acryl-datahub 1.0.0.2rc4__py3-none-any.whl → 1.0.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.0.0.2rc4.dist-info → acryl_datahub-1.0.0.3.dist-info}/METADATA +2566 -2514
- {acryl_datahub-1.0.0.2rc4.dist-info → acryl_datahub-1.0.0.3.dist-info}/RECORD +159 -149
- {acryl_datahub-1.0.0.2rc4.dist-info → acryl_datahub-1.0.0.3.dist-info}/WHEEL +1 -1
- datahub/_version.py +1 -1
- datahub/api/circuit_breaker/operation_circuit_breaker.py +2 -2
- datahub/api/entities/datacontract/datacontract.py +35 -3
- datahub/api/entities/datajob/dataflow.py +3 -3
- datahub/api/entities/datajob/datajob.py +7 -4
- datahub/api/entities/dataset/dataset.py +9 -11
- datahub/api/entities/forms/forms.py +34 -34
- datahub/api/graphql/assertion.py +1 -1
- datahub/api/graphql/operation.py +4 -4
- datahub/cli/check_cli.py +3 -2
- datahub/cli/config_utils.py +2 -2
- datahub/cli/delete_cli.py +6 -5
- datahub/cli/docker_cli.py +2 -2
- datahub/cli/exists_cli.py +2 -1
- datahub/cli/get_cli.py +2 -1
- datahub/cli/iceberg_cli.py +6 -5
- datahub/cli/ingest_cli.py +9 -6
- datahub/cli/migrate.py +4 -3
- datahub/cli/migration_utils.py +4 -3
- datahub/cli/put_cli.py +3 -2
- datahub/cli/specific/assertions_cli.py +2 -1
- datahub/cli/specific/datacontract_cli.py +3 -2
- datahub/cli/specific/dataproduct_cli.py +10 -9
- datahub/cli/specific/dataset_cli.py +4 -3
- datahub/cli/specific/forms_cli.py +2 -1
- datahub/cli/specific/group_cli.py +2 -1
- datahub/cli/specific/structuredproperties_cli.py +4 -3
- datahub/cli/specific/user_cli.py +2 -1
- datahub/cli/state_cli.py +2 -1
- datahub/cli/timeline_cli.py +2 -1
- datahub/configuration/common.py +5 -0
- datahub/configuration/source_common.py +1 -1
- datahub/emitter/mcp.py +20 -5
- datahub/emitter/request_helper.py +116 -3
- datahub/emitter/rest_emitter.py +163 -93
- datahub/entrypoints.py +2 -1
- datahub/errors.py +4 -0
- datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +2 -1
- datahub/ingestion/api/source.py +2 -5
- datahub/ingestion/api/source_helpers.py +1 -0
- datahub/ingestion/glossary/classification_mixin.py +4 -2
- datahub/ingestion/graph/client.py +33 -8
- datahub/ingestion/graph/config.py +14 -0
- datahub/ingestion/graph/filters.py +1 -1
- datahub/ingestion/graph/links.py +53 -0
- datahub/ingestion/run/pipeline.py +9 -6
- datahub/ingestion/run/pipeline_config.py +1 -1
- datahub/ingestion/sink/datahub_rest.py +5 -6
- datahub/ingestion/source/apply/datahub_apply.py +2 -1
- datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py +1 -1
- datahub/ingestion/source/bigquery_v2/bigquery.py +24 -23
- datahub/ingestion/source/bigquery_v2/bigquery_config.py +4 -62
- datahub/ingestion/source/bigquery_v2/bigquery_connection.py +70 -0
- datahub/ingestion/source/bigquery_v2/bigquery_queries.py +3 -1
- datahub/ingestion/source/cassandra/cassandra_profiling.py +25 -24
- datahub/ingestion/source/common/subtypes.py +3 -0
- datahub/ingestion/source/datahub/datahub_database_reader.py +12 -11
- datahub/ingestion/source/dbt/dbt_cloud.py +2 -6
- datahub/ingestion/source/dbt/dbt_common.py +10 -2
- datahub/ingestion/source/dbt/dbt_core.py +82 -42
- datahub/ingestion/source/dynamodb/dynamodb.py +7 -4
- datahub/ingestion/source/feast.py +4 -4
- datahub/ingestion/source/fivetran/config.py +1 -1
- datahub/ingestion/source/fivetran/fivetran_log_api.py +7 -3
- datahub/ingestion/source/fivetran/fivetran_query.py +16 -16
- datahub/ingestion/source/ge_data_profiler.py +27 -1
- datahub/ingestion/source/hex/api.py +1 -20
- datahub/ingestion/source/hex/query_fetcher.py +4 -1
- datahub/ingestion/source/iceberg/iceberg.py +20 -4
- datahub/ingestion/source/iceberg/iceberg_common.py +2 -2
- datahub/ingestion/source/ldap.py +1 -1
- datahub/ingestion/source/looker/looker_common.py +17 -2
- datahub/ingestion/source/looker/looker_lib_wrapper.py +1 -1
- datahub/ingestion/source/looker/looker_source.py +34 -5
- datahub/ingestion/source/looker/lookml_source.py +7 -1
- datahub/ingestion/source/metadata/lineage.py +2 -1
- datahub/ingestion/source/mlflow.py +19 -6
- datahub/ingestion/source/mode.py +74 -28
- datahub/ingestion/source/neo4j/neo4j_source.py +85 -55
- datahub/ingestion/source/powerbi/config.py +13 -1
- datahub/ingestion/source/powerbi/m_query/data_classes.py +1 -0
- datahub/ingestion/source/powerbi/m_query/odbc.py +185 -0
- datahub/ingestion/source/powerbi/m_query/pattern_handler.py +153 -0
- datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +2 -2
- datahub/ingestion/source/redshift/usage.py +10 -9
- datahub/ingestion/source/sigma/config.py +74 -6
- datahub/ingestion/source/sigma/sigma.py +16 -1
- datahub/ingestion/source/sigma/sigma_api.py +99 -58
- datahub/ingestion/source/slack/slack.py +4 -52
- datahub/ingestion/source/snowflake/snowflake_config.py +2 -12
- datahub/ingestion/source/snowflake/snowflake_connection.py +24 -18
- datahub/ingestion/source/snowflake/snowflake_profiler.py +1 -6
- datahub/ingestion/source/snowflake/snowflake_queries.py +18 -4
- datahub/ingestion/source/snowflake/snowflake_query.py +9 -63
- datahub/ingestion/source/snowflake/snowflake_tag.py +4 -1
- datahub/ingestion/source/sql/athena.py +2 -1
- datahub/ingestion/source/sql/clickhouse.py +5 -1
- datahub/ingestion/source/sql/druid.py +7 -2
- datahub/ingestion/source/sql/hive.py +7 -2
- datahub/ingestion/source/sql/hive_metastore.py +5 -5
- datahub/ingestion/source/sql/mssql/source.py +1 -1
- datahub/ingestion/source/sql/oracle.py +6 -2
- datahub/ingestion/source/sql/sql_config.py +1 -34
- datahub/ingestion/source/sql/sqlalchemy_uri.py +36 -0
- datahub/ingestion/source/sql/stored_procedures/base.py +12 -1
- datahub/ingestion/source/sql/two_tier_sql_source.py +1 -1
- datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +2 -1
- datahub/ingestion/source/tableau/tableau.py +31 -6
- datahub/ingestion/source/tableau/tableau_validation.py +1 -1
- datahub/ingestion/source/unity/config.py +2 -1
- datahub/ingestion/source/usage/clickhouse_usage.py +7 -3
- datahub/ingestion/source/usage/starburst_trino_usage.py +5 -3
- datahub/ingestion/source/vertexai/vertexai.py +316 -4
- datahub/ingestion/source/vertexai/vertexai_result_type_utils.py +23 -2
- datahub/integrations/assertion/common.py +3 -2
- datahub/metadata/{_schema_classes.py → _internal_schema_classes.py} +538 -493
- datahub/metadata/_urns/urn_defs.py +1819 -1763
- datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +2 -0
- datahub/metadata/schema.avsc +17296 -16883
- datahub/metadata/schema_classes.py +3 -3
- datahub/metadata/schemas/DataContractKey.avsc +2 -1
- datahub/metadata/schemas/DataHubOpenAPISchemaKey.avsc +22 -0
- datahub/metadata/schemas/DataTransformLogic.avsc +4 -2
- datahub/metadata/schemas/FormInfo.avsc +5 -0
- datahub/metadata/schemas/MLModelDeploymentProperties.avsc +3 -0
- datahub/metadata/schemas/MetadataChangeEvent.avsc +6 -0
- datahub/metadata/schemas/MetadataChangeLog.avsc +3 -0
- datahub/metadata/schemas/MetadataChangeProposal.avsc +3 -0
- datahub/metadata/schemas/QueryProperties.avsc +4 -2
- datahub/metadata/schemas/SystemMetadata.avsc +86 -0
- datahub/metadata/schemas/__init__.py +3 -3
- datahub/sdk/_all_entities.py +4 -0
- datahub/sdk/_shared.py +142 -4
- datahub/sdk/_utils.py +4 -0
- datahub/sdk/dataset.py +2 -2
- datahub/sdk/entity_client.py +8 -0
- datahub/sdk/lineage_client.py +235 -0
- datahub/sdk/main_client.py +6 -3
- datahub/sdk/mlmodel.py +301 -0
- datahub/sdk/mlmodelgroup.py +233 -0
- datahub/secret/datahub_secret_store.py +2 -1
- datahub/specific/dataset.py +12 -0
- datahub/sql_parsing/fingerprint_utils.py +6 -0
- datahub/sql_parsing/sql_parsing_aggregator.py +48 -34
- datahub/sql_parsing/sqlglot_utils.py +18 -14
- datahub/telemetry/telemetry.py +2 -2
- datahub/testing/check_imports.py +1 -1
- datahub/testing/mcp_diff.py +15 -2
- datahub/upgrade/upgrade.py +10 -12
- datahub/utilities/logging_manager.py +8 -1
- datahub/utilities/server_config_util.py +350 -10
- datahub/utilities/sqlalchemy_query_combiner.py +4 -5
- datahub/utilities/urn_encoder.py +1 -1
- {acryl_datahub-1.0.0.2rc4.dist-info → acryl_datahub-1.0.0.3.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-1.0.0.2rc4.dist-info → acryl_datahub-1.0.0.3.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.0.0.2rc4.dist-info → acryl_datahub-1.0.0.3.dist-info}/top_level.txt +0 -0
datahub/cli/put_cli.py
CHANGED
|
@@ -8,6 +8,7 @@ from datahub.cli.cli_utils import post_entity
|
|
|
8
8
|
from datahub.configuration.config_loader import load_config_file
|
|
9
9
|
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
10
10
|
from datahub.ingestion.graph.client import get_default_graph
|
|
11
|
+
from datahub.ingestion.graph.config import ClientMode
|
|
11
12
|
from datahub.metadata.schema_classes import (
|
|
12
13
|
DataPlatformInfoClass as DataPlatformInfo,
|
|
13
14
|
PlatformTypeClass,
|
|
@@ -53,7 +54,7 @@ def aspect(urn: str, aspect: str, aspect_data: str, run_id: Optional[str]) -> No
|
|
|
53
54
|
aspect_data, allow_stdin=True, resolve_env_vars=False, process_directives=False
|
|
54
55
|
)
|
|
55
56
|
|
|
56
|
-
client = get_default_graph()
|
|
57
|
+
client = get_default_graph(ClientMode.CLI)
|
|
57
58
|
|
|
58
59
|
system_metadata: Union[None, SystemMetadataClass] = None
|
|
59
60
|
if run_id:
|
|
@@ -118,7 +119,7 @@ def platform(
|
|
|
118
119
|
displayName=display_name or platform_name,
|
|
119
120
|
logoUrl=logo,
|
|
120
121
|
)
|
|
121
|
-
datahub_graph = get_default_graph()
|
|
122
|
+
datahub_graph = get_default_graph(ClientMode.CLI)
|
|
122
123
|
mcp = MetadataChangeProposalWrapper(
|
|
123
124
|
entityUrn=str(platform_urn),
|
|
124
125
|
aspect=data_platform_info,
|
|
@@ -15,6 +15,7 @@ from datahub.api.entities.assertion.compiler_interface import (
|
|
|
15
15
|
from datahub.emitter.mce_builder import make_assertion_urn
|
|
16
16
|
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
17
17
|
from datahub.ingestion.graph.client import get_default_graph
|
|
18
|
+
from datahub.ingestion.graph.config import ClientMode
|
|
18
19
|
from datahub.integrations.assertion.registry import ASSERTION_PLATFORMS
|
|
19
20
|
from datahub.telemetry import telemetry
|
|
20
21
|
from datahub.upgrade import upgrade
|
|
@@ -39,7 +40,7 @@ def upsert(file: str) -> None:
|
|
|
39
40
|
|
|
40
41
|
assertions_spec: AssertionsConfigSpec = AssertionsConfigSpec.from_yaml(file)
|
|
41
42
|
|
|
42
|
-
with get_default_graph() as graph:
|
|
43
|
+
with get_default_graph(ClientMode.CLI) as graph:
|
|
43
44
|
for assertion_spec in assertions_spec.assertions:
|
|
44
45
|
try:
|
|
45
46
|
mcp = MetadataChangeProposalWrapper(
|
|
@@ -6,6 +6,7 @@ from click_default_group import DefaultGroup
|
|
|
6
6
|
|
|
7
7
|
from datahub.api.entities.datacontract.datacontract import DataContract
|
|
8
8
|
from datahub.ingestion.graph.client import get_default_graph
|
|
9
|
+
from datahub.ingestion.graph.config import ClientMode
|
|
9
10
|
from datahub.telemetry import telemetry
|
|
10
11
|
from datahub.upgrade import upgrade
|
|
11
12
|
|
|
@@ -28,7 +29,7 @@ def upsert(file: str) -> None:
|
|
|
28
29
|
data_contract: DataContract = DataContract.from_yaml(file)
|
|
29
30
|
urn = data_contract.urn
|
|
30
31
|
|
|
31
|
-
with get_default_graph() as graph:
|
|
32
|
+
with get_default_graph(ClientMode.CLI) as graph:
|
|
32
33
|
if not graph.exists(data_contract.entity):
|
|
33
34
|
raise ValueError(
|
|
34
35
|
f"Cannot define a data contract for non-existent entity {data_contract.entity}"
|
|
@@ -72,7 +73,7 @@ def delete(urn: Optional[str], file: Optional[str], hard: bool) -> None:
|
|
|
72
73
|
data_contract = DataContract.from_yaml(file)
|
|
73
74
|
urn = data_contract.urn
|
|
74
75
|
|
|
75
|
-
with get_default_graph() as graph:
|
|
76
|
+
with get_default_graph(ClientMode.CLI) as graph:
|
|
76
77
|
if not graph.exists(urn):
|
|
77
78
|
raise ValueError(f"Data Contract {urn} does not exist")
|
|
78
79
|
|
|
@@ -20,6 +20,7 @@ from datahub.emitter.mce_builder import (
|
|
|
20
20
|
validate_ownership_type,
|
|
21
21
|
)
|
|
22
22
|
from datahub.ingestion.graph.client import DataHubGraph, get_default_graph
|
|
23
|
+
from datahub.ingestion.graph.config import ClientMode
|
|
23
24
|
from datahub.metadata.schema_classes import OwnerClass, OwnershipTypeClass
|
|
24
25
|
from datahub.specific.dataproduct import DataProductPatchBuilder
|
|
25
26
|
from datahub.telemetry import telemetry
|
|
@@ -81,7 +82,7 @@ def mutate(file: Path, validate_assets: bool, external_url: str, upsert: bool) -
|
|
|
81
82
|
|
|
82
83
|
config_dict = load_file(pathlib.Path(file))
|
|
83
84
|
id = config_dict.get("id") if isinstance(config_dict, dict) else None
|
|
84
|
-
with get_default_graph() as graph:
|
|
85
|
+
with get_default_graph(ClientMode.CLI) as graph:
|
|
85
86
|
data_product: DataProduct = DataProduct.from_yaml(file, graph)
|
|
86
87
|
external_url_override = (
|
|
87
88
|
external_url
|
|
@@ -162,7 +163,7 @@ def upsert(file: Path, validate_assets: bool, external_url: str) -> None:
|
|
|
162
163
|
def diff(file: Path, update: bool) -> None:
|
|
163
164
|
"""Diff a Data Product file with its twin in DataHub"""
|
|
164
165
|
|
|
165
|
-
with get_default_graph() as emitter:
|
|
166
|
+
with get_default_graph(ClientMode.CLI) as emitter:
|
|
166
167
|
id: Optional[str] = None
|
|
167
168
|
try:
|
|
168
169
|
data_product_local: DataProduct = DataProduct.from_yaml(file, emitter)
|
|
@@ -216,7 +217,7 @@ def delete(urn: str, file: Path, hard: bool) -> None:
|
|
|
216
217
|
raise click.Abort()
|
|
217
218
|
|
|
218
219
|
graph: DataHubGraph
|
|
219
|
-
with get_default_graph() as graph:
|
|
220
|
+
with get_default_graph(ClientMode.CLI) as graph:
|
|
220
221
|
data_product_urn = (
|
|
221
222
|
urn if urn.startswith("urn:li:dataProduct") else f"urn:li:dataProduct:{urn}"
|
|
222
223
|
)
|
|
@@ -248,7 +249,7 @@ def get(urn: str, to_file: str) -> None:
|
|
|
248
249
|
if not urn.startswith("urn:li:dataProduct:"):
|
|
249
250
|
urn = f"urn:li:dataProduct:{urn}"
|
|
250
251
|
|
|
251
|
-
with get_default_graph() as graph:
|
|
252
|
+
with get_default_graph(ClientMode.CLI) as graph:
|
|
252
253
|
if graph.exists(urn):
|
|
253
254
|
dataproduct: DataProduct = DataProduct.from_datahub(graph=graph, id=urn)
|
|
254
255
|
click.secho(
|
|
@@ -306,7 +307,7 @@ def set_description(urn: str, description: str, md_file: Path) -> None:
|
|
|
306
307
|
|
|
307
308
|
dataproduct_patcher: DataProductPatchBuilder = DataProduct.get_patch_builder(urn)
|
|
308
309
|
dataproduct_patcher.set_description(description)
|
|
309
|
-
with get_default_graph() as graph:
|
|
310
|
+
with get_default_graph(ClientMode.CLI) as graph:
|
|
310
311
|
_abort_if_non_existent_urn(graph, urn, "set description")
|
|
311
312
|
for mcp in dataproduct_patcher.build():
|
|
312
313
|
graph.emit(mcp)
|
|
@@ -342,7 +343,7 @@ def add_owner(urn: str, owner: str, owner_type: str) -> None:
|
|
|
342
343
|
owner=_get_owner_urn(owner), type=owner_type, typeUrn=owner_type_urn
|
|
343
344
|
)
|
|
344
345
|
)
|
|
345
|
-
with get_default_graph() as graph:
|
|
346
|
+
with get_default_graph(ClientMode.CLI) as graph:
|
|
346
347
|
_abort_if_non_existent_urn(graph, urn, "add owners")
|
|
347
348
|
for mcp in dataproduct_patcher.build():
|
|
348
349
|
graph.emit(mcp)
|
|
@@ -360,7 +361,7 @@ def remove_owner(urn: str, owner_urn: str) -> None:
|
|
|
360
361
|
urn = f"urn:li:dataProduct:{urn}"
|
|
361
362
|
dataproduct_patcher: DataProductPatchBuilder = DataProduct.get_patch_builder(urn)
|
|
362
363
|
dataproduct_patcher.remove_owner(owner=_get_owner_urn(owner_urn))
|
|
363
|
-
with get_default_graph() as graph:
|
|
364
|
+
with get_default_graph(ClientMode.CLI) as graph:
|
|
364
365
|
_abort_if_non_existent_urn(graph, urn, "remove owners")
|
|
365
366
|
for mcp in dataproduct_patcher.build():
|
|
366
367
|
click.echo(json.dumps(mcp.to_obj()))
|
|
@@ -382,7 +383,7 @@ def add_asset(urn: str, asset: str, validate_assets: bool) -> None:
|
|
|
382
383
|
urn = f"urn:li:dataProduct:{urn}"
|
|
383
384
|
dataproduct_patcher: DataProductPatchBuilder = DataProduct.get_patch_builder(urn)
|
|
384
385
|
dataproduct_patcher.add_asset(asset)
|
|
385
|
-
with get_default_graph() as graph:
|
|
386
|
+
with get_default_graph(ClientMode.CLI) as graph:
|
|
386
387
|
_abort_if_non_existent_urn(graph, urn, "add assets")
|
|
387
388
|
if validate_assets:
|
|
388
389
|
_abort_if_non_existent_urn(
|
|
@@ -409,7 +410,7 @@ def remove_asset(urn: str, asset: str, validate_assets: bool) -> None:
|
|
|
409
410
|
urn = f"urn:li:dataProduct:{urn}"
|
|
410
411
|
dataproduct_patcher: DataProductPatchBuilder = DataProduct.get_patch_builder(urn)
|
|
411
412
|
dataproduct_patcher.remove_asset(asset)
|
|
412
|
-
with get_default_graph() as graph:
|
|
413
|
+
with get_default_graph(ClientMode.CLI) as graph:
|
|
413
414
|
_abort_if_non_existent_urn(graph, urn, "remove assets")
|
|
414
415
|
if validate_assets:
|
|
415
416
|
_abort_if_non_existent_urn(
|
|
@@ -12,6 +12,7 @@ from click_default_group import DefaultGroup
|
|
|
12
12
|
from datahub.api.entities.dataset.dataset import Dataset, DatasetRetrievalConfig
|
|
13
13
|
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
14
14
|
from datahub.ingestion.graph.client import DataHubGraph, get_default_graph
|
|
15
|
+
from datahub.ingestion.graph.config import ClientMode
|
|
15
16
|
from datahub.metadata.com.linkedin.pegasus2avro.common import Siblings
|
|
16
17
|
from datahub.telemetry import telemetry
|
|
17
18
|
from datahub.upgrade import upgrade
|
|
@@ -54,7 +55,7 @@ def get(urn: str, to_file: str) -> None:
|
|
|
54
55
|
if not urn.startswith("urn:li:dataset:"):
|
|
55
56
|
urn = f"urn:li:dataset:{urn}"
|
|
56
57
|
|
|
57
|
-
with get_default_graph() as graph:
|
|
58
|
+
with get_default_graph(ClientMode.CLI) as graph:
|
|
58
59
|
if graph.exists(urn):
|
|
59
60
|
dataset: Dataset = Dataset.from_datahub(graph=graph, urn=urn)
|
|
60
61
|
click.secho(
|
|
@@ -82,7 +83,7 @@ def add_sibling(urn: str, sibling_urns: Tuple[str]) -> None:
|
|
|
82
83
|
all_urns.add(urn)
|
|
83
84
|
for sibling_urn in sibling_urns:
|
|
84
85
|
all_urns.add(sibling_urn)
|
|
85
|
-
with get_default_graph() as graph:
|
|
86
|
+
with get_default_graph(ClientMode.CLI) as graph:
|
|
86
87
|
for _urn in all_urns:
|
|
87
88
|
_emit_sibling(graph, urn, _urn, all_urns)
|
|
88
89
|
|
|
@@ -181,7 +182,7 @@ def sync(file: str, to_datahub: bool, dry_run: bool) -> None:
|
|
|
181
182
|
dry_run_prefix = "[dry-run]: " if dry_run else "" # prefix to use in messages
|
|
182
183
|
|
|
183
184
|
failures: List[str] = []
|
|
184
|
-
with get_default_graph() as graph:
|
|
185
|
+
with get_default_graph(ClientMode.CLI) as graph:
|
|
185
186
|
datasets = Dataset.from_yaml(file)
|
|
186
187
|
for dataset in datasets:
|
|
187
188
|
assert (
|
|
@@ -7,6 +7,7 @@ from click_default_group import DefaultGroup
|
|
|
7
7
|
|
|
8
8
|
from datahub.api.entities.forms.forms import Forms
|
|
9
9
|
from datahub.ingestion.graph.client import get_default_graph
|
|
10
|
+
from datahub.ingestion.graph.config import ClientMode
|
|
10
11
|
from datahub.telemetry import telemetry
|
|
11
12
|
from datahub.upgrade import upgrade
|
|
12
13
|
|
|
@@ -40,7 +41,7 @@ def upsert(file: Path) -> None:
|
|
|
40
41
|
@telemetry.with_telemetry()
|
|
41
42
|
def get(urn: str, to_file: str) -> None:
|
|
42
43
|
"""Get form from DataHub"""
|
|
43
|
-
with get_default_graph() as graph:
|
|
44
|
+
with get_default_graph(ClientMode.CLI) as graph:
|
|
44
45
|
if graph.exists(urn):
|
|
45
46
|
form: Forms = Forms.from_datahub(graph=graph, urn=urn)
|
|
46
47
|
click.secho(
|
|
@@ -10,6 +10,7 @@ from datahub.api.entities.corpgroup.corpgroup import (
|
|
|
10
10
|
)
|
|
11
11
|
from datahub.cli.specific.file_loader import load_file
|
|
12
12
|
from datahub.ingestion.graph.client import get_default_graph
|
|
13
|
+
from datahub.ingestion.graph.config import ClientMode
|
|
13
14
|
from datahub.telemetry import telemetry
|
|
14
15
|
from datahub.upgrade import upgrade
|
|
15
16
|
|
|
@@ -40,7 +41,7 @@ def upsert(file: Path, override_editable: bool) -> None:
|
|
|
40
41
|
|
|
41
42
|
config_dict = load_file(file)
|
|
42
43
|
group_configs = config_dict if isinstance(config_dict, list) else [config_dict]
|
|
43
|
-
with get_default_graph() as emitter:
|
|
44
|
+
with get_default_graph(ClientMode.CLI) as emitter:
|
|
44
45
|
for group_config in group_configs:
|
|
45
46
|
try:
|
|
46
47
|
datahub_group = CorpGroup.parse_obj(group_config)
|
|
@@ -11,6 +11,7 @@ from datahub.api.entities.structuredproperties.structuredproperties import (
|
|
|
11
11
|
StructuredProperties,
|
|
12
12
|
)
|
|
13
13
|
from datahub.ingestion.graph.client import get_default_graph
|
|
14
|
+
from datahub.ingestion.graph.config import ClientMode
|
|
14
15
|
from datahub.telemetry import telemetry
|
|
15
16
|
from datahub.upgrade import upgrade
|
|
16
17
|
from datahub.utilities.urns.urn import Urn
|
|
@@ -33,7 +34,7 @@ def properties() -> None:
|
|
|
33
34
|
def upsert(file: Path) -> None:
|
|
34
35
|
"""Upsert structured properties in DataHub."""
|
|
35
36
|
|
|
36
|
-
with get_default_graph() as graph:
|
|
37
|
+
with get_default_graph(ClientMode.CLI) as graph:
|
|
37
38
|
StructuredProperties.create(str(file), graph)
|
|
38
39
|
|
|
39
40
|
|
|
@@ -48,7 +49,7 @@ def get(urn: str, to_file: str) -> None:
|
|
|
48
49
|
"""Get structured properties from DataHub"""
|
|
49
50
|
urn = Urn.make_structured_property_urn(urn)
|
|
50
51
|
|
|
51
|
-
with get_default_graph() as graph:
|
|
52
|
+
with get_default_graph(ClientMode.CLI) as graph:
|
|
52
53
|
if graph.exists(urn):
|
|
53
54
|
structuredproperties: StructuredProperties = (
|
|
54
55
|
StructuredProperties.from_datahub(graph=graph, urn=urn)
|
|
@@ -117,7 +118,7 @@ def list(details: bool, to_file: str) -> None:
|
|
|
117
118
|
with open(file, "w") as fp:
|
|
118
119
|
yaml.dump(serialized_objects, fp)
|
|
119
120
|
|
|
120
|
-
with get_default_graph() as graph:
|
|
121
|
+
with get_default_graph(ClientMode.CLI) as graph:
|
|
121
122
|
if details:
|
|
122
123
|
logger.info(
|
|
123
124
|
"Listing structured properties with details. Use --no-details for urns only"
|
datahub/cli/specific/user_cli.py
CHANGED
|
@@ -8,6 +8,7 @@ from click_default_group import DefaultGroup
|
|
|
8
8
|
from datahub.api.entities.corpuser.corpuser import CorpUser, CorpUserGenerationConfig
|
|
9
9
|
from datahub.cli.specific.file_loader import load_file
|
|
10
10
|
from datahub.ingestion.graph.client import get_default_graph
|
|
11
|
+
from datahub.ingestion.graph.config import ClientMode
|
|
11
12
|
from datahub.telemetry import telemetry
|
|
12
13
|
from datahub.upgrade import upgrade
|
|
13
14
|
|
|
@@ -38,7 +39,7 @@ def upsert(file: Path, override_editable: bool) -> None:
|
|
|
38
39
|
|
|
39
40
|
config_dict = load_file(pathlib.Path(file))
|
|
40
41
|
user_configs = config_dict if isinstance(config_dict, list) else [config_dict]
|
|
41
|
-
with get_default_graph() as emitter:
|
|
42
|
+
with get_default_graph(ClientMode.CLI) as emitter:
|
|
42
43
|
for user_config in user_configs:
|
|
43
44
|
try:
|
|
44
45
|
datahub_user: CorpUser = CorpUser.parse_obj(user_config)
|
datahub/cli/state_cli.py
CHANGED
|
@@ -5,6 +5,7 @@ import click
|
|
|
5
5
|
from click_default_group import DefaultGroup
|
|
6
6
|
|
|
7
7
|
from datahub.ingestion.graph.client import get_default_graph
|
|
8
|
+
from datahub.ingestion.graph.config import ClientMode
|
|
8
9
|
from datahub.telemetry import telemetry
|
|
9
10
|
from datahub.upgrade import upgrade
|
|
10
11
|
|
|
@@ -28,7 +29,7 @@ def inspect(pipeline_name: str, platform: str) -> None:
|
|
|
28
29
|
Only works for state entity removal for now.
|
|
29
30
|
"""
|
|
30
31
|
|
|
31
|
-
datahub_graph = get_default_graph()
|
|
32
|
+
datahub_graph = get_default_graph(ClientMode.CLI)
|
|
32
33
|
checkpoint = datahub_graph.get_latest_pipeline_checkpoint(pipeline_name, platform)
|
|
33
34
|
if not checkpoint:
|
|
34
35
|
click.secho("No ingestion state found.", fg="red")
|
datahub/cli/timeline_cli.py
CHANGED
|
@@ -9,6 +9,7 @@ from requests import Response
|
|
|
9
9
|
|
|
10
10
|
from datahub.emitter.mce_builder import dataset_urn_to_key, schema_field_urn_to_key
|
|
11
11
|
from datahub.ingestion.graph.client import DataHubGraph, get_default_graph
|
|
12
|
+
from datahub.ingestion.graph.config import ClientMode
|
|
12
13
|
from datahub.telemetry import telemetry
|
|
13
14
|
from datahub.upgrade import upgrade
|
|
14
15
|
from datahub.utilities.urns.urn import Urn
|
|
@@ -63,7 +64,7 @@ def get_timeline(
|
|
|
63
64
|
diff: bool,
|
|
64
65
|
graph: Optional[DataHubGraph] = None,
|
|
65
66
|
) -> Any:
|
|
66
|
-
client = graph if graph else get_default_graph()
|
|
67
|
+
client = graph if graph else get_default_graph(ClientMode.CLI)
|
|
67
68
|
session = client._session
|
|
68
69
|
host = client.config.server
|
|
69
70
|
if urn.startswith("urn%3A"):
|
datahub/configuration/common.py
CHANGED
|
@@ -16,7 +16,7 @@ class PlatformInstanceConfigMixin(ConfigModel):
|
|
|
16
16
|
default=None,
|
|
17
17
|
description="The instance of the platform that all assets produced by this recipe belong to. "
|
|
18
18
|
"This should be unique within the platform. "
|
|
19
|
-
"See https://
|
|
19
|
+
"See https://docs.datahub.com/docs/platform-instances/ for more details.",
|
|
20
20
|
)
|
|
21
21
|
|
|
22
22
|
|
datahub/emitter/mcp.py
CHANGED
|
@@ -1,9 +1,11 @@
|
|
|
1
1
|
import dataclasses
|
|
2
2
|
import json
|
|
3
|
-
|
|
3
|
+
import warnings
|
|
4
|
+
from typing import TYPE_CHECKING, Dict, List, Optional, Sequence, Tuple, Union
|
|
4
5
|
|
|
5
6
|
from datahub.emitter.aspect import ASPECT_MAP, JSON_CONTENT_TYPE
|
|
6
7
|
from datahub.emitter.serialization_helper import post_json_transform, pre_json_transform
|
|
8
|
+
from datahub.errors import DataHubDeprecationWarning
|
|
7
9
|
from datahub.metadata.schema_classes import (
|
|
8
10
|
ChangeTypeClass,
|
|
9
11
|
DictWrapper,
|
|
@@ -69,18 +71,28 @@ class MetadataChangeProposalWrapper:
|
|
|
69
71
|
aspectName: Union[None, str] = None
|
|
70
72
|
aspect: Union[None, _Aspect] = None
|
|
71
73
|
systemMetadata: Union[None, SystemMetadataClass] = None
|
|
74
|
+
headers: Union[None, Dict[str, str]] = None
|
|
72
75
|
|
|
73
76
|
def __post_init__(self) -> None:
|
|
74
77
|
if self.entityUrn and self.entityType == _ENTITY_TYPE_UNSET:
|
|
75
78
|
self.entityType = guess_entity_type(self.entityUrn)
|
|
76
79
|
elif self.entityUrn and self.entityType:
|
|
77
|
-
guessed_entity_type = guess_entity_type(self.entityUrn)
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
if self.entityType.lower() != guessed_entity_type:
|
|
80
|
+
guessed_entity_type = guess_entity_type(self.entityUrn)
|
|
81
|
+
if self.entityType.lower() != guessed_entity_type.lower():
|
|
82
|
+
# If they aren't a case-ignored match, raise an error.
|
|
81
83
|
raise ValueError(
|
|
82
84
|
f"entityType {self.entityType} does not match the entity type {guessed_entity_type} from entityUrn {self.entityUrn}",
|
|
83
85
|
)
|
|
86
|
+
elif self.entityType != guessed_entity_type:
|
|
87
|
+
# If they only differ in case, normalize and print a warning.
|
|
88
|
+
self.entityType = guessed_entity_type
|
|
89
|
+
warnings.warn(
|
|
90
|
+
f"The passed entityType {self.entityType} differs in case from the expected entity type {guessed_entity_type}. "
|
|
91
|
+
"This will be automatically corrected for now, but will become an error in a future release. "
|
|
92
|
+
"Note that the entityType field is optional and will be automatically inferred from the entityUrn.",
|
|
93
|
+
DataHubDeprecationWarning,
|
|
94
|
+
stacklevel=3,
|
|
95
|
+
)
|
|
84
96
|
elif self.entityType == _ENTITY_TYPE_UNSET:
|
|
85
97
|
raise ValueError("entityType must be set if entityUrn is not set")
|
|
86
98
|
|
|
@@ -112,6 +124,7 @@ class MetadataChangeProposalWrapper:
|
|
|
112
124
|
auditHeader=self.auditHeader,
|
|
113
125
|
aspectName=self.aspectName,
|
|
114
126
|
systemMetadata=self.systemMetadata,
|
|
127
|
+
headers=self.headers,
|
|
115
128
|
)
|
|
116
129
|
|
|
117
130
|
def make_mcp(self) -> MetadataChangeProposalClass:
|
|
@@ -211,6 +224,7 @@ class MetadataChangeProposalWrapper:
|
|
|
211
224
|
aspectName=mcpc.aspectName,
|
|
212
225
|
aspect=aspect,
|
|
213
226
|
systemMetadata=mcpc.systemMetadata,
|
|
227
|
+
headers=mcpc.headers,
|
|
214
228
|
)
|
|
215
229
|
else:
|
|
216
230
|
return None
|
|
@@ -228,6 +242,7 @@ class MetadataChangeProposalWrapper:
|
|
|
228
242
|
changeType=mcl.changeType,
|
|
229
243
|
auditHeader=mcl.auditHeader,
|
|
230
244
|
systemMetadata=mcl.systemMetadata,
|
|
245
|
+
headers=mcl.headers,
|
|
231
246
|
)
|
|
232
247
|
return cls.try_from_mcpc(mcpc) or mcpc
|
|
233
248
|
|
|
@@ -1,14 +1,31 @@
|
|
|
1
|
+
import json
|
|
1
2
|
import shlex
|
|
2
|
-
from
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from typing import Any, Dict, List, Optional, Union
|
|
3
5
|
|
|
4
6
|
import requests
|
|
5
7
|
from requests.auth import HTTPBasicAuth
|
|
6
8
|
|
|
9
|
+
from datahub.emitter.aspect import JSON_CONTENT_TYPE, JSON_PATCH_CONTENT_TYPE
|
|
10
|
+
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
11
|
+
from datahub.emitter.serialization_helper import pre_json_transform
|
|
12
|
+
from datahub.metadata.com.linkedin.pegasus2avro.mxe import (
|
|
13
|
+
MetadataChangeProposal,
|
|
14
|
+
)
|
|
15
|
+
from datahub.metadata.schema_classes import ChangeTypeClass
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def _decode_bytes(value: Union[str, bytes]) -> str:
|
|
19
|
+
"""Decode bytes to string, if necessary."""
|
|
20
|
+
if isinstance(value, bytes):
|
|
21
|
+
return value.decode()
|
|
22
|
+
return value
|
|
23
|
+
|
|
7
24
|
|
|
8
25
|
def _format_header(name: str, value: Union[str, bytes]) -> str:
|
|
9
26
|
if name == "Authorization":
|
|
10
27
|
return f"{name!s}: <redacted>"
|
|
11
|
-
return f"{name!s}: {value
|
|
28
|
+
return f"{name!s}: {_decode_bytes(value)}"
|
|
12
29
|
|
|
13
30
|
|
|
14
31
|
def make_curl_command(
|
|
@@ -21,7 +38,9 @@ def make_curl_command(
|
|
|
21
38
|
|
|
22
39
|
if session.auth:
|
|
23
40
|
if isinstance(session.auth, HTTPBasicAuth):
|
|
24
|
-
fragments.extend(
|
|
41
|
+
fragments.extend(
|
|
42
|
+
["-u", f"{_decode_bytes(session.auth.username)}:<redacted>"]
|
|
43
|
+
)
|
|
25
44
|
else:
|
|
26
45
|
# For other auth types, they should be handled via headers
|
|
27
46
|
fragments.extend(["-H", "<unknown auth type>"])
|
|
@@ -31,3 +50,97 @@ def make_curl_command(
|
|
|
31
50
|
|
|
32
51
|
fragments.append(url)
|
|
33
52
|
return shlex.join(fragments)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
@dataclass
|
|
56
|
+
class OpenApiRequest:
|
|
57
|
+
"""Represents an OpenAPI request for entity operations."""
|
|
58
|
+
|
|
59
|
+
method: str
|
|
60
|
+
url: str
|
|
61
|
+
payload: List[Dict[str, Any]]
|
|
62
|
+
|
|
63
|
+
@classmethod
|
|
64
|
+
def from_mcp(
|
|
65
|
+
cls,
|
|
66
|
+
mcp: Union[MetadataChangeProposal, MetadataChangeProposalWrapper],
|
|
67
|
+
gms_server: str,
|
|
68
|
+
async_flag: Optional[bool] = None,
|
|
69
|
+
async_default: bool = False,
|
|
70
|
+
) -> Optional["OpenApiRequest"]:
|
|
71
|
+
"""Factory method to create an OpenApiRequest from a MetadataChangeProposal."""
|
|
72
|
+
if not mcp.aspectName or (
|
|
73
|
+
mcp.changeType != ChangeTypeClass.DELETE and not mcp.aspect
|
|
74
|
+
):
|
|
75
|
+
return None
|
|
76
|
+
|
|
77
|
+
resolved_async_flag = async_flag if async_flag is not None else async_default
|
|
78
|
+
|
|
79
|
+
method = "post"
|
|
80
|
+
url = f"{gms_server}/openapi/v3/entity/{mcp.entityType}?async={'true' if resolved_async_flag else 'false'}"
|
|
81
|
+
payload = []
|
|
82
|
+
|
|
83
|
+
if mcp.changeType == ChangeTypeClass.DELETE:
|
|
84
|
+
method = "delete"
|
|
85
|
+
url = f"{gms_server}/openapi/v3/entity/{mcp.entityType}/{mcp.entityUrn}"
|
|
86
|
+
else:
|
|
87
|
+
if mcp.aspect:
|
|
88
|
+
if mcp.changeType == ChangeTypeClass.PATCH:
|
|
89
|
+
method = "patch"
|
|
90
|
+
obj = mcp.aspect.to_obj()
|
|
91
|
+
content_type = obj.get("contentType")
|
|
92
|
+
if obj.get("value") and content_type == JSON_PATCH_CONTENT_TYPE:
|
|
93
|
+
# Undo double serialization.
|
|
94
|
+
obj = json.loads(obj["value"])
|
|
95
|
+
patch_value = obj
|
|
96
|
+
else:
|
|
97
|
+
raise NotImplementedError(
|
|
98
|
+
f"ChangeType {mcp.changeType} only supports context type {JSON_PATCH_CONTENT_TYPE}, found {content_type}."
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
if isinstance(patch_value, list):
|
|
102
|
+
patch_value = {"patch": patch_value}
|
|
103
|
+
|
|
104
|
+
payload = [
|
|
105
|
+
{
|
|
106
|
+
"urn": mcp.entityUrn,
|
|
107
|
+
mcp.aspectName: {
|
|
108
|
+
"value": patch_value,
|
|
109
|
+
"systemMetadata": mcp.systemMetadata.to_obj()
|
|
110
|
+
if mcp.systemMetadata
|
|
111
|
+
else None,
|
|
112
|
+
},
|
|
113
|
+
}
|
|
114
|
+
]
|
|
115
|
+
else:
|
|
116
|
+
if isinstance(mcp, MetadataChangeProposalWrapper):
|
|
117
|
+
aspect_value = pre_json_transform(
|
|
118
|
+
mcp.to_obj(simplified_structure=True)
|
|
119
|
+
)["aspect"]["json"]
|
|
120
|
+
else:
|
|
121
|
+
obj = mcp.aspect.to_obj()
|
|
122
|
+
content_type = obj.get("contentType")
|
|
123
|
+
if obj.get("value") and content_type == JSON_CONTENT_TYPE:
|
|
124
|
+
# Undo double serialization.
|
|
125
|
+
obj = json.loads(obj["value"])
|
|
126
|
+
elif content_type == JSON_PATCH_CONTENT_TYPE:
|
|
127
|
+
raise NotImplementedError(
|
|
128
|
+
f"ChangeType {mcp.changeType} does not support patch."
|
|
129
|
+
)
|
|
130
|
+
aspect_value = pre_json_transform(obj)
|
|
131
|
+
|
|
132
|
+
payload = [
|
|
133
|
+
{
|
|
134
|
+
"urn": mcp.entityUrn,
|
|
135
|
+
mcp.aspectName: {
|
|
136
|
+
"value": aspect_value,
|
|
137
|
+
"systemMetadata": mcp.systemMetadata.to_obj()
|
|
138
|
+
if mcp.systemMetadata
|
|
139
|
+
else None,
|
|
140
|
+
},
|
|
141
|
+
}
|
|
142
|
+
]
|
|
143
|
+
else:
|
|
144
|
+
raise ValueError(f"ChangeType {mcp.changeType} requires a value.")
|
|
145
|
+
|
|
146
|
+
return cls(method=method, url=url, payload=payload)
|