acryl-datahub 1.0.0.2rc4__py3-none-any.whl → 1.0.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.0.0.2rc4.dist-info → acryl_datahub-1.0.0.3.dist-info}/METADATA +2566 -2514
- {acryl_datahub-1.0.0.2rc4.dist-info → acryl_datahub-1.0.0.3.dist-info}/RECORD +159 -149
- {acryl_datahub-1.0.0.2rc4.dist-info → acryl_datahub-1.0.0.3.dist-info}/WHEEL +1 -1
- datahub/_version.py +1 -1
- datahub/api/circuit_breaker/operation_circuit_breaker.py +2 -2
- datahub/api/entities/datacontract/datacontract.py +35 -3
- datahub/api/entities/datajob/dataflow.py +3 -3
- datahub/api/entities/datajob/datajob.py +7 -4
- datahub/api/entities/dataset/dataset.py +9 -11
- datahub/api/entities/forms/forms.py +34 -34
- datahub/api/graphql/assertion.py +1 -1
- datahub/api/graphql/operation.py +4 -4
- datahub/cli/check_cli.py +3 -2
- datahub/cli/config_utils.py +2 -2
- datahub/cli/delete_cli.py +6 -5
- datahub/cli/docker_cli.py +2 -2
- datahub/cli/exists_cli.py +2 -1
- datahub/cli/get_cli.py +2 -1
- datahub/cli/iceberg_cli.py +6 -5
- datahub/cli/ingest_cli.py +9 -6
- datahub/cli/migrate.py +4 -3
- datahub/cli/migration_utils.py +4 -3
- datahub/cli/put_cli.py +3 -2
- datahub/cli/specific/assertions_cli.py +2 -1
- datahub/cli/specific/datacontract_cli.py +3 -2
- datahub/cli/specific/dataproduct_cli.py +10 -9
- datahub/cli/specific/dataset_cli.py +4 -3
- datahub/cli/specific/forms_cli.py +2 -1
- datahub/cli/specific/group_cli.py +2 -1
- datahub/cli/specific/structuredproperties_cli.py +4 -3
- datahub/cli/specific/user_cli.py +2 -1
- datahub/cli/state_cli.py +2 -1
- datahub/cli/timeline_cli.py +2 -1
- datahub/configuration/common.py +5 -0
- datahub/configuration/source_common.py +1 -1
- datahub/emitter/mcp.py +20 -5
- datahub/emitter/request_helper.py +116 -3
- datahub/emitter/rest_emitter.py +163 -93
- datahub/entrypoints.py +2 -1
- datahub/errors.py +4 -0
- datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +2 -1
- datahub/ingestion/api/source.py +2 -5
- datahub/ingestion/api/source_helpers.py +1 -0
- datahub/ingestion/glossary/classification_mixin.py +4 -2
- datahub/ingestion/graph/client.py +33 -8
- datahub/ingestion/graph/config.py +14 -0
- datahub/ingestion/graph/filters.py +1 -1
- datahub/ingestion/graph/links.py +53 -0
- datahub/ingestion/run/pipeline.py +9 -6
- datahub/ingestion/run/pipeline_config.py +1 -1
- datahub/ingestion/sink/datahub_rest.py +5 -6
- datahub/ingestion/source/apply/datahub_apply.py +2 -1
- datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py +1 -1
- datahub/ingestion/source/bigquery_v2/bigquery.py +24 -23
- datahub/ingestion/source/bigquery_v2/bigquery_config.py +4 -62
- datahub/ingestion/source/bigquery_v2/bigquery_connection.py +70 -0
- datahub/ingestion/source/bigquery_v2/bigquery_queries.py +3 -1
- datahub/ingestion/source/cassandra/cassandra_profiling.py +25 -24
- datahub/ingestion/source/common/subtypes.py +3 -0
- datahub/ingestion/source/datahub/datahub_database_reader.py +12 -11
- datahub/ingestion/source/dbt/dbt_cloud.py +2 -6
- datahub/ingestion/source/dbt/dbt_common.py +10 -2
- datahub/ingestion/source/dbt/dbt_core.py +82 -42
- datahub/ingestion/source/dynamodb/dynamodb.py +7 -4
- datahub/ingestion/source/feast.py +4 -4
- datahub/ingestion/source/fivetran/config.py +1 -1
- datahub/ingestion/source/fivetran/fivetran_log_api.py +7 -3
- datahub/ingestion/source/fivetran/fivetran_query.py +16 -16
- datahub/ingestion/source/ge_data_profiler.py +27 -1
- datahub/ingestion/source/hex/api.py +1 -20
- datahub/ingestion/source/hex/query_fetcher.py +4 -1
- datahub/ingestion/source/iceberg/iceberg.py +20 -4
- datahub/ingestion/source/iceberg/iceberg_common.py +2 -2
- datahub/ingestion/source/ldap.py +1 -1
- datahub/ingestion/source/looker/looker_common.py +17 -2
- datahub/ingestion/source/looker/looker_lib_wrapper.py +1 -1
- datahub/ingestion/source/looker/looker_source.py +34 -5
- datahub/ingestion/source/looker/lookml_source.py +7 -1
- datahub/ingestion/source/metadata/lineage.py +2 -1
- datahub/ingestion/source/mlflow.py +19 -6
- datahub/ingestion/source/mode.py +74 -28
- datahub/ingestion/source/neo4j/neo4j_source.py +85 -55
- datahub/ingestion/source/powerbi/config.py +13 -1
- datahub/ingestion/source/powerbi/m_query/data_classes.py +1 -0
- datahub/ingestion/source/powerbi/m_query/odbc.py +185 -0
- datahub/ingestion/source/powerbi/m_query/pattern_handler.py +153 -0
- datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +2 -2
- datahub/ingestion/source/redshift/usage.py +10 -9
- datahub/ingestion/source/sigma/config.py +74 -6
- datahub/ingestion/source/sigma/sigma.py +16 -1
- datahub/ingestion/source/sigma/sigma_api.py +99 -58
- datahub/ingestion/source/slack/slack.py +4 -52
- datahub/ingestion/source/snowflake/snowflake_config.py +2 -12
- datahub/ingestion/source/snowflake/snowflake_connection.py +24 -18
- datahub/ingestion/source/snowflake/snowflake_profiler.py +1 -6
- datahub/ingestion/source/snowflake/snowflake_queries.py +18 -4
- datahub/ingestion/source/snowflake/snowflake_query.py +9 -63
- datahub/ingestion/source/snowflake/snowflake_tag.py +4 -1
- datahub/ingestion/source/sql/athena.py +2 -1
- datahub/ingestion/source/sql/clickhouse.py +5 -1
- datahub/ingestion/source/sql/druid.py +7 -2
- datahub/ingestion/source/sql/hive.py +7 -2
- datahub/ingestion/source/sql/hive_metastore.py +5 -5
- datahub/ingestion/source/sql/mssql/source.py +1 -1
- datahub/ingestion/source/sql/oracle.py +6 -2
- datahub/ingestion/source/sql/sql_config.py +1 -34
- datahub/ingestion/source/sql/sqlalchemy_uri.py +36 -0
- datahub/ingestion/source/sql/stored_procedures/base.py +12 -1
- datahub/ingestion/source/sql/two_tier_sql_source.py +1 -1
- datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +2 -1
- datahub/ingestion/source/tableau/tableau.py +31 -6
- datahub/ingestion/source/tableau/tableau_validation.py +1 -1
- datahub/ingestion/source/unity/config.py +2 -1
- datahub/ingestion/source/usage/clickhouse_usage.py +7 -3
- datahub/ingestion/source/usage/starburst_trino_usage.py +5 -3
- datahub/ingestion/source/vertexai/vertexai.py +316 -4
- datahub/ingestion/source/vertexai/vertexai_result_type_utils.py +23 -2
- datahub/integrations/assertion/common.py +3 -2
- datahub/metadata/{_schema_classes.py → _internal_schema_classes.py} +538 -493
- datahub/metadata/_urns/urn_defs.py +1819 -1763
- datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +2 -0
- datahub/metadata/schema.avsc +17296 -16883
- datahub/metadata/schema_classes.py +3 -3
- datahub/metadata/schemas/DataContractKey.avsc +2 -1
- datahub/metadata/schemas/DataHubOpenAPISchemaKey.avsc +22 -0
- datahub/metadata/schemas/DataTransformLogic.avsc +4 -2
- datahub/metadata/schemas/FormInfo.avsc +5 -0
- datahub/metadata/schemas/MLModelDeploymentProperties.avsc +3 -0
- datahub/metadata/schemas/MetadataChangeEvent.avsc +6 -0
- datahub/metadata/schemas/MetadataChangeLog.avsc +3 -0
- datahub/metadata/schemas/MetadataChangeProposal.avsc +3 -0
- datahub/metadata/schemas/QueryProperties.avsc +4 -2
- datahub/metadata/schemas/SystemMetadata.avsc +86 -0
- datahub/metadata/schemas/__init__.py +3 -3
- datahub/sdk/_all_entities.py +4 -0
- datahub/sdk/_shared.py +142 -4
- datahub/sdk/_utils.py +4 -0
- datahub/sdk/dataset.py +2 -2
- datahub/sdk/entity_client.py +8 -0
- datahub/sdk/lineage_client.py +235 -0
- datahub/sdk/main_client.py +6 -3
- datahub/sdk/mlmodel.py +301 -0
- datahub/sdk/mlmodelgroup.py +233 -0
- datahub/secret/datahub_secret_store.py +2 -1
- datahub/specific/dataset.py +12 -0
- datahub/sql_parsing/fingerprint_utils.py +6 -0
- datahub/sql_parsing/sql_parsing_aggregator.py +48 -34
- datahub/sql_parsing/sqlglot_utils.py +18 -14
- datahub/telemetry/telemetry.py +2 -2
- datahub/testing/check_imports.py +1 -1
- datahub/testing/mcp_diff.py +15 -2
- datahub/upgrade/upgrade.py +10 -12
- datahub/utilities/logging_manager.py +8 -1
- datahub/utilities/server_config_util.py +350 -10
- datahub/utilities/sqlalchemy_query_combiner.py +4 -5
- datahub/utilities/urn_encoder.py +1 -1
- {acryl_datahub-1.0.0.2rc4.dist-info → acryl_datahub-1.0.0.3.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-1.0.0.2rc4.dist-info → acryl_datahub-1.0.0.3.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.0.0.2rc4.dist-info → acryl_datahub-1.0.0.3.dist-info}/top_level.txt +0 -0
datahub/_version.py
CHANGED
|
@@ -55,9 +55,9 @@ class OperationCircuitBreaker(AbstractCircuitBreaker):
|
|
|
55
55
|
which is set as Airflow connection.
|
|
56
56
|
:param partition: The partition to check the operation.
|
|
57
57
|
:param source_type: The source type to filter on. If not set it will accept any source type.
|
|
58
|
-
See valid types here: https://
|
|
58
|
+
See valid types here: https://docs.datahub.com/docs/graphql/enums#operationsourcetype
|
|
59
59
|
:param operation_type: The operation type to filter on. If not set it will accept any source type.
|
|
60
|
-
See valid types here: https://
|
|
60
|
+
See valid types here: https://docs.datahub.com/docs/graphql/enums/#operationtype
|
|
61
61
|
"""
|
|
62
62
|
|
|
63
63
|
start_time_millis: int = int(
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import collections
|
|
2
|
-
from typing import Iterable, List, Optional, Tuple
|
|
2
|
+
from typing import Dict, Iterable, List, Optional, Tuple, Union
|
|
3
3
|
|
|
4
4
|
from ruamel.yaml import YAML
|
|
5
5
|
from typing_extensions import Literal
|
|
@@ -25,6 +25,8 @@ from datahub.metadata.schema_classes import (
|
|
|
25
25
|
FreshnessContractClass,
|
|
26
26
|
SchemaContractClass,
|
|
27
27
|
StatusClass,
|
|
28
|
+
StructuredPropertiesClass,
|
|
29
|
+
StructuredPropertyValueAssignmentClass,
|
|
28
30
|
)
|
|
29
31
|
from datahub.utilities.urns.urn import guess_entity_type
|
|
30
32
|
|
|
@@ -47,8 +49,12 @@ class DataContract(v1_ConfigModel):
|
|
|
47
49
|
entity: str = v1_Field(
|
|
48
50
|
description="The entity urn that the Data Contract is associated with"
|
|
49
51
|
)
|
|
50
|
-
|
|
51
|
-
|
|
52
|
+
properties: Optional[Dict[str, Union[str, float, List[Union[str, float]]]]] = (
|
|
53
|
+
v1_Field(
|
|
54
|
+
default=None,
|
|
55
|
+
description="Structured properties associated with the data contract.",
|
|
56
|
+
)
|
|
57
|
+
)
|
|
52
58
|
|
|
53
59
|
schema_field: Optional[SchemaAssertion] = v1_Field(default=None, alias="schema")
|
|
54
60
|
|
|
@@ -172,6 +178,30 @@ class DataContract(v1_ConfigModel):
|
|
|
172
178
|
)
|
|
173
179
|
yield from dq_assertion_mcps
|
|
174
180
|
|
|
181
|
+
# Construct the structured properties aspect if properties are defined
|
|
182
|
+
structured_properties_aspect: Optional[StructuredPropertiesClass] = None
|
|
183
|
+
if self.properties:
|
|
184
|
+
property_assignments: List[StructuredPropertyValueAssignmentClass] = []
|
|
185
|
+
for key, value in self.properties.items():
|
|
186
|
+
# Use f-string formatting for the property URN, like in dataset.py
|
|
187
|
+
prop_urn = f"urn:li:structuredProperty:{key}"
|
|
188
|
+
# Ensure value is a list for StructuredPropertyValueAssignmentClass
|
|
189
|
+
values_list = value if isinstance(value, list) else [value]
|
|
190
|
+
property_assignments.append(
|
|
191
|
+
StructuredPropertyValueAssignmentClass(
|
|
192
|
+
propertyUrn=prop_urn,
|
|
193
|
+
values=[
|
|
194
|
+
str(v) for v in values_list
|
|
195
|
+
], # Ensure all values are strings
|
|
196
|
+
)
|
|
197
|
+
)
|
|
198
|
+
if (
|
|
199
|
+
property_assignments
|
|
200
|
+
): # Only create aspect if there are valid assignments
|
|
201
|
+
structured_properties_aspect = StructuredPropertiesClass(
|
|
202
|
+
properties=property_assignments
|
|
203
|
+
)
|
|
204
|
+
|
|
175
205
|
# Now that we've generated the assertions, we can generate
|
|
176
206
|
# the actual data contract.
|
|
177
207
|
yield from MetadataChangeProposalWrapper.construct_many(
|
|
@@ -202,6 +232,8 @@ class DataContract(v1_ConfigModel):
|
|
|
202
232
|
if True
|
|
203
233
|
else None
|
|
204
234
|
),
|
|
235
|
+
# Add structured properties aspect if defined
|
|
236
|
+
structured_properties_aspect,
|
|
205
237
|
],
|
|
206
238
|
)
|
|
207
239
|
|
|
@@ -30,7 +30,7 @@ class DataFlow:
|
|
|
30
30
|
"""The DataHub representation of data-flow.
|
|
31
31
|
|
|
32
32
|
Args:
|
|
33
|
-
urn (int): Unique identifier of the DataFlow in DataHub. For more detail refer https://
|
|
33
|
+
urn (int): Unique identifier of the DataFlow in DataHub. For more detail refer https://docs.datahub.com/docs/what/urn/.
|
|
34
34
|
id (str): Identifier of DataFlow in orchestrator.
|
|
35
35
|
orchestrator (str): orchestrator. for example airflow.
|
|
36
36
|
cluster (Optional[str]): [deprecated] Please use env.
|
|
@@ -40,8 +40,8 @@ class DataFlow:
|
|
|
40
40
|
url (Optional[str]): URL pointing to DataFlow.
|
|
41
41
|
tags (Set[str]): tags that need to be apply on DataFlow.
|
|
42
42
|
owners (Set[str]): owners that need to be apply on DataFlow.
|
|
43
|
-
platform_instance (Optional[str]): The instance of the platform that all assets produced by this orchestrator belong to. For more detail refer https://
|
|
44
|
-
env (Optional[str]): The environment that all assets produced by this orchestrator belong to. For more detail and possible values refer https://
|
|
43
|
+
platform_instance (Optional[str]): The instance of the platform that all assets produced by this orchestrator belong to. For more detail refer https://docs.datahub.com/docs/platform-instances/.
|
|
44
|
+
env (Optional[str]): The environment that all assets produced by this orchestrator belong to. For more detail and possible values refer https://docs.datahub.com/docs/graphql/enums/#fabrictype.
|
|
45
45
|
"""
|
|
46
46
|
|
|
47
47
|
urn: DataFlowUrn = field(init=False)
|
|
@@ -108,7 +108,9 @@ class DataJob:
|
|
|
108
108
|
return [tags]
|
|
109
109
|
|
|
110
110
|
def generate_mcp(
|
|
111
|
-
self,
|
|
111
|
+
self,
|
|
112
|
+
generate_lineage: bool = True,
|
|
113
|
+
materialize_iolets: bool = True,
|
|
112
114
|
) -> Iterable[MetadataChangeProposalWrapper]:
|
|
113
115
|
env: Optional[str] = None
|
|
114
116
|
if self.flow_urn.cluster.upper() in builder.ALL_ENV_TYPES:
|
|
@@ -152,9 +154,10 @@ class DataJob:
|
|
|
152
154
|
)
|
|
153
155
|
yield mcp
|
|
154
156
|
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
157
|
+
if generate_lineage:
|
|
158
|
+
yield from self.generate_data_input_output_mcp(
|
|
159
|
+
materialize_iolets=materialize_iolets
|
|
160
|
+
)
|
|
158
161
|
|
|
159
162
|
for owner in self.generate_ownership_aspect():
|
|
160
163
|
mcp = MetadataChangeProposalWrapper(
|
|
@@ -509,16 +509,14 @@ class Dataset(StrictModel):
|
|
|
509
509
|
def generate_mcp(
|
|
510
510
|
self,
|
|
511
511
|
) -> Iterable[Union[MetadataChangeProposalClass, MetadataChangeProposalWrapper]]:
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
)
|
|
521
|
-
yield mcp
|
|
512
|
+
patch_builder = self.patch_builder()
|
|
513
|
+
|
|
514
|
+
patch_builder.set_custom_properties(self.properties or {})
|
|
515
|
+
patch_builder.set_description(self.description)
|
|
516
|
+
patch_builder.set_display_name(self.name)
|
|
517
|
+
patch_builder.set_external_url(self.external_url)
|
|
518
|
+
|
|
519
|
+
yield from patch_builder.build()
|
|
522
520
|
|
|
523
521
|
if self.schema_metadata:
|
|
524
522
|
schema_fields = set()
|
|
@@ -981,7 +979,7 @@ class Dataset(StrictModel):
|
|
|
981
979
|
|
|
982
980
|
def model_dump(self, **kwargs):
|
|
983
981
|
"""Custom model_dump method for Pydantic v2 to handle YAML serialization properly."""
|
|
984
|
-
exclude = kwargs.pop("exclude", set()
|
|
982
|
+
exclude = kwargs.pop("exclude", None) or set()
|
|
985
983
|
|
|
986
984
|
# If id and name are identical, exclude name from the output
|
|
987
985
|
if self.id == self.name and self.id is not None:
|
|
@@ -26,6 +26,7 @@ from datahub.emitter.mce_builder import (
|
|
|
26
26
|
)
|
|
27
27
|
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
28
28
|
from datahub.ingestion.graph.client import DataHubGraph, get_default_graph
|
|
29
|
+
from datahub.ingestion.graph.config import ClientMode
|
|
29
30
|
from datahub.metadata.schema_classes import (
|
|
30
31
|
FormActorAssignmentClass,
|
|
31
32
|
FormInfoClass,
|
|
@@ -133,47 +134,46 @@ class Forms(ConfigModel):
|
|
|
133
134
|
def create(file: str) -> None:
|
|
134
135
|
emitter: DataHubGraph
|
|
135
136
|
|
|
136
|
-
with get_default_graph() as emitter:
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
form = Forms.parse_obj(form_raw)
|
|
137
|
+
with get_default_graph(ClientMode.CLI) as emitter, open(file) as fp:
|
|
138
|
+
forms: List[dict] = yaml.safe_load(fp)
|
|
139
|
+
for form_raw in forms:
|
|
140
|
+
form = Forms.parse_obj(form_raw)
|
|
141
141
|
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
)
|
|
147
|
-
|
|
148
|
-
mcp = MetadataChangeProposalWrapper(
|
|
149
|
-
entityUrn=form.urn,
|
|
150
|
-
aspect=FormInfoClass(
|
|
151
|
-
name=form.name,
|
|
152
|
-
description=form.description,
|
|
153
|
-
prompts=form.validate_prompts(emitter),
|
|
154
|
-
type=form.type,
|
|
155
|
-
actors=form.create_form_actors(form.actors),
|
|
156
|
-
),
|
|
142
|
+
try:
|
|
143
|
+
if not FormType.has_value(form.type):
|
|
144
|
+
logger.error(
|
|
145
|
+
f"Form type {form.type} does not exist. Please try again with a valid type."
|
|
157
146
|
)
|
|
158
|
-
emitter.emit_mcp(mcp)
|
|
159
147
|
|
|
160
|
-
|
|
148
|
+
mcp = MetadataChangeProposalWrapper(
|
|
149
|
+
entityUrn=form.urn,
|
|
150
|
+
aspect=FormInfoClass(
|
|
151
|
+
name=form.name,
|
|
152
|
+
description=form.description,
|
|
153
|
+
prompts=form.validate_prompts(emitter),
|
|
154
|
+
type=form.type,
|
|
155
|
+
actors=form.create_form_actors(form.actors),
|
|
156
|
+
),
|
|
157
|
+
)
|
|
158
|
+
emitter.emit_mcp(mcp)
|
|
159
|
+
|
|
160
|
+
logger.info(f"Created form {form.urn}")
|
|
161
161
|
|
|
162
|
-
|
|
163
|
-
|
|
162
|
+
if form.owners or form.group_owners:
|
|
163
|
+
form.add_owners(emitter)
|
|
164
164
|
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
165
|
+
if form.entities:
|
|
166
|
+
if form.entities.urns:
|
|
167
|
+
# Associate specific entities with a form
|
|
168
|
+
form.upload_entities_for_form(emitter)
|
|
169
169
|
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
170
|
+
if form.entities.filters:
|
|
171
|
+
# Associate groups of entities with a form based on filters
|
|
172
|
+
form.create_form_filters(emitter)
|
|
173
173
|
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
174
|
+
except Exception as e:
|
|
175
|
+
logger.error(e)
|
|
176
|
+
return
|
|
177
177
|
|
|
178
178
|
def validate_prompts(self, emitter: DataHubGraph) -> List[FormPromptClass]:
|
|
179
179
|
prompts = []
|
datahub/api/graphql/assertion.py
CHANGED
|
@@ -65,7 +65,7 @@ query dataset($urn: String!, $start: Int, $count: Int, $status: AssertionRunStat
|
|
|
65
65
|
|
|
66
66
|
:param urn: The DataHub dataset unique identifier.
|
|
67
67
|
:param status: The assertion status to filter for. Every status will be accepted if it is not set.
|
|
68
|
-
See valid status at https://
|
|
68
|
+
See valid status at https://docs.datahub.com/docs/graphql/enums#assertionrunstatus
|
|
69
69
|
:param start_time_millis: The start time in milliseconds from the assertions will be queried.
|
|
70
70
|
:param end_time_millis: The end time in milliseconds until the assertions will be queried.
|
|
71
71
|
:param filter: Additional key value filters which will be applied as AND query
|
datahub/api/graphql/operation.py
CHANGED
|
@@ -55,10 +55,10 @@ mutation reportOperation($urn: String!, $sourceType: OperationSourceType!, $oper
|
|
|
55
55
|
Report operation metadata for a dataset.
|
|
56
56
|
:param source_type: The source type to filter on. If not set it will accept any source type.
|
|
57
57
|
Default value: DATA_PROCESS
|
|
58
|
-
See valid types here: https://
|
|
58
|
+
See valid types here: https://docs.datahub.com/docs/graphql/enums#operationsourcetype
|
|
59
59
|
:param operation_type: The operation type to filter on. If not set it will accept any source type.
|
|
60
60
|
Default value: "UPDATE"
|
|
61
|
-
See valid types here: https://
|
|
61
|
+
See valid types here: https://docs.datahub.com/docs/graphql/enums/#operationtype
|
|
62
62
|
:param partition: The partition to set the operation.
|
|
63
63
|
:param num_affected_rows: The number of rows affected by this operation.
|
|
64
64
|
:param custom_properties: Key/value pair of custom propertis
|
|
@@ -103,9 +103,9 @@ mutation reportOperation($urn: String!, $sourceType: OperationSourceType!, $oper
|
|
|
103
103
|
:param end_time_millis: The end time in milliseconds until the operations will be queried.
|
|
104
104
|
:param limit: The maximum number of items to return.
|
|
105
105
|
:param source_type: The source type to filter on. If not set it will accept any source type.
|
|
106
|
-
See valid types here: https://
|
|
106
|
+
See valid types here: https://docs.datahub.com/docs/graphql/enums#operationsourcetype
|
|
107
107
|
:param operation_type: The operation type to filter on. If not set it will accept any source type.
|
|
108
|
-
See valid types here: https://
|
|
108
|
+
See valid types here: https://docs.datahub.com/docs/graphql/enums#operationsourcetype
|
|
109
109
|
:param partition: The partition to check the operation.
|
|
110
110
|
"""
|
|
111
111
|
|
datahub/cli/check_cli.py
CHANGED
|
@@ -16,6 +16,7 @@ from datahub.configuration import config_loader
|
|
|
16
16
|
from datahub.configuration.common import AllowDenyPattern
|
|
17
17
|
from datahub.emitter.mce_builder import DEFAULT_ENV
|
|
18
18
|
from datahub.ingestion.graph.client import get_default_graph
|
|
19
|
+
from datahub.ingestion.graph.config import ClientMode
|
|
19
20
|
from datahub.ingestion.run.pipeline import Pipeline
|
|
20
21
|
from datahub.ingestion.sink.sink_registry import sink_registry
|
|
21
22
|
from datahub.ingestion.source.source_registry import source_registry
|
|
@@ -259,7 +260,7 @@ def sql_lineage(
|
|
|
259
260
|
|
|
260
261
|
graph = None
|
|
261
262
|
if online:
|
|
262
|
-
graph = get_default_graph()
|
|
263
|
+
graph = get_default_graph(ClientMode.CLI)
|
|
263
264
|
|
|
264
265
|
lineage = create_lineage_sql_parsed_result(
|
|
265
266
|
sql,
|
|
@@ -472,7 +473,7 @@ WHERE
|
|
|
472
473
|
@check.command()
|
|
473
474
|
def server_config() -> None:
|
|
474
475
|
"""Print the server config."""
|
|
475
|
-
graph = get_default_graph()
|
|
476
|
+
graph = get_default_graph(ClientMode.CLI)
|
|
476
477
|
|
|
477
478
|
server_config = graph.get_server_config()
|
|
478
479
|
|
datahub/cli/config_utils.py
CHANGED
|
@@ -17,8 +17,8 @@ from datahub.ingestion.graph.config import DatahubClientConfig
|
|
|
17
17
|
logger = logging.getLogger(__name__)
|
|
18
18
|
|
|
19
19
|
CONDENSED_DATAHUB_CONFIG_PATH = "~/.datahubenv"
|
|
20
|
-
DATAHUB_CONFIG_PATH = os.path.expanduser(CONDENSED_DATAHUB_CONFIG_PATH)
|
|
21
|
-
DATAHUB_ROOT_FOLDER = os.path.expanduser("~/.datahub")
|
|
20
|
+
DATAHUB_CONFIG_PATH: str = os.path.expanduser(CONDENSED_DATAHUB_CONFIG_PATH)
|
|
21
|
+
DATAHUB_ROOT_FOLDER: str = os.path.expanduser("~/.datahub")
|
|
22
22
|
ENV_SKIP_CONFIG = "DATAHUB_SKIP_CONFIG"
|
|
23
23
|
|
|
24
24
|
ENV_DATAHUB_SYSTEM_CLIENT_ID = "DATAHUB_SYSTEM_CLIENT_ID"
|
datahub/cli/delete_cli.py
CHANGED
|
@@ -15,6 +15,7 @@ from datahub.cli import cli_utils
|
|
|
15
15
|
from datahub.configuration.datetimes import ClickDatetime
|
|
16
16
|
from datahub.emitter.aspect import ASPECT_MAP, TIMESERIES_ASPECT_MAP
|
|
17
17
|
from datahub.ingestion.graph.client import DataHubGraph, get_default_graph
|
|
18
|
+
from datahub.ingestion.graph.config import ClientMode
|
|
18
19
|
from datahub.ingestion.graph.filters import RemovedStatusFilter
|
|
19
20
|
from datahub.telemetry import telemetry
|
|
20
21
|
from datahub.upgrade import upgrade
|
|
@@ -48,7 +49,7 @@ def delete() -> None:
|
|
|
48
49
|
|
|
49
50
|
See `datahub delete by-filter` for the list of available filters.
|
|
50
51
|
|
|
51
|
-
See https://
|
|
52
|
+
See https://docs.datahub.com/docs/how/delete-metadata for more detailed docs.
|
|
52
53
|
"""
|
|
53
54
|
pass
|
|
54
55
|
|
|
@@ -124,7 +125,7 @@ def by_registry(
|
|
|
124
125
|
Delete all metadata written using the given registry id and version pair.
|
|
125
126
|
"""
|
|
126
127
|
|
|
127
|
-
client = get_default_graph()
|
|
128
|
+
client = get_default_graph(ClientMode.CLI)
|
|
128
129
|
|
|
129
130
|
if soft and not dry_run:
|
|
130
131
|
raise click.UsageError(
|
|
@@ -175,7 +176,7 @@ def references(urn: str, dry_run: bool, force: bool) -> None:
|
|
|
175
176
|
Delete all references to an entity (but not the entity itself).
|
|
176
177
|
"""
|
|
177
178
|
|
|
178
|
-
graph = get_default_graph()
|
|
179
|
+
graph = get_default_graph(ClientMode.CLI)
|
|
179
180
|
logger.info(f"Using graph: {graph}")
|
|
180
181
|
|
|
181
182
|
references_count, related_aspects = graph.delete_references_to_urn(
|
|
@@ -238,7 +239,7 @@ def undo_by_filter(
|
|
|
238
239
|
"""
|
|
239
240
|
Undo soft deletion by filters
|
|
240
241
|
"""
|
|
241
|
-
graph = get_default_graph()
|
|
242
|
+
graph = get_default_graph(ClientMode.CLI)
|
|
242
243
|
logger.info(f"Using {graph}")
|
|
243
244
|
if urn:
|
|
244
245
|
graph.set_soft_delete_status(urn=urn, delete=False)
|
|
@@ -410,7 +411,7 @@ def by_filter(
|
|
|
410
411
|
abort=True,
|
|
411
412
|
)
|
|
412
413
|
|
|
413
|
-
graph = get_default_graph()
|
|
414
|
+
graph = get_default_graph(ClientMode.CLI)
|
|
414
415
|
logger.info(f"Using {graph}")
|
|
415
416
|
|
|
416
417
|
# Determine which urns to delete.
|
datahub/cli/docker_cli.py
CHANGED
|
@@ -811,7 +811,7 @@ def quickstart(
|
|
|
811
811
|
raise status.to_exception(
|
|
812
812
|
header="Unable to run quickstart - the following issues were detected:",
|
|
813
813
|
footer="If you think something went wrong, please file an issue at https://github.com/datahub-project/datahub/issues\n"
|
|
814
|
-
"or send a message in our Slack https://slack
|
|
814
|
+
"or send a message in our Slack https://datahub.com/slack/\n"
|
|
815
815
|
f"Be sure to attach the logs from {log_file.name}",
|
|
816
816
|
)
|
|
817
817
|
|
|
@@ -824,7 +824,7 @@ def quickstart(
|
|
|
824
824
|
fg="green",
|
|
825
825
|
)
|
|
826
826
|
click.secho(
|
|
827
|
-
"Need support? Get in touch on Slack: https://
|
|
827
|
+
"Need support? Get in touch on Slack: https://datahub.com/slack/",
|
|
828
828
|
fg="magenta",
|
|
829
829
|
)
|
|
830
830
|
|
datahub/cli/exists_cli.py
CHANGED
|
@@ -6,6 +6,7 @@ import click
|
|
|
6
6
|
from click_default_group import DefaultGroup
|
|
7
7
|
|
|
8
8
|
from datahub.ingestion.graph.client import get_default_graph
|
|
9
|
+
from datahub.ingestion.graph.config import ClientMode
|
|
9
10
|
from datahub.telemetry import telemetry
|
|
10
11
|
from datahub.upgrade import upgrade
|
|
11
12
|
|
|
@@ -36,4 +37,4 @@ def urn(ctx: Any, urn: Optional[str]) -> None:
|
|
|
36
37
|
raise click.UsageError("Nothing for me to get. Maybe provide an urn?")
|
|
37
38
|
urn = ctx.args[0]
|
|
38
39
|
logger.debug(f"Using urn from args {urn}")
|
|
39
|
-
click.echo(json.dumps(get_default_graph().exists(urn)))
|
|
40
|
+
click.echo(json.dumps(get_default_graph(ClientMode.CLI).exists(urn)))
|
datahub/cli/get_cli.py
CHANGED
|
@@ -7,6 +7,7 @@ from click_default_group import DefaultGroup
|
|
|
7
7
|
|
|
8
8
|
from datahub.cli.cli_utils import get_aspects_for_entity
|
|
9
9
|
from datahub.ingestion.graph.client import get_default_graph
|
|
10
|
+
from datahub.ingestion.graph.config import ClientMode
|
|
10
11
|
from datahub.telemetry import telemetry
|
|
11
12
|
from datahub.upgrade import upgrade
|
|
12
13
|
|
|
@@ -46,7 +47,7 @@ def urn(ctx: Any, urn: Optional[str], aspect: List[str], details: bool) -> None:
|
|
|
46
47
|
urn = ctx.args[0]
|
|
47
48
|
logger.debug(f"Using urn from args {urn}")
|
|
48
49
|
|
|
49
|
-
client = get_default_graph()
|
|
50
|
+
client = get_default_graph(ClientMode.CLI)
|
|
50
51
|
|
|
51
52
|
if aspect:
|
|
52
53
|
# If aspects are specified, we need to do the existence check first.
|
datahub/cli/iceberg_cli.py
CHANGED
|
@@ -13,6 +13,7 @@ import datahub.metadata.schema_classes
|
|
|
13
13
|
from datahub.cli.cli_utils import post_entity
|
|
14
14
|
from datahub.configuration.common import GraphError
|
|
15
15
|
from datahub.ingestion.graph.client import DataHubGraph, get_default_graph
|
|
16
|
+
from datahub.ingestion.graph.config import ClientMode
|
|
16
17
|
from datahub.metadata.schema_classes import SystemMetadataClass
|
|
17
18
|
from datahub.telemetry import telemetry
|
|
18
19
|
|
|
@@ -178,7 +179,7 @@ def create(
|
|
|
178
179
|
Create an iceberg warehouse.
|
|
179
180
|
"""
|
|
180
181
|
|
|
181
|
-
client = get_default_graph()
|
|
182
|
+
client = get_default_graph(ClientMode.CLI)
|
|
182
183
|
|
|
183
184
|
urn = iceberg_data_platform_instance_urn(warehouse)
|
|
184
185
|
|
|
@@ -331,7 +332,7 @@ def update(
|
|
|
331
332
|
Update iceberg warehouses. Can only update credentials, and role. Cannot update region
|
|
332
333
|
"""
|
|
333
334
|
|
|
334
|
-
client = get_default_graph()
|
|
335
|
+
client = get_default_graph(ClientMode.CLI)
|
|
335
336
|
|
|
336
337
|
urn = iceberg_data_platform_instance_urn(warehouse)
|
|
337
338
|
|
|
@@ -407,7 +408,7 @@ def list() -> None:
|
|
|
407
408
|
List iceberg warehouses
|
|
408
409
|
"""
|
|
409
410
|
|
|
410
|
-
client = get_default_graph()
|
|
411
|
+
client = get_default_graph(ClientMode.CLI)
|
|
411
412
|
|
|
412
413
|
for warehouse in get_all_warehouses(client):
|
|
413
414
|
click.echo(warehouse)
|
|
@@ -420,7 +421,7 @@ def list() -> None:
|
|
|
420
421
|
@telemetry.with_telemetry()
|
|
421
422
|
def get(warehouse: str) -> None:
|
|
422
423
|
"""Fetches the details of the specified iceberg warehouse"""
|
|
423
|
-
client = get_default_graph()
|
|
424
|
+
client = get_default_graph(ClientMode.CLI)
|
|
424
425
|
urn = iceberg_data_platform_instance_urn(warehouse)
|
|
425
426
|
|
|
426
427
|
if client.exists(urn):
|
|
@@ -455,7 +456,7 @@ def delete(warehouse: str, dry_run: bool, force: bool) -> None:
|
|
|
455
456
|
|
|
456
457
|
urn = iceberg_data_platform_instance_urn(warehouse)
|
|
457
458
|
|
|
458
|
-
client = get_default_graph()
|
|
459
|
+
client = get_default_graph(ClientMode.CLI)
|
|
459
460
|
|
|
460
461
|
if not client.exists(urn):
|
|
461
462
|
raise click.ClickException(f"urn {urn} not found")
|
datahub/cli/ingest_cli.py
CHANGED
|
@@ -14,10 +14,11 @@ from tabulate import tabulate
|
|
|
14
14
|
|
|
15
15
|
from datahub._version import nice_version_name
|
|
16
16
|
from datahub.cli import cli_utils
|
|
17
|
-
from datahub.cli.config_utils import CONDENSED_DATAHUB_CONFIG_PATH
|
|
17
|
+
from datahub.cli.config_utils import CONDENSED_DATAHUB_CONFIG_PATH, load_client_config
|
|
18
18
|
from datahub.configuration.common import GraphError
|
|
19
19
|
from datahub.configuration.config_loader import load_config_file
|
|
20
20
|
from datahub.ingestion.graph.client import get_default_graph
|
|
21
|
+
from datahub.ingestion.graph.config import ClientMode
|
|
21
22
|
from datahub.ingestion.run.connection import ConnectionManager
|
|
22
23
|
from datahub.ingestion.run.pipeline import Pipeline
|
|
23
24
|
from datahub.telemetry import telemetry
|
|
@@ -269,7 +270,7 @@ def deploy(
|
|
|
269
270
|
urn:li:dataHubIngestionSource:<name>
|
|
270
271
|
"""
|
|
271
272
|
|
|
272
|
-
datahub_graph = get_default_graph()
|
|
273
|
+
datahub_graph = get_default_graph(ClientMode.CLI)
|
|
273
274
|
|
|
274
275
|
variables = deploy_source_vars(
|
|
275
276
|
name=name,
|
|
@@ -360,6 +361,7 @@ def mcps(path: str) -> None:
|
|
|
360
361
|
"""
|
|
361
362
|
|
|
362
363
|
click.echo("Starting ingestion...")
|
|
364
|
+
datahub_config = load_client_config()
|
|
363
365
|
recipe: dict = {
|
|
364
366
|
"source": {
|
|
365
367
|
"type": "file",
|
|
@@ -367,6 +369,7 @@ def mcps(path: str) -> None:
|
|
|
367
369
|
"path": path,
|
|
368
370
|
},
|
|
369
371
|
},
|
|
372
|
+
"datahub_api": datahub_config,
|
|
370
373
|
}
|
|
371
374
|
|
|
372
375
|
pipeline = Pipeline.create(recipe, report_to=None)
|
|
@@ -422,7 +425,7 @@ def list_source_runs(page_offset: int, page_size: int, urn: str, source: str) ->
|
|
|
422
425
|
}
|
|
423
426
|
}
|
|
424
427
|
|
|
425
|
-
client = get_default_graph()
|
|
428
|
+
client = get_default_graph(ClientMode.CLI)
|
|
426
429
|
session = client._session
|
|
427
430
|
gms_host = client.config.server
|
|
428
431
|
|
|
@@ -508,7 +511,7 @@ def list_source_runs(page_offset: int, page_size: int, urn: str, source: str) ->
|
|
|
508
511
|
def list_runs(page_offset: int, page_size: int, include_soft_deletes: bool) -> None:
|
|
509
512
|
"""List recent ingestion runs to datahub"""
|
|
510
513
|
|
|
511
|
-
client = get_default_graph()
|
|
514
|
+
client = get_default_graph(ClientMode.CLI)
|
|
512
515
|
session = client._session
|
|
513
516
|
gms_host = client.config.server
|
|
514
517
|
|
|
@@ -559,7 +562,7 @@ def show(
|
|
|
559
562
|
run_id: str, start: int, count: int, include_soft_deletes: bool, show_aspect: bool
|
|
560
563
|
) -> None:
|
|
561
564
|
"""Describe a provided ingestion run to datahub"""
|
|
562
|
-
client = get_default_graph()
|
|
565
|
+
client = get_default_graph(ClientMode.CLI)
|
|
563
566
|
session = client._session
|
|
564
567
|
gms_host = client.config.server
|
|
565
568
|
|
|
@@ -609,7 +612,7 @@ def rollback(
|
|
|
609
612
|
run_id: str, force: bool, dry_run: bool, safe: bool, report_dir: str
|
|
610
613
|
) -> None:
|
|
611
614
|
"""Rollback a provided ingestion run to datahub"""
|
|
612
|
-
client = get_default_graph()
|
|
615
|
+
client = get_default_graph(ClientMode.CLI)
|
|
613
616
|
|
|
614
617
|
if not force and not dry_run:
|
|
615
618
|
click.confirm(
|
datahub/cli/migrate.py
CHANGED
|
@@ -25,6 +25,7 @@ from datahub.emitter.mcp_builder import (
|
|
|
25
25
|
)
|
|
26
26
|
from datahub.emitter.rest_emitter import DatahubRestEmitter
|
|
27
27
|
from datahub.ingestion.graph.client import (
|
|
28
|
+
ClientMode,
|
|
28
29
|
DataHubGraph,
|
|
29
30
|
RelatedEntity,
|
|
30
31
|
get_default_graph,
|
|
@@ -147,7 +148,7 @@ def dataplatform2instance_func(
|
|
|
147
148
|
migration_report = MigrationReport(run_id, dry_run, keep)
|
|
148
149
|
system_metadata = SystemMetadataClass(runId=run_id)
|
|
149
150
|
|
|
150
|
-
graph = get_default_graph()
|
|
151
|
+
graph = get_default_graph(ClientMode.CLI)
|
|
151
152
|
|
|
152
153
|
urns_to_migrate: List[str] = []
|
|
153
154
|
|
|
@@ -386,7 +387,7 @@ def migrate_containers(
|
|
|
386
387
|
|
|
387
388
|
|
|
388
389
|
def get_containers_for_migration(env: str) -> List[Any]:
|
|
389
|
-
client = get_default_graph()
|
|
390
|
+
client = get_default_graph(ClientMode.CLI)
|
|
390
391
|
containers_to_migrate = list(
|
|
391
392
|
client.get_urns_by_filter(entity_types=["container"], env=env)
|
|
392
393
|
)
|
|
@@ -445,7 +446,7 @@ def process_container_relationships(
|
|
|
445
446
|
relationships: Iterable[RelatedEntity] = migration_utils.get_incoming_relationships(
|
|
446
447
|
urn=src_urn
|
|
447
448
|
)
|
|
448
|
-
client = get_default_graph()
|
|
449
|
+
client = get_default_graph(ClientMode.CLI)
|
|
449
450
|
for relationship in relationships:
|
|
450
451
|
log.debug(f"Incoming Relationship: {relationship}")
|
|
451
452
|
target_urn: str = relationship.urn
|
datahub/cli/migration_utils.py
CHANGED
|
@@ -12,6 +12,7 @@ from datahub.ingestion.graph.client import (
|
|
|
12
12
|
RelatedEntity,
|
|
13
13
|
get_default_graph,
|
|
14
14
|
)
|
|
15
|
+
from datahub.ingestion.graph.config import ClientMode
|
|
15
16
|
from datahub.metadata.schema_classes import (
|
|
16
17
|
ChartInfoClass,
|
|
17
18
|
ContainerClass,
|
|
@@ -243,7 +244,7 @@ def clone_aspect(
|
|
|
243
244
|
run_id: str = str(uuid.uuid4()),
|
|
244
245
|
dry_run: bool = False,
|
|
245
246
|
) -> Iterable[MetadataChangeProposalWrapper]:
|
|
246
|
-
client = get_default_graph()
|
|
247
|
+
client = get_default_graph(ClientMode.CLI)
|
|
247
248
|
aspect_map = cli_utils.get_aspects_for_entity(
|
|
248
249
|
client._session,
|
|
249
250
|
client.config.server,
|
|
@@ -274,7 +275,7 @@ def clone_aspect(
|
|
|
274
275
|
|
|
275
276
|
|
|
276
277
|
def get_incoming_relationships(urn: str) -> Iterable[RelatedEntity]:
|
|
277
|
-
client = get_default_graph()
|
|
278
|
+
client = get_default_graph(ClientMode.CLI)
|
|
278
279
|
yield from client.get_related_entities(
|
|
279
280
|
entity_urn=urn,
|
|
280
281
|
relationship_types=[
|
|
@@ -290,7 +291,7 @@ def get_incoming_relationships(urn: str) -> Iterable[RelatedEntity]:
|
|
|
290
291
|
|
|
291
292
|
|
|
292
293
|
def get_outgoing_relationships(urn: str) -> Iterable[RelatedEntity]:
|
|
293
|
-
client = get_default_graph()
|
|
294
|
+
client = get_default_graph(ClientMode.CLI)
|
|
294
295
|
yield from client.get_related_entities(
|
|
295
296
|
entity_urn=urn,
|
|
296
297
|
relationship_types=[
|