acryl-datahub 0.15.0.6rc2__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-0.15.0.6rc2.dist-info → acryl_datahub-1.0.0.dist-info}/METADATA +2522 -2493
- {acryl_datahub-0.15.0.6rc2.dist-info → acryl_datahub-1.0.0.dist-info}/RECORD +205 -192
- {acryl_datahub-0.15.0.6rc2.dist-info → acryl_datahub-1.0.0.dist-info}/WHEEL +1 -1
- {acryl_datahub-0.15.0.6rc2.dist-info → acryl_datahub-1.0.0.dist-info}/entry_points.txt +1 -0
- datahub/_version.py +1 -1
- datahub/api/entities/common/serialized_value.py +4 -3
- datahub/api/entities/dataset/dataset.py +731 -42
- datahub/api/entities/structuredproperties/structuredproperties.py +2 -2
- datahub/cli/check_cli.py +72 -19
- datahub/cli/docker_cli.py +3 -3
- datahub/cli/iceberg_cli.py +31 -7
- datahub/cli/ingest_cli.py +30 -93
- datahub/cli/lite_cli.py +4 -2
- datahub/cli/specific/dataproduct_cli.py +1 -1
- datahub/cli/specific/dataset_cli.py +128 -14
- datahub/configuration/common.py +10 -2
- datahub/configuration/git.py +1 -3
- datahub/configuration/kafka.py +1 -1
- datahub/emitter/mce_builder.py +28 -13
- datahub/emitter/mcp_builder.py +4 -1
- datahub/emitter/response_helper.py +145 -0
- datahub/emitter/rest_emitter.py +323 -10
- datahub/ingestion/api/decorators.py +1 -1
- datahub/ingestion/api/source_helpers.py +4 -0
- datahub/ingestion/fs/s3_fs.py +2 -2
- datahub/ingestion/glossary/classification_mixin.py +1 -5
- datahub/ingestion/graph/client.py +41 -22
- datahub/ingestion/graph/entity_versioning.py +3 -3
- datahub/ingestion/graph/filters.py +64 -37
- datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +1 -6
- datahub/ingestion/run/pipeline.py +112 -148
- datahub/ingestion/run/sink_callback.py +77 -0
- datahub/ingestion/sink/datahub_rest.py +8 -0
- datahub/ingestion/source/abs/config.py +2 -4
- datahub/ingestion/source/bigquery_v2/bigquery_audit.py +1 -1
- datahub/ingestion/source/bigquery_v2/bigquery_config.py +2 -46
- datahub/ingestion/source/bigquery_v2/bigquery_schema.py +6 -1
- datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +7 -4
- datahub/ingestion/source/cassandra/cassandra.py +152 -233
- datahub/ingestion/source/cassandra/cassandra_api.py +13 -5
- datahub/ingestion/source/common/gcp_credentials_config.py +53 -0
- datahub/ingestion/source/common/subtypes.py +12 -0
- datahub/ingestion/source/csv_enricher.py +3 -3
- datahub/ingestion/source/data_lake_common/path_spec.py +1 -3
- datahub/ingestion/source/dbt/dbt_common.py +8 -5
- datahub/ingestion/source/dbt/dbt_core.py +11 -9
- datahub/ingestion/source/dbt/dbt_tests.py +4 -8
- datahub/ingestion/source/delta_lake/config.py +8 -1
- datahub/ingestion/source/delta_lake/report.py +4 -2
- datahub/ingestion/source/delta_lake/source.py +20 -5
- datahub/ingestion/source/dremio/dremio_api.py +4 -8
- datahub/ingestion/source/dremio/dremio_aspects.py +3 -5
- datahub/ingestion/source/dynamodb/dynamodb.py +6 -0
- datahub/ingestion/source/elastic_search.py +26 -6
- datahub/ingestion/source/feast.py +27 -8
- datahub/ingestion/source/file.py +6 -3
- datahub/ingestion/source/gc/dataprocess_cleanup.py +1 -1
- datahub/ingestion/source/gc/execution_request_cleanup.py +2 -1
- datahub/ingestion/source/ge_data_profiler.py +12 -15
- datahub/ingestion/source/iceberg/iceberg.py +46 -12
- datahub/ingestion/source/iceberg/iceberg_common.py +71 -21
- datahub/ingestion/source/identity/okta.py +37 -7
- datahub/ingestion/source/kafka/kafka.py +1 -1
- datahub/ingestion/source/kafka_connect/common.py +2 -7
- datahub/ingestion/source/kafka_connect/kafka_connect.py +97 -4
- datahub/ingestion/source/kafka_connect/sink_connectors.py +2 -2
- datahub/ingestion/source/kafka_connect/source_connectors.py +6 -9
- datahub/ingestion/source/looker/looker_common.py +6 -5
- datahub/ingestion/source/looker/looker_file_loader.py +2 -2
- datahub/ingestion/source/looker/looker_lib_wrapper.py +2 -1
- datahub/ingestion/source/looker/looker_source.py +1 -1
- datahub/ingestion/source/looker/looker_template_language.py +4 -2
- datahub/ingestion/source/looker/lookml_source.py +3 -2
- datahub/ingestion/source/metabase.py +57 -35
- datahub/ingestion/source/metadata/business_glossary.py +45 -3
- datahub/ingestion/source/metadata/lineage.py +2 -2
- datahub/ingestion/source/mlflow.py +365 -35
- datahub/ingestion/source/mode.py +18 -8
- datahub/ingestion/source/neo4j/neo4j_source.py +27 -7
- datahub/ingestion/source/nifi.py +37 -11
- datahub/ingestion/source/openapi.py +1 -1
- datahub/ingestion/source/openapi_parser.py +49 -17
- datahub/ingestion/source/powerbi/m_query/parser.py +3 -2
- datahub/ingestion/source/powerbi/m_query/tree_function.py +2 -1
- datahub/ingestion/source/powerbi/powerbi.py +1 -3
- datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +2 -1
- datahub/ingestion/source/powerbi_report_server/report_server.py +26 -7
- datahub/ingestion/source/powerbi_report_server/report_server_domain.py +1 -1
- datahub/ingestion/source/preset.py +7 -4
- datahub/ingestion/source/pulsar.py +3 -2
- datahub/ingestion/source/qlik_sense/websocket_connection.py +4 -2
- datahub/ingestion/source/redash.py +31 -7
- datahub/ingestion/source/redshift/config.py +4 -0
- datahub/ingestion/source/redshift/datashares.py +236 -0
- datahub/ingestion/source/redshift/lineage.py +6 -2
- datahub/ingestion/source/redshift/lineage_v2.py +24 -9
- datahub/ingestion/source/redshift/profile.py +1 -1
- datahub/ingestion/source/redshift/query.py +133 -33
- datahub/ingestion/source/redshift/redshift.py +46 -73
- datahub/ingestion/source/redshift/redshift_schema.py +186 -6
- datahub/ingestion/source/redshift/report.py +3 -0
- datahub/ingestion/source/s3/config.py +5 -5
- datahub/ingestion/source/s3/source.py +20 -41
- datahub/ingestion/source/salesforce.py +550 -275
- datahub/ingestion/source/schema_inference/object.py +1 -1
- datahub/ingestion/source/sigma/sigma.py +1 -1
- datahub/ingestion/source/slack/slack.py +31 -10
- datahub/ingestion/source/snowflake/snowflake_connection.py +2 -2
- datahub/ingestion/source/snowflake/snowflake_queries.py +19 -13
- datahub/ingestion/source/snowflake/snowflake_query.py +6 -4
- datahub/ingestion/source/snowflake/snowflake_schema.py +3 -4
- datahub/ingestion/source/snowflake/snowflake_v2.py +1 -1
- datahub/ingestion/source/sql/athena.py +10 -16
- datahub/ingestion/source/sql/druid.py +1 -5
- datahub/ingestion/source/sql/hive.py +15 -6
- datahub/ingestion/source/sql/hive_metastore.py +3 -2
- datahub/ingestion/source/sql/mssql/job_models.py +29 -0
- datahub/ingestion/source/sql/mssql/source.py +11 -5
- datahub/ingestion/source/sql/oracle.py +127 -63
- datahub/ingestion/source/sql/sql_common.py +16 -18
- datahub/ingestion/source/sql/sql_types.py +2 -2
- datahub/ingestion/source/sql/teradata.py +19 -5
- datahub/ingestion/source/sql/trino.py +2 -2
- datahub/ingestion/source/state/stale_entity_removal_handler.py +4 -8
- datahub/ingestion/source/superset.py +222 -62
- datahub/ingestion/source/tableau/tableau.py +22 -6
- datahub/ingestion/source/tableau/tableau_common.py +3 -2
- datahub/ingestion/source/unity/ge_profiler.py +2 -1
- datahub/ingestion/source/unity/source.py +11 -1
- datahub/ingestion/source/vertexai.py +697 -0
- datahub/ingestion/source_config/pulsar.py +3 -1
- datahub/ingestion/transformer/pattern_cleanup_ownership.py +25 -7
- datahub/lite/duckdb_lite.py +3 -10
- datahub/lite/lite_local.py +1 -1
- datahub/lite/lite_util.py +4 -3
- datahub/metadata/_schema_classes.py +714 -417
- datahub/metadata/_urns/urn_defs.py +1673 -1649
- datahub/metadata/com/linkedin/pegasus2avro/incident/__init__.py +4 -0
- datahub/metadata/schema.avsc +16438 -16603
- datahub/metadata/schemas/AssertionInfo.avsc +3 -1
- datahub/metadata/schemas/BusinessAttributeInfo.avsc +6 -2
- datahub/metadata/schemas/BusinessAttributes.avsc +6 -0
- datahub/metadata/schemas/ChartInfo.avsc +1 -0
- datahub/metadata/schemas/CorpGroupKey.avsc +2 -1
- datahub/metadata/schemas/CorpUserInfo.avsc +13 -0
- datahub/metadata/schemas/CorpUserKey.avsc +2 -1
- datahub/metadata/schemas/DataHubIngestionSourceInfo.avsc +8 -3
- datahub/metadata/schemas/DataProcessInstanceInput.avsc +129 -1
- datahub/metadata/schemas/DataProcessInstanceOutput.avsc +131 -3
- datahub/metadata/schemas/DataProcessKey.avsc +2 -1
- datahub/metadata/schemas/DataProductKey.avsc +2 -1
- datahub/metadata/schemas/DomainKey.avsc +2 -1
- datahub/metadata/schemas/EditableSchemaMetadata.avsc +6 -2
- datahub/metadata/schemas/GlossaryNodeKey.avsc +3 -1
- datahub/metadata/schemas/GlossaryTermKey.avsc +2 -1
- datahub/metadata/schemas/GlossaryTerms.avsc +3 -1
- datahub/metadata/schemas/IncidentInfo.avsc +130 -46
- datahub/metadata/schemas/InputFields.avsc +3 -1
- datahub/metadata/schemas/MLFeatureKey.avsc +2 -1
- datahub/metadata/schemas/MLFeatureTableKey.avsc +2 -1
- datahub/metadata/schemas/MLModelDeploymentKey.avsc +2 -1
- datahub/metadata/schemas/MLModelGroupKey.avsc +3 -1
- datahub/metadata/schemas/MLModelKey.avsc +3 -1
- datahub/metadata/schemas/MLPrimaryKeyKey.avsc +2 -1
- datahub/metadata/schemas/MetadataChangeEvent.avsc +20 -2
- datahub/metadata/schemas/PostKey.avsc +2 -1
- datahub/metadata/schemas/SchemaFieldKey.avsc +2 -1
- datahub/metadata/schemas/SchemaMetadata.avsc +3 -1
- datahub/metadata/schemas/StructuredPropertyDefinition.avsc +14 -0
- datahub/metadata/schemas/VersionProperties.avsc +18 -0
- datahub/metadata/schemas/VersionSetProperties.avsc +5 -0
- datahub/pydantic/__init__.py +0 -0
- datahub/pydantic/compat.py +58 -0
- datahub/sdk/__init__.py +30 -12
- datahub/sdk/_all_entities.py +1 -1
- datahub/sdk/_attribution.py +4 -0
- datahub/sdk/_shared.py +258 -16
- datahub/sdk/_utils.py +35 -0
- datahub/sdk/container.py +30 -6
- datahub/sdk/dataset.py +118 -20
- datahub/sdk/{_entity.py → entity.py} +24 -1
- datahub/sdk/entity_client.py +1 -1
- datahub/sdk/main_client.py +23 -0
- datahub/sdk/resolver_client.py +17 -29
- datahub/sdk/search_client.py +50 -0
- datahub/sdk/search_filters.py +374 -0
- datahub/specific/dataset.py +3 -4
- datahub/sql_parsing/_sqlglot_patch.py +2 -10
- datahub/sql_parsing/schema_resolver.py +1 -1
- datahub/sql_parsing/split_statements.py +220 -126
- datahub/sql_parsing/sql_parsing_common.py +7 -0
- datahub/sql_parsing/sqlglot_lineage.py +1 -1
- datahub/sql_parsing/sqlglot_utils.py +1 -4
- datahub/testing/check_sql_parser_result.py +5 -6
- datahub/testing/compare_metadata_json.py +7 -6
- datahub/testing/pytest_hooks.py +56 -0
- datahub/upgrade/upgrade.py +2 -2
- datahub/utilities/file_backed_collections.py +3 -14
- datahub/utilities/ingest_utils.py +106 -0
- datahub/utilities/mapping.py +1 -1
- datahub/utilities/memory_footprint.py +3 -2
- datahub/utilities/sentinels.py +22 -0
- datahub/utilities/unified_diff.py +5 -1
- {acryl_datahub-0.15.0.6rc2.dist-info → acryl_datahub-1.0.0.dist-info}/LICENSE +0 -0
- {acryl_datahub-0.15.0.6rc2.dist-info → acryl_datahub-1.0.0.dist-info}/top_level.txt +0 -0
|
@@ -16,6 +16,7 @@ from typing import (
|
|
|
16
16
|
List,
|
|
17
17
|
Literal,
|
|
18
18
|
Optional,
|
|
19
|
+
Sequence,
|
|
19
20
|
Tuple,
|
|
20
21
|
Type,
|
|
21
22
|
Union,
|
|
@@ -31,9 +32,15 @@ from datahub.configuration.common import ConfigModel, GraphError, OperationalErr
|
|
|
31
32
|
from datahub.emitter.aspect import TIMESERIES_ASPECT_MAP
|
|
32
33
|
from datahub.emitter.mce_builder import DEFAULT_ENV, Aspect
|
|
33
34
|
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
34
|
-
from datahub.emitter.rest_emitter import
|
|
35
|
+
from datahub.emitter.rest_emitter import (
|
|
36
|
+
DEFAULT_REST_SINK_ENDPOINT,
|
|
37
|
+
DEFAULT_REST_TRACE_MODE,
|
|
38
|
+
DatahubRestEmitter,
|
|
39
|
+
RestSinkEndpoint,
|
|
40
|
+
RestTraceMode,
|
|
41
|
+
)
|
|
35
42
|
from datahub.emitter.serialization_helper import post_json_transform
|
|
36
|
-
from datahub.ingestion.graph.config import (
|
|
43
|
+
from datahub.ingestion.graph.config import (
|
|
37
44
|
DatahubClientConfig as DatahubClientConfig,
|
|
38
45
|
)
|
|
39
46
|
from datahub.ingestion.graph.connections import (
|
|
@@ -42,8 +49,8 @@ from datahub.ingestion.graph.connections import (
|
|
|
42
49
|
)
|
|
43
50
|
from datahub.ingestion.graph.entity_versioning import EntityVersioningAPI
|
|
44
51
|
from datahub.ingestion.graph.filters import (
|
|
52
|
+
RawSearchFilterRule,
|
|
45
53
|
RemovedStatusFilter,
|
|
46
|
-
SearchFilterRule,
|
|
47
54
|
generate_filter,
|
|
48
55
|
)
|
|
49
56
|
from datahub.ingestion.source.state.checkpoint import Checkpoint
|
|
@@ -105,7 +112,7 @@ class RelatedEntity:
|
|
|
105
112
|
via: Optional[str] = None
|
|
106
113
|
|
|
107
114
|
|
|
108
|
-
def
|
|
115
|
+
def entity_type_to_graphql(entity_type: str) -> str:
|
|
109
116
|
"""Convert the entity types into GraphQL "EntityType" enum values."""
|
|
110
117
|
|
|
111
118
|
# Hard-coded special cases.
|
|
@@ -140,6 +147,8 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
140
147
|
ca_certificate_path=self.config.ca_certificate_path,
|
|
141
148
|
client_certificate_path=self.config.client_certificate_path,
|
|
142
149
|
disable_ssl_verification=self.config.disable_ssl_verification,
|
|
150
|
+
openapi_ingestion=DEFAULT_REST_SINK_ENDPOINT == RestSinkEndpoint.OPENAPI,
|
|
151
|
+
default_trace_mode=DEFAULT_REST_TRACE_MODE == RestTraceMode.ENABLED,
|
|
143
152
|
)
|
|
144
153
|
|
|
145
154
|
self.server_id = _MISSING_SERVER_ID
|
|
@@ -330,7 +339,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
330
339
|
aspect_type_name: Optional[str] = None,
|
|
331
340
|
version: int = 0,
|
|
332
341
|
) -> Optional[Aspect]:
|
|
333
|
-
assert aspect_type.ASPECT_NAME
|
|
342
|
+
assert aspect == aspect_type.ASPECT_NAME
|
|
334
343
|
return self.get_aspect(
|
|
335
344
|
entity_urn=entity_urn,
|
|
336
345
|
aspect_type=aspect_type,
|
|
@@ -781,9 +790,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
781
790
|
results: Dict = self._post_generic(url, search_body)
|
|
782
791
|
num_entities = results["value"]["numEntities"]
|
|
783
792
|
logger.debug(f"Matched {num_entities} containers")
|
|
784
|
-
entities_yielded: int = 0
|
|
785
793
|
for x in results["value"]["entities"]:
|
|
786
|
-
entities_yielded += 1
|
|
787
794
|
logger.debug(f"yielding {x['entity']}")
|
|
788
795
|
yield x["entity"]
|
|
789
796
|
|
|
@@ -797,13 +804,13 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
797
804
|
container: Optional[str] = None,
|
|
798
805
|
status: RemovedStatusFilter = RemovedStatusFilter.NOT_SOFT_DELETED,
|
|
799
806
|
batch_size: int = 100,
|
|
800
|
-
extraFilters: Optional[List[
|
|
807
|
+
extraFilters: Optional[List[RawSearchFilterRule]] = None,
|
|
801
808
|
) -> Iterable[Tuple[str, "GraphQLSchemaMetadata"]]:
|
|
802
809
|
"""Fetch schema info for datasets that match all of the given filters.
|
|
803
810
|
|
|
804
811
|
:return: An iterable of (urn, schema info) tuple that match the filters.
|
|
805
812
|
"""
|
|
806
|
-
types = [
|
|
813
|
+
types = [entity_type_to_graphql("dataset")]
|
|
807
814
|
|
|
808
815
|
# Add the query default of * if no query is specified.
|
|
809
816
|
query = query or "*"
|
|
@@ -865,7 +872,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
865
872
|
def get_urns_by_filter(
|
|
866
873
|
self,
|
|
867
874
|
*,
|
|
868
|
-
entity_types: Optional[
|
|
875
|
+
entity_types: Optional[Sequence[str]] = None,
|
|
869
876
|
platform: Optional[str] = None,
|
|
870
877
|
platform_instance: Optional[str] = None,
|
|
871
878
|
env: Optional[str] = None,
|
|
@@ -873,8 +880,8 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
873
880
|
container: Optional[str] = None,
|
|
874
881
|
status: RemovedStatusFilter = RemovedStatusFilter.NOT_SOFT_DELETED,
|
|
875
882
|
batch_size: int = 10000,
|
|
876
|
-
extraFilters: Optional[List[
|
|
877
|
-
extra_or_filters: Optional[List[Dict[str, List[
|
|
883
|
+
extraFilters: Optional[List[RawSearchFilterRule]] = None,
|
|
884
|
+
extra_or_filters: Optional[List[Dict[str, List[RawSearchFilterRule]]]] = None,
|
|
878
885
|
) -> Iterable[str]:
|
|
879
886
|
"""Fetch all urns that match all of the given filters.
|
|
880
887
|
|
|
@@ -965,8 +972,8 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
965
972
|
container: Optional[str] = None,
|
|
966
973
|
status: RemovedStatusFilter = RemovedStatusFilter.NOT_SOFT_DELETED,
|
|
967
974
|
batch_size: int = 10000,
|
|
968
|
-
extra_and_filters: Optional[List[
|
|
969
|
-
extra_or_filters: Optional[List[Dict[str, List[
|
|
975
|
+
extra_and_filters: Optional[List[RawSearchFilterRule]] = None,
|
|
976
|
+
extra_or_filters: Optional[List[Dict[str, List[RawSearchFilterRule]]]] = None,
|
|
970
977
|
extra_source_fields: Optional[List[str]] = None,
|
|
971
978
|
skip_cache: bool = False,
|
|
972
979
|
) -> Iterable[dict]:
|
|
@@ -1109,7 +1116,8 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
1109
1116
|
f"Scrolling to next scrollAcrossEntities page: {scroll_id}"
|
|
1110
1117
|
)
|
|
1111
1118
|
|
|
1112
|
-
|
|
1119
|
+
@classmethod
|
|
1120
|
+
def _get_types(cls, entity_types: Optional[Sequence[str]]) -> Optional[List[str]]:
|
|
1113
1121
|
types: Optional[List[str]] = None
|
|
1114
1122
|
if entity_types is not None:
|
|
1115
1123
|
if not entity_types:
|
|
@@ -1117,7 +1125,9 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
1117
1125
|
"entity_types cannot be an empty list; use None for all entities"
|
|
1118
1126
|
)
|
|
1119
1127
|
|
|
1120
|
-
types = [
|
|
1128
|
+
types = [
|
|
1129
|
+
entity_type_to_graphql(entity_type) for entity_type in entity_types
|
|
1130
|
+
]
|
|
1121
1131
|
return types
|
|
1122
1132
|
|
|
1123
1133
|
def get_latest_pipeline_checkpoint(
|
|
@@ -1547,7 +1557,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
1547
1557
|
return fragment
|
|
1548
1558
|
|
|
1549
1559
|
def _run_assertion_build_params(
|
|
1550
|
-
self, params: Optional[Dict[str, str]] =
|
|
1560
|
+
self, params: Optional[Dict[str, str]] = None
|
|
1551
1561
|
) -> List[Any]:
|
|
1552
1562
|
if params is None:
|
|
1553
1563
|
return []
|
|
@@ -1566,9 +1576,11 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
1566
1576
|
self,
|
|
1567
1577
|
urn: str,
|
|
1568
1578
|
save_result: bool = True,
|
|
1569
|
-
parameters: Optional[Dict[str, str]] =
|
|
1579
|
+
parameters: Optional[Dict[str, str]] = None,
|
|
1570
1580
|
async_flag: bool = False,
|
|
1571
1581
|
) -> Dict:
|
|
1582
|
+
if parameters is None:
|
|
1583
|
+
parameters = {}
|
|
1572
1584
|
params = self._run_assertion_build_params(parameters)
|
|
1573
1585
|
graph_query: str = """
|
|
1574
1586
|
%s
|
|
@@ -1597,9 +1609,11 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
1597
1609
|
self,
|
|
1598
1610
|
urns: List[str],
|
|
1599
1611
|
save_result: bool = True,
|
|
1600
|
-
parameters: Optional[Dict[str, str]] =
|
|
1612
|
+
parameters: Optional[Dict[str, str]] = None,
|
|
1601
1613
|
async_flag: bool = False,
|
|
1602
1614
|
) -> Dict:
|
|
1615
|
+
if parameters is None:
|
|
1616
|
+
parameters = {}
|
|
1603
1617
|
params = self._run_assertion_build_params(parameters)
|
|
1604
1618
|
graph_query: str = """
|
|
1605
1619
|
%s
|
|
@@ -1636,10 +1650,14 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
1636
1650
|
def run_assertions_for_asset(
|
|
1637
1651
|
self,
|
|
1638
1652
|
urn: str,
|
|
1639
|
-
tag_urns: Optional[List[str]] =
|
|
1640
|
-
parameters: Optional[Dict[str, str]] =
|
|
1653
|
+
tag_urns: Optional[List[str]] = None,
|
|
1654
|
+
parameters: Optional[Dict[str, str]] = None,
|
|
1641
1655
|
async_flag: bool = False,
|
|
1642
1656
|
) -> Dict:
|
|
1657
|
+
if tag_urns is None:
|
|
1658
|
+
tag_urns = []
|
|
1659
|
+
if parameters is None:
|
|
1660
|
+
parameters = {}
|
|
1643
1661
|
params = self._run_assertion_build_params(parameters)
|
|
1644
1662
|
graph_query: str = """
|
|
1645
1663
|
%s
|
|
@@ -1677,9 +1695,10 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
1677
1695
|
self,
|
|
1678
1696
|
entity_name: str,
|
|
1679
1697
|
urns: List[str],
|
|
1680
|
-
aspects: List[str] =
|
|
1698
|
+
aspects: Optional[List[str]] = None,
|
|
1681
1699
|
with_system_metadata: bool = False,
|
|
1682
1700
|
) -> Dict[str, Any]:
|
|
1701
|
+
aspects = aspects or []
|
|
1683
1702
|
payload = {
|
|
1684
1703
|
"urns": urns,
|
|
1685
1704
|
"aspectNames": aspects,
|
|
@@ -93,7 +93,7 @@ class EntityVersioningAPI(DataHubGraphProtocol):
|
|
|
93
93
|
try:
|
|
94
94
|
return response["linkAssetVersion"]["urn"]
|
|
95
95
|
except KeyError:
|
|
96
|
-
raise ValueError(f"Unexpected response: {response}")
|
|
96
|
+
raise ValueError(f"Unexpected response: {response}") from None
|
|
97
97
|
|
|
98
98
|
def link_asset_to_versioned_asset(
|
|
99
99
|
self,
|
|
@@ -165,7 +165,7 @@ class EntityVersioningAPI(DataHubGraphProtocol):
|
|
|
165
165
|
try:
|
|
166
166
|
return response["unlinkAssetVersion"]["urn"]
|
|
167
167
|
except KeyError:
|
|
168
|
-
raise ValueError(f"Unexpected response: {response}")
|
|
168
|
+
raise ValueError(f"Unexpected response: {response}") from None
|
|
169
169
|
|
|
170
170
|
def unlink_latest_asset_from_version_set(
|
|
171
171
|
self, version_set_urn: str
|
|
@@ -198,4 +198,4 @@ class EntityVersioningAPI(DataHubGraphProtocol):
|
|
|
198
198
|
try:
|
|
199
199
|
return response["unlinkAssetVersion"]["urn"]
|
|
200
200
|
except KeyError:
|
|
201
|
-
raise ValueError(f"Unexpected response: {response}")
|
|
201
|
+
raise ValueError(f"Unexpected response: {response}") from None
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import dataclasses
|
|
1
2
|
import enum
|
|
2
3
|
from typing import Any, Dict, List, Optional
|
|
3
4
|
|
|
@@ -7,7 +8,31 @@ from datahub.emitter.mce_builder import (
|
|
|
7
8
|
)
|
|
8
9
|
from datahub.utilities.urns.urn import guess_entity_type
|
|
9
10
|
|
|
10
|
-
|
|
11
|
+
RawSearchFilterRule = Dict[str, Any]
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclasses.dataclass
|
|
15
|
+
class SearchFilterRule:
|
|
16
|
+
field: str
|
|
17
|
+
condition: str # TODO: convert to an enum
|
|
18
|
+
values: List[str]
|
|
19
|
+
negated: bool = False
|
|
20
|
+
|
|
21
|
+
def to_raw(self) -> RawSearchFilterRule:
|
|
22
|
+
return {
|
|
23
|
+
"field": self.field,
|
|
24
|
+
"condition": self.condition,
|
|
25
|
+
"values": self.values,
|
|
26
|
+
"negated": self.negated,
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
def negate(self) -> "SearchFilterRule":
|
|
30
|
+
return SearchFilterRule(
|
|
31
|
+
field=self.field,
|
|
32
|
+
condition=self.condition,
|
|
33
|
+
values=self.values,
|
|
34
|
+
negated=not self.negated,
|
|
35
|
+
)
|
|
11
36
|
|
|
12
37
|
|
|
13
38
|
class RemovedStatusFilter(enum.Enum):
|
|
@@ -29,9 +54,9 @@ def generate_filter(
|
|
|
29
54
|
env: Optional[str],
|
|
30
55
|
container: Optional[str],
|
|
31
56
|
status: RemovedStatusFilter,
|
|
32
|
-
extra_filters: Optional[List[
|
|
33
|
-
extra_or_filters: Optional[List[
|
|
34
|
-
) -> List[Dict[str, List[
|
|
57
|
+
extra_filters: Optional[List[RawSearchFilterRule]],
|
|
58
|
+
extra_or_filters: Optional[List[RawSearchFilterRule]] = None,
|
|
59
|
+
) -> List[Dict[str, List[RawSearchFilterRule]]]:
|
|
35
60
|
"""
|
|
36
61
|
Generate a search filter based on the provided parameters.
|
|
37
62
|
:param platform: The platform to filter by.
|
|
@@ -43,30 +68,32 @@ def generate_filter(
|
|
|
43
68
|
:param extra_or_filters: Extra OR filters to apply. These are combined with
|
|
44
69
|
the AND filters using an OR at the top level.
|
|
45
70
|
"""
|
|
46
|
-
and_filters: List[
|
|
71
|
+
and_filters: List[RawSearchFilterRule] = []
|
|
47
72
|
|
|
48
73
|
# Platform filter.
|
|
49
74
|
if platform:
|
|
50
|
-
and_filters.append(_get_platform_filter(platform))
|
|
75
|
+
and_filters.append(_get_platform_filter(platform).to_raw())
|
|
51
76
|
|
|
52
77
|
# Platform instance filter.
|
|
53
78
|
if platform_instance:
|
|
54
|
-
and_filters.append(
|
|
79
|
+
and_filters.append(
|
|
80
|
+
_get_platform_instance_filter(platform, platform_instance).to_raw()
|
|
81
|
+
)
|
|
55
82
|
|
|
56
83
|
# Browse path v2 filter.
|
|
57
84
|
if container:
|
|
58
|
-
and_filters.append(_get_container_filter(container))
|
|
85
|
+
and_filters.append(_get_container_filter(container).to_raw())
|
|
59
86
|
|
|
60
87
|
# Status filter.
|
|
61
88
|
status_filter = _get_status_filter(status)
|
|
62
89
|
if status_filter:
|
|
63
|
-
and_filters.append(status_filter)
|
|
90
|
+
and_filters.append(status_filter.to_raw())
|
|
64
91
|
|
|
65
92
|
# Extra filters.
|
|
66
93
|
if extra_filters:
|
|
67
94
|
and_filters += extra_filters
|
|
68
95
|
|
|
69
|
-
or_filters: List[Dict[str, List[
|
|
96
|
+
or_filters: List[Dict[str, List[RawSearchFilterRule]]] = [{"and": and_filters}]
|
|
70
97
|
|
|
71
98
|
# Env filter
|
|
72
99
|
if env:
|
|
@@ -89,7 +116,7 @@ def generate_filter(
|
|
|
89
116
|
return or_filters
|
|
90
117
|
|
|
91
118
|
|
|
92
|
-
def _get_env_filters(env: str) -> List[
|
|
119
|
+
def _get_env_filters(env: str) -> List[RawSearchFilterRule]:
|
|
93
120
|
# The env filter is a bit more tricky since it's not always stored
|
|
94
121
|
# in the same place in ElasticSearch.
|
|
95
122
|
return [
|
|
@@ -125,19 +152,19 @@ def _get_status_filter(status: RemovedStatusFilter) -> Optional[SearchFilterRule
|
|
|
125
152
|
# removed field is simply not present in the ElasticSearch document. Ideally this
|
|
126
153
|
# would be a "removed" : "false" filter, but that doesn't work. Instead, we need to
|
|
127
154
|
# use a negated filter.
|
|
128
|
-
return
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
155
|
+
return SearchFilterRule(
|
|
156
|
+
field="removed",
|
|
157
|
+
values=["true"],
|
|
158
|
+
condition="EQUAL",
|
|
159
|
+
negated=True,
|
|
160
|
+
)
|
|
134
161
|
|
|
135
162
|
elif status == RemovedStatusFilter.ONLY_SOFT_DELETED:
|
|
136
|
-
return
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
163
|
+
return SearchFilterRule(
|
|
164
|
+
field="removed",
|
|
165
|
+
values=["true"],
|
|
166
|
+
condition="EQUAL",
|
|
167
|
+
)
|
|
141
168
|
|
|
142
169
|
elif status == RemovedStatusFilter.ALL:
|
|
143
170
|
# We don't need to add a filter for this case.
|
|
@@ -152,11 +179,11 @@ def _get_container_filter(container: str) -> SearchFilterRule:
|
|
|
152
179
|
if guess_entity_type(container) != "container":
|
|
153
180
|
raise ValueError(f"Invalid container urn: {container}")
|
|
154
181
|
|
|
155
|
-
return
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
182
|
+
return SearchFilterRule(
|
|
183
|
+
field="browsePathV2",
|
|
184
|
+
values=[container],
|
|
185
|
+
condition="CONTAIN",
|
|
186
|
+
)
|
|
160
187
|
|
|
161
188
|
|
|
162
189
|
def _get_platform_instance_filter(
|
|
@@ -171,16 +198,16 @@ def _get_platform_instance_filter(
|
|
|
171
198
|
if guess_entity_type(platform_instance) != "dataPlatformInstance":
|
|
172
199
|
raise ValueError(f"Invalid data platform instance urn: {platform_instance}")
|
|
173
200
|
|
|
174
|
-
return
|
|
175
|
-
|
|
176
|
-
"
|
|
177
|
-
|
|
178
|
-
|
|
201
|
+
return SearchFilterRule(
|
|
202
|
+
field="platformInstance",
|
|
203
|
+
condition="EQUAL",
|
|
204
|
+
values=[platform_instance],
|
|
205
|
+
)
|
|
179
206
|
|
|
180
207
|
|
|
181
208
|
def _get_platform_filter(platform: str) -> SearchFilterRule:
|
|
182
|
-
return
|
|
183
|
-
|
|
184
|
-
"
|
|
185
|
-
|
|
186
|
-
|
|
209
|
+
return SearchFilterRule(
|
|
210
|
+
field="platform.keyword",
|
|
211
|
+
condition="EQUAL",
|
|
212
|
+
values=[make_data_platform_urn(platform)],
|
|
213
|
+
)
|
|
@@ -163,12 +163,7 @@ class DatahubIngestionRunSummaryProvider(PipelineRunListener):
|
|
|
163
163
|
key: DatahubIngestionRunSummaryProvider._convert_sets_to_lists(value)
|
|
164
164
|
for key, value in obj.items()
|
|
165
165
|
}
|
|
166
|
-
elif isinstance(obj, list):
|
|
167
|
-
return [
|
|
168
|
-
DatahubIngestionRunSummaryProvider._convert_sets_to_lists(element)
|
|
169
|
-
for element in obj
|
|
170
|
-
]
|
|
171
|
-
elif isinstance(obj, set):
|
|
166
|
+
elif isinstance(obj, (list, set)):
|
|
172
167
|
return [
|
|
173
168
|
DatahubIngestionRunSummaryProvider._convert_sets_to_lists(element)
|
|
174
169
|
for element in obj
|