acryl-datahub 0.15.0.6rc2__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-0.15.0.6rc2.dist-info → acryl_datahub-1.0.0.dist-info}/METADATA +2522 -2493
- {acryl_datahub-0.15.0.6rc2.dist-info → acryl_datahub-1.0.0.dist-info}/RECORD +205 -192
- {acryl_datahub-0.15.0.6rc2.dist-info → acryl_datahub-1.0.0.dist-info}/WHEEL +1 -1
- {acryl_datahub-0.15.0.6rc2.dist-info → acryl_datahub-1.0.0.dist-info}/entry_points.txt +1 -0
- datahub/_version.py +1 -1
- datahub/api/entities/common/serialized_value.py +4 -3
- datahub/api/entities/dataset/dataset.py +731 -42
- datahub/api/entities/structuredproperties/structuredproperties.py +2 -2
- datahub/cli/check_cli.py +72 -19
- datahub/cli/docker_cli.py +3 -3
- datahub/cli/iceberg_cli.py +31 -7
- datahub/cli/ingest_cli.py +30 -93
- datahub/cli/lite_cli.py +4 -2
- datahub/cli/specific/dataproduct_cli.py +1 -1
- datahub/cli/specific/dataset_cli.py +128 -14
- datahub/configuration/common.py +10 -2
- datahub/configuration/git.py +1 -3
- datahub/configuration/kafka.py +1 -1
- datahub/emitter/mce_builder.py +28 -13
- datahub/emitter/mcp_builder.py +4 -1
- datahub/emitter/response_helper.py +145 -0
- datahub/emitter/rest_emitter.py +323 -10
- datahub/ingestion/api/decorators.py +1 -1
- datahub/ingestion/api/source_helpers.py +4 -0
- datahub/ingestion/fs/s3_fs.py +2 -2
- datahub/ingestion/glossary/classification_mixin.py +1 -5
- datahub/ingestion/graph/client.py +41 -22
- datahub/ingestion/graph/entity_versioning.py +3 -3
- datahub/ingestion/graph/filters.py +64 -37
- datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +1 -6
- datahub/ingestion/run/pipeline.py +112 -148
- datahub/ingestion/run/sink_callback.py +77 -0
- datahub/ingestion/sink/datahub_rest.py +8 -0
- datahub/ingestion/source/abs/config.py +2 -4
- datahub/ingestion/source/bigquery_v2/bigquery_audit.py +1 -1
- datahub/ingestion/source/bigquery_v2/bigquery_config.py +2 -46
- datahub/ingestion/source/bigquery_v2/bigquery_schema.py +6 -1
- datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +7 -4
- datahub/ingestion/source/cassandra/cassandra.py +152 -233
- datahub/ingestion/source/cassandra/cassandra_api.py +13 -5
- datahub/ingestion/source/common/gcp_credentials_config.py +53 -0
- datahub/ingestion/source/common/subtypes.py +12 -0
- datahub/ingestion/source/csv_enricher.py +3 -3
- datahub/ingestion/source/data_lake_common/path_spec.py +1 -3
- datahub/ingestion/source/dbt/dbt_common.py +8 -5
- datahub/ingestion/source/dbt/dbt_core.py +11 -9
- datahub/ingestion/source/dbt/dbt_tests.py +4 -8
- datahub/ingestion/source/delta_lake/config.py +8 -1
- datahub/ingestion/source/delta_lake/report.py +4 -2
- datahub/ingestion/source/delta_lake/source.py +20 -5
- datahub/ingestion/source/dremio/dremio_api.py +4 -8
- datahub/ingestion/source/dremio/dremio_aspects.py +3 -5
- datahub/ingestion/source/dynamodb/dynamodb.py +6 -0
- datahub/ingestion/source/elastic_search.py +26 -6
- datahub/ingestion/source/feast.py +27 -8
- datahub/ingestion/source/file.py +6 -3
- datahub/ingestion/source/gc/dataprocess_cleanup.py +1 -1
- datahub/ingestion/source/gc/execution_request_cleanup.py +2 -1
- datahub/ingestion/source/ge_data_profiler.py +12 -15
- datahub/ingestion/source/iceberg/iceberg.py +46 -12
- datahub/ingestion/source/iceberg/iceberg_common.py +71 -21
- datahub/ingestion/source/identity/okta.py +37 -7
- datahub/ingestion/source/kafka/kafka.py +1 -1
- datahub/ingestion/source/kafka_connect/common.py +2 -7
- datahub/ingestion/source/kafka_connect/kafka_connect.py +97 -4
- datahub/ingestion/source/kafka_connect/sink_connectors.py +2 -2
- datahub/ingestion/source/kafka_connect/source_connectors.py +6 -9
- datahub/ingestion/source/looker/looker_common.py +6 -5
- datahub/ingestion/source/looker/looker_file_loader.py +2 -2
- datahub/ingestion/source/looker/looker_lib_wrapper.py +2 -1
- datahub/ingestion/source/looker/looker_source.py +1 -1
- datahub/ingestion/source/looker/looker_template_language.py +4 -2
- datahub/ingestion/source/looker/lookml_source.py +3 -2
- datahub/ingestion/source/metabase.py +57 -35
- datahub/ingestion/source/metadata/business_glossary.py +45 -3
- datahub/ingestion/source/metadata/lineage.py +2 -2
- datahub/ingestion/source/mlflow.py +365 -35
- datahub/ingestion/source/mode.py +18 -8
- datahub/ingestion/source/neo4j/neo4j_source.py +27 -7
- datahub/ingestion/source/nifi.py +37 -11
- datahub/ingestion/source/openapi.py +1 -1
- datahub/ingestion/source/openapi_parser.py +49 -17
- datahub/ingestion/source/powerbi/m_query/parser.py +3 -2
- datahub/ingestion/source/powerbi/m_query/tree_function.py +2 -1
- datahub/ingestion/source/powerbi/powerbi.py +1 -3
- datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +2 -1
- datahub/ingestion/source/powerbi_report_server/report_server.py +26 -7
- datahub/ingestion/source/powerbi_report_server/report_server_domain.py +1 -1
- datahub/ingestion/source/preset.py +7 -4
- datahub/ingestion/source/pulsar.py +3 -2
- datahub/ingestion/source/qlik_sense/websocket_connection.py +4 -2
- datahub/ingestion/source/redash.py +31 -7
- datahub/ingestion/source/redshift/config.py +4 -0
- datahub/ingestion/source/redshift/datashares.py +236 -0
- datahub/ingestion/source/redshift/lineage.py +6 -2
- datahub/ingestion/source/redshift/lineage_v2.py +24 -9
- datahub/ingestion/source/redshift/profile.py +1 -1
- datahub/ingestion/source/redshift/query.py +133 -33
- datahub/ingestion/source/redshift/redshift.py +46 -73
- datahub/ingestion/source/redshift/redshift_schema.py +186 -6
- datahub/ingestion/source/redshift/report.py +3 -0
- datahub/ingestion/source/s3/config.py +5 -5
- datahub/ingestion/source/s3/source.py +20 -41
- datahub/ingestion/source/salesforce.py +550 -275
- datahub/ingestion/source/schema_inference/object.py +1 -1
- datahub/ingestion/source/sigma/sigma.py +1 -1
- datahub/ingestion/source/slack/slack.py +31 -10
- datahub/ingestion/source/snowflake/snowflake_connection.py +2 -2
- datahub/ingestion/source/snowflake/snowflake_queries.py +19 -13
- datahub/ingestion/source/snowflake/snowflake_query.py +6 -4
- datahub/ingestion/source/snowflake/snowflake_schema.py +3 -4
- datahub/ingestion/source/snowflake/snowflake_v2.py +1 -1
- datahub/ingestion/source/sql/athena.py +10 -16
- datahub/ingestion/source/sql/druid.py +1 -5
- datahub/ingestion/source/sql/hive.py +15 -6
- datahub/ingestion/source/sql/hive_metastore.py +3 -2
- datahub/ingestion/source/sql/mssql/job_models.py +29 -0
- datahub/ingestion/source/sql/mssql/source.py +11 -5
- datahub/ingestion/source/sql/oracle.py +127 -63
- datahub/ingestion/source/sql/sql_common.py +16 -18
- datahub/ingestion/source/sql/sql_types.py +2 -2
- datahub/ingestion/source/sql/teradata.py +19 -5
- datahub/ingestion/source/sql/trino.py +2 -2
- datahub/ingestion/source/state/stale_entity_removal_handler.py +4 -8
- datahub/ingestion/source/superset.py +222 -62
- datahub/ingestion/source/tableau/tableau.py +22 -6
- datahub/ingestion/source/tableau/tableau_common.py +3 -2
- datahub/ingestion/source/unity/ge_profiler.py +2 -1
- datahub/ingestion/source/unity/source.py +11 -1
- datahub/ingestion/source/vertexai.py +697 -0
- datahub/ingestion/source_config/pulsar.py +3 -1
- datahub/ingestion/transformer/pattern_cleanup_ownership.py +25 -7
- datahub/lite/duckdb_lite.py +3 -10
- datahub/lite/lite_local.py +1 -1
- datahub/lite/lite_util.py +4 -3
- datahub/metadata/_schema_classes.py +714 -417
- datahub/metadata/_urns/urn_defs.py +1673 -1649
- datahub/metadata/com/linkedin/pegasus2avro/incident/__init__.py +4 -0
- datahub/metadata/schema.avsc +16438 -16603
- datahub/metadata/schemas/AssertionInfo.avsc +3 -1
- datahub/metadata/schemas/BusinessAttributeInfo.avsc +6 -2
- datahub/metadata/schemas/BusinessAttributes.avsc +6 -0
- datahub/metadata/schemas/ChartInfo.avsc +1 -0
- datahub/metadata/schemas/CorpGroupKey.avsc +2 -1
- datahub/metadata/schemas/CorpUserInfo.avsc +13 -0
- datahub/metadata/schemas/CorpUserKey.avsc +2 -1
- datahub/metadata/schemas/DataHubIngestionSourceInfo.avsc +8 -3
- datahub/metadata/schemas/DataProcessInstanceInput.avsc +129 -1
- datahub/metadata/schemas/DataProcessInstanceOutput.avsc +131 -3
- datahub/metadata/schemas/DataProcessKey.avsc +2 -1
- datahub/metadata/schemas/DataProductKey.avsc +2 -1
- datahub/metadata/schemas/DomainKey.avsc +2 -1
- datahub/metadata/schemas/EditableSchemaMetadata.avsc +6 -2
- datahub/metadata/schemas/GlossaryNodeKey.avsc +3 -1
- datahub/metadata/schemas/GlossaryTermKey.avsc +2 -1
- datahub/metadata/schemas/GlossaryTerms.avsc +3 -1
- datahub/metadata/schemas/IncidentInfo.avsc +130 -46
- datahub/metadata/schemas/InputFields.avsc +3 -1
- datahub/metadata/schemas/MLFeatureKey.avsc +2 -1
- datahub/metadata/schemas/MLFeatureTableKey.avsc +2 -1
- datahub/metadata/schemas/MLModelDeploymentKey.avsc +2 -1
- datahub/metadata/schemas/MLModelGroupKey.avsc +3 -1
- datahub/metadata/schemas/MLModelKey.avsc +3 -1
- datahub/metadata/schemas/MLPrimaryKeyKey.avsc +2 -1
- datahub/metadata/schemas/MetadataChangeEvent.avsc +20 -2
- datahub/metadata/schemas/PostKey.avsc +2 -1
- datahub/metadata/schemas/SchemaFieldKey.avsc +2 -1
- datahub/metadata/schemas/SchemaMetadata.avsc +3 -1
- datahub/metadata/schemas/StructuredPropertyDefinition.avsc +14 -0
- datahub/metadata/schemas/VersionProperties.avsc +18 -0
- datahub/metadata/schemas/VersionSetProperties.avsc +5 -0
- datahub/pydantic/__init__.py +0 -0
- datahub/pydantic/compat.py +58 -0
- datahub/sdk/__init__.py +30 -12
- datahub/sdk/_all_entities.py +1 -1
- datahub/sdk/_attribution.py +4 -0
- datahub/sdk/_shared.py +258 -16
- datahub/sdk/_utils.py +35 -0
- datahub/sdk/container.py +30 -6
- datahub/sdk/dataset.py +118 -20
- datahub/sdk/{_entity.py → entity.py} +24 -1
- datahub/sdk/entity_client.py +1 -1
- datahub/sdk/main_client.py +23 -0
- datahub/sdk/resolver_client.py +17 -29
- datahub/sdk/search_client.py +50 -0
- datahub/sdk/search_filters.py +374 -0
- datahub/specific/dataset.py +3 -4
- datahub/sql_parsing/_sqlglot_patch.py +2 -10
- datahub/sql_parsing/schema_resolver.py +1 -1
- datahub/sql_parsing/split_statements.py +220 -126
- datahub/sql_parsing/sql_parsing_common.py +7 -0
- datahub/sql_parsing/sqlglot_lineage.py +1 -1
- datahub/sql_parsing/sqlglot_utils.py +1 -4
- datahub/testing/check_sql_parser_result.py +5 -6
- datahub/testing/compare_metadata_json.py +7 -6
- datahub/testing/pytest_hooks.py +56 -0
- datahub/upgrade/upgrade.py +2 -2
- datahub/utilities/file_backed_collections.py +3 -14
- datahub/utilities/ingest_utils.py +106 -0
- datahub/utilities/mapping.py +1 -1
- datahub/utilities/memory_footprint.py +3 -2
- datahub/utilities/sentinels.py +22 -0
- datahub/utilities/unified_diff.py +5 -1
- {acryl_datahub-0.15.0.6rc2.dist-info → acryl_datahub-1.0.0.dist-info}/LICENSE +0 -0
- {acryl_datahub-0.15.0.6rc2.dist-info → acryl_datahub-1.0.0.dist-info}/top_level.txt +0 -0
|
@@ -33,7 +33,9 @@ def _is_valid_hostname(hostname: str) -> bool:
|
|
|
33
33
|
|
|
34
34
|
|
|
35
35
|
class PulsarSourceConfig(
|
|
36
|
-
StatefulIngestionConfigBase,
|
|
36
|
+
StatefulIngestionConfigBase,
|
|
37
|
+
PlatformInstanceConfigMixin,
|
|
38
|
+
EnvConfigMixin,
|
|
37
39
|
):
|
|
38
40
|
web_service_url: str = Field(
|
|
39
41
|
default="http://localhost:8080", description="The web URL for the cluster."
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import logging
|
|
1
2
|
import re
|
|
2
3
|
from typing import List, Optional, Set, cast
|
|
3
4
|
|
|
@@ -10,8 +11,11 @@ from datahub.metadata.schema_classes import (
|
|
|
10
11
|
OwnershipClass,
|
|
11
12
|
OwnershipTypeClass,
|
|
12
13
|
)
|
|
14
|
+
from datahub.metadata.urns import CorpGroupUrn, CorpUserUrn
|
|
15
|
+
from datahub.utilities.urns._urn_base import Urn
|
|
16
|
+
from datahub.utilities.urns.error import InvalidUrnError
|
|
13
17
|
|
|
14
|
-
|
|
18
|
+
logger = logging.getLogger(__name__)
|
|
15
19
|
|
|
16
20
|
|
|
17
21
|
class PatternCleanUpOwnershipConfig(ConfigModel):
|
|
@@ -49,6 +53,11 @@ class PatternCleanUpOwnership(OwnershipTransformer):
|
|
|
49
53
|
else:
|
|
50
54
|
return set()
|
|
51
55
|
|
|
56
|
+
def _process_owner(self, name: str) -> str:
|
|
57
|
+
for value in self.config.pattern_for_cleanup:
|
|
58
|
+
name = re.sub(value, "", name)
|
|
59
|
+
return name
|
|
60
|
+
|
|
52
61
|
def transform_aspect(
|
|
53
62
|
self, entity_urn: str, aspect_name: str, aspect: Optional[builder.Aspect]
|
|
54
63
|
) -> Optional[builder.Aspect]:
|
|
@@ -58,14 +67,23 @@ class PatternCleanUpOwnership(OwnershipTransformer):
|
|
|
58
67
|
# clean all the owners based on the parameters received from config
|
|
59
68
|
cleaned_owner_urns: List[str] = []
|
|
60
69
|
for owner_urn in current_owner_urns:
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
70
|
+
username = ""
|
|
71
|
+
try:
|
|
72
|
+
owner: Urn = Urn.from_string(owner_urn)
|
|
73
|
+
if isinstance(owner, CorpUserUrn):
|
|
74
|
+
username = str(CorpUserUrn(self._process_owner(owner.username)))
|
|
75
|
+
elif isinstance(owner, CorpGroupUrn):
|
|
76
|
+
username = str(CorpGroupUrn(self._process_owner(owner.name)))
|
|
77
|
+
else:
|
|
78
|
+
logger.warning(f"{owner_urn} is not a supported owner type.")
|
|
79
|
+
username = owner_urn
|
|
80
|
+
except InvalidUrnError:
|
|
81
|
+
logger.warning(f"Could not parse {owner_urn} from {entity_urn}")
|
|
82
|
+
username = owner_urn
|
|
83
|
+
cleaned_owner_urns.append(username)
|
|
66
84
|
|
|
67
85
|
ownership_type, ownership_type_urn = builder.validate_ownership_type(
|
|
68
|
-
OwnershipTypeClass.
|
|
86
|
+
OwnershipTypeClass.TECHNICAL_OWNER
|
|
69
87
|
)
|
|
70
88
|
owners = [
|
|
71
89
|
OwnerClass(
|
datahub/lite/duckdb_lite.py
CHANGED
|
@@ -284,9 +284,10 @@ class DuckDBLite(DataHubLiteLocal[DuckDBLiteConfig]):
|
|
|
284
284
|
self,
|
|
285
285
|
query: str,
|
|
286
286
|
flavor: SearchFlavor,
|
|
287
|
-
aspects: List[str] =
|
|
287
|
+
aspects: Optional[List[str]] = None,
|
|
288
288
|
snippet: bool = True,
|
|
289
289
|
) -> Iterable[Searchable]:
|
|
290
|
+
aspects = aspects or []
|
|
290
291
|
if flavor == SearchFlavor.FREE_TEXT:
|
|
291
292
|
base_query = f"SELECT distinct(urn), 'urn', NULL from metadata_aspect_v2 where urn ILIKE '%{query}%' UNION SELECT urn, aspect_name, metadata from metadata_aspect_v2 where metadata->>'$.name' ILIKE '%{query}%'"
|
|
292
293
|
for r in self.duckdb_client.execute(base_query).fetchall():
|
|
@@ -759,15 +760,7 @@ class DuckDBLite(DataHubLiteLocal[DuckDBLiteConfig]):
|
|
|
759
760
|
entity_id=[str(data_platform_urn), data_platform_instance],
|
|
760
761
|
)
|
|
761
762
|
self._create_edges_from_data_platform_instance(data_platform_instance_urn)
|
|
762
|
-
elif isinstance(aspect, ChartInfoClass):
|
|
763
|
-
urn = Urn.from_string(entity_urn)
|
|
764
|
-
self.add_edge(
|
|
765
|
-
entity_urn,
|
|
766
|
-
"name",
|
|
767
|
-
aspect.title + f" ({urn.get_entity_id()[-1]})",
|
|
768
|
-
remove_existing=True,
|
|
769
|
-
)
|
|
770
|
-
elif isinstance(aspect, DashboardInfoClass):
|
|
763
|
+
elif isinstance(aspect, (ChartInfoClass, DashboardInfoClass)):
|
|
771
764
|
urn = Urn.from_string(entity_urn)
|
|
772
765
|
self.add_edge(
|
|
773
766
|
entity_urn,
|
datahub/lite/lite_local.py
CHANGED
datahub/lite/lite_util.py
CHANGED
|
@@ -70,9 +70,10 @@ class DataHubLiteWrapper(DataHubLiteLocal):
|
|
|
70
70
|
self,
|
|
71
71
|
query: str,
|
|
72
72
|
flavor: SearchFlavor,
|
|
73
|
-
aspects: List[str] =
|
|
73
|
+
aspects: Optional[List[str]] = None,
|
|
74
74
|
snippet: bool = True,
|
|
75
75
|
) -> Iterable[Searchable]:
|
|
76
|
+
aspects = aspects or []
|
|
76
77
|
yield from self.lite.search(query, flavor, aspects, snippet)
|
|
77
78
|
|
|
78
79
|
def ls(self, path: str) -> List[Browseable]:
|
|
@@ -96,10 +97,10 @@ def get_datahub_lite(config_dict: dict, read_only: bool = False) -> "DataHubLite
|
|
|
96
97
|
lite_type = lite_local_config.type
|
|
97
98
|
try:
|
|
98
99
|
lite_class = lite_registry.get(lite_type)
|
|
99
|
-
except KeyError:
|
|
100
|
+
except KeyError as e:
|
|
100
101
|
raise Exception(
|
|
101
102
|
f"Failed to find a registered lite implementation for {lite_type}. Valid values are {[k for k in lite_registry.mapping.keys()]}"
|
|
102
|
-
)
|
|
103
|
+
) from e
|
|
103
104
|
|
|
104
105
|
lite_specific_config = lite_class.get_config_class().parse_obj(
|
|
105
106
|
lite_local_config.config
|