acryl-datahub 0.14.1.13rc8__py3-none-any.whl → 0.15.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-0.14.1.13rc8.dist-info → acryl_datahub-0.15.0.dist-info}/METADATA +2506 -2456
- {acryl_datahub-0.14.1.13rc8.dist-info → acryl_datahub-0.15.0.dist-info}/RECORD +136 -131
- {acryl_datahub-0.14.1.13rc8.dist-info → acryl_datahub-0.15.0.dist-info}/entry_points.txt +2 -1
- datahub/__init__.py +1 -1
- datahub/api/entities/structuredproperties/structuredproperties.py +123 -146
- datahub/cli/cli_utils.py +2 -0
- datahub/cli/delete_cli.py +103 -24
- datahub/cli/ingest_cli.py +110 -0
- datahub/cli/put_cli.py +1 -1
- datahub/cli/specific/dataproduct_cli.py +1 -1
- datahub/cli/specific/structuredproperties_cli.py +2 -1
- datahub/configuration/common.py +3 -3
- datahub/configuration/git.py +7 -1
- datahub/configuration/kafka_consumer_config.py +31 -1
- datahub/emitter/mcp_patch_builder.py +43 -0
- datahub/emitter/rest_emitter.py +17 -4
- datahub/ingestion/api/incremental_properties_helper.py +69 -0
- datahub/ingestion/api/source.py +6 -1
- datahub/ingestion/api/source_helpers.py +4 -2
- datahub/ingestion/graph/client.py +2 -0
- datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +2 -2
- datahub/ingestion/run/pipeline.py +6 -5
- datahub/ingestion/run/pipeline_config.py +6 -0
- datahub/ingestion/sink/datahub_rest.py +15 -4
- datahub/ingestion/source/abs/source.py +4 -0
- datahub/ingestion/source/aws/aws_common.py +13 -1
- datahub/ingestion/source/aws/sagemaker.py +8 -0
- datahub/ingestion/source/aws/sagemaker_processors/common.py +6 -0
- datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py +9 -4
- datahub/ingestion/source/aws/sagemaker_processors/jobs.py +12 -1
- datahub/ingestion/source/aws/sagemaker_processors/lineage.py +11 -4
- datahub/ingestion/source/aws/sagemaker_processors/models.py +30 -1
- datahub/ingestion/source/bigquery_v2/bigquery_audit.py +1 -1
- datahub/ingestion/source/bigquery_v2/bigquery_schema.py +0 -1
- datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +0 -21
- datahub/ingestion/source/bigquery_v2/profiler.py +0 -6
- datahub/ingestion/source/common/subtypes.py +2 -0
- datahub/ingestion/source/csv_enricher.py +1 -1
- datahub/ingestion/source/datahub/datahub_database_reader.py +41 -21
- datahub/ingestion/source/datahub/datahub_source.py +8 -1
- datahub/ingestion/source/dbt/dbt_common.py +7 -61
- datahub/ingestion/source/dremio/dremio_api.py +204 -86
- datahub/ingestion/source/dremio/dremio_aspects.py +19 -15
- datahub/ingestion/source/dremio/dremio_config.py +5 -0
- datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py +2 -0
- datahub/ingestion/source/dremio/dremio_entities.py +4 -0
- datahub/ingestion/source/dremio/dremio_reporting.py +15 -0
- datahub/ingestion/source/dremio/dremio_source.py +7 -2
- datahub/ingestion/source/elastic_search.py +1 -1
- datahub/ingestion/source/feast.py +97 -6
- datahub/ingestion/source/gc/datahub_gc.py +46 -35
- datahub/ingestion/source/gc/dataprocess_cleanup.py +110 -50
- datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +7 -2
- datahub/ingestion/source/ge_data_profiler.py +46 -9
- datahub/ingestion/source/ge_profiling_config.py +5 -0
- datahub/ingestion/source/iceberg/iceberg.py +12 -5
- datahub/ingestion/source/kafka/kafka.py +39 -19
- datahub/ingestion/source/kafka/kafka_connect.py +81 -51
- datahub/ingestion/source/looker/looker_liquid_tag.py +8 -1
- datahub/ingestion/source/looker/lookml_concept_context.py +1 -2
- datahub/ingestion/source/looker/view_upstream.py +65 -30
- datahub/ingestion/source/metadata/business_glossary.py +35 -18
- datahub/ingestion/source/mode.py +0 -23
- datahub/ingestion/source/neo4j/__init__.py +0 -0
- datahub/ingestion/source/neo4j/neo4j_source.py +331 -0
- datahub/ingestion/source/powerbi/__init__.py +0 -1
- datahub/ingestion/source/powerbi/config.py +3 -3
- datahub/ingestion/source/powerbi/m_query/data_classes.py +36 -15
- datahub/ingestion/source/powerbi/m_query/parser.py +6 -3
- datahub/ingestion/source/powerbi/m_query/pattern_handler.py +912 -0
- datahub/ingestion/source/powerbi/m_query/resolver.py +23 -947
- datahub/ingestion/source/powerbi/m_query/tree_function.py +3 -3
- datahub/ingestion/source/powerbi/m_query/validator.py +9 -3
- datahub/ingestion/source/powerbi/powerbi.py +12 -6
- datahub/ingestion/source/preset.py +1 -0
- datahub/ingestion/source/pulsar.py +21 -2
- datahub/ingestion/source/qlik_sense/data_classes.py +1 -0
- datahub/ingestion/source/redash.py +13 -63
- datahub/ingestion/source/redshift/config.py +1 -0
- datahub/ingestion/source/redshift/redshift.py +3 -0
- datahub/ingestion/source/s3/source.py +2 -3
- datahub/ingestion/source/sigma/data_classes.py +1 -0
- datahub/ingestion/source/sigma/sigma.py +101 -43
- datahub/ingestion/source/snowflake/snowflake_config.py +8 -3
- datahub/ingestion/source/snowflake/snowflake_connection.py +28 -0
- datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +6 -1
- datahub/ingestion/source/snowflake/snowflake_query.py +21 -4
- datahub/ingestion/source/snowflake/snowflake_report.py +1 -0
- datahub/ingestion/source/snowflake/snowflake_schema.py +28 -0
- datahub/ingestion/source/snowflake/snowflake_schema_gen.py +41 -2
- datahub/ingestion/source/snowflake/snowflake_utils.py +46 -6
- datahub/ingestion/source/snowflake/snowflake_v2.py +6 -0
- datahub/ingestion/source/sql/athena.py +46 -22
- datahub/ingestion/source/sql/mssql/source.py +18 -6
- datahub/ingestion/source/sql/sql_common.py +34 -21
- datahub/ingestion/source/sql/sql_report.py +1 -0
- datahub/ingestion/source/sql/sql_types.py +85 -8
- datahub/ingestion/source/state/redundant_run_skip_handler.py +1 -1
- datahub/ingestion/source/superset.py +215 -65
- datahub/ingestion/source/tableau/tableau.py +237 -76
- datahub/ingestion/source/tableau/tableau_common.py +12 -6
- datahub/ingestion/source/tableau/tableau_constant.py +2 -0
- datahub/ingestion/source/tableau/tableau_server_wrapper.py +33 -0
- datahub/ingestion/source/tableau/tableau_validation.py +48 -0
- datahub/ingestion/source/unity/proxy_types.py +1 -0
- datahub/ingestion/source/unity/source.py +4 -0
- datahub/ingestion/source/unity/usage.py +20 -11
- datahub/ingestion/transformer/add_dataset_tags.py +1 -1
- datahub/ingestion/transformer/generic_aspect_transformer.py +1 -1
- datahub/integrations/assertion/common.py +1 -1
- datahub/lite/duckdb_lite.py +12 -17
- datahub/metadata/_schema_classes.py +512 -392
- datahub/metadata/_urns/urn_defs.py +1355 -1355
- datahub/metadata/com/linkedin/pegasus2avro/structured/__init__.py +2 -0
- datahub/metadata/schema.avsc +17222 -17499
- datahub/metadata/schemas/FormInfo.avsc +4 -0
- datahub/metadata/schemas/StructuredPropertyDefinition.avsc +1 -1
- datahub/metadata/schemas/StructuredPropertyKey.avsc +1 -0
- datahub/metadata/schemas/StructuredPropertySettings.avsc +114 -0
- datahub/specific/chart.py +0 -39
- datahub/specific/dashboard.py +0 -39
- datahub/specific/datajob.py +7 -57
- datahub/sql_parsing/schema_resolver.py +23 -0
- datahub/sql_parsing/sql_parsing_aggregator.py +1 -2
- datahub/sql_parsing/sqlglot_lineage.py +55 -14
- datahub/sql_parsing/sqlglot_utils.py +8 -2
- datahub/telemetry/telemetry.py +23 -9
- datahub/testing/compare_metadata_json.py +1 -1
- datahub/testing/doctest.py +12 -0
- datahub/utilities/file_backed_collections.py +35 -2
- datahub/utilities/partition_executor.py +1 -1
- datahub/utilities/urn_encoder.py +2 -1
- datahub/utilities/urns/_urn_base.py +1 -1
- datahub/utilities/urns/structured_properties_urn.py +1 -1
- datahub/utilities/sql_lineage_parser_impl.py +0 -160
- datahub/utilities/sql_parser.py +0 -94
- datahub/utilities/sql_parser_base.py +0 -21
- {acryl_datahub-0.14.1.13rc8.dist-info → acryl_datahub-0.15.0.dist-info}/WHEEL +0 -0
- {acryl_datahub-0.14.1.13rc8.dist-info → acryl_datahub-0.15.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
|
|
3
|
+
from tableauserverclient import Server, UserItem
|
|
4
|
+
|
|
5
|
+
from datahub.ingestion.source.tableau import tableau_constant as c
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@dataclass
|
|
9
|
+
class UserInfo:
|
|
10
|
+
user_name: str
|
|
11
|
+
site_role: str
|
|
12
|
+
site_id: str
|
|
13
|
+
|
|
14
|
+
def is_site_administrator_explorer(self):
|
|
15
|
+
return self.site_role == c.SITE_ROLE
|
|
16
|
+
|
|
17
|
+
@staticmethod
|
|
18
|
+
def from_server(server: Server) -> "UserInfo":
|
|
19
|
+
assert server.user_id, "make the connection with tableau"
|
|
20
|
+
|
|
21
|
+
user: UserItem = server.users.get_by_id(server.user_id)
|
|
22
|
+
|
|
23
|
+
assert user.site_role, "site_role is not available" # to silent the lint
|
|
24
|
+
|
|
25
|
+
assert user.name, "user name is not available" # to silent the lint
|
|
26
|
+
|
|
27
|
+
assert server.site_id, "site identifier is not available" # to silent the lint
|
|
28
|
+
|
|
29
|
+
return UserInfo(
|
|
30
|
+
user_name=user.name,
|
|
31
|
+
site_role=user.site_role,
|
|
32
|
+
site_id=server.site_id,
|
|
33
|
+
)
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Dict, Union
|
|
3
|
+
|
|
4
|
+
from datahub.ingestion.api.source import CapabilityReport, SourceCapability
|
|
5
|
+
from datahub.ingestion.source.tableau import tableau_constant as c
|
|
6
|
+
from datahub.ingestion.source.tableau.tableau_server_wrapper import UserInfo
|
|
7
|
+
|
|
8
|
+
logger = logging.getLogger(__name__)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def check_user_role(
|
|
12
|
+
logged_in_user: UserInfo,
|
|
13
|
+
) -> Dict[Union[SourceCapability, str], CapabilityReport]:
|
|
14
|
+
capability_dict: Dict[Union[SourceCapability, str], CapabilityReport] = {
|
|
15
|
+
c.SITE_PERMISSION: CapabilityReport(
|
|
16
|
+
capable=True,
|
|
17
|
+
)
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
failure_reason: str = (
|
|
21
|
+
"The user does not have the `Site Administrator Explorer` role."
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
mitigation_message_prefix: str = (
|
|
25
|
+
"Assign `Site Administrator Explorer` role to the user"
|
|
26
|
+
)
|
|
27
|
+
mitigation_message_suffix: str = "Refer to the setup guide: https://datahubproject.io/docs/quick-ingestion-guides/tableau/setup"
|
|
28
|
+
|
|
29
|
+
try:
|
|
30
|
+
# TODO: Add check for `Enable Derived Permissions`
|
|
31
|
+
if not logged_in_user.is_site_administrator_explorer():
|
|
32
|
+
capability_dict[c.SITE_PERMISSION] = CapabilityReport(
|
|
33
|
+
capable=False,
|
|
34
|
+
failure_reason=f"{failure_reason} Their current role is {logged_in_user.site_role}.",
|
|
35
|
+
mitigation_message=f"{mitigation_message_prefix} `{logged_in_user.user_name}`. {mitigation_message_suffix}",
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
return capability_dict
|
|
39
|
+
|
|
40
|
+
except Exception as e:
|
|
41
|
+
logger.warning(msg=e, exc_info=e)
|
|
42
|
+
capability_dict[c.SITE_PERMISSION] = CapabilityReport(
|
|
43
|
+
capable=False,
|
|
44
|
+
failure_reason="Failed to verify user role.",
|
|
45
|
+
mitigation_message=f"{mitigation_message_prefix}. {mitigation_message_suffix}", # user is unknown
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
return capability_dict
|
|
@@ -33,6 +33,7 @@ from datahub.metadata.schema_classes import (
|
|
|
33
33
|
|
|
34
34
|
logger = logging.getLogger(__name__)
|
|
35
35
|
|
|
36
|
+
# TODO: (maybe) Replace with standardized types in sql_types.py
|
|
36
37
|
DATA_TYPE_REGISTRY: dict = {
|
|
37
38
|
ColumnTypeName.BOOLEAN: BooleanTypeClass,
|
|
38
39
|
ColumnTypeName.BYTE: BytesTypeClass,
|
|
@@ -556,6 +556,8 @@ class UnityCatalogSource(StatefulIngestionSourceBase, TestableSource):
|
|
|
556
556
|
)
|
|
557
557
|
|
|
558
558
|
if table_props:
|
|
559
|
+
# TODO: use auto_incremental_properties workunit processor instead
|
|
560
|
+
# Consider enabling incremental_properties by default
|
|
559
561
|
patch_builder = create_dataset_props_patch_builder(dataset_urn, table_props)
|
|
560
562
|
for patch_mcp in patch_builder.build():
|
|
561
563
|
yield MetadataWorkUnit(
|
|
@@ -974,6 +976,8 @@ class UnityCatalogSource(StatefulIngestionSourceBase, TestableSource):
|
|
|
974
976
|
)
|
|
975
977
|
else:
|
|
976
978
|
self.report.num_view_definitions_parsed += 1
|
|
979
|
+
if raw_lineage.out_tables != [view_urn]:
|
|
980
|
+
self.report.num_view_definitions_view_urn_mismatch += 1
|
|
977
981
|
return view_definition_lineage_helper(raw_lineage, view_urn)
|
|
978
982
|
|
|
979
983
|
def get_view_lineage(self) -> Iterable[MetadataWorkUnit]:
|
|
@@ -7,7 +7,6 @@ from typing import Any, Callable, Dict, Generic, Iterable, List, Optional, Set,
|
|
|
7
7
|
|
|
8
8
|
import pyspark
|
|
9
9
|
from databricks.sdk.service.sql import QueryStatementType
|
|
10
|
-
from sqllineage.runner import LineageRunner
|
|
11
10
|
|
|
12
11
|
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
13
12
|
from datahub.ingestion.api.source_helpers import auto_empty_dataset_usage_statistics
|
|
@@ -22,7 +21,9 @@ from datahub.ingestion.source.unity.proxy_types import (
|
|
|
22
21
|
from datahub.ingestion.source.unity.report import UnityCatalogReport
|
|
23
22
|
from datahub.ingestion.source.usage.usage_common import UsageAggregator
|
|
24
23
|
from datahub.metadata.schema_classes import OperationClass
|
|
24
|
+
from datahub.sql_parsing.sqlglot_lineage import create_lineage_sql_parsed_result
|
|
25
25
|
from datahub.sql_parsing.sqlglot_utils import get_query_fingerprint
|
|
26
|
+
from datahub.utilities.urns.dataset_urn import DatasetUrn
|
|
26
27
|
|
|
27
28
|
logger = logging.getLogger(__name__)
|
|
28
29
|
|
|
@@ -48,6 +49,7 @@ class UnityCatalogUsageExtractor:
|
|
|
48
49
|
proxy: UnityCatalogApiProxy
|
|
49
50
|
table_urn_builder: Callable[[TableReference], str]
|
|
50
51
|
user_urn_builder: Callable[[str], str]
|
|
52
|
+
platform: str = "databricks"
|
|
51
53
|
|
|
52
54
|
def __post_init__(self):
|
|
53
55
|
self.usage_aggregator = UsageAggregator[TableReference](self.config)
|
|
@@ -173,7 +175,7 @@ class UnityCatalogUsageExtractor:
|
|
|
173
175
|
self, query: Query, table_map: TableMap
|
|
174
176
|
) -> Optional[QueryTableInfo]:
|
|
175
177
|
with self.report.usage_perf_report.sql_parsing_timer:
|
|
176
|
-
table_info = self.
|
|
178
|
+
table_info = self._parse_query_via_sqlglot(query.query_text)
|
|
177
179
|
if table_info is None and query.statement_type == QueryStatementType.SELECT:
|
|
178
180
|
with self.report.usage_perf_report.spark_sql_parsing_timer:
|
|
179
181
|
table_info = self._parse_query_via_spark_sql_plan(query.query_text)
|
|
@@ -191,26 +193,33 @@ class UnityCatalogUsageExtractor:
|
|
|
191
193
|
),
|
|
192
194
|
)
|
|
193
195
|
|
|
194
|
-
def
|
|
196
|
+
def _parse_query_via_sqlglot(self, query: str) -> Optional[StringTableInfo]:
|
|
195
197
|
try:
|
|
196
|
-
|
|
198
|
+
sql_parser_in_tables = create_lineage_sql_parsed_result(
|
|
199
|
+
query=query,
|
|
200
|
+
default_db=None,
|
|
201
|
+
platform=self.platform,
|
|
202
|
+
env=self.config.env,
|
|
203
|
+
platform_instance=None,
|
|
204
|
+
)
|
|
205
|
+
|
|
197
206
|
return GenericTableInfo(
|
|
198
207
|
source_tables=[
|
|
199
|
-
self.
|
|
200
|
-
for table in
|
|
208
|
+
self._parse_sqlglot_table(table)
|
|
209
|
+
for table in sql_parser_in_tables.in_tables
|
|
201
210
|
],
|
|
202
211
|
target_tables=[
|
|
203
|
-
self.
|
|
204
|
-
for table in
|
|
212
|
+
self._parse_sqlglot_table(table)
|
|
213
|
+
for table in sql_parser_in_tables.out_tables
|
|
205
214
|
],
|
|
206
215
|
)
|
|
207
216
|
except Exception as e:
|
|
208
|
-
logger.info(f"Could not parse query via
|
|
217
|
+
logger.info(f"Could not parse query via sqlglot, {query}: {e!r}")
|
|
209
218
|
return None
|
|
210
219
|
|
|
211
220
|
@staticmethod
|
|
212
|
-
def
|
|
213
|
-
full_table_name =
|
|
221
|
+
def _parse_sqlglot_table(table_urn: str) -> str:
|
|
222
|
+
full_table_name = DatasetUrn.from_string(table_urn).name
|
|
214
223
|
default_schema = "<default>."
|
|
215
224
|
if full_table_name.startswith(default_schema):
|
|
216
225
|
return full_table_name[len(default_schema) :]
|
|
@@ -74,7 +74,7 @@ class AddDatasetTags(DatasetTagsTransformer):
|
|
|
74
74
|
logger.debug("Generating tags")
|
|
75
75
|
|
|
76
76
|
for tag_association in self.processed_tags.values():
|
|
77
|
-
tag_urn = TagUrn.
|
|
77
|
+
tag_urn = TagUrn.from_string(tag_association.tag)
|
|
78
78
|
mcps.append(
|
|
79
79
|
MetadataChangeProposalWrapper(
|
|
80
80
|
entityUrn=tag_urn.urn(),
|
|
@@ -100,7 +100,7 @@ class GenericAspectTransformer(
|
|
|
100
100
|
)
|
|
101
101
|
if transformed_aspect:
|
|
102
102
|
# for end of stream records, we modify the workunit-id
|
|
103
|
-
structured_urn = Urn.
|
|
103
|
+
structured_urn = Urn.from_string(urn)
|
|
104
104
|
simple_name = "-".join(structured_urn.get_entity_id())
|
|
105
105
|
record_metadata = envelope.metadata.copy()
|
|
106
106
|
record_metadata.update(
|
|
@@ -42,7 +42,7 @@ def get_entity_name(assertion: BaseEntityAssertion) -> Tuple[str, str, str]:
|
|
|
42
42
|
if qualified_name is not None:
|
|
43
43
|
parts = qualified_name.split(".")
|
|
44
44
|
else:
|
|
45
|
-
urn_id = Urn.
|
|
45
|
+
urn_id = Urn.from_string(assertion.entity).entity_ids[1]
|
|
46
46
|
parts = urn_id.split(".")
|
|
47
47
|
if len(parts) > 3:
|
|
48
48
|
parts = parts[-3:]
|
datahub/lite/duckdb_lite.py
CHANGED
|
@@ -609,7 +609,7 @@ class DuckDBLite(DataHubLiteLocal[DuckDBLiteConfig]):
|
|
|
609
609
|
aspect_map, DataPlatformInstanceClass
|
|
610
610
|
) # type: ignore
|
|
611
611
|
|
|
612
|
-
needs_platform = Urn.
|
|
612
|
+
needs_platform = Urn.from_string(entity_urn).get_type() in [
|
|
613
613
|
"dataset",
|
|
614
614
|
"container",
|
|
615
615
|
"chart",
|
|
@@ -617,7 +617,7 @@ class DuckDBLite(DataHubLiteLocal[DuckDBLiteConfig]):
|
|
|
617
617
|
"dataFlow",
|
|
618
618
|
"dataJob",
|
|
619
619
|
]
|
|
620
|
-
entity_urn_parsed = Urn.
|
|
620
|
+
entity_urn_parsed = Urn.from_string(entity_urn)
|
|
621
621
|
if entity_urn_parsed.get_type() in ["dataFlow", "dataJob"]:
|
|
622
622
|
self.add_edge(
|
|
623
623
|
entity_urn,
|
|
@@ -630,15 +630,12 @@ class DuckDBLite(DataHubLiteLocal[DuckDBLiteConfig]):
|
|
|
630
630
|
# this is a top-level entity
|
|
631
631
|
if not dpi:
|
|
632
632
|
logger.debug(f"No data platform instance for {entity_urn}")
|
|
633
|
-
maybe_parent_urn = Urn.
|
|
633
|
+
maybe_parent_urn = Urn.from_string(entity_urn).get_entity_id()[0]
|
|
634
634
|
needs_dpi = False
|
|
635
635
|
if maybe_parent_urn.startswith(Urn.URN_PREFIX):
|
|
636
636
|
parent_urn = maybe_parent_urn
|
|
637
|
-
if (
|
|
638
|
-
|
|
639
|
-
== "dataPlatform"
|
|
640
|
-
):
|
|
641
|
-
data_platform_urn = DataPlatformUrn.create_from_string(
|
|
637
|
+
if Urn.from_string(maybe_parent_urn).get_type() == "dataPlatform":
|
|
638
|
+
data_platform_urn = DataPlatformUrn.from_string(
|
|
642
639
|
maybe_parent_urn
|
|
643
640
|
)
|
|
644
641
|
needs_dpi = True
|
|
@@ -660,7 +657,7 @@ class DuckDBLite(DataHubLiteLocal[DuckDBLiteConfig]):
|
|
|
660
657
|
logger.error(f"Failed to generate edges entity {entity_urn}", e)
|
|
661
658
|
parent_urn = str(data_platform_instance_urn)
|
|
662
659
|
else:
|
|
663
|
-
data_platform_urn = DataPlatformUrn.
|
|
660
|
+
data_platform_urn = DataPlatformUrn.from_string(dpi.platform)
|
|
664
661
|
data_platform_instance = dpi.instance or "default"
|
|
665
662
|
data_platform_instance_urn = Urn(
|
|
666
663
|
entity_type="dataPlatformInstance",
|
|
@@ -673,9 +670,7 @@ class DuckDBLite(DataHubLiteLocal[DuckDBLiteConfig]):
|
|
|
673
670
|
parent_urn = "__root__"
|
|
674
671
|
|
|
675
672
|
types = (
|
|
676
|
-
subtypes.typeNames
|
|
677
|
-
if subtypes
|
|
678
|
-
else [Urn.create_from_string(entity_urn).get_type()]
|
|
673
|
+
subtypes.typeNames if subtypes else [Urn.from_string(entity_urn).get_type()]
|
|
679
674
|
)
|
|
680
675
|
for t in types:
|
|
681
676
|
type_urn = Urn(entity_type="systemNode", entity_id=[parent_urn, t])
|
|
@@ -686,7 +681,7 @@ class DuckDBLite(DataHubLiteLocal[DuckDBLiteConfig]):
|
|
|
686
681
|
def _create_edges_from_data_platform_instance(
|
|
687
682
|
self, data_platform_instance_urn: Urn
|
|
688
683
|
) -> None:
|
|
689
|
-
data_platform_urn = DataPlatformUrn.
|
|
684
|
+
data_platform_urn = DataPlatformUrn.from_string(
|
|
690
685
|
data_platform_instance_urn.get_entity_id()[0]
|
|
691
686
|
)
|
|
692
687
|
data_platform_instances_urn = Urn(
|
|
@@ -735,7 +730,7 @@ class DuckDBLite(DataHubLiteLocal[DuckDBLiteConfig]):
|
|
|
735
730
|
if isinstance(aspect, DatasetPropertiesClass):
|
|
736
731
|
dp: DatasetPropertiesClass = aspect
|
|
737
732
|
if dp.name:
|
|
738
|
-
specific_urn = DatasetUrn.
|
|
733
|
+
specific_urn = DatasetUrn.from_string(entity_urn)
|
|
739
734
|
if (
|
|
740
735
|
specific_urn.get_data_platform_urn().get_entity_id_as_string()
|
|
741
736
|
== "looker"
|
|
@@ -755,7 +750,7 @@ class DuckDBLite(DataHubLiteLocal[DuckDBLiteConfig]):
|
|
|
755
750
|
self.add_edge(entity_urn, "name", cp.name, remove_existing=True)
|
|
756
751
|
elif isinstance(aspect, DataPlatformInstanceClass):
|
|
757
752
|
dpi: DataPlatformInstanceClass = aspect
|
|
758
|
-
data_platform_urn = DataPlatformUrn.
|
|
753
|
+
data_platform_urn = DataPlatformUrn.from_string(dpi.platform)
|
|
759
754
|
data_platform_instance = dpi.instance or "default"
|
|
760
755
|
data_platform_instance_urn = Urn(
|
|
761
756
|
entity_type="dataPlatformInstance",
|
|
@@ -763,7 +758,7 @@ class DuckDBLite(DataHubLiteLocal[DuckDBLiteConfig]):
|
|
|
763
758
|
)
|
|
764
759
|
self._create_edges_from_data_platform_instance(data_platform_instance_urn)
|
|
765
760
|
elif isinstance(aspect, ChartInfoClass):
|
|
766
|
-
urn = Urn.
|
|
761
|
+
urn = Urn.from_string(entity_urn)
|
|
767
762
|
self.add_edge(
|
|
768
763
|
entity_urn,
|
|
769
764
|
"name",
|
|
@@ -771,7 +766,7 @@ class DuckDBLite(DataHubLiteLocal[DuckDBLiteConfig]):
|
|
|
771
766
|
remove_existing=True,
|
|
772
767
|
)
|
|
773
768
|
elif isinstance(aspect, DashboardInfoClass):
|
|
774
|
-
urn = Urn.
|
|
769
|
+
urn = Urn.from_string(entity_urn)
|
|
775
770
|
self.add_edge(
|
|
776
771
|
entity_urn,
|
|
777
772
|
"name",
|