acryl-datahub 1.2.0.9rc1__py3-none-any.whl → 1.2.0.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.2.0.9rc1.dist-info → acryl_datahub-1.2.0.10.dist-info}/METADATA +2568 -2626
- {acryl_datahub-1.2.0.9rc1.dist-info → acryl_datahub-1.2.0.10.dist-info}/RECORD +120 -113
- {acryl_datahub-1.2.0.9rc1.dist-info → acryl_datahub-1.2.0.10.dist-info}/entry_points.txt +2 -0
- datahub/_version.py +1 -1
- datahub/api/entities/assertion/assertion.py +1 -1
- datahub/api/entities/corpgroup/corpgroup.py +1 -1
- datahub/api/entities/dataproduct/dataproduct.py +6 -3
- datahub/api/entities/dataset/dataset.py +9 -18
- datahub/api/entities/structuredproperties/structuredproperties.py +2 -2
- datahub/api/graphql/operation.py +10 -6
- datahub/cli/docker_check.py +2 -2
- datahub/configuration/common.py +29 -1
- datahub/configuration/connection_resolver.py +5 -2
- datahub/configuration/import_resolver.py +7 -4
- datahub/configuration/pydantic_migration_helpers.py +0 -9
- datahub/configuration/source_common.py +3 -2
- datahub/configuration/validate_field_deprecation.py +5 -2
- datahub/configuration/validate_field_removal.py +5 -2
- datahub/configuration/validate_field_rename.py +6 -5
- datahub/configuration/validate_multiline_string.py +5 -2
- datahub/ingestion/autogenerated/capability_summary.json +45 -1
- datahub/ingestion/run/pipeline_config.py +2 -2
- datahub/ingestion/source/azure/azure_common.py +1 -1
- datahub/ingestion/source/bigquery_v2/bigquery_config.py +28 -14
- datahub/ingestion/source/bigquery_v2/bigquery_schema.py +23 -16
- datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +11 -0
- datahub/ingestion/source/bigquery_v2/queries_extractor.py +4 -5
- datahub/ingestion/source/common/gcp_credentials_config.py +3 -1
- datahub/ingestion/source/data_lake_common/path_spec.py +16 -16
- datahub/ingestion/source/datahub/config.py +8 -9
- datahub/ingestion/source/dbt/dbt_common.py +65 -5
- datahub/ingestion/source/delta_lake/config.py +1 -1
- datahub/ingestion/source/dremio/dremio_config.py +3 -4
- datahub/ingestion/source/feast.py +8 -10
- datahub/ingestion/source/fivetran/config.py +1 -1
- datahub/ingestion/source/gcs/gcs_source.py +19 -2
- datahub/ingestion/source/ge_data_profiler.py +15 -2
- datahub/ingestion/source/ge_profiling_config.py +26 -22
- datahub/ingestion/source/grafana/grafana_config.py +2 -2
- datahub/ingestion/source/grafana/models.py +12 -14
- datahub/ingestion/source/hex/hex.py +6 -1
- datahub/ingestion/source/iceberg/iceberg_profiler.py +4 -2
- datahub/ingestion/source/kafka_connect/common.py +2 -2
- datahub/ingestion/source/looker/looker_common.py +76 -75
- datahub/ingestion/source/looker/looker_config.py +15 -4
- datahub/ingestion/source/looker/looker_source.py +493 -547
- datahub/ingestion/source/looker/lookml_config.py +1 -1
- datahub/ingestion/source/looker/lookml_source.py +46 -88
- datahub/ingestion/source/metabase.py +9 -2
- datahub/ingestion/source/metadata/business_glossary.py +7 -7
- datahub/ingestion/source/metadata/lineage.py +1 -1
- datahub/ingestion/source/mode.py +13 -5
- datahub/ingestion/source/nifi.py +1 -1
- datahub/ingestion/source/powerbi/config.py +14 -21
- datahub/ingestion/source/preset.py +1 -1
- datahub/ingestion/source/qlik_sense/data_classes.py +28 -8
- datahub/ingestion/source/redash.py +1 -1
- datahub/ingestion/source/redshift/config.py +6 -3
- datahub/ingestion/source/redshift/query.py +23 -19
- datahub/ingestion/source/s3/source.py +26 -24
- datahub/ingestion/source/salesforce.py +13 -9
- datahub/ingestion/source/schema/json_schema.py +14 -14
- datahub/ingestion/source/sigma/data_classes.py +3 -0
- datahub/ingestion/source/snaplogic/__init__.py +0 -0
- datahub/ingestion/source/snaplogic/snaplogic.py +355 -0
- datahub/ingestion/source/snaplogic/snaplogic_config.py +37 -0
- datahub/ingestion/source/snaplogic/snaplogic_lineage_extractor.py +107 -0
- datahub/ingestion/source/snaplogic/snaplogic_parser.py +168 -0
- datahub/ingestion/source/snaplogic/snaplogic_utils.py +31 -0
- datahub/ingestion/source/snowflake/snowflake_config.py +12 -15
- datahub/ingestion/source/snowflake/snowflake_connection.py +8 -3
- datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +15 -2
- datahub/ingestion/source/snowflake/snowflake_queries.py +4 -5
- datahub/ingestion/source/sql/athena.py +2 -1
- datahub/ingestion/source/sql/clickhouse.py +12 -7
- datahub/ingestion/source/sql/cockroachdb.py +5 -3
- datahub/ingestion/source/sql/druid.py +2 -2
- datahub/ingestion/source/sql/hive.py +4 -3
- datahub/ingestion/source/sql/hive_metastore.py +7 -9
- datahub/ingestion/source/sql/mssql/source.py +2 -2
- datahub/ingestion/source/sql/mysql.py +2 -2
- datahub/ingestion/source/sql/oracle.py +3 -3
- datahub/ingestion/source/sql/presto.py +2 -1
- datahub/ingestion/source/sql/teradata.py +4 -4
- datahub/ingestion/source/sql/trino.py +2 -1
- datahub/ingestion/source/sql/two_tier_sql_source.py +2 -3
- datahub/ingestion/source/sql/vertica.py +1 -1
- datahub/ingestion/source/sql_queries.py +6 -6
- datahub/ingestion/source/state/checkpoint.py +5 -1
- datahub/ingestion/source/state/entity_removal_state.py +5 -2
- datahub/ingestion/source/state/stateful_ingestion_base.py +5 -8
- datahub/ingestion/source/superset.py +122 -15
- datahub/ingestion/source/tableau/tableau.py +68 -14
- datahub/ingestion/source/tableau/tableau_common.py +5 -0
- datahub/ingestion/source/tableau/tableau_constant.py +1 -0
- datahub/ingestion/source/tableau/tableau_server_wrapper.py +3 -0
- datahub/ingestion/source/unity/config.py +7 -3
- datahub/ingestion/source/usage/usage_common.py +3 -3
- datahub/ingestion/source_config/pulsar.py +3 -1
- datahub/ingestion/transformer/set_browse_path.py +112 -0
- datahub/metadata/_internal_schema_classes.py +728 -528
- datahub/metadata/_urns/urn_defs.py +1702 -1702
- datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +4 -0
- datahub/metadata/schema.avsc +17434 -17732
- datahub/metadata/schemas/GlobalSettingsInfo.avsc +72 -0
- datahub/metadata/schemas/InstitutionalMemory.avsc +22 -0
- datahub/metadata/schemas/LogicalParent.avsc +2 -1
- datahub/metadata/schemas/MLModelGroupKey.avsc +2 -1
- datahub/metadata/schemas/MetadataChangeEvent.avsc +22 -0
- datahub/sdk/_shared.py +126 -0
- datahub/sdk/chart.py +87 -30
- datahub/sdk/dashboard.py +79 -34
- datahub/sdk/entity_client.py +11 -4
- datahub/sdk/lineage_client.py +3 -3
- datahub/sdk/search_filters.py +1 -7
- datahub/sql_parsing/split_statements.py +13 -0
- {acryl_datahub-1.2.0.9rc1.dist-info → acryl_datahub-1.2.0.10.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.2.0.9rc1.dist-info → acryl_datahub-1.2.0.10.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.2.0.9rc1.dist-info → acryl_datahub-1.2.0.10.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import datetime
|
|
2
2
|
import json
|
|
3
3
|
import logging
|
|
4
|
+
from dataclasses import dataclass
|
|
4
5
|
from json import JSONDecodeError
|
|
5
6
|
from typing import (
|
|
6
7
|
Any,
|
|
@@ -18,7 +19,7 @@ from typing import (
|
|
|
18
19
|
from looker_sdk.error import SDKError
|
|
19
20
|
from looker_sdk.rtl.serialize import DeserializeError
|
|
20
21
|
from looker_sdk.sdk.api40.models import (
|
|
21
|
-
Dashboard,
|
|
22
|
+
Dashboard as LookerAPIDashboard,
|
|
22
23
|
DashboardElement,
|
|
23
24
|
Folder,
|
|
24
25
|
FolderBase,
|
|
@@ -29,7 +30,7 @@ from looker_sdk.sdk.api40.models import (
|
|
|
29
30
|
|
|
30
31
|
import datahub.emitter.mce_builder as builder
|
|
31
32
|
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
32
|
-
from datahub.emitter.mcp_builder import
|
|
33
|
+
from datahub.emitter.mcp_builder import mcps_from_mce
|
|
33
34
|
from datahub.ingestion.api.common import PipelineContext
|
|
34
35
|
from datahub.ingestion.api.decorators import (
|
|
35
36
|
SupportStatus,
|
|
@@ -80,36 +81,38 @@ from datahub.ingestion.source.state.stateful_ingestion_base import (
|
|
|
80
81
|
StatefulIngestionSourceBase,
|
|
81
82
|
)
|
|
82
83
|
from datahub.metadata.com.linkedin.pegasus2avro.common import (
|
|
83
|
-
AuditStamp,
|
|
84
|
-
ChangeAuditStamps,
|
|
85
|
-
DataPlatformInstance,
|
|
86
84
|
Status,
|
|
87
85
|
)
|
|
88
|
-
from datahub.metadata.com.linkedin.pegasus2avro.metadata.snapshot import (
|
|
89
|
-
ChartSnapshot,
|
|
90
|
-
DashboardSnapshot,
|
|
91
|
-
)
|
|
92
|
-
from datahub.metadata.com.linkedin.pegasus2avro.mxe import MetadataChangeEvent
|
|
93
86
|
from datahub.metadata.schema_classes import (
|
|
94
|
-
BrowsePathEntryClass,
|
|
95
|
-
BrowsePathsClass,
|
|
96
|
-
BrowsePathsV2Class,
|
|
97
|
-
ChartInfoClass,
|
|
98
87
|
ChartTypeClass,
|
|
99
|
-
|
|
100
|
-
DashboardInfoClass,
|
|
88
|
+
EmbedClass,
|
|
101
89
|
InputFieldClass,
|
|
102
90
|
InputFieldsClass,
|
|
103
91
|
OwnerClass,
|
|
104
|
-
OwnershipClass,
|
|
105
92
|
OwnershipTypeClass,
|
|
106
|
-
SubTypesClass,
|
|
107
93
|
)
|
|
94
|
+
from datahub.sdk.chart import Chart
|
|
95
|
+
from datahub.sdk.container import Container
|
|
96
|
+
from datahub.sdk.dashboard import Dashboard
|
|
97
|
+
from datahub.sdk.dataset import Dataset
|
|
98
|
+
from datahub.sdk.entity import Entity
|
|
108
99
|
from datahub.utilities.backpressure_aware_executor import BackpressureAwareExecutor
|
|
100
|
+
from datahub.utilities.sentinels import Unset, unset
|
|
109
101
|
|
|
110
102
|
logger = logging.getLogger(__name__)
|
|
111
103
|
|
|
112
104
|
|
|
105
|
+
@dataclass
|
|
106
|
+
class DashboardProcessingResult:
|
|
107
|
+
"""Result of processing a single dashboard."""
|
|
108
|
+
|
|
109
|
+
entities: List[Entity]
|
|
110
|
+
dashboard_usage: Optional[looker_usage.LookerDashboardForUsage]
|
|
111
|
+
dashboard_id: str
|
|
112
|
+
start_time: datetime.datetime
|
|
113
|
+
end_time: datetime.datetime
|
|
114
|
+
|
|
115
|
+
|
|
113
116
|
@platform_name("Looker")
|
|
114
117
|
@support_status(SupportStatus.CERTIFIED)
|
|
115
118
|
@config_class(LookerDashboardSourceConfig)
|
|
@@ -633,35 +636,17 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
|
|
|
633
636
|
|
|
634
637
|
return chart_type
|
|
635
638
|
|
|
636
|
-
def
|
|
639
|
+
def _get_folder_ancestors_urn_entries(
|
|
637
640
|
self, folder: LookerFolder, include_current_folder: bool = True
|
|
638
|
-
) -> Iterable[
|
|
641
|
+
) -> Iterable[str]:
|
|
639
642
|
for ancestor in self.looker_api.folder_ancestors(folder_id=folder.id):
|
|
640
|
-
assert ancestor.id
|
|
643
|
+
assert ancestor.id # to make the linter happy as `Folder` has id field marked optional - which is always returned by the API
|
|
641
644
|
urn = self._gen_folder_key(ancestor.id).as_urn()
|
|
642
|
-
yield
|
|
645
|
+
yield urn
|
|
643
646
|
|
|
644
647
|
urn = self._gen_folder_key(folder.id).as_urn()
|
|
645
648
|
if include_current_folder:
|
|
646
|
-
yield
|
|
647
|
-
|
|
648
|
-
def _create_platform_instance_aspect(
|
|
649
|
-
self,
|
|
650
|
-
) -> DataPlatformInstance:
|
|
651
|
-
assert self.source_config.platform_name, (
|
|
652
|
-
"Platform name is not set in the configuration."
|
|
653
|
-
)
|
|
654
|
-
assert self.source_config.platform_instance, (
|
|
655
|
-
"Platform instance is not set in the configuration."
|
|
656
|
-
)
|
|
657
|
-
|
|
658
|
-
return DataPlatformInstance(
|
|
659
|
-
platform=builder.make_data_platform_urn(self.source_config.platform_name),
|
|
660
|
-
instance=builder.make_dataplatform_instance_urn(
|
|
661
|
-
platform=self.source_config.platform_name,
|
|
662
|
-
instance=self.source_config.platform_instance,
|
|
663
|
-
),
|
|
664
|
-
)
|
|
649
|
+
yield urn
|
|
665
650
|
|
|
666
651
|
def _make_chart_urn(self, element_id: str) -> str:
|
|
667
652
|
platform_instance: Optional[str] = None
|
|
@@ -674,104 +659,46 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
|
|
|
674
659
|
platform_instance=platform_instance,
|
|
675
660
|
)
|
|
676
661
|
|
|
677
|
-
def
|
|
662
|
+
def _make_chart_entities(
|
|
678
663
|
self,
|
|
679
664
|
dashboard_element: LookerDashboardElement,
|
|
680
665
|
dashboard: Optional[
|
|
681
666
|
LookerDashboard
|
|
682
667
|
], # dashboard will be None if this is a standalone look
|
|
683
|
-
) -> List[
|
|
684
|
-
|
|
685
|
-
element_id=dashboard_element.get_urn_element_id()
|
|
686
|
-
)
|
|
687
|
-
self.chart_urns.add(chart_urn)
|
|
688
|
-
chart_snapshot = ChartSnapshot(
|
|
689
|
-
urn=chart_urn,
|
|
690
|
-
aspects=[Status(removed=False)],
|
|
691
|
-
)
|
|
692
|
-
browse_path_v2: Optional[BrowsePathsV2Class] = None
|
|
693
|
-
|
|
694
|
-
chart_type = self._get_chart_type(dashboard_element)
|
|
695
|
-
chart_info = ChartInfoClass(
|
|
696
|
-
type=chart_type,
|
|
697
|
-
description=dashboard_element.description or "",
|
|
698
|
-
title=dashboard_element.title or "",
|
|
699
|
-
lastModified=ChangeAuditStamps(),
|
|
700
|
-
chartUrl=dashboard_element.url(self.source_config.external_base_url or ""),
|
|
701
|
-
inputs=dashboard_element.get_view_urns(self.source_config),
|
|
702
|
-
customProperties={
|
|
703
|
-
"upstream_fields": (
|
|
704
|
-
",".join(
|
|
705
|
-
sorted({field.name for field in dashboard_element.input_fields})
|
|
706
|
-
)
|
|
707
|
-
if dashboard_element.input_fields
|
|
708
|
-
else ""
|
|
709
|
-
)
|
|
710
|
-
},
|
|
711
|
-
)
|
|
712
|
-
chart_snapshot.aspects.append(chart_info)
|
|
713
|
-
|
|
668
|
+
) -> List[Chart]:
|
|
669
|
+
chart_parent_container: Union[List[str], Unset] = unset
|
|
714
670
|
if (
|
|
715
671
|
dashboard
|
|
716
672
|
and dashboard.folder_path is not None
|
|
717
673
|
and dashboard.folder is not None
|
|
718
674
|
):
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
dashboard_urn = self.make_dashboard_urn(dashboard)
|
|
725
|
-
browse_path_v2 = BrowsePathsV2Class(
|
|
726
|
-
path=[
|
|
727
|
-
BrowsePathEntryClass("Folders"),
|
|
728
|
-
*self._get_folder_browse_path_v2_entries(dashboard.folder),
|
|
729
|
-
BrowsePathEntryClass(id=dashboard_urn, urn=dashboard_urn),
|
|
730
|
-
],
|
|
731
|
-
)
|
|
675
|
+
chart_parent_container = [
|
|
676
|
+
"Folders",
|
|
677
|
+
*self._get_folder_ancestors_urn_entries(dashboard.folder),
|
|
678
|
+
self.make_dashboard_urn(dashboard),
|
|
679
|
+
]
|
|
732
680
|
elif (
|
|
733
681
|
dashboard is None
|
|
734
682
|
and dashboard_element.folder_path is not None
|
|
735
683
|
and dashboard_element.folder is not None
|
|
736
|
-
): #
|
|
737
|
-
|
|
738
|
-
|
|
739
|
-
|
|
740
|
-
|
|
741
|
-
browse_path_v2 = BrowsePathsV2Class(
|
|
742
|
-
path=[
|
|
743
|
-
BrowsePathEntryClass("Folders"),
|
|
744
|
-
*self._get_folder_browse_path_v2_entries(dashboard_element.folder),
|
|
745
|
-
],
|
|
746
|
-
)
|
|
684
|
+
): # Independent look
|
|
685
|
+
chart_parent_container = [
|
|
686
|
+
"Folders",
|
|
687
|
+
*self._get_folder_ancestors_urn_entries(dashboard_element.folder),
|
|
688
|
+
]
|
|
747
689
|
|
|
690
|
+
# Determine chart ownership
|
|
691
|
+
chart_ownership: Optional[List[OwnerClass]] = None
|
|
748
692
|
if dashboard is not None:
|
|
749
693
|
ownership = self.get_ownership(dashboard)
|
|
750
694
|
if ownership is not None:
|
|
751
|
-
|
|
695
|
+
chart_ownership = [ownership]
|
|
752
696
|
elif dashboard is None and dashboard_element is not None:
|
|
753
697
|
ownership = self.get_ownership(dashboard_element)
|
|
754
698
|
if ownership is not None:
|
|
755
|
-
|
|
756
|
-
|
|
757
|
-
chart_mce = MetadataChangeEvent(proposedSnapshot=chart_snapshot)
|
|
758
|
-
|
|
759
|
-
proposals: List[Union[MetadataChangeEvent, MetadataChangeProposalWrapper]] = [
|
|
760
|
-
chart_mce,
|
|
761
|
-
MetadataChangeProposalWrapper(
|
|
762
|
-
entityUrn=chart_urn,
|
|
763
|
-
aspect=SubTypesClass(typeNames=[BIAssetSubTypes.LOOKER_LOOK]),
|
|
764
|
-
),
|
|
765
|
-
]
|
|
766
|
-
|
|
767
|
-
if self.source_config.include_platform_instance_in_urns:
|
|
768
|
-
proposals.append(
|
|
769
|
-
MetadataChangeProposalWrapper(
|
|
770
|
-
entityUrn=chart_urn,
|
|
771
|
-
aspect=self._create_platform_instance_aspect(),
|
|
772
|
-
),
|
|
773
|
-
)
|
|
699
|
+
chart_ownership = [ownership]
|
|
774
700
|
|
|
701
|
+
chart_extra_aspects: List[Union[InputFieldsClass, EmbedClass]] = []
|
|
775
702
|
# If extracting embeds is enabled, produce an MCP for embed URL.
|
|
776
703
|
if (
|
|
777
704
|
self.source_config.extract_embed_urls
|
|
@@ -781,111 +708,124 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
|
|
|
781
708
|
self.source_config.external_base_url
|
|
782
709
|
)
|
|
783
710
|
if maybe_embed_url:
|
|
784
|
-
|
|
785
|
-
create_embed_mcp(
|
|
786
|
-
chart_snapshot.urn,
|
|
787
|
-
maybe_embed_url,
|
|
788
|
-
)
|
|
789
|
-
)
|
|
711
|
+
chart_extra_aspects.append(EmbedClass(renderUrl=maybe_embed_url))
|
|
790
712
|
|
|
791
|
-
|
|
792
|
-
|
|
793
|
-
|
|
713
|
+
chart_extra_aspects.append(
|
|
714
|
+
InputFieldsClass(
|
|
715
|
+
fields=self._input_fields_from_dashboard_element(dashboard_element)
|
|
794
716
|
)
|
|
795
|
-
proposals.append(
|
|
796
|
-
MetadataChangeProposalWrapper(entityUrn=chart_urn, aspect=container)
|
|
797
|
-
)
|
|
798
|
-
|
|
799
|
-
if browse_path_v2:
|
|
800
|
-
proposals.append(
|
|
801
|
-
MetadataChangeProposalWrapper(
|
|
802
|
-
entityUrn=chart_urn, aspect=browse_path_v2
|
|
803
|
-
)
|
|
804
|
-
)
|
|
805
|
-
|
|
806
|
-
return proposals
|
|
807
|
-
|
|
808
|
-
def _make_dashboard_metadata_events(
|
|
809
|
-
self, looker_dashboard: LookerDashboard, chart_urns: List[str]
|
|
810
|
-
) -> List[Union[MetadataChangeEvent, MetadataChangeProposalWrapper]]:
|
|
811
|
-
dashboard_urn = self.make_dashboard_urn(looker_dashboard)
|
|
812
|
-
dashboard_snapshot = DashboardSnapshot(
|
|
813
|
-
urn=dashboard_urn,
|
|
814
|
-
aspects=[],
|
|
815
|
-
)
|
|
816
|
-
browse_path_v2: Optional[BrowsePathsV2Class] = None
|
|
817
|
-
dashboard_info = DashboardInfoClass(
|
|
818
|
-
description=looker_dashboard.description or "",
|
|
819
|
-
title=looker_dashboard.title,
|
|
820
|
-
charts=chart_urns,
|
|
821
|
-
lastModified=self._get_change_audit_stamps(looker_dashboard),
|
|
822
|
-
dashboardUrl=looker_dashboard.url(self.source_config.external_base_url),
|
|
823
717
|
)
|
|
824
|
-
|
|
825
|
-
|
|
826
|
-
|
|
827
|
-
|
|
828
|
-
|
|
829
|
-
|
|
830
|
-
|
|
831
|
-
|
|
832
|
-
|
|
833
|
-
|
|
834
|
-
|
|
835
|
-
|
|
836
|
-
|
|
837
|
-
|
|
718
|
+
return [
|
|
719
|
+
Chart(
|
|
720
|
+
chart_type=self._get_chart_type(dashboard_element),
|
|
721
|
+
chart_url=dashboard_element.url(
|
|
722
|
+
self.source_config.external_base_url or ""
|
|
723
|
+
),
|
|
724
|
+
custom_properties={
|
|
725
|
+
"upstream_fields": (
|
|
726
|
+
",".join(
|
|
727
|
+
sorted(
|
|
728
|
+
{field.name for field in dashboard_element.input_fields}
|
|
729
|
+
)
|
|
730
|
+
)
|
|
731
|
+
if dashboard_element.input_fields
|
|
732
|
+
else ""
|
|
733
|
+
)
|
|
734
|
+
},
|
|
735
|
+
description=dashboard_element.description or "",
|
|
736
|
+
display_name=dashboard_element.title, # title is (deprecated) using display_name
|
|
737
|
+
extra_aspects=chart_extra_aspects,
|
|
738
|
+
input_datasets=dashboard_element.get_view_urns(self.source_config),
|
|
739
|
+
last_modified=self._get_last_modified_time(
|
|
740
|
+
dashboard
|
|
741
|
+
), # Inherited from Dashboard
|
|
742
|
+
last_modified_by=self._get_last_modified_by(
|
|
743
|
+
dashboard
|
|
744
|
+
), # Inherited from Dashboard
|
|
745
|
+
created_at=self._get_created_at(dashboard), # Inherited from Dashboard
|
|
746
|
+
created_by=self._get_created_by(dashboard), # Inherited from Dashboard
|
|
747
|
+
deleted_on=self._get_deleted_on(dashboard), # Inherited from Dashboard
|
|
748
|
+
deleted_by=self._get_deleted_by(dashboard), # Inherited from Dashboard
|
|
749
|
+
name=dashboard_element.get_urn_element_id(),
|
|
750
|
+
owners=chart_ownership,
|
|
751
|
+
parent_container=chart_parent_container,
|
|
752
|
+
platform=self.source_config.platform_name,
|
|
753
|
+
platform_instance=self.source_config.platform_instance
|
|
754
|
+
if self.source_config.include_platform_instance_in_urns
|
|
755
|
+
else None,
|
|
756
|
+
subtype=BIAssetSubTypes.LOOKER_LOOK,
|
|
838
757
|
)
|
|
839
|
-
dashboard_snapshot.aspects.append(browse_path)
|
|
840
|
-
|
|
841
|
-
ownership = self.get_ownership(looker_dashboard)
|
|
842
|
-
if ownership is not None:
|
|
843
|
-
dashboard_snapshot.aspects.append(ownership)
|
|
844
|
-
|
|
845
|
-
dashboard_snapshot.aspects.append(Status(removed=looker_dashboard.is_deleted))
|
|
846
|
-
|
|
847
|
-
dashboard_mce = MetadataChangeEvent(proposedSnapshot=dashboard_snapshot)
|
|
848
|
-
|
|
849
|
-
proposals: List[Union[MetadataChangeEvent, MetadataChangeProposalWrapper]] = [
|
|
850
|
-
dashboard_mce
|
|
851
758
|
]
|
|
852
759
|
|
|
853
|
-
|
|
854
|
-
|
|
855
|
-
|
|
856
|
-
|
|
857
|
-
|
|
858
|
-
|
|
859
|
-
|
|
760
|
+
def _make_dashboard_entities(
|
|
761
|
+
self, looker_dashboard: LookerDashboard, charts: List[Chart]
|
|
762
|
+
) -> List[Dashboard]:
|
|
763
|
+
dashboard_ownership: Optional[List[OwnerClass]] = None
|
|
764
|
+
ownership: Optional[OwnerClass] = self.get_ownership(looker_dashboard)
|
|
765
|
+
if ownership is not None:
|
|
766
|
+
dashboard_ownership = [ownership]
|
|
860
767
|
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
MetadataChangeProposalWrapper(
|
|
864
|
-
entityUrn=dashboard_urn, aspect=browse_path_v2
|
|
865
|
-
)
|
|
866
|
-
)
|
|
768
|
+
# Extra Aspects not yet supported in the Dashboard entity class SDKv2
|
|
769
|
+
dashboard_extra_aspects: List[Union[EmbedClass, InputFieldsClass, Status]] = []
|
|
867
770
|
|
|
868
|
-
#
|
|
771
|
+
# Embed URL aspect
|
|
869
772
|
if (
|
|
870
773
|
self.source_config.extract_embed_urls
|
|
871
774
|
and self.source_config.external_base_url
|
|
872
775
|
):
|
|
873
|
-
|
|
874
|
-
|
|
875
|
-
|
|
876
|
-
|
|
776
|
+
dashboard_extra_aspects.append(
|
|
777
|
+
EmbedClass(
|
|
778
|
+
renderUrl=looker_dashboard.embed_url(
|
|
779
|
+
self.source_config.external_base_url
|
|
780
|
+
)
|
|
877
781
|
)
|
|
878
782
|
)
|
|
879
783
|
|
|
880
|
-
|
|
881
|
-
|
|
882
|
-
|
|
883
|
-
|
|
884
|
-
|
|
885
|
-
)
|
|
784
|
+
# Input fields aspect
|
|
785
|
+
# Populate input fields from all the dashboard elements
|
|
786
|
+
all_fields: List[InputFieldClass] = []
|
|
787
|
+
for dashboard_element in looker_dashboard.dashboard_elements:
|
|
788
|
+
all_fields.extend(
|
|
789
|
+
self._input_fields_from_dashboard_element(dashboard_element)
|
|
886
790
|
)
|
|
791
|
+
dashboard_extra_aspects.append(InputFieldsClass(fields=all_fields))
|
|
792
|
+
# Status aspect
|
|
793
|
+
dashboard_extra_aspects.append(Status(removed=looker_dashboard.is_deleted))
|
|
794
|
+
|
|
795
|
+
dashboard_parent_container: Union[List[str], Unset] = unset
|
|
796
|
+
if (
|
|
797
|
+
looker_dashboard.folder_path is not None
|
|
798
|
+
and looker_dashboard.folder is not None
|
|
799
|
+
):
|
|
800
|
+
dashboard_parent_container = [
|
|
801
|
+
"Folders",
|
|
802
|
+
*self._get_folder_ancestors_urn_entries(looker_dashboard.folder),
|
|
803
|
+
]
|
|
887
804
|
|
|
888
|
-
return
|
|
805
|
+
return [
|
|
806
|
+
Dashboard(
|
|
807
|
+
charts=charts,
|
|
808
|
+
dashboard_url=looker_dashboard.url(
|
|
809
|
+
self.source_config.external_base_url
|
|
810
|
+
),
|
|
811
|
+
description=looker_dashboard.description or "",
|
|
812
|
+
display_name=looker_dashboard.title, # title is (deprecated) using display_name
|
|
813
|
+
extra_aspects=dashboard_extra_aspects,
|
|
814
|
+
last_modified=self._get_last_modified_time(looker_dashboard),
|
|
815
|
+
last_modified_by=self._get_last_modified_by(looker_dashboard),
|
|
816
|
+
created_at=self._get_created_at(looker_dashboard),
|
|
817
|
+
created_by=self._get_created_by(looker_dashboard),
|
|
818
|
+
deleted_on=self._get_deleted_on(looker_dashboard),
|
|
819
|
+
deleted_by=self._get_deleted_by(looker_dashboard),
|
|
820
|
+
name=looker_dashboard.get_urn_dashboard_id(),
|
|
821
|
+
owners=dashboard_ownership,
|
|
822
|
+
parent_container=dashboard_parent_container,
|
|
823
|
+
platform=self.source_config.platform_name,
|
|
824
|
+
platform_instance=self.source_config.platform_instance
|
|
825
|
+
if self.source_config.include_platform_instance_in_urns
|
|
826
|
+
else None,
|
|
827
|
+
)
|
|
828
|
+
]
|
|
889
829
|
|
|
890
830
|
def _make_dashboard_urn(self, looker_dashboard_name_part: str) -> str:
|
|
891
831
|
# Note that `looker_dashboard_name_part` will like be `dashboard.1234`.
|
|
@@ -902,11 +842,9 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
|
|
|
902
842
|
def make_dashboard_urn(self, looker_dashboard: LookerDashboard) -> str:
|
|
903
843
|
return self._make_dashboard_urn(looker_dashboard.get_urn_dashboard_id())
|
|
904
844
|
|
|
905
|
-
def
|
|
845
|
+
def _make_explore_containers(
|
|
906
846
|
self,
|
|
907
|
-
) -> Iterable[
|
|
908
|
-
Union[MetadataChangeEvent, MetadataChangeProposalWrapper, MetadataWorkUnit]
|
|
909
|
-
]:
|
|
847
|
+
) -> Iterable[Union[Container, Dataset]]:
|
|
910
848
|
if not self.source_config.emit_used_explores_only:
|
|
911
849
|
explores_to_fetch = list(self.list_all_explores())
|
|
912
850
|
else:
|
|
@@ -924,19 +862,14 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
|
|
|
924
862
|
for project_name, model, _ in explores_to_fetch:
|
|
925
863
|
if model not in processed_models:
|
|
926
864
|
model_key = gen_model_key(self.source_config, model)
|
|
927
|
-
yield
|
|
865
|
+
yield Container(
|
|
928
866
|
container_key=model_key,
|
|
929
|
-
|
|
930
|
-
|
|
867
|
+
display_name=model,
|
|
868
|
+
subtype=BIContainerSubTypes.LOOKML_MODEL,
|
|
931
869
|
extra_properties=(
|
|
932
870
|
{"project": project_name} if project_name is not None else None
|
|
933
871
|
),
|
|
934
|
-
|
|
935
|
-
yield MetadataChangeProposalWrapper(
|
|
936
|
-
entityUrn=model_key.as_urn(),
|
|
937
|
-
aspect=BrowsePathsV2Class(
|
|
938
|
-
path=[BrowsePathEntryClass("Explore")],
|
|
939
|
-
),
|
|
872
|
+
parent_container=["Explore"],
|
|
940
873
|
)
|
|
941
874
|
|
|
942
875
|
processed_models.append(model)
|
|
@@ -947,9 +880,10 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
|
|
|
947
880
|
((model, explore) for (_project, model, explore) in explores_to_fetch),
|
|
948
881
|
max_workers=self.source_config.max_threads,
|
|
949
882
|
):
|
|
950
|
-
|
|
883
|
+
explore_dataset_entity, explore_id, start_time, end_time = future.result()
|
|
951
884
|
self.reporter.explores_scanned += 1
|
|
952
|
-
|
|
885
|
+
if explore_dataset_entity:
|
|
886
|
+
yield explore_dataset_entity
|
|
953
887
|
self.reporter.report_upstream_latency(start_time, end_time)
|
|
954
888
|
logger.debug(
|
|
955
889
|
f"Running time of fetch_one_explore for {explore_id}: {(end_time - start_time).total_seconds()}"
|
|
@@ -969,66 +903,50 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
|
|
|
969
903
|
def fetch_one_explore(
|
|
970
904
|
self, model: str, explore: str
|
|
971
905
|
) -> Tuple[
|
|
972
|
-
|
|
906
|
+
Optional[Dataset],
|
|
973
907
|
str,
|
|
974
908
|
datetime.datetime,
|
|
975
909
|
datetime.datetime,
|
|
976
910
|
]:
|
|
977
911
|
start_time = datetime.datetime.now()
|
|
978
|
-
events: List[Union[MetadataChangeEvent, MetadataChangeProposalWrapper]] = []
|
|
979
912
|
looker_explore = self.explore_registry.get_explore(model, explore)
|
|
913
|
+
explore_dataset_entity: Optional[Dataset] = None
|
|
980
914
|
if looker_explore is not None:
|
|
981
|
-
|
|
982
|
-
|
|
983
|
-
|
|
984
|
-
|
|
985
|
-
|
|
986
|
-
self.source_config.extract_embed_urls,
|
|
987
|
-
)
|
|
988
|
-
or events
|
|
915
|
+
explore_dataset_entity = looker_explore._to_metadata_events(
|
|
916
|
+
self.source_config,
|
|
917
|
+
self.reporter,
|
|
918
|
+
self.source_config.external_base_url or self.source_config.base_url,
|
|
919
|
+
self.source_config.extract_embed_urls,
|
|
989
920
|
)
|
|
990
921
|
|
|
991
|
-
return
|
|
992
|
-
|
|
993
|
-
|
|
994
|
-
|
|
995
|
-
|
|
996
|
-
|
|
997
|
-
return event.proposedSnapshot.urn
|
|
998
|
-
else:
|
|
999
|
-
return event.entityUrn
|
|
922
|
+
return (
|
|
923
|
+
explore_dataset_entity,
|
|
924
|
+
f"{model}:{explore}",
|
|
925
|
+
start_time,
|
|
926
|
+
datetime.datetime.now(),
|
|
927
|
+
)
|
|
1000
928
|
|
|
1001
|
-
def _emit_folder_as_container(
|
|
1002
|
-
self, folder: LookerFolder
|
|
1003
|
-
) -> Iterable[MetadataWorkUnit]:
|
|
929
|
+
def _emit_folder_as_container(self, folder: LookerFolder) -> Iterable[Container]:
|
|
1004
930
|
if folder.id not in self.processed_folders:
|
|
1005
|
-
yield from gen_containers(
|
|
1006
|
-
container_key=self._gen_folder_key(folder.id),
|
|
1007
|
-
name=folder.name,
|
|
1008
|
-
sub_types=[BIContainerSubTypes.LOOKER_FOLDER],
|
|
1009
|
-
parent_container_key=(
|
|
1010
|
-
self._gen_folder_key(folder.parent_id) if folder.parent_id else None
|
|
1011
|
-
),
|
|
1012
|
-
)
|
|
1013
931
|
if folder.parent_id is None:
|
|
1014
|
-
yield
|
|
1015
|
-
|
|
1016
|
-
|
|
1017
|
-
|
|
1018
|
-
|
|
1019
|
-
)
|
|
932
|
+
yield Container(
|
|
933
|
+
container_key=self._gen_folder_key(folder.id),
|
|
934
|
+
display_name=folder.name,
|
|
935
|
+
subtype=BIContainerSubTypes.LOOKER_FOLDER,
|
|
936
|
+
parent_container=["Folders"],
|
|
937
|
+
)
|
|
1020
938
|
else:
|
|
1021
|
-
yield
|
|
1022
|
-
|
|
1023
|
-
|
|
1024
|
-
|
|
1025
|
-
|
|
1026
|
-
|
|
1027
|
-
|
|
1028
|
-
|
|
1029
|
-
|
|
1030
|
-
|
|
1031
|
-
)
|
|
939
|
+
yield Container(
|
|
940
|
+
container_key=self._gen_folder_key(folder.id),
|
|
941
|
+
display_name=folder.name,
|
|
942
|
+
subtype=BIContainerSubTypes.LOOKER_FOLDER,
|
|
943
|
+
parent_container=[
|
|
944
|
+
"Folders",
|
|
945
|
+
*self._get_folder_ancestors_urn_entries(
|
|
946
|
+
folder, include_current_folder=False
|
|
947
|
+
),
|
|
948
|
+
],
|
|
949
|
+
)
|
|
1032
950
|
self.processed_folders.append(folder.id)
|
|
1033
951
|
|
|
1034
952
|
def _gen_folder_key(self, folder_id: str) -> LookerFolderKey:
|
|
@@ -1039,91 +957,89 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
|
|
|
1039
957
|
instance=self.source_config.platform_instance,
|
|
1040
958
|
)
|
|
1041
959
|
|
|
1042
|
-
def
|
|
960
|
+
def _make_dashboard_and_chart_entities(
|
|
1043
961
|
self, looker_dashboard: LookerDashboard
|
|
1044
|
-
) -> Iterable[Union[
|
|
962
|
+
) -> Iterable[Union[Chart, Dashboard]]:
|
|
1045
963
|
# Step 1: Emit metadata for each Chart inside the Dashboard.
|
|
1046
|
-
chart_events = []
|
|
964
|
+
chart_events: List[Chart] = []
|
|
1047
965
|
for element in looker_dashboard.dashboard_elements:
|
|
1048
966
|
if element.type == "vis":
|
|
1049
967
|
chart_events.extend(
|
|
1050
|
-
self.
|
|
968
|
+
self._make_chart_entities(element, looker_dashboard)
|
|
1051
969
|
)
|
|
1052
970
|
|
|
1053
971
|
yield from chart_events
|
|
1054
972
|
|
|
1055
|
-
# Step 2: Emit metadata events for the Dashboard itself.
|
|
1056
|
-
|
|
1057
|
-
|
|
1058
|
-
) # Collect the unique child chart urns for dashboard input lineage.
|
|
973
|
+
# # Step 2: Emit metadata events for the Dashboard itself.
|
|
974
|
+
# Create a set of unique chart entities for dashboard input lineage based in chart.urn
|
|
975
|
+
unique_chart_entities: List[Chart] = []
|
|
1059
976
|
for chart_event in chart_events:
|
|
1060
|
-
|
|
1061
|
-
|
|
1062
|
-
|
|
1063
|
-
|
|
1064
|
-
|
|
1065
|
-
|
|
977
|
+
# Use chart.urn to ensure uniqueness based on the chart's URN property
|
|
978
|
+
# Also, update the set of processed chart urns
|
|
979
|
+
if str(chart_event.urn) not in self.chart_urns:
|
|
980
|
+
self.chart_urns.add(str(chart_event.urn))
|
|
981
|
+
unique_chart_entities.append(chart_event)
|
|
982
|
+
|
|
983
|
+
dashboard_events = self._make_dashboard_entities(
|
|
984
|
+
looker_dashboard, unique_chart_entities
|
|
1066
985
|
)
|
|
1067
986
|
yield from dashboard_events
|
|
1068
987
|
|
|
1069
988
|
def get_ownership(
|
|
1070
989
|
self, looker_dashboard_look: Union[LookerDashboard, LookerDashboardElement]
|
|
1071
|
-
) -> Optional[
|
|
990
|
+
) -> Optional[OwnerClass]:
|
|
1072
991
|
if looker_dashboard_look.owner is not None:
|
|
1073
992
|
owner_urn = looker_dashboard_look.owner.get_urn(
|
|
1074
993
|
self.source_config.strip_user_ids_from_email
|
|
1075
994
|
)
|
|
1076
995
|
if owner_urn is not None:
|
|
1077
|
-
|
|
1078
|
-
|
|
1079
|
-
|
|
1080
|
-
owner=owner_urn,
|
|
1081
|
-
type=OwnershipTypeClass.DATAOWNER,
|
|
1082
|
-
)
|
|
1083
|
-
]
|
|
996
|
+
return OwnerClass(
|
|
997
|
+
owner=owner_urn,
|
|
998
|
+
type=OwnershipTypeClass.DATAOWNER,
|
|
1084
999
|
)
|
|
1085
|
-
return ownership
|
|
1086
1000
|
return None
|
|
1087
1001
|
|
|
1088
|
-
def
|
|
1089
|
-
self, looker_dashboard: LookerDashboard
|
|
1090
|
-
) ->
|
|
1091
|
-
|
|
1092
|
-
|
|
1093
|
-
|
|
1094
|
-
|
|
1095
|
-
|
|
1096
|
-
if looker_dashboard
|
|
1097
|
-
|
|
1098
|
-
|
|
1099
|
-
|
|
1100
|
-
|
|
1101
|
-
|
|
1102
|
-
|
|
1103
|
-
|
|
1104
|
-
|
|
1105
|
-
|
|
1106
|
-
|
|
1107
|
-
|
|
1108
|
-
|
|
1109
|
-
|
|
1110
|
-
|
|
1111
|
-
|
|
1112
|
-
|
|
1113
|
-
|
|
1114
|
-
|
|
1115
|
-
and looker_dashboard.deleted_at is not None
|
|
1116
|
-
):
|
|
1117
|
-
deleter_urn = looker_dashboard.deleted_by.get_urn(
|
|
1118
|
-
self.source_config.strip_user_ids_from_email
|
|
1119
|
-
)
|
|
1120
|
-
if deleter_urn:
|
|
1121
|
-
change_audit_stamp.deleted = AuditStamp(
|
|
1122
|
-
actor=deleter_urn,
|
|
1123
|
-
time=round(looker_dashboard.deleted_at.timestamp() * 1000),
|
|
1124
|
-
)
|
|
1002
|
+
def _get_last_modified_time(
|
|
1003
|
+
self, looker_dashboard: Optional[LookerDashboard]
|
|
1004
|
+
) -> Optional[datetime.datetime]:
|
|
1005
|
+
return looker_dashboard.last_updated_at if looker_dashboard else None
|
|
1006
|
+
|
|
1007
|
+
def _get_last_modified_by(
|
|
1008
|
+
self, looker_dashboard: Optional[LookerDashboard]
|
|
1009
|
+
) -> Optional[str]:
|
|
1010
|
+
if not looker_dashboard or not looker_dashboard.last_updated_by:
|
|
1011
|
+
return None
|
|
1012
|
+
return looker_dashboard.last_updated_by.get_urn(
|
|
1013
|
+
self.source_config.strip_user_ids_from_email
|
|
1014
|
+
)
|
|
1015
|
+
|
|
1016
|
+
def _get_created_at(
|
|
1017
|
+
self, looker_dashboard: Optional[LookerDashboard]
|
|
1018
|
+
) -> Optional[datetime.datetime]:
|
|
1019
|
+
return looker_dashboard.created_at if looker_dashboard else None
|
|
1020
|
+
|
|
1021
|
+
def _get_created_by(
|
|
1022
|
+
self, looker_dashboard: Optional[LookerDashboard]
|
|
1023
|
+
) -> Optional[str]:
|
|
1024
|
+
if not looker_dashboard or not looker_dashboard.owner:
|
|
1025
|
+
return None
|
|
1026
|
+
return looker_dashboard.owner.get_urn(
|
|
1027
|
+
self.source_config.strip_user_ids_from_email
|
|
1028
|
+
)
|
|
1125
1029
|
|
|
1126
|
-
|
|
1030
|
+
def _get_deleted_on(
|
|
1031
|
+
self, looker_dashboard: Optional[LookerDashboard]
|
|
1032
|
+
) -> Optional[datetime.datetime]:
|
|
1033
|
+
return looker_dashboard.deleted_at if looker_dashboard else None
|
|
1034
|
+
|
|
1035
|
+
def _get_deleted_by(
|
|
1036
|
+
self, looker_dashboard: Optional[LookerDashboard]
|
|
1037
|
+
) -> Optional[str]:
|
|
1038
|
+
if not looker_dashboard or not looker_dashboard.deleted_by:
|
|
1039
|
+
return None
|
|
1040
|
+
return looker_dashboard.deleted_by.get_urn(
|
|
1041
|
+
self.source_config.strip_user_ids_from_email
|
|
1042
|
+
)
|
|
1127
1043
|
|
|
1128
1044
|
def _get_looker_folder(self, folder: Union[Folder, FolderBase]) -> LookerFolder:
|
|
1129
1045
|
assert folder.id
|
|
@@ -1136,7 +1052,7 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
|
|
|
1136
1052
|
]
|
|
1137
1053
|
return "/".join(ancestors + [folder.name])
|
|
1138
1054
|
|
|
1139
|
-
def _get_looker_dashboard(self, dashboard:
|
|
1055
|
+
def _get_looker_dashboard(self, dashboard: LookerAPIDashboard) -> LookerDashboard:
|
|
1140
1056
|
self.reporter.accessed_dashboards += 1
|
|
1141
1057
|
if dashboard.folder is None:
|
|
1142
1058
|
logger.debug(f"{dashboard.id} has no folder")
|
|
@@ -1210,22 +1126,6 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
|
|
|
1210
1126
|
|
|
1211
1127
|
return user
|
|
1212
1128
|
|
|
1213
|
-
def process_metrics_dimensions_and_fields_for_dashboard(
|
|
1214
|
-
self, dashboard: LookerDashboard
|
|
1215
|
-
) -> List[MetadataWorkUnit]:
|
|
1216
|
-
chart_mcps = [
|
|
1217
|
-
self._make_metrics_dimensions_chart_mcp(element)
|
|
1218
|
-
for element in dashboard.dashboard_elements
|
|
1219
|
-
]
|
|
1220
|
-
dashboard_mcp = self._make_metrics_dimensions_dashboard_mcp(dashboard)
|
|
1221
|
-
|
|
1222
|
-
mcps = chart_mcps
|
|
1223
|
-
mcps.append(dashboard_mcp)
|
|
1224
|
-
|
|
1225
|
-
workunits = [mcp.as_workunit() for mcp in mcps]
|
|
1226
|
-
|
|
1227
|
-
return workunits
|
|
1228
|
-
|
|
1229
1129
|
def _input_fields_from_dashboard_element(
|
|
1230
1130
|
self, dashboard_element: LookerDashboardElement
|
|
1231
1131
|
) -> List[InputFieldClass]:
|
|
@@ -1318,104 +1218,141 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
|
|
|
1318
1218
|
aspect=input_fields_aspect,
|
|
1319
1219
|
)
|
|
1320
1220
|
|
|
1321
|
-
def
|
|
1221
|
+
def _should_skip_personal_folder_dashboard(
|
|
1222
|
+
self, dashboard_object: LookerAPIDashboard
|
|
1223
|
+
) -> bool:
|
|
1224
|
+
"""Check if dashboard should be skipped due to being in personal folder."""
|
|
1225
|
+
if not self.source_config.skip_personal_folders:
|
|
1226
|
+
return False
|
|
1227
|
+
|
|
1228
|
+
if dashboard_object.folder is not None and (
|
|
1229
|
+
dashboard_object.folder.is_personal
|
|
1230
|
+
or dashboard_object.folder.is_personal_descendant
|
|
1231
|
+
):
|
|
1232
|
+
self.reporter.info(
|
|
1233
|
+
title="Dropped Dashboard",
|
|
1234
|
+
message="Dropped due to being a personal folder",
|
|
1235
|
+
context=f"Dashboard ID: {dashboard_object.id}",
|
|
1236
|
+
)
|
|
1237
|
+
assert dashboard_object.id is not None
|
|
1238
|
+
self.reporter.report_dashboards_dropped(dashboard_object.id)
|
|
1239
|
+
return True
|
|
1240
|
+
return False
|
|
1241
|
+
|
|
1242
|
+
def _should_skip_dashboard_by_folder_path(
|
|
1243
|
+
self, looker_dashboard: LookerDashboard
|
|
1244
|
+
) -> bool:
|
|
1245
|
+
"""Check if dashboard should be skipped based on folder path pattern."""
|
|
1246
|
+
if (
|
|
1247
|
+
looker_dashboard.folder_path is not None
|
|
1248
|
+
and not self.source_config.folder_path_pattern.allowed(
|
|
1249
|
+
looker_dashboard.folder_path
|
|
1250
|
+
)
|
|
1251
|
+
):
|
|
1252
|
+
logger.debug(
|
|
1253
|
+
f"Folder path {looker_dashboard.folder_path} is denied in folder_path_pattern"
|
|
1254
|
+
)
|
|
1255
|
+
self.reporter.report_dashboards_dropped(looker_dashboard.id)
|
|
1256
|
+
return True
|
|
1257
|
+
return False
|
|
1258
|
+
|
|
1259
|
+
def _fetch_dashboard_from_api(
|
|
1322
1260
|
self, dashboard_id: str, fields: List[str]
|
|
1323
|
-
) ->
|
|
1324
|
-
|
|
1325
|
-
Optional[looker_usage.LookerDashboardForUsage],
|
|
1326
|
-
str,
|
|
1327
|
-
datetime.datetime,
|
|
1328
|
-
datetime.datetime,
|
|
1329
|
-
]:
|
|
1330
|
-
start_time = datetime.datetime.now()
|
|
1331
|
-
assert dashboard_id is not None
|
|
1332
|
-
if not self.source_config.dashboard_pattern.allowed(dashboard_id):
|
|
1333
|
-
self.reporter.report_dashboards_dropped(dashboard_id)
|
|
1334
|
-
return [], None, dashboard_id, start_time, datetime.datetime.now()
|
|
1261
|
+
) -> Optional[LookerAPIDashboard]:
|
|
1262
|
+
"""Fetch dashboard object from Looker API with error handling."""
|
|
1335
1263
|
try:
|
|
1336
|
-
|
|
1264
|
+
return self.looker_api.dashboard(
|
|
1337
1265
|
dashboard_id=dashboard_id,
|
|
1338
1266
|
fields=fields,
|
|
1339
1267
|
)
|
|
1340
1268
|
except (SDKError, DeserializeError) as e:
|
|
1341
|
-
# A looker dashboard could be deleted in between the list and the get
|
|
1342
1269
|
self.reporter.report_warning(
|
|
1343
1270
|
title="Failed to fetch dashboard from the Looker API",
|
|
1344
1271
|
message="Error occurred while attempting to loading dashboard from Looker API. Skipping.",
|
|
1345
1272
|
context=f"Dashboard ID: {dashboard_id}",
|
|
1346
1273
|
exc=e,
|
|
1347
1274
|
)
|
|
1348
|
-
return
|
|
1275
|
+
return None
|
|
1349
1276
|
|
|
1350
|
-
|
|
1351
|
-
|
|
1352
|
-
|
|
1353
|
-
|
|
1354
|
-
|
|
1355
|
-
|
|
1356
|
-
|
|
1357
|
-
|
|
1358
|
-
|
|
1359
|
-
|
|
1360
|
-
|
|
1361
|
-
return [], None, dashboard_id, start_time, datetime.datetime.now()
|
|
1277
|
+
def _create_empty_result(
|
|
1278
|
+
self, dashboard_id: str, start_time: datetime.datetime
|
|
1279
|
+
) -> DashboardProcessingResult:
|
|
1280
|
+
"""Create an empty result for skipped or failed dashboard processing."""
|
|
1281
|
+
return DashboardProcessingResult(
|
|
1282
|
+
entities=[],
|
|
1283
|
+
dashboard_usage=None,
|
|
1284
|
+
dashboard_id=dashboard_id,
|
|
1285
|
+
start_time=start_time,
|
|
1286
|
+
end_time=datetime.datetime.now(),
|
|
1287
|
+
)
|
|
1362
1288
|
|
|
1363
|
-
|
|
1289
|
+
def process_dashboard(
|
|
1290
|
+
self, dashboard_id: str, fields: List[str]
|
|
1291
|
+
) -> DashboardProcessingResult:
|
|
1292
|
+
"""
|
|
1293
|
+
Process a single dashboard and return the metadata workunits.
|
|
1364
1294
|
|
|
1365
|
-
|
|
1366
|
-
|
|
1367
|
-
|
|
1368
|
-
and not self.source_config.folder_path_pattern.allowed(
|
|
1369
|
-
looker_dashboard.folder_path
|
|
1370
|
-
)
|
|
1371
|
-
):
|
|
1372
|
-
logger.debug(
|
|
1373
|
-
f"Folder path {looker_dashboard.folder_path} is denied in folder_path_pattern"
|
|
1374
|
-
)
|
|
1375
|
-
return [], None, dashboard_id, start_time, datetime.datetime.now()
|
|
1295
|
+
Args:
|
|
1296
|
+
dashboard_id: The ID of the dashboard to process
|
|
1297
|
+
fields: List of fields to fetch from the Looker API
|
|
1376
1298
|
|
|
1377
|
-
|
|
1378
|
-
|
|
1379
|
-
|
|
1380
|
-
|
|
1299
|
+
Returns:
|
|
1300
|
+
DashboardProcessingResult containing entities, usage data, and timing information
|
|
1301
|
+
"""
|
|
1302
|
+
start_time = datetime.datetime.now()
|
|
1381
1303
|
|
|
1382
|
-
|
|
1383
|
-
|
|
1384
|
-
(
|
|
1385
|
-
MetadataWorkUnit(id=f"looker-{mce.proposedSnapshot.urn}", mce=mce)
|
|
1386
|
-
if isinstance(mce, MetadataChangeEvent)
|
|
1387
|
-
else MetadataWorkUnit(
|
|
1388
|
-
id=f"looker-{mce.aspectName}-{mce.entityUrn}", mcp=mce
|
|
1389
|
-
)
|
|
1390
|
-
)
|
|
1391
|
-
for mce in mces
|
|
1392
|
-
]
|
|
1304
|
+
if dashboard_id is None:
|
|
1305
|
+
raise ValueError("Dashboard ID cannot be None")
|
|
1393
1306
|
|
|
1394
|
-
#
|
|
1395
|
-
|
|
1396
|
-
|
|
1307
|
+
# Fetch dashboard from API
|
|
1308
|
+
dashboard_object: Optional[LookerAPIDashboard] = self._fetch_dashboard_from_api(
|
|
1309
|
+
dashboard_id, fields
|
|
1397
1310
|
)
|
|
1311
|
+
if dashboard_object is None:
|
|
1312
|
+
return self._create_empty_result(dashboard_id, start_time)
|
|
1313
|
+
|
|
1314
|
+
# Check if dashboard should be skipped due to personal folder
|
|
1315
|
+
if self._should_skip_personal_folder_dashboard(dashboard_object):
|
|
1316
|
+
return self._create_empty_result(dashboard_id, start_time)
|
|
1398
1317
|
|
|
1399
|
-
|
|
1318
|
+
# Convert to internal representation
|
|
1319
|
+
looker_dashboard: LookerDashboard = self._get_looker_dashboard(dashboard_object)
|
|
1400
1320
|
|
|
1321
|
+
# Check folder path pattern
|
|
1322
|
+
if self._should_skip_dashboard_by_folder_path(looker_dashboard):
|
|
1323
|
+
return self._create_empty_result(dashboard_id, start_time)
|
|
1324
|
+
|
|
1325
|
+
# Build entities list
|
|
1326
|
+
entities: List[Entity] = []
|
|
1327
|
+
|
|
1328
|
+
# Add folder containers if dashboard has a folder
|
|
1329
|
+
if looker_dashboard.folder:
|
|
1330
|
+
entities.extend(
|
|
1331
|
+
list(self._get_folder_and_ancestors_containers(looker_dashboard.folder))
|
|
1332
|
+
)
|
|
1333
|
+
|
|
1334
|
+
# Add dashboard and chart entities
|
|
1335
|
+
entities.extend(list(self._make_dashboard_and_chart_entities(looker_dashboard)))
|
|
1336
|
+
|
|
1337
|
+
# Report successful processing
|
|
1401
1338
|
self.reporter.report_dashboards_scanned()
|
|
1402
1339
|
|
|
1403
|
-
#
|
|
1340
|
+
# Generate usage tracking object
|
|
1404
1341
|
dashboard_usage = looker_usage.LookerDashboardForUsage.from_dashboard(
|
|
1405
1342
|
dashboard_object
|
|
1406
1343
|
)
|
|
1407
1344
|
|
|
1408
|
-
return (
|
|
1409
|
-
|
|
1410
|
-
dashboard_usage,
|
|
1411
|
-
dashboard_id,
|
|
1412
|
-
start_time,
|
|
1413
|
-
datetime.datetime.now(),
|
|
1345
|
+
return DashboardProcessingResult(
|
|
1346
|
+
entities=entities,
|
|
1347
|
+
dashboard_usage=dashboard_usage,
|
|
1348
|
+
dashboard_id=dashboard_id,
|
|
1349
|
+
start_time=start_time,
|
|
1350
|
+
end_time=datetime.datetime.now(),
|
|
1414
1351
|
)
|
|
1415
1352
|
|
|
1416
|
-
def
|
|
1353
|
+
def _get_folder_and_ancestors_containers(
|
|
1417
1354
|
self, folder: LookerFolder
|
|
1418
|
-
) -> Iterable[
|
|
1355
|
+
) -> Iterable[Container]:
|
|
1419
1356
|
for ancestor_folder in self.looker_api.folder_ancestors(folder.id):
|
|
1420
1357
|
yield from self._emit_folder_as_container(
|
|
1421
1358
|
self._get_looker_folder(ancestor_folder)
|
|
@@ -1486,39 +1423,27 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
|
|
|
1486
1423
|
).workunit_processor,
|
|
1487
1424
|
]
|
|
1488
1425
|
|
|
1489
|
-
def
|
|
1426
|
+
def emit_independent_looks_entities(
|
|
1490
1427
|
self, dashboard_element: LookerDashboardElement
|
|
1491
|
-
) -> Iterable[
|
|
1428
|
+
) -> Iterable[Union[Container, Chart]]:
|
|
1492
1429
|
if dashboard_element.folder: # independent look
|
|
1493
|
-
yield from self.
|
|
1430
|
+
yield from self._get_folder_and_ancestors_containers(
|
|
1494
1431
|
dashboard_element.folder
|
|
1495
1432
|
)
|
|
1496
1433
|
|
|
1497
|
-
yield from
|
|
1498
|
-
|
|
1499
|
-
|
|
1500
|
-
dashboard=None,
|
|
1501
|
-
)
|
|
1502
|
-
)
|
|
1503
|
-
|
|
1504
|
-
yield from auto_workunit(
|
|
1505
|
-
[
|
|
1506
|
-
self._make_metrics_dimensions_chart_mcp(
|
|
1507
|
-
dashboard_element,
|
|
1508
|
-
)
|
|
1509
|
-
]
|
|
1434
|
+
yield from self._make_chart_entities(
|
|
1435
|
+
dashboard_element=dashboard_element,
|
|
1436
|
+
dashboard=None,
|
|
1510
1437
|
)
|
|
1511
1438
|
|
|
1512
|
-
def extract_independent_looks(self) -> Iterable[
|
|
1513
|
-
"""
|
|
1514
|
-
Emit MetadataWorkUnit for looks which are not part of any Dashboard
|
|
1439
|
+
def extract_independent_looks(self) -> Iterable[Union[Container, Chart]]:
|
|
1515
1440
|
"""
|
|
1516
|
-
|
|
1517
|
-
return
|
|
1441
|
+
Emit entities for Looks which are not part of any Dashboard.
|
|
1518
1442
|
|
|
1519
|
-
|
|
1443
|
+
Returns: Containers for the folders and ancestors folders and Charts for the looks
|
|
1444
|
+
"""
|
|
1445
|
+
logger.debug("Extracting Looks not part of any Dashboard")
|
|
1520
1446
|
|
|
1521
|
-
logger.debug("Extracting looks not part of Dashboard")
|
|
1522
1447
|
look_fields: List[str] = [
|
|
1523
1448
|
"id",
|
|
1524
1449
|
"title",
|
|
@@ -1540,15 +1465,21 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
|
|
|
1540
1465
|
all_looks: List[Look] = self.looker_api.all_looks(
|
|
1541
1466
|
fields=look_fields, soft_deleted=self.source_config.include_deleted
|
|
1542
1467
|
)
|
|
1468
|
+
|
|
1543
1469
|
for look in all_looks:
|
|
1470
|
+
# Skip looks that are already referenced from a dashboard
|
|
1471
|
+
if look.id is None:
|
|
1472
|
+
logger.warning("Encountered Look with no ID, skipping.")
|
|
1473
|
+
continue
|
|
1474
|
+
|
|
1544
1475
|
if look.id in self.reachable_look_registry:
|
|
1545
|
-
# This look is reachable from the Dashboard
|
|
1546
1476
|
continue
|
|
1547
1477
|
|
|
1548
1478
|
if look.query_id is None:
|
|
1549
1479
|
logger.info(f"query_id is None for look {look.title}({look.id})")
|
|
1550
1480
|
continue
|
|
1551
1481
|
|
|
1482
|
+
# Skip looks in personal folders if configured
|
|
1552
1483
|
if self.source_config.skip_personal_folders:
|
|
1553
1484
|
if look.folder is not None and (
|
|
1554
1485
|
look.folder.is_personal or look.folder.is_personal_descendant
|
|
@@ -1559,76 +1490,96 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
|
|
|
1559
1490
|
context=f"Look ID: {look.id}",
|
|
1560
1491
|
)
|
|
1561
1492
|
|
|
1562
|
-
assert look.id, "Looker id is null"
|
|
1563
1493
|
self.reporter.report_charts_dropped(look.id)
|
|
1564
1494
|
continue
|
|
1565
1495
|
|
|
1566
|
-
|
|
1567
|
-
|
|
1568
|
-
|
|
1569
|
-
|
|
1570
|
-
|
|
1571
|
-
|
|
1572
|
-
|
|
1573
|
-
|
|
1574
|
-
|
|
1575
|
-
|
|
1576
|
-
|
|
1577
|
-
|
|
1496
|
+
# Fetch the Look's query and filter to allowed fields
|
|
1497
|
+
query: Optional[Query] = None
|
|
1498
|
+
try:
|
|
1499
|
+
look_with_query = self.looker_api.get_look(look.id, fields=["query"])
|
|
1500
|
+
query_obj = look_with_query.query
|
|
1501
|
+
if query_obj:
|
|
1502
|
+
query = Query(
|
|
1503
|
+
**{
|
|
1504
|
+
key: getattr(query_obj, key)
|
|
1505
|
+
for key in query_fields
|
|
1506
|
+
if hasattr(query_obj, key)
|
|
1507
|
+
}
|
|
1508
|
+
)
|
|
1509
|
+
except Exception as exc:
|
|
1510
|
+
logger.warning(f"Failed to fetch query for Look {look.id}: {exc}")
|
|
1511
|
+
continue
|
|
1578
1512
|
|
|
1579
|
-
dashboard_element
|
|
1580
|
-
|
|
1581
|
-
|
|
1582
|
-
|
|
1583
|
-
|
|
1584
|
-
|
|
1585
|
-
|
|
1586
|
-
|
|
1587
|
-
|
|
1588
|
-
|
|
1589
|
-
|
|
1590
|
-
),
|
|
1513
|
+
dashboard_element = self._get_looker_dashboard_element(
|
|
1514
|
+
DashboardElement(
|
|
1515
|
+
id=f"looks_{look.id}", # to avoid conflict with non-standalone looks (element.id prefixes),
|
|
1516
|
+
# we add the "looks_" prefix to look.id.
|
|
1517
|
+
title=look.title,
|
|
1518
|
+
subtitle_text=look.description,
|
|
1519
|
+
look_id=look.id,
|
|
1520
|
+
dashboard_id=None, # As this is an independent look
|
|
1521
|
+
look=LookWithQuery(
|
|
1522
|
+
query=query,
|
|
1523
|
+
folder=getattr(look, "folder", None),
|
|
1524
|
+
user_id=getattr(look, "user_id", None),
|
|
1591
1525
|
),
|
|
1592
1526
|
)
|
|
1593
1527
|
)
|
|
1594
1528
|
|
|
1595
1529
|
if dashboard_element is not None:
|
|
1596
|
-
logger.debug(f"Emitting
|
|
1597
|
-
yield from self.
|
|
1530
|
+
logger.debug(f"Emitting MCPs for look {look.title}({look.id})")
|
|
1531
|
+
yield from self.emit_independent_looks_entities(
|
|
1598
1532
|
dashboard_element=dashboard_element
|
|
1599
1533
|
)
|
|
1600
1534
|
|
|
1601
|
-
|
|
1535
|
+
def get_workunits_internal(self) -> Iterable[Union[MetadataWorkUnit, Entity]]:
|
|
1536
|
+
"""
|
|
1537
|
+
Note: Returns Entities from SDKv2 where possible else MCPs only.
|
|
1602
1538
|
|
|
1603
|
-
|
|
1604
|
-
|
|
1605
|
-
dashboards = self.looker_api.all_dashboards(fields="id")
|
|
1606
|
-
deleted_dashboards = (
|
|
1607
|
-
self.looker_api.search_dashboards(fields="id", deleted="true")
|
|
1608
|
-
if self.source_config.include_deleted
|
|
1609
|
-
else []
|
|
1610
|
-
)
|
|
1611
|
-
if deleted_dashboards != []:
|
|
1612
|
-
logger.debug(f"Deleted Dashboards = {deleted_dashboards}")
|
|
1539
|
+
Using SDKv2: Containers, Datasets, Dashboards and Charts
|
|
1540
|
+
Using MCPW: Tags, DashboardUsageStats and UserResourceMapping
|
|
1613
1541
|
|
|
1614
|
-
|
|
1615
|
-
|
|
1616
|
-
|
|
1617
|
-
|
|
1618
|
-
|
|
1619
|
-
|
|
1620
|
-
|
|
1621
|
-
|
|
1622
|
-
|
|
1623
|
-
|
|
1542
|
+
TODO: Convert MCPWs to use SDKv2 entities
|
|
1543
|
+
"""
|
|
1544
|
+
with self.reporter.report_stage("list_dashboards"):
|
|
1545
|
+
# Fetch all dashboards (not deleted)
|
|
1546
|
+
dashboards = self.looker_api.all_dashboards(fields="id")
|
|
1547
|
+
|
|
1548
|
+
# Optionally fetch deleted dashboards if configured
|
|
1549
|
+
if self.source_config.include_deleted:
|
|
1550
|
+
deleted_dashboards = self.looker_api.search_dashboards(
|
|
1551
|
+
fields="id", deleted="true"
|
|
1552
|
+
)
|
|
1624
1553
|
else:
|
|
1625
|
-
|
|
1626
|
-
|
|
1627
|
-
|
|
1628
|
-
|
|
1554
|
+
deleted_dashboards = []
|
|
1555
|
+
|
|
1556
|
+
if deleted_dashboards:
|
|
1557
|
+
logger.debug(f"Deleted Dashboards = {deleted_dashboards}")
|
|
1558
|
+
|
|
1559
|
+
# Collect all dashboard IDs (including deleted if applicable)
|
|
1560
|
+
all_dashboard_ids: List[Optional[str]] = [
|
|
1561
|
+
dashboard.id for dashboard in dashboards
|
|
1562
|
+
]
|
|
1563
|
+
all_dashboard_ids.extend([dashboard.id for dashboard in deleted_dashboards])
|
|
1629
1564
|
|
|
1630
|
-
|
|
1631
|
-
|
|
1565
|
+
# Filter dashboard IDs based on the allowed pattern
|
|
1566
|
+
filtered_dashboard_ids: List[str] = []
|
|
1567
|
+
for dashboard_id in all_dashboard_ids:
|
|
1568
|
+
if dashboard_id is None:
|
|
1569
|
+
continue
|
|
1570
|
+
if not self.source_config.dashboard_pattern.allowed(dashboard_id):
|
|
1571
|
+
self.reporter.report_dashboards_dropped(dashboard_id)
|
|
1572
|
+
else:
|
|
1573
|
+
filtered_dashboard_ids.append(dashboard_id)
|
|
1574
|
+
|
|
1575
|
+
# Use the filtered list for further processing
|
|
1576
|
+
dashboard_ids: List[str] = filtered_dashboard_ids
|
|
1577
|
+
|
|
1578
|
+
# Report the total number of dashboards to be processed
|
|
1579
|
+
self.reporter.report_total_dashboards(len(dashboard_ids))
|
|
1580
|
+
|
|
1581
|
+
# Define the fields to extract for each dashboard
|
|
1582
|
+
dashboard_fields = [
|
|
1632
1583
|
"id",
|
|
1633
1584
|
"title",
|
|
1634
1585
|
"dashboard_elements",
|
|
@@ -1644,41 +1595,47 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
|
|
|
1644
1595
|
"deleted_at",
|
|
1645
1596
|
"deleter_id",
|
|
1646
1597
|
]
|
|
1598
|
+
|
|
1599
|
+
# Add usage-related fields if usage history extraction is enabled
|
|
1647
1600
|
if self.source_config.extract_usage_history:
|
|
1648
|
-
|
|
1649
|
-
|
|
1650
|
-
|
|
1651
|
-
|
|
1652
|
-
|
|
1601
|
+
dashboard_fields.extend(
|
|
1602
|
+
[
|
|
1603
|
+
"favorite_count",
|
|
1604
|
+
"view_count",
|
|
1605
|
+
"last_viewed_at",
|
|
1606
|
+
]
|
|
1607
|
+
)
|
|
1653
1608
|
|
|
1609
|
+
# Store dashboards for which usage stats will be extracted
|
|
1654
1610
|
looker_dashboards_for_usage: List[looker_usage.LookerDashboardForUsage] = []
|
|
1655
1611
|
|
|
1612
|
+
# Process dashboard and chart metadata
|
|
1656
1613
|
with self.reporter.report_stage("dashboard_chart_metadata"):
|
|
1614
|
+
dashboard_jobs = (
|
|
1615
|
+
(dashboard_id, dashboard_fields)
|
|
1616
|
+
for dashboard_id in dashboard_ids
|
|
1617
|
+
if dashboard_id is not None
|
|
1618
|
+
)
|
|
1657
1619
|
for job in BackpressureAwareExecutor.map(
|
|
1658
1620
|
self.process_dashboard,
|
|
1659
|
-
|
|
1660
|
-
(dashboard_id, fields)
|
|
1661
|
-
for dashboard_id in dashboard_ids
|
|
1662
|
-
if dashboard_id is not None
|
|
1663
|
-
),
|
|
1621
|
+
dashboard_jobs,
|
|
1664
1622
|
max_workers=self.source_config.max_threads,
|
|
1665
1623
|
):
|
|
1666
|
-
(
|
|
1667
|
-
|
|
1668
|
-
dashboard_usage,
|
|
1669
|
-
dashboard_id,
|
|
1670
|
-
start_time,
|
|
1671
|
-
end_time,
|
|
1672
|
-
) = job.result()
|
|
1624
|
+
result: DashboardProcessingResult = job.result()
|
|
1625
|
+
|
|
1673
1626
|
logger.debug(
|
|
1674
|
-
f"Running time of process_dashboard for {dashboard_id} = {(end_time - start_time).total_seconds()}"
|
|
1627
|
+
f"Running time of process_dashboard for {result.dashboard_id} = {(result.end_time - result.start_time).total_seconds()}"
|
|
1675
1628
|
)
|
|
1676
|
-
self.reporter.report_upstream_latency(
|
|
1629
|
+
self.reporter.report_upstream_latency(
|
|
1630
|
+
result.start_time, result.end_time
|
|
1631
|
+
)
|
|
1632
|
+
|
|
1633
|
+
yield from result.entities
|
|
1677
1634
|
|
|
1678
|
-
|
|
1679
|
-
|
|
1680
|
-
looker_dashboards_for_usage.append(dashboard_usage)
|
|
1635
|
+
if result.dashboard_usage is not None:
|
|
1636
|
+
looker_dashboards_for_usage.append(result.dashboard_usage)
|
|
1681
1637
|
|
|
1638
|
+
# Warn if owner extraction was enabled but no emails could be found
|
|
1682
1639
|
if (
|
|
1683
1640
|
self.source_config.extract_owners
|
|
1684
1641
|
and self.reporter.resolved_user_ids > 0
|
|
@@ -1690,53 +1647,42 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
|
|
|
1690
1647
|
"Failed to extract owners emails for any dashboards. Please enable the see_users permission for your Looker API key",
|
|
1691
1648
|
)
|
|
1692
1649
|
|
|
1693
|
-
# Extract independent
|
|
1694
|
-
|
|
1650
|
+
# Extract independent looks first, so their explores are considered in _make_explore_containers.
|
|
1651
|
+
if self.source_config.extract_independent_looks:
|
|
1652
|
+
with self.reporter.report_stage("extract_independent_looks"):
|
|
1653
|
+
yield from self.extract_independent_looks()
|
|
1695
1654
|
|
|
1696
|
-
|
|
1697
|
-
|
|
1698
|
-
|
|
1699
|
-
if isinstance(event, MetadataChangeEvent):
|
|
1700
|
-
yield MetadataWorkUnit(
|
|
1701
|
-
id=f"looker-{event.proposedSnapshot.urn}", mce=event
|
|
1702
|
-
)
|
|
1703
|
-
elif isinstance(event, MetadataChangeProposalWrapper):
|
|
1704
|
-
yield event.as_workunit()
|
|
1705
|
-
elif isinstance(event, MetadataWorkUnit):
|
|
1706
|
-
yield event
|
|
1707
|
-
else:
|
|
1708
|
-
raise Exception(f"Unexpected type of event {event}")
|
|
1709
|
-
self.reporter.report_stage_end("explore_metadata")
|
|
1655
|
+
# Process explore containers and yield them.
|
|
1656
|
+
with self.reporter.report_stage("explore_metadata"):
|
|
1657
|
+
yield from self._make_explore_containers()
|
|
1710
1658
|
|
|
1711
1659
|
if (
|
|
1712
1660
|
self.source_config.tag_measures_and_dimensions
|
|
1713
1661
|
and self.reporter.explores_scanned > 0
|
|
1714
1662
|
):
|
|
1715
|
-
# Emit tag
|
|
1663
|
+
# Emit tag MCPs for measures and dimensions if we produced any explores:
|
|
1664
|
+
# Tags MCEs are converted to MCPs
|
|
1716
1665
|
for tag_mce in LookerUtil.get_tag_mces():
|
|
1717
|
-
yield
|
|
1718
|
-
id=f"tag-{tag_mce.proposedSnapshot.urn}",
|
|
1719
|
-
mce=tag_mce,
|
|
1720
|
-
)
|
|
1666
|
+
yield from auto_workunit(mcps_from_mce(tag_mce))
|
|
1721
1667
|
|
|
1722
1668
|
# Extract usage history is enabled
|
|
1723
1669
|
if self.source_config.extract_usage_history:
|
|
1724
|
-
self.reporter.
|
|
1725
|
-
|
|
1726
|
-
|
|
1727
|
-
|
|
1728
|
-
|
|
1729
|
-
|
|
1730
|
-
|
|
1670
|
+
with self.reporter.report_stage("usage_extraction"):
|
|
1671
|
+
usage_mcps: List[MetadataChangeProposalWrapper] = (
|
|
1672
|
+
self.extract_usage_stat(
|
|
1673
|
+
looker_dashboards_for_usage, self.chart_urns
|
|
1674
|
+
)
|
|
1675
|
+
)
|
|
1676
|
+
yield from auto_workunit(usage_mcps)
|
|
1731
1677
|
|
|
1732
|
-
#
|
|
1678
|
+
# Ingest looker user resource mapping workunits.
|
|
1733
1679
|
logger.info("Ingesting looker user resource mapping workunits")
|
|
1734
|
-
self.reporter.
|
|
1735
|
-
|
|
1736
|
-
|
|
1737
|
-
|
|
1680
|
+
with self.reporter.report_stage("user_resource_extraction"):
|
|
1681
|
+
yield from auto_workunit(
|
|
1682
|
+
self.user_registry.to_platform_resource(
|
|
1683
|
+
self.source_config.platform_instance
|
|
1684
|
+
)
|
|
1738
1685
|
)
|
|
1739
|
-
)
|
|
1740
1686
|
|
|
1741
1687
|
def get_report(self) -> SourceReport:
|
|
1742
1688
|
return self.reporter
|