acryl-datahub 1.2.0.10rc2__py3-none-any.whl → 1.2.0.10rc4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (93) hide show
  1. {acryl_datahub-1.2.0.10rc2.dist-info → acryl_datahub-1.2.0.10rc4.dist-info}/METADATA +2525 -2609
  2. {acryl_datahub-1.2.0.10rc2.dist-info → acryl_datahub-1.2.0.10rc4.dist-info}/RECORD +93 -93
  3. datahub/_version.py +1 -1
  4. datahub/api/entities/assertion/assertion.py +1 -1
  5. datahub/api/entities/corpgroup/corpgroup.py +1 -1
  6. datahub/api/entities/dataproduct/dataproduct.py +6 -3
  7. datahub/api/entities/dataset/dataset.py +9 -18
  8. datahub/api/entities/structuredproperties/structuredproperties.py +2 -2
  9. datahub/api/graphql/operation.py +10 -6
  10. datahub/cli/docker_check.py +2 -2
  11. datahub/configuration/common.py +29 -1
  12. datahub/configuration/connection_resolver.py +5 -2
  13. datahub/configuration/import_resolver.py +7 -4
  14. datahub/configuration/pydantic_migration_helpers.py +0 -9
  15. datahub/configuration/source_common.py +3 -2
  16. datahub/configuration/validate_field_deprecation.py +5 -2
  17. datahub/configuration/validate_field_removal.py +5 -2
  18. datahub/configuration/validate_field_rename.py +6 -5
  19. datahub/configuration/validate_multiline_string.py +5 -2
  20. datahub/ingestion/run/pipeline_config.py +2 -2
  21. datahub/ingestion/source/azure/azure_common.py +1 -1
  22. datahub/ingestion/source/bigquery_v2/bigquery_config.py +28 -14
  23. datahub/ingestion/source/bigquery_v2/queries_extractor.py +4 -5
  24. datahub/ingestion/source/common/gcp_credentials_config.py +3 -1
  25. datahub/ingestion/source/data_lake_common/path_spec.py +16 -16
  26. datahub/ingestion/source/datahub/config.py +8 -9
  27. datahub/ingestion/source/delta_lake/config.py +1 -1
  28. datahub/ingestion/source/dremio/dremio_config.py +3 -4
  29. datahub/ingestion/source/feast.py +8 -10
  30. datahub/ingestion/source/fivetran/config.py +1 -1
  31. datahub/ingestion/source/ge_profiling_config.py +26 -22
  32. datahub/ingestion/source/grafana/grafana_config.py +2 -2
  33. datahub/ingestion/source/grafana/models.py +12 -14
  34. datahub/ingestion/source/hex/hex.py +6 -1
  35. datahub/ingestion/source/iceberg/iceberg_profiler.py +4 -2
  36. datahub/ingestion/source/kafka_connect/common.py +2 -2
  37. datahub/ingestion/source/looker/looker_common.py +55 -75
  38. datahub/ingestion/source/looker/looker_config.py +15 -4
  39. datahub/ingestion/source/looker/looker_source.py +445 -548
  40. datahub/ingestion/source/looker/lookml_config.py +1 -1
  41. datahub/ingestion/source/metadata/business_glossary.py +7 -7
  42. datahub/ingestion/source/metadata/lineage.py +1 -1
  43. datahub/ingestion/source/mode.py +13 -5
  44. datahub/ingestion/source/nifi.py +1 -1
  45. datahub/ingestion/source/powerbi/config.py +14 -21
  46. datahub/ingestion/source/preset.py +1 -1
  47. datahub/ingestion/source/qlik_sense/data_classes.py +28 -8
  48. datahub/ingestion/source/redshift/config.py +6 -3
  49. datahub/ingestion/source/salesforce.py +13 -9
  50. datahub/ingestion/source/schema/json_schema.py +14 -14
  51. datahub/ingestion/source/sigma/data_classes.py +3 -0
  52. datahub/ingestion/source/snowflake/snowflake_config.py +12 -15
  53. datahub/ingestion/source/snowflake/snowflake_connection.py +8 -3
  54. datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +15 -2
  55. datahub/ingestion/source/snowflake/snowflake_queries.py +4 -5
  56. datahub/ingestion/source/sql/athena.py +2 -1
  57. datahub/ingestion/source/sql/clickhouse.py +12 -7
  58. datahub/ingestion/source/sql/cockroachdb.py +5 -3
  59. datahub/ingestion/source/sql/druid.py +2 -2
  60. datahub/ingestion/source/sql/hive.py +4 -3
  61. datahub/ingestion/source/sql/hive_metastore.py +7 -9
  62. datahub/ingestion/source/sql/mssql/source.py +2 -2
  63. datahub/ingestion/source/sql/mysql.py +2 -2
  64. datahub/ingestion/source/sql/oracle.py +3 -3
  65. datahub/ingestion/source/sql/presto.py +2 -1
  66. datahub/ingestion/source/sql/teradata.py +4 -4
  67. datahub/ingestion/source/sql/trino.py +2 -1
  68. datahub/ingestion/source/sql/two_tier_sql_source.py +2 -3
  69. datahub/ingestion/source/sql/vertica.py +1 -1
  70. datahub/ingestion/source/sql_queries.py +6 -6
  71. datahub/ingestion/source/state/checkpoint.py +5 -1
  72. datahub/ingestion/source/state/entity_removal_state.py +5 -2
  73. datahub/ingestion/source/state/stateful_ingestion_base.py +5 -8
  74. datahub/ingestion/source/superset.py +29 -4
  75. datahub/ingestion/source/tableau/tableau.py +65 -11
  76. datahub/ingestion/source/tableau/tableau_common.py +5 -0
  77. datahub/ingestion/source/tableau/tableau_constant.py +1 -0
  78. datahub/ingestion/source/tableau/tableau_server_wrapper.py +3 -0
  79. datahub/ingestion/source/unity/config.py +7 -3
  80. datahub/ingestion/source/usage/usage_common.py +3 -3
  81. datahub/ingestion/source_config/pulsar.py +3 -1
  82. datahub/metadata/_internal_schema_classes.py +45 -1
  83. datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
  84. datahub/metadata/schema.avsc +24 -1
  85. datahub/metadata/schemas/InstitutionalMemory.avsc +22 -0
  86. datahub/metadata/schemas/MLModelGroupKey.avsc +2 -1
  87. datahub/metadata/schemas/MetadataChangeEvent.avsc +22 -0
  88. datahub/sdk/dashboard.py +0 -2
  89. datahub/sdk/search_filters.py +1 -7
  90. {acryl_datahub-1.2.0.10rc2.dist-info → acryl_datahub-1.2.0.10rc4.dist-info}/WHEEL +0 -0
  91. {acryl_datahub-1.2.0.10rc2.dist-info → acryl_datahub-1.2.0.10rc4.dist-info}/entry_points.txt +0 -0
  92. {acryl_datahub-1.2.0.10rc2.dist-info → acryl_datahub-1.2.0.10rc4.dist-info}/licenses/LICENSE +0 -0
  93. {acryl_datahub-1.2.0.10rc2.dist-info → acryl_datahub-1.2.0.10rc4.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,7 @@
1
1
  import datetime
2
2
  import json
3
3
  import logging
4
+ from dataclasses import dataclass
4
5
  from json import JSONDecodeError
5
6
  from typing import (
6
7
  Any,
@@ -18,7 +19,7 @@ from typing import (
18
19
  from looker_sdk.error import SDKError
19
20
  from looker_sdk.rtl.serialize import DeserializeError
20
21
  from looker_sdk.sdk.api40.models import (
21
- Dashboard,
22
+ Dashboard as LookerAPIDashboard,
22
23
  DashboardElement,
23
24
  Folder,
24
25
  FolderBase,
@@ -29,7 +30,7 @@ from looker_sdk.sdk.api40.models import (
29
30
 
30
31
  import datahub.emitter.mce_builder as builder
31
32
  from datahub.emitter.mcp import MetadataChangeProposalWrapper
32
- from datahub.emitter.mcp_builder import create_embed_mcp, gen_containers
33
+ from datahub.emitter.mcp_builder import mcps_from_mce
33
34
  from datahub.ingestion.api.common import PipelineContext
34
35
  from datahub.ingestion.api.decorators import (
35
36
  SupportStatus,
@@ -80,36 +81,38 @@ from datahub.ingestion.source.state.stateful_ingestion_base import (
80
81
  StatefulIngestionSourceBase,
81
82
  )
82
83
  from datahub.metadata.com.linkedin.pegasus2avro.common import (
83
- AuditStamp,
84
- ChangeAuditStamps,
85
- DataPlatformInstance,
86
84
  Status,
87
85
  )
88
- from datahub.metadata.com.linkedin.pegasus2avro.metadata.snapshot import (
89
- ChartSnapshot,
90
- DashboardSnapshot,
91
- )
92
- from datahub.metadata.com.linkedin.pegasus2avro.mxe import MetadataChangeEvent
93
86
  from datahub.metadata.schema_classes import (
94
- BrowsePathEntryClass,
95
- BrowsePathsClass,
96
- BrowsePathsV2Class,
97
- ChartInfoClass,
98
87
  ChartTypeClass,
99
- ContainerClass,
100
- DashboardInfoClass,
88
+ EmbedClass,
101
89
  InputFieldClass,
102
90
  InputFieldsClass,
103
91
  OwnerClass,
104
- OwnershipClass,
105
92
  OwnershipTypeClass,
106
- SubTypesClass,
107
93
  )
94
+ from datahub.sdk.chart import Chart
95
+ from datahub.sdk.container import Container
96
+ from datahub.sdk.dashboard import Dashboard
97
+ from datahub.sdk.dataset import Dataset
98
+ from datahub.sdk.entity import Entity
108
99
  from datahub.utilities.backpressure_aware_executor import BackpressureAwareExecutor
100
+ from datahub.utilities.sentinels import Unset, unset
109
101
 
110
102
  logger = logging.getLogger(__name__)
111
103
 
112
104
 
105
+ @dataclass
106
+ class DashboardProcessingResult:
107
+ """Result of processing a single dashboard."""
108
+
109
+ entities: List[Entity]
110
+ dashboard_usage: Optional[looker_usage.LookerDashboardForUsage]
111
+ dashboard_id: str
112
+ start_time: datetime.datetime
113
+ end_time: datetime.datetime
114
+
115
+
113
116
  @platform_name("Looker")
114
117
  @support_status(SupportStatus.CERTIFIED)
115
118
  @config_class(LookerDashboardSourceConfig)
@@ -633,35 +636,17 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
633
636
 
634
637
  return chart_type
635
638
 
636
- def _get_folder_browse_path_v2_entries(
639
+ def _get_folder_ancestors_urn_entries(
637
640
  self, folder: LookerFolder, include_current_folder: bool = True
638
- ) -> Iterable[BrowsePathEntryClass]:
641
+ ) -> Iterable[str]:
639
642
  for ancestor in self.looker_api.folder_ancestors(folder_id=folder.id):
640
- assert ancestor.id
643
+ assert ancestor.id # to make the linter happy as `Folder` has id field marked optional - which is always returned by the API
641
644
  urn = self._gen_folder_key(ancestor.id).as_urn()
642
- yield BrowsePathEntryClass(id=urn, urn=urn)
645
+ yield urn
643
646
 
644
647
  urn = self._gen_folder_key(folder.id).as_urn()
645
648
  if include_current_folder:
646
- yield BrowsePathEntryClass(id=urn, urn=urn)
647
-
648
- def _create_platform_instance_aspect(
649
- self,
650
- ) -> DataPlatformInstance:
651
- assert self.source_config.platform_name, (
652
- "Platform name is not set in the configuration."
653
- )
654
- assert self.source_config.platform_instance, (
655
- "Platform instance is not set in the configuration."
656
- )
657
-
658
- return DataPlatformInstance(
659
- platform=builder.make_data_platform_urn(self.source_config.platform_name),
660
- instance=builder.make_dataplatform_instance_urn(
661
- platform=self.source_config.platform_name,
662
- instance=self.source_config.platform_instance,
663
- ),
664
- )
649
+ yield urn
665
650
 
666
651
  def _make_chart_urn(self, element_id: str) -> str:
667
652
  platform_instance: Optional[str] = None
@@ -674,104 +659,46 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
674
659
  platform_instance=platform_instance,
675
660
  )
676
661
 
677
- def _make_chart_metadata_events(
662
+ def _make_chart_entities(
678
663
  self,
679
664
  dashboard_element: LookerDashboardElement,
680
665
  dashboard: Optional[
681
666
  LookerDashboard
682
667
  ], # dashboard will be None if this is a standalone look
683
- ) -> List[Union[MetadataChangeEvent, MetadataChangeProposalWrapper]]:
684
- chart_urn = self._make_chart_urn(
685
- element_id=dashboard_element.get_urn_element_id()
686
- )
687
- self.chart_urns.add(chart_urn)
688
- chart_snapshot = ChartSnapshot(
689
- urn=chart_urn,
690
- aspects=[Status(removed=False)],
691
- )
692
- browse_path_v2: Optional[BrowsePathsV2Class] = None
693
-
694
- chart_type = self._get_chart_type(dashboard_element)
695
- chart_info = ChartInfoClass(
696
- type=chart_type,
697
- description=dashboard_element.description or "",
698
- title=dashboard_element.title or "",
699
- lastModified=ChangeAuditStamps(),
700
- chartUrl=dashboard_element.url(self.source_config.external_base_url or ""),
701
- inputs=dashboard_element.get_view_urns(self.source_config),
702
- customProperties={
703
- "upstream_fields": (
704
- ",".join(
705
- sorted({field.name for field in dashboard_element.input_fields})
706
- )
707
- if dashboard_element.input_fields
708
- else ""
709
- )
710
- },
711
- )
712
- chart_snapshot.aspects.append(chart_info)
713
-
668
+ ) -> List[Chart]:
669
+ chart_parent_container: Union[List[str], Unset] = unset
714
670
  if (
715
671
  dashboard
716
672
  and dashboard.folder_path is not None
717
673
  and dashboard.folder is not None
718
674
  ):
719
- browse_path = BrowsePathsClass(
720
- paths=[f"/Folders/{dashboard.folder_path}/{dashboard.title}"]
721
- )
722
- chart_snapshot.aspects.append(browse_path)
723
-
724
- dashboard_urn = self.make_dashboard_urn(dashboard)
725
- browse_path_v2 = BrowsePathsV2Class(
726
- path=[
727
- BrowsePathEntryClass("Folders"),
728
- *self._get_folder_browse_path_v2_entries(dashboard.folder),
729
- BrowsePathEntryClass(id=dashboard_urn, urn=dashboard_urn),
730
- ],
731
- )
675
+ chart_parent_container = [
676
+ "Folders",
677
+ *self._get_folder_ancestors_urn_entries(dashboard.folder),
678
+ self.make_dashboard_urn(dashboard),
679
+ ]
732
680
  elif (
733
681
  dashboard is None
734
682
  and dashboard_element.folder_path is not None
735
683
  and dashboard_element.folder is not None
736
- ): # independent look
737
- browse_path = BrowsePathsClass(
738
- paths=[f"/Folders/{dashboard_element.folder_path}"]
739
- )
740
- chart_snapshot.aspects.append(browse_path)
741
- browse_path_v2 = BrowsePathsV2Class(
742
- path=[
743
- BrowsePathEntryClass("Folders"),
744
- *self._get_folder_browse_path_v2_entries(dashboard_element.folder),
745
- ],
746
- )
684
+ ): # Independent look
685
+ chart_parent_container = [
686
+ "Folders",
687
+ *self._get_folder_ancestors_urn_entries(dashboard_element.folder),
688
+ ]
747
689
 
690
+ # Determine chart ownership
691
+ chart_ownership: Optional[List[OwnerClass]] = None
748
692
  if dashboard is not None:
749
693
  ownership = self.get_ownership(dashboard)
750
694
  if ownership is not None:
751
- chart_snapshot.aspects.append(ownership)
695
+ chart_ownership = [ownership]
752
696
  elif dashboard is None and dashboard_element is not None:
753
697
  ownership = self.get_ownership(dashboard_element)
754
698
  if ownership is not None:
755
- chart_snapshot.aspects.append(ownership)
756
-
757
- chart_mce = MetadataChangeEvent(proposedSnapshot=chart_snapshot)
758
-
759
- proposals: List[Union[MetadataChangeEvent, MetadataChangeProposalWrapper]] = [
760
- chart_mce,
761
- MetadataChangeProposalWrapper(
762
- entityUrn=chart_urn,
763
- aspect=SubTypesClass(typeNames=[BIAssetSubTypes.LOOKER_LOOK]),
764
- ),
765
- ]
766
-
767
- if self.source_config.include_platform_instance_in_urns:
768
- proposals.append(
769
- MetadataChangeProposalWrapper(
770
- entityUrn=chart_urn,
771
- aspect=self._create_platform_instance_aspect(),
772
- ),
773
- )
699
+ chart_ownership = [ownership]
774
700
 
701
+ chart_extra_aspects: List[Union[InputFieldsClass, EmbedClass]] = []
775
702
  # If extracting embeds is enabled, produce an MCP for embed URL.
776
703
  if (
777
704
  self.source_config.extract_embed_urls
@@ -781,111 +708,110 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
781
708
  self.source_config.external_base_url
782
709
  )
783
710
  if maybe_embed_url:
784
- proposals.append(
785
- create_embed_mcp(
786
- chart_snapshot.urn,
787
- maybe_embed_url,
788
- )
789
- )
711
+ chart_extra_aspects.append(EmbedClass(renderUrl=maybe_embed_url))
790
712
 
791
- if dashboard is None and dashboard_element.folder:
792
- container = ContainerClass(
793
- container=self._gen_folder_key(dashboard_element.folder.id).as_urn(),
713
+ chart_extra_aspects.append(
714
+ InputFieldsClass(
715
+ fields=self._input_fields_from_dashboard_element(dashboard_element)
794
716
  )
795
- proposals.append(
796
- MetadataChangeProposalWrapper(entityUrn=chart_urn, aspect=container)
797
- )
798
-
799
- if browse_path_v2:
800
- proposals.append(
801
- MetadataChangeProposalWrapper(
802
- entityUrn=chart_urn, aspect=browse_path_v2
803
- )
804
- )
805
-
806
- return proposals
807
-
808
- def _make_dashboard_metadata_events(
809
- self, looker_dashboard: LookerDashboard, chart_urns: List[str]
810
- ) -> List[Union[MetadataChangeEvent, MetadataChangeProposalWrapper]]:
811
- dashboard_urn = self.make_dashboard_urn(looker_dashboard)
812
- dashboard_snapshot = DashboardSnapshot(
813
- urn=dashboard_urn,
814
- aspects=[],
815
- )
816
- browse_path_v2: Optional[BrowsePathsV2Class] = None
817
- dashboard_info = DashboardInfoClass(
818
- description=looker_dashboard.description or "",
819
- title=looker_dashboard.title,
820
- charts=chart_urns,
821
- lastModified=self._get_change_audit_stamps(looker_dashboard),
822
- dashboardUrl=looker_dashboard.url(self.source_config.external_base_url),
823
717
  )
824
-
825
- dashboard_snapshot.aspects.append(dashboard_info)
826
- if (
827
- looker_dashboard.folder_path is not None
828
- and looker_dashboard.folder is not None
829
- ):
830
- browse_path = BrowsePathsClass(
831
- paths=[f"/Folders/{looker_dashboard.folder_path}"]
832
- )
833
- browse_path_v2 = BrowsePathsV2Class(
834
- path=[
835
- BrowsePathEntryClass("Folders"),
836
- *self._get_folder_browse_path_v2_entries(looker_dashboard.folder),
837
- ],
718
+ return [
719
+ Chart(
720
+ chart_type=self._get_chart_type(dashboard_element),
721
+ chart_url=dashboard_element.url(
722
+ self.source_config.external_base_url or ""
723
+ ),
724
+ custom_properties={
725
+ "upstream_fields": (
726
+ ",".join(
727
+ sorted(
728
+ {field.name for field in dashboard_element.input_fields}
729
+ )
730
+ )
731
+ if dashboard_element.input_fields
732
+ else ""
733
+ )
734
+ },
735
+ description=dashboard_element.description or "",
736
+ display_name=dashboard_element.title, # title is (deprecated) using display_name
737
+ extra_aspects=chart_extra_aspects,
738
+ input_datasets=dashboard_element.get_view_urns(self.source_config),
739
+ last_modified=self._get_last_modified_time(dashboard),
740
+ name=dashboard_element.get_urn_element_id(),
741
+ owners=chart_ownership,
742
+ parent_container=chart_parent_container,
743
+ platform=self.source_config.platform_name,
744
+ platform_instance=self.source_config.platform_instance
745
+ if self.source_config.include_platform_instance_in_urns
746
+ else None,
747
+ subtype=BIAssetSubTypes.LOOKER_LOOK,
838
748
  )
839
- dashboard_snapshot.aspects.append(browse_path)
840
-
841
- ownership = self.get_ownership(looker_dashboard)
842
- if ownership is not None:
843
- dashboard_snapshot.aspects.append(ownership)
844
-
845
- dashboard_snapshot.aspects.append(Status(removed=looker_dashboard.is_deleted))
846
-
847
- dashboard_mce = MetadataChangeEvent(proposedSnapshot=dashboard_snapshot)
848
-
849
- proposals: List[Union[MetadataChangeEvent, MetadataChangeProposalWrapper]] = [
850
- dashboard_mce
851
749
  ]
852
750
 
853
- if looker_dashboard.folder is not None:
854
- container = ContainerClass(
855
- container=self._gen_folder_key(looker_dashboard.folder.id).as_urn(),
856
- )
857
- proposals.append(
858
- MetadataChangeProposalWrapper(entityUrn=dashboard_urn, aspect=container)
859
- )
751
+ def _make_dashboard_entities(
752
+ self, looker_dashboard: LookerDashboard, charts: List[Chart]
753
+ ) -> List[Dashboard]:
754
+ dashboard_ownership: Optional[List[OwnerClass]] = None
755
+ ownership: Optional[OwnerClass] = self.get_ownership(looker_dashboard)
756
+ if ownership is not None:
757
+ dashboard_ownership = [ownership]
860
758
 
861
- if browse_path_v2:
862
- proposals.append(
863
- MetadataChangeProposalWrapper(
864
- entityUrn=dashboard_urn, aspect=browse_path_v2
865
- )
866
- )
759
+ # Extra Aspects not yet supported in the Dashboard entity class SDKv2
760
+ dashboard_extra_aspects: List[Union[EmbedClass, InputFieldsClass, Status]] = []
867
761
 
868
- # If extracting embeds is enabled, produce an MCP for embed URL.
762
+ # Embed URL aspect
869
763
  if (
870
764
  self.source_config.extract_embed_urls
871
765
  and self.source_config.external_base_url
872
766
  ):
873
- proposals.append(
874
- create_embed_mcp(
875
- dashboard_snapshot.urn,
876
- looker_dashboard.embed_url(self.source_config.external_base_url),
767
+ dashboard_extra_aspects.append(
768
+ EmbedClass(
769
+ renderUrl=looker_dashboard.embed_url(
770
+ self.source_config.external_base_url
771
+ )
877
772
  )
878
773
  )
879
774
 
880
- if self.source_config.include_platform_instance_in_urns:
881
- proposals.append(
882
- MetadataChangeProposalWrapper(
883
- entityUrn=dashboard_urn,
884
- aspect=self._create_platform_instance_aspect(),
885
- )
775
+ # Input fields aspect
776
+ # Populate input fields from all the dashboard elements
777
+ all_fields: List[InputFieldClass] = []
778
+ for dashboard_element in looker_dashboard.dashboard_elements:
779
+ all_fields.extend(
780
+ self._input_fields_from_dashboard_element(dashboard_element)
886
781
  )
782
+ dashboard_extra_aspects.append(InputFieldsClass(fields=all_fields))
783
+ # Status aspect
784
+ dashboard_extra_aspects.append(Status(removed=looker_dashboard.is_deleted))
785
+
786
+ dashboard_parent_container: Union[List[str], Unset] = unset
787
+ if (
788
+ looker_dashboard.folder_path is not None
789
+ and looker_dashboard.folder is not None
790
+ ):
791
+ dashboard_parent_container = [
792
+ "Folders",
793
+ *self._get_folder_ancestors_urn_entries(looker_dashboard.folder),
794
+ ]
887
795
 
888
- return proposals
796
+ return [
797
+ Dashboard(
798
+ charts=charts,
799
+ dashboard_url=looker_dashboard.url(
800
+ self.source_config.external_base_url
801
+ ),
802
+ description=looker_dashboard.description or "",
803
+ display_name=looker_dashboard.title, # title is (deprecated) using display_name
804
+ extra_aspects=dashboard_extra_aspects,
805
+ last_modified=self._get_last_modified_time(looker_dashboard),
806
+ name=looker_dashboard.get_urn_dashboard_id(),
807
+ owners=dashboard_ownership,
808
+ parent_container=dashboard_parent_container,
809
+ platform=self.source_config.platform_name,
810
+ platform_instance=self.source_config.platform_instance
811
+ if self.source_config.include_platform_instance_in_urns
812
+ else None,
813
+ )
814
+ ]
889
815
 
890
816
  def _make_dashboard_urn(self, looker_dashboard_name_part: str) -> str:
891
817
  # Note that `looker_dashboard_name_part` will like be `dashboard.1234`.
@@ -902,11 +828,9 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
902
828
  def make_dashboard_urn(self, looker_dashboard: LookerDashboard) -> str:
903
829
  return self._make_dashboard_urn(looker_dashboard.get_urn_dashboard_id())
904
830
 
905
- def _make_explore_metadata_events(
831
+ def _make_explore_containers(
906
832
  self,
907
- ) -> Iterable[
908
- Union[MetadataChangeEvent, MetadataChangeProposalWrapper, MetadataWorkUnit]
909
- ]:
833
+ ) -> Iterable[Union[Container, Dataset]]:
910
834
  if not self.source_config.emit_used_explores_only:
911
835
  explores_to_fetch = list(self.list_all_explores())
912
836
  else:
@@ -924,19 +848,14 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
924
848
  for project_name, model, _ in explores_to_fetch:
925
849
  if model not in processed_models:
926
850
  model_key = gen_model_key(self.source_config, model)
927
- yield from gen_containers(
851
+ yield Container(
928
852
  container_key=model_key,
929
- name=model,
930
- sub_types=[BIContainerSubTypes.LOOKML_MODEL],
853
+ display_name=model,
854
+ subtype=BIContainerSubTypes.LOOKML_MODEL,
931
855
  extra_properties=(
932
856
  {"project": project_name} if project_name is not None else None
933
857
  ),
934
- )
935
- yield MetadataChangeProposalWrapper(
936
- entityUrn=model_key.as_urn(),
937
- aspect=BrowsePathsV2Class(
938
- path=[BrowsePathEntryClass("Explore")],
939
- ),
858
+ parent_container=["Explore"],
940
859
  )
941
860
 
942
861
  processed_models.append(model)
@@ -947,9 +866,10 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
947
866
  ((model, explore) for (_project, model, explore) in explores_to_fetch),
948
867
  max_workers=self.source_config.max_threads,
949
868
  ):
950
- events, explore_id, start_time, end_time = future.result()
869
+ explore_dataset_entity, explore_id, start_time, end_time = future.result()
951
870
  self.reporter.explores_scanned += 1
952
- yield from events
871
+ if explore_dataset_entity:
872
+ yield explore_dataset_entity
953
873
  self.reporter.report_upstream_latency(start_time, end_time)
954
874
  logger.debug(
955
875
  f"Running time of fetch_one_explore for {explore_id}: {(end_time - start_time).total_seconds()}"
@@ -969,66 +889,50 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
969
889
  def fetch_one_explore(
970
890
  self, model: str, explore: str
971
891
  ) -> Tuple[
972
- List[Union[MetadataChangeEvent, MetadataChangeProposalWrapper]],
892
+ Optional[Dataset],
973
893
  str,
974
894
  datetime.datetime,
975
895
  datetime.datetime,
976
896
  ]:
977
897
  start_time = datetime.datetime.now()
978
- events: List[Union[MetadataChangeEvent, MetadataChangeProposalWrapper]] = []
979
898
  looker_explore = self.explore_registry.get_explore(model, explore)
899
+ explore_dataset_entity: Optional[Dataset] = None
980
900
  if looker_explore is not None:
981
- events = (
982
- looker_explore._to_metadata_events(
983
- self.source_config,
984
- self.reporter,
985
- self.source_config.external_base_url or self.source_config.base_url,
986
- self.source_config.extract_embed_urls,
987
- )
988
- or events
901
+ explore_dataset_entity = looker_explore._to_metadata_events(
902
+ self.source_config,
903
+ self.reporter,
904
+ self.source_config.external_base_url or self.source_config.base_url,
905
+ self.source_config.extract_embed_urls,
989
906
  )
990
907
 
991
- return events, f"{model}:{explore}", start_time, datetime.datetime.now()
992
-
993
- def _extract_event_urn(
994
- self, event: Union[MetadataChangeEvent, MetadataChangeProposalWrapper]
995
- ) -> Optional[str]:
996
- if isinstance(event, MetadataChangeEvent):
997
- return event.proposedSnapshot.urn
998
- else:
999
- return event.entityUrn
908
+ return (
909
+ explore_dataset_entity,
910
+ f"{model}:{explore}",
911
+ start_time,
912
+ datetime.datetime.now(),
913
+ )
1000
914
 
1001
- def _emit_folder_as_container(
1002
- self, folder: LookerFolder
1003
- ) -> Iterable[MetadataWorkUnit]:
915
+ def _emit_folder_as_container(self, folder: LookerFolder) -> Iterable[Container]:
1004
916
  if folder.id not in self.processed_folders:
1005
- yield from gen_containers(
1006
- container_key=self._gen_folder_key(folder.id),
1007
- name=folder.name,
1008
- sub_types=[BIContainerSubTypes.LOOKER_FOLDER],
1009
- parent_container_key=(
1010
- self._gen_folder_key(folder.parent_id) if folder.parent_id else None
1011
- ),
1012
- )
1013
917
  if folder.parent_id is None:
1014
- yield MetadataChangeProposalWrapper(
1015
- entityUrn=self._gen_folder_key(folder.id).as_urn(),
1016
- aspect=BrowsePathsV2Class(
1017
- path=[BrowsePathEntryClass("Folders")],
1018
- ),
1019
- ).as_workunit()
918
+ yield Container(
919
+ container_key=self._gen_folder_key(folder.id),
920
+ display_name=folder.name,
921
+ subtype=BIContainerSubTypes.LOOKER_FOLDER,
922
+ parent_container=["Folders"],
923
+ )
1020
924
  else:
1021
- yield MetadataChangeProposalWrapper(
1022
- entityUrn=self._gen_folder_key(folder.id).as_urn(),
1023
- aspect=BrowsePathsV2Class(
1024
- path=[
1025
- BrowsePathEntryClass("Folders"),
1026
- *self._get_folder_browse_path_v2_entries(
1027
- folder, include_current_folder=False
1028
- ),
1029
- ],
1030
- ),
1031
- ).as_workunit()
925
+ yield Container(
926
+ container_key=self._gen_folder_key(folder.id),
927
+ display_name=folder.name,
928
+ subtype=BIContainerSubTypes.LOOKER_FOLDER,
929
+ parent_container=[
930
+ "Folders",
931
+ *self._get_folder_ancestors_urn_entries(
932
+ folder, include_current_folder=False
933
+ ),
934
+ ],
935
+ )
1032
936
  self.processed_folders.append(folder.id)
1033
937
 
1034
938
  def _gen_folder_key(self, folder_id: str) -> LookerFolderKey:
@@ -1039,91 +943,54 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
1039
943
  instance=self.source_config.platform_instance,
1040
944
  )
1041
945
 
1042
- def _make_dashboard_and_chart_mces(
946
+ def _make_dashboard_and_chart_entities(
1043
947
  self, looker_dashboard: LookerDashboard
1044
- ) -> Iterable[Union[MetadataChangeEvent, MetadataChangeProposalWrapper]]:
948
+ ) -> Iterable[Union[Chart, Dashboard]]:
1045
949
  # Step 1: Emit metadata for each Chart inside the Dashboard.
1046
- chart_events = []
950
+ chart_events: List[Chart] = []
1047
951
  for element in looker_dashboard.dashboard_elements:
1048
952
  if element.type == "vis":
1049
953
  chart_events.extend(
1050
- self._make_chart_metadata_events(element, looker_dashboard)
954
+ self._make_chart_entities(element, looker_dashboard)
1051
955
  )
1052
956
 
1053
957
  yield from chart_events
1054
958
 
1055
- # Step 2: Emit metadata events for the Dashboard itself.
1056
- chart_urns: Set[str] = (
1057
- set()
1058
- ) # Collect the unique child chart urns for dashboard input lineage.
959
+ # # Step 2: Emit metadata events for the Dashboard itself.
960
+ # Create a set of unique chart entities for dashboard input lineage based in chart.urn
961
+ unique_chart_entities: List[Chart] = []
1059
962
  for chart_event in chart_events:
1060
- chart_event_urn = self._extract_event_urn(chart_event)
1061
- if chart_event_urn:
1062
- chart_urns.add(chart_event_urn)
1063
-
1064
- dashboard_events = self._make_dashboard_metadata_events(
1065
- looker_dashboard, list(chart_urns)
963
+ # Use chart.urn to ensure uniqueness based on the chart's URN property
964
+ # Also, update the set of processed chart urns
965
+ if str(chart_event.urn) not in self.chart_urns:
966
+ self.chart_urns.add(str(chart_event.urn))
967
+ unique_chart_entities.append(chart_event)
968
+
969
+ dashboard_events = self._make_dashboard_entities(
970
+ looker_dashboard, unique_chart_entities
1066
971
  )
1067
972
  yield from dashboard_events
1068
973
 
1069
974
  def get_ownership(
1070
975
  self, looker_dashboard_look: Union[LookerDashboard, LookerDashboardElement]
1071
- ) -> Optional[OwnershipClass]:
976
+ ) -> Optional[OwnerClass]:
1072
977
  if looker_dashboard_look.owner is not None:
1073
978
  owner_urn = looker_dashboard_look.owner.get_urn(
1074
979
  self.source_config.strip_user_ids_from_email
1075
980
  )
1076
981
  if owner_urn is not None:
1077
- ownership: OwnershipClass = OwnershipClass(
1078
- owners=[
1079
- OwnerClass(
1080
- owner=owner_urn,
1081
- type=OwnershipTypeClass.DATAOWNER,
1082
- )
1083
- ]
982
+ return OwnerClass(
983
+ owner=owner_urn,
984
+ type=OwnershipTypeClass.DATAOWNER,
1084
985
  )
1085
- return ownership
1086
986
  return None
1087
987
 
1088
- def _get_change_audit_stamps(
1089
- self, looker_dashboard: LookerDashboard
1090
- ) -> ChangeAuditStamps:
1091
- change_audit_stamp: ChangeAuditStamps = ChangeAuditStamps()
1092
- if looker_dashboard.created_at is not None:
1093
- change_audit_stamp.created.time = round(
1094
- looker_dashboard.created_at.timestamp() * 1000
1095
- )
1096
- if looker_dashboard.owner is not None:
1097
- owner_urn = looker_dashboard.owner.get_urn(
1098
- self.source_config.strip_user_ids_from_email
1099
- )
1100
- if owner_urn:
1101
- change_audit_stamp.created.actor = owner_urn
1102
- if looker_dashboard.last_updated_at is not None:
1103
- change_audit_stamp.lastModified.time = round(
1104
- looker_dashboard.last_updated_at.timestamp() * 1000
1105
- )
1106
- if looker_dashboard.last_updated_by is not None:
1107
- updated_by_urn = looker_dashboard.last_updated_by.get_urn(
1108
- self.source_config.strip_user_ids_from_email
1109
- )
1110
- if updated_by_urn:
1111
- change_audit_stamp.lastModified.actor = updated_by_urn
1112
- if (
1113
- looker_dashboard.is_deleted
1114
- and looker_dashboard.deleted_by is not None
1115
- and looker_dashboard.deleted_at is not None
1116
- ):
1117
- deleter_urn = looker_dashboard.deleted_by.get_urn(
1118
- self.source_config.strip_user_ids_from_email
1119
- )
1120
- if deleter_urn:
1121
- change_audit_stamp.deleted = AuditStamp(
1122
- actor=deleter_urn,
1123
- time=round(looker_dashboard.deleted_at.timestamp() * 1000),
1124
- )
1125
-
1126
- return change_audit_stamp
988
+ def _get_last_modified_time(
989
+ self, looker_dashboard: Optional[LookerDashboard]
990
+ ) -> Optional[datetime.datetime]:
991
+ if looker_dashboard is None:
992
+ return None
993
+ return looker_dashboard.last_updated_at
1127
994
 
1128
995
  def _get_looker_folder(self, folder: Union[Folder, FolderBase]) -> LookerFolder:
1129
996
  assert folder.id
@@ -1136,7 +1003,7 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
1136
1003
  ]
1137
1004
  return "/".join(ancestors + [folder.name])
1138
1005
 
1139
- def _get_looker_dashboard(self, dashboard: Dashboard) -> LookerDashboard:
1006
+ def _get_looker_dashboard(self, dashboard: LookerAPIDashboard) -> LookerDashboard:
1140
1007
  self.reporter.accessed_dashboards += 1
1141
1008
  if dashboard.folder is None:
1142
1009
  logger.debug(f"{dashboard.id} has no folder")
@@ -1210,22 +1077,6 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
1210
1077
 
1211
1078
  return user
1212
1079
 
1213
- def process_metrics_dimensions_and_fields_for_dashboard(
1214
- self, dashboard: LookerDashboard
1215
- ) -> List[MetadataWorkUnit]:
1216
- chart_mcps = [
1217
- self._make_metrics_dimensions_chart_mcp(element)
1218
- for element in dashboard.dashboard_elements
1219
- ]
1220
- dashboard_mcp = self._make_metrics_dimensions_dashboard_mcp(dashboard)
1221
-
1222
- mcps = chart_mcps
1223
- mcps.append(dashboard_mcp)
1224
-
1225
- workunits = [mcp.as_workunit() for mcp in mcps]
1226
-
1227
- return workunits
1228
-
1229
1080
  def _input_fields_from_dashboard_element(
1230
1081
  self, dashboard_element: LookerDashboardElement
1231
1082
  ) -> List[InputFieldClass]:
@@ -1318,104 +1169,141 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
1318
1169
  aspect=input_fields_aspect,
1319
1170
  )
1320
1171
 
1321
- def process_dashboard(
1172
+ def _should_skip_personal_folder_dashboard(
1173
+ self, dashboard_object: LookerAPIDashboard
1174
+ ) -> bool:
1175
+ """Check if dashboard should be skipped due to being in personal folder."""
1176
+ if not self.source_config.skip_personal_folders:
1177
+ return False
1178
+
1179
+ if dashboard_object.folder is not None and (
1180
+ dashboard_object.folder.is_personal
1181
+ or dashboard_object.folder.is_personal_descendant
1182
+ ):
1183
+ self.reporter.info(
1184
+ title="Dropped Dashboard",
1185
+ message="Dropped due to being a personal folder",
1186
+ context=f"Dashboard ID: {dashboard_object.id}",
1187
+ )
1188
+ assert dashboard_object.id is not None
1189
+ self.reporter.report_dashboards_dropped(dashboard_object.id)
1190
+ return True
1191
+ return False
1192
+
1193
+ def _should_skip_dashboard_by_folder_path(
1194
+ self, looker_dashboard: LookerDashboard
1195
+ ) -> bool:
1196
+ """Check if dashboard should be skipped based on folder path pattern."""
1197
+ if (
1198
+ looker_dashboard.folder_path is not None
1199
+ and not self.source_config.folder_path_pattern.allowed(
1200
+ looker_dashboard.folder_path
1201
+ )
1202
+ ):
1203
+ logger.debug(
1204
+ f"Folder path {looker_dashboard.folder_path} is denied in folder_path_pattern"
1205
+ )
1206
+ self.reporter.report_dashboards_dropped(looker_dashboard.id)
1207
+ return True
1208
+ return False
1209
+
1210
+ def _fetch_dashboard_from_api(
1322
1211
  self, dashboard_id: str, fields: List[str]
1323
- ) -> Tuple[
1324
- List[MetadataWorkUnit],
1325
- Optional[looker_usage.LookerDashboardForUsage],
1326
- str,
1327
- datetime.datetime,
1328
- datetime.datetime,
1329
- ]:
1330
- start_time = datetime.datetime.now()
1331
- assert dashboard_id is not None
1332
- if not self.source_config.dashboard_pattern.allowed(dashboard_id):
1333
- self.reporter.report_dashboards_dropped(dashboard_id)
1334
- return [], None, dashboard_id, start_time, datetime.datetime.now()
1212
+ ) -> Optional[LookerAPIDashboard]:
1213
+ """Fetch dashboard object from Looker API with error handling."""
1335
1214
  try:
1336
- dashboard_object: Dashboard = self.looker_api.dashboard(
1215
+ return self.looker_api.dashboard(
1337
1216
  dashboard_id=dashboard_id,
1338
1217
  fields=fields,
1339
1218
  )
1340
1219
  except (SDKError, DeserializeError) as e:
1341
- # A looker dashboard could be deleted in between the list and the get
1342
1220
  self.reporter.report_warning(
1343
1221
  title="Failed to fetch dashboard from the Looker API",
1344
1222
  message="Error occurred while attempting to loading dashboard from Looker API. Skipping.",
1345
1223
  context=f"Dashboard ID: {dashboard_id}",
1346
1224
  exc=e,
1347
1225
  )
1348
- return [], None, dashboard_id, start_time, datetime.datetime.now()
1226
+ return None
1349
1227
 
1350
- if self.source_config.skip_personal_folders:
1351
- if dashboard_object.folder is not None and (
1352
- dashboard_object.folder.is_personal
1353
- or dashboard_object.folder.is_personal_descendant
1354
- ):
1355
- self.reporter.info(
1356
- title="Dropped Dashboard",
1357
- message="Dropped due to being a personal folder",
1358
- context=f"Dashboard ID: {dashboard_id}",
1359
- )
1360
- self.reporter.report_dashboards_dropped(dashboard_id)
1361
- return [], None, dashboard_id, start_time, datetime.datetime.now()
1228
+ def _create_empty_result(
1229
+ self, dashboard_id: str, start_time: datetime.datetime
1230
+ ) -> DashboardProcessingResult:
1231
+ """Create an empty result for skipped or failed dashboard processing."""
1232
+ return DashboardProcessingResult(
1233
+ entities=[],
1234
+ dashboard_usage=None,
1235
+ dashboard_id=dashboard_id,
1236
+ start_time=start_time,
1237
+ end_time=datetime.datetime.now(),
1238
+ )
1362
1239
 
1363
- looker_dashboard = self._get_looker_dashboard(dashboard_object)
1240
+ def process_dashboard(
1241
+ self, dashboard_id: str, fields: List[str]
1242
+ ) -> DashboardProcessingResult:
1243
+ """
1244
+ Process a single dashboard and return the metadata workunits.
1364
1245
 
1365
- workunits = []
1366
- if (
1367
- looker_dashboard.folder_path is not None
1368
- and not self.source_config.folder_path_pattern.allowed(
1369
- looker_dashboard.folder_path
1370
- )
1371
- ):
1372
- logger.debug(
1373
- f"Folder path {looker_dashboard.folder_path} is denied in folder_path_pattern"
1374
- )
1375
- return [], None, dashboard_id, start_time, datetime.datetime.now()
1246
+ Args:
1247
+ dashboard_id: The ID of the dashboard to process
1248
+ fields: List of fields to fetch from the Looker API
1376
1249
 
1377
- if looker_dashboard.folder:
1378
- workunits += list(
1379
- self._get_folder_and_ancestors_workunits(looker_dashboard.folder)
1380
- )
1250
+ Returns:
1251
+ DashboardProcessingResult containing entities, usage data, and timing information
1252
+ """
1253
+ start_time = datetime.datetime.now()
1381
1254
 
1382
- mces = self._make_dashboard_and_chart_mces(looker_dashboard)
1383
- workunits += [
1384
- (
1385
- MetadataWorkUnit(id=f"looker-{mce.proposedSnapshot.urn}", mce=mce)
1386
- if isinstance(mce, MetadataChangeEvent)
1387
- else MetadataWorkUnit(
1388
- id=f"looker-{mce.aspectName}-{mce.entityUrn}", mcp=mce
1389
- )
1390
- )
1391
- for mce in mces
1392
- ]
1255
+ if dashboard_id is None:
1256
+ raise ValueError("Dashboard ID cannot be None")
1393
1257
 
1394
- # add on metrics, dimensions, fields events
1395
- metric_dim_workunits = self.process_metrics_dimensions_and_fields_for_dashboard(
1396
- looker_dashboard
1258
+ # Fetch dashboard from API
1259
+ dashboard_object: Optional[LookerAPIDashboard] = self._fetch_dashboard_from_api(
1260
+ dashboard_id, fields
1397
1261
  )
1262
+ if dashboard_object is None:
1263
+ return self._create_empty_result(dashboard_id, start_time)
1398
1264
 
1399
- workunits.extend(metric_dim_workunits)
1265
+ # Check if dashboard should be skipped due to personal folder
1266
+ if self._should_skip_personal_folder_dashboard(dashboard_object):
1267
+ return self._create_empty_result(dashboard_id, start_time)
1400
1268
 
1269
+ # Convert to internal representation
1270
+ looker_dashboard: LookerDashboard = self._get_looker_dashboard(dashboard_object)
1271
+
1272
+ # Check folder path pattern
1273
+ if self._should_skip_dashboard_by_folder_path(looker_dashboard):
1274
+ return self._create_empty_result(dashboard_id, start_time)
1275
+
1276
+ # Build entities list
1277
+ entities: List[Entity] = []
1278
+
1279
+ # Add folder containers if dashboard has a folder
1280
+ if looker_dashboard.folder:
1281
+ entities.extend(
1282
+ list(self._get_folder_and_ancestors_containers(looker_dashboard.folder))
1283
+ )
1284
+
1285
+ # Add dashboard and chart entities
1286
+ entities.extend(list(self._make_dashboard_and_chart_entities(looker_dashboard)))
1287
+
1288
+ # Report successful processing
1401
1289
  self.reporter.report_dashboards_scanned()
1402
1290
 
1403
- # generate usage tracking object
1291
+ # Generate usage tracking object
1404
1292
  dashboard_usage = looker_usage.LookerDashboardForUsage.from_dashboard(
1405
1293
  dashboard_object
1406
1294
  )
1407
1295
 
1408
- return (
1409
- workunits,
1410
- dashboard_usage,
1411
- dashboard_id,
1412
- start_time,
1413
- datetime.datetime.now(),
1296
+ return DashboardProcessingResult(
1297
+ entities=entities,
1298
+ dashboard_usage=dashboard_usage,
1299
+ dashboard_id=dashboard_id,
1300
+ start_time=start_time,
1301
+ end_time=datetime.datetime.now(),
1414
1302
  )
1415
1303
 
1416
- def _get_folder_and_ancestors_workunits(
1304
+ def _get_folder_and_ancestors_containers(
1417
1305
  self, folder: LookerFolder
1418
- ) -> Iterable[MetadataWorkUnit]:
1306
+ ) -> Iterable[Container]:
1419
1307
  for ancestor_folder in self.looker_api.folder_ancestors(folder.id):
1420
1308
  yield from self._emit_folder_as_container(
1421
1309
  self._get_looker_folder(ancestor_folder)
@@ -1486,39 +1374,27 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
1486
1374
  ).workunit_processor,
1487
1375
  ]
1488
1376
 
1489
- def emit_independent_looks_mcp(
1377
+ def emit_independent_looks_entities(
1490
1378
  self, dashboard_element: LookerDashboardElement
1491
- ) -> Iterable[MetadataWorkUnit]:
1379
+ ) -> Iterable[Union[Container, Chart]]:
1492
1380
  if dashboard_element.folder: # independent look
1493
- yield from self._get_folder_and_ancestors_workunits(
1381
+ yield from self._get_folder_and_ancestors_containers(
1494
1382
  dashboard_element.folder
1495
1383
  )
1496
1384
 
1497
- yield from auto_workunit(
1498
- stream=self._make_chart_metadata_events(
1499
- dashboard_element=dashboard_element,
1500
- dashboard=None,
1501
- )
1502
- )
1503
-
1504
- yield from auto_workunit(
1505
- [
1506
- self._make_metrics_dimensions_chart_mcp(
1507
- dashboard_element,
1508
- )
1509
- ]
1385
+ yield from self._make_chart_entities(
1386
+ dashboard_element=dashboard_element,
1387
+ dashboard=None,
1510
1388
  )
1511
1389
 
1512
- def extract_independent_looks(self) -> Iterable[MetadataWorkUnit]:
1390
+ def extract_independent_looks(self) -> Iterable[Union[Container, Chart]]:
1513
1391
  """
1514
- Emit MetadataWorkUnit for looks which are not part of any Dashboard
1515
- """
1516
- if self.source_config.extract_independent_looks is False:
1517
- return
1392
+ Emit entities for Looks which are not part of any Dashboard.
1518
1393
 
1519
- self.reporter.report_stage_start("extract_independent_looks")
1394
+ Returns: Containers for the folders and ancestors folders and Charts for the looks
1395
+ """
1396
+ logger.debug("Extracting Looks not part of any Dashboard")
1520
1397
 
1521
- logger.debug("Extracting looks not part of Dashboard")
1522
1398
  look_fields: List[str] = [
1523
1399
  "id",
1524
1400
  "title",
@@ -1540,15 +1416,21 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
1540
1416
  all_looks: List[Look] = self.looker_api.all_looks(
1541
1417
  fields=look_fields, soft_deleted=self.source_config.include_deleted
1542
1418
  )
1419
+
1543
1420
  for look in all_looks:
1421
+ # Skip looks that are already referenced from a dashboard
1422
+ if look.id is None:
1423
+ logger.warning("Encountered Look with no ID, skipping.")
1424
+ continue
1425
+
1544
1426
  if look.id in self.reachable_look_registry:
1545
- # This look is reachable from the Dashboard
1546
1427
  continue
1547
1428
 
1548
1429
  if look.query_id is None:
1549
1430
  logger.info(f"query_id is None for look {look.title}({look.id})")
1550
1431
  continue
1551
1432
 
1433
+ # Skip looks in personal folders if configured
1552
1434
  if self.source_config.skip_personal_folders:
1553
1435
  if look.folder is not None and (
1554
1436
  look.folder.is_personal or look.folder.is_personal_descendant
@@ -1559,76 +1441,96 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
1559
1441
  context=f"Look ID: {look.id}",
1560
1442
  )
1561
1443
 
1562
- assert look.id, "Looker id is null"
1563
1444
  self.reporter.report_charts_dropped(look.id)
1564
1445
  continue
1565
1446
 
1566
- if look.id is not None:
1567
- query: Optional[Query] = self.looker_api.get_look(
1568
- look.id, fields=["query"]
1569
- ).query
1570
- # Only include fields that are in the query_fields list
1571
- query = Query(
1572
- **{
1573
- key: getattr(query, key)
1574
- for key in query_fields
1575
- if hasattr(query, key)
1576
- }
1577
- )
1447
+ # Fetch the Look's query and filter to allowed fields
1448
+ query: Optional[Query] = None
1449
+ try:
1450
+ look_with_query = self.looker_api.get_look(look.id, fields=["query"])
1451
+ query_obj = look_with_query.query
1452
+ if query_obj:
1453
+ query = Query(
1454
+ **{
1455
+ key: getattr(query_obj, key)
1456
+ for key in query_fields
1457
+ if hasattr(query_obj, key)
1458
+ }
1459
+ )
1460
+ except Exception as exc:
1461
+ logger.warning(f"Failed to fetch query for Look {look.id}: {exc}")
1462
+ continue
1578
1463
 
1579
- dashboard_element: Optional[LookerDashboardElement] = (
1580
- self._get_looker_dashboard_element(
1581
- DashboardElement(
1582
- id=f"looks_{look.id}", # to avoid conflict with non-standalone looks (element.id prefixes),
1583
- # we add the "looks_" prefix to look.id.
1584
- title=look.title,
1585
- subtitle_text=look.description,
1586
- look_id=look.id,
1587
- dashboard_id=None, # As this is an independent look
1588
- look=LookWithQuery(
1589
- query=query, folder=look.folder, user_id=look.user_id
1590
- ),
1464
+ dashboard_element = self._get_looker_dashboard_element(
1465
+ DashboardElement(
1466
+ id=f"looks_{look.id}", # to avoid conflict with non-standalone looks (element.id prefixes),
1467
+ # we add the "looks_" prefix to look.id.
1468
+ title=look.title,
1469
+ subtitle_text=look.description,
1470
+ look_id=look.id,
1471
+ dashboard_id=None, # As this is an independent look
1472
+ look=LookWithQuery(
1473
+ query=query,
1474
+ folder=getattr(look, "folder", None),
1475
+ user_id=getattr(look, "user_id", None),
1591
1476
  ),
1592
1477
  )
1593
1478
  )
1594
1479
 
1595
1480
  if dashboard_element is not None:
1596
- logger.debug(f"Emitting MCPS for look {look.title}({look.id})")
1597
- yield from self.emit_independent_looks_mcp(
1481
+ logger.debug(f"Emitting MCPs for look {look.title}({look.id})")
1482
+ yield from self.emit_independent_looks_entities(
1598
1483
  dashboard_element=dashboard_element
1599
1484
  )
1600
1485
 
1601
- self.reporter.report_stage_end("extract_independent_looks")
1486
+ def get_workunits_internal(self) -> Iterable[Union[MetadataWorkUnit, Entity]]:
1487
+ """
1488
+ Note: Returns Entities from SDKv2 where possible else MCPs only.
1602
1489
 
1603
- def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
1604
- self.reporter.report_stage_start("list_dashboards")
1605
- dashboards = self.looker_api.all_dashboards(fields="id")
1606
- deleted_dashboards = (
1607
- self.looker_api.search_dashboards(fields="id", deleted="true")
1608
- if self.source_config.include_deleted
1609
- else []
1610
- )
1611
- if deleted_dashboards != []:
1612
- logger.debug(f"Deleted Dashboards = {deleted_dashboards}")
1490
+ Using SDKv2: Containers, Datasets, Dashboards and Charts
1491
+ Using MCPW: Tags, DashboardUsageStats and UserResourceMapping
1613
1492
 
1614
- dashboard_ids = [dashboard_base.id for dashboard_base in dashboards]
1615
- dashboard_ids.extend(
1616
- [deleted_dashboard.id for deleted_dashboard in deleted_dashboards]
1617
- )
1618
- selected_dashboard_ids: List[Optional[str]] = []
1619
- for id in dashboard_ids:
1620
- if id is None:
1621
- continue
1622
- if not self.source_config.dashboard_pattern.allowed(id):
1623
- self.reporter.report_dashboards_dropped(id)
1493
+ TODO: Convert MCPWs to use SDKv2 entities
1494
+ """
1495
+ with self.reporter.report_stage("list_dashboards"):
1496
+ # Fetch all dashboards (not deleted)
1497
+ dashboards = self.looker_api.all_dashboards(fields="id")
1498
+
1499
+ # Optionally fetch deleted dashboards if configured
1500
+ if self.source_config.include_deleted:
1501
+ deleted_dashboards = self.looker_api.search_dashboards(
1502
+ fields="id", deleted="true"
1503
+ )
1624
1504
  else:
1625
- selected_dashboard_ids.append(id)
1626
- dashboard_ids = selected_dashboard_ids
1627
- self.reporter.report_stage_end("list_dashboards")
1628
- self.reporter.report_total_dashboards(len(dashboard_ids))
1505
+ deleted_dashboards = []
1506
+
1507
+ if deleted_dashboards:
1508
+ logger.debug(f"Deleted Dashboards = {deleted_dashboards}")
1509
+
1510
+ # Collect all dashboard IDs (including deleted if applicable)
1511
+ all_dashboard_ids: List[Optional[str]] = [
1512
+ dashboard.id for dashboard in dashboards
1513
+ ]
1514
+ all_dashboard_ids.extend([dashboard.id for dashboard in deleted_dashboards])
1515
+
1516
+ # Filter dashboard IDs based on the allowed pattern
1517
+ filtered_dashboard_ids: List[str] = []
1518
+ for dashboard_id in all_dashboard_ids:
1519
+ if dashboard_id is None:
1520
+ continue
1521
+ if not self.source_config.dashboard_pattern.allowed(dashboard_id):
1522
+ self.reporter.report_dashboards_dropped(dashboard_id)
1523
+ else:
1524
+ filtered_dashboard_ids.append(dashboard_id)
1525
+
1526
+ # Use the filtered list for further processing
1527
+ dashboard_ids: List[str] = filtered_dashboard_ids
1528
+
1529
+ # Report the total number of dashboards to be processed
1530
+ self.reporter.report_total_dashboards(len(dashboard_ids))
1629
1531
 
1630
- # List dashboard fields to extract for processing
1631
- fields = [
1532
+ # Define the fields to extract for each dashboard
1533
+ dashboard_fields = [
1632
1534
  "id",
1633
1535
  "title",
1634
1536
  "dashboard_elements",
@@ -1644,41 +1546,47 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
1644
1546
  "deleted_at",
1645
1547
  "deleter_id",
1646
1548
  ]
1549
+
1550
+ # Add usage-related fields if usage history extraction is enabled
1647
1551
  if self.source_config.extract_usage_history:
1648
- fields += [
1649
- "favorite_count",
1650
- "view_count",
1651
- "last_viewed_at",
1652
- ]
1552
+ dashboard_fields.extend(
1553
+ [
1554
+ "favorite_count",
1555
+ "view_count",
1556
+ "last_viewed_at",
1557
+ ]
1558
+ )
1653
1559
 
1560
+ # Store dashboards for which usage stats will be extracted
1654
1561
  looker_dashboards_for_usage: List[looker_usage.LookerDashboardForUsage] = []
1655
1562
 
1563
+ # Process dashboard and chart metadata
1656
1564
  with self.reporter.report_stage("dashboard_chart_metadata"):
1565
+ dashboard_jobs = (
1566
+ (dashboard_id, dashboard_fields)
1567
+ for dashboard_id in dashboard_ids
1568
+ if dashboard_id is not None
1569
+ )
1657
1570
  for job in BackpressureAwareExecutor.map(
1658
1571
  self.process_dashboard,
1659
- (
1660
- (dashboard_id, fields)
1661
- for dashboard_id in dashboard_ids
1662
- if dashboard_id is not None
1663
- ),
1572
+ dashboard_jobs,
1664
1573
  max_workers=self.source_config.max_threads,
1665
1574
  ):
1666
- (
1667
- work_units,
1668
- dashboard_usage,
1669
- dashboard_id,
1670
- start_time,
1671
- end_time,
1672
- ) = job.result()
1575
+ result: DashboardProcessingResult = job.result()
1576
+
1673
1577
  logger.debug(
1674
- f"Running time of process_dashboard for {dashboard_id} = {(end_time - start_time).total_seconds()}"
1578
+ f"Running time of process_dashboard for {result.dashboard_id} = {(result.end_time - result.start_time).total_seconds()}"
1579
+ )
1580
+ self.reporter.report_upstream_latency(
1581
+ result.start_time, result.end_time
1675
1582
  )
1676
- self.reporter.report_upstream_latency(start_time, end_time)
1677
1583
 
1678
- yield from work_units
1679
- if dashboard_usage is not None:
1680
- looker_dashboards_for_usage.append(dashboard_usage)
1584
+ yield from result.entities
1681
1585
 
1586
+ if result.dashboard_usage is not None:
1587
+ looker_dashboards_for_usage.append(result.dashboard_usage)
1588
+
1589
+ # Warn if owner extraction was enabled but no emails could be found
1682
1590
  if (
1683
1591
  self.source_config.extract_owners
1684
1592
  and self.reporter.resolved_user_ids > 0
@@ -1690,53 +1598,42 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
1690
1598
  "Failed to extract owners emails for any dashboards. Please enable the see_users permission for your Looker API key",
1691
1599
  )
1692
1600
 
1693
- # Extract independent look here, so that explore of this look would get consider in _make_explore_metadata_events
1694
- yield from self.extract_independent_looks()
1695
-
1696
- self.reporter.report_stage_start("explore_metadata")
1601
+ # Extract independent looks first, so their explores are considered in _make_explore_containers.
1602
+ if self.source_config.extract_independent_looks:
1603
+ with self.reporter.report_stage("extract_independent_looks"):
1604
+ yield from self.extract_independent_looks()
1697
1605
 
1698
- for event in self._make_explore_metadata_events():
1699
- if isinstance(event, MetadataChangeEvent):
1700
- yield MetadataWorkUnit(
1701
- id=f"looker-{event.proposedSnapshot.urn}", mce=event
1702
- )
1703
- elif isinstance(event, MetadataChangeProposalWrapper):
1704
- yield event.as_workunit()
1705
- elif isinstance(event, MetadataWorkUnit):
1706
- yield event
1707
- else:
1708
- raise Exception(f"Unexpected type of event {event}")
1709
- self.reporter.report_stage_end("explore_metadata")
1606
+ # Process explore containers and yield them.
1607
+ with self.reporter.report_stage("explore_metadata"):
1608
+ yield from self._make_explore_containers()
1710
1609
 
1711
1610
  if (
1712
1611
  self.source_config.tag_measures_and_dimensions
1713
1612
  and self.reporter.explores_scanned > 0
1714
1613
  ):
1715
- # Emit tag MCEs for measures and dimensions if we produced any explores:
1614
+ # Emit tag MCPs for measures and dimensions if we produced any explores:
1615
+ # Tags MCEs are converted to MCPs
1716
1616
  for tag_mce in LookerUtil.get_tag_mces():
1717
- yield MetadataWorkUnit(
1718
- id=f"tag-{tag_mce.proposedSnapshot.urn}",
1719
- mce=tag_mce,
1720
- )
1617
+ yield from auto_workunit(mcps_from_mce(tag_mce))
1721
1618
 
1722
1619
  # Extract usage history is enabled
1723
1620
  if self.source_config.extract_usage_history:
1724
- self.reporter.report_stage_start("usage_extraction")
1725
- usage_mcps: List[MetadataChangeProposalWrapper] = self.extract_usage_stat(
1726
- looker_dashboards_for_usage, self.chart_urns
1727
- )
1728
- for usage_mcp in usage_mcps:
1729
- yield usage_mcp.as_workunit()
1730
- self.reporter.report_stage_end("usage_extraction")
1621
+ with self.reporter.report_stage("usage_extraction"):
1622
+ usage_mcps: List[MetadataChangeProposalWrapper] = (
1623
+ self.extract_usage_stat(
1624
+ looker_dashboards_for_usage, self.chart_urns
1625
+ )
1626
+ )
1627
+ yield from auto_workunit(usage_mcps)
1731
1628
 
1732
- # Dump looker user resource mappings.
1629
+ # Ingest looker user resource mapping workunits.
1733
1630
  logger.info("Ingesting looker user resource mapping workunits")
1734
- self.reporter.report_stage_start("user_resource_extraction")
1735
- yield from auto_workunit(
1736
- self.user_registry.to_platform_resource(
1737
- self.source_config.platform_instance
1631
+ with self.reporter.report_stage("user_resource_extraction"):
1632
+ yield from auto_workunit(
1633
+ self.user_registry.to_platform_resource(
1634
+ self.source_config.platform_instance
1635
+ )
1738
1636
  )
1739
- )
1740
1637
 
1741
1638
  def get_report(self) -> SourceReport:
1742
1639
  return self.reporter