acryl-datahub 1.2.0.9rc1__py3-none-any.whl → 1.2.0.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (120) hide show
  1. {acryl_datahub-1.2.0.9rc1.dist-info → acryl_datahub-1.2.0.10.dist-info}/METADATA +2568 -2626
  2. {acryl_datahub-1.2.0.9rc1.dist-info → acryl_datahub-1.2.0.10.dist-info}/RECORD +120 -113
  3. {acryl_datahub-1.2.0.9rc1.dist-info → acryl_datahub-1.2.0.10.dist-info}/entry_points.txt +2 -0
  4. datahub/_version.py +1 -1
  5. datahub/api/entities/assertion/assertion.py +1 -1
  6. datahub/api/entities/corpgroup/corpgroup.py +1 -1
  7. datahub/api/entities/dataproduct/dataproduct.py +6 -3
  8. datahub/api/entities/dataset/dataset.py +9 -18
  9. datahub/api/entities/structuredproperties/structuredproperties.py +2 -2
  10. datahub/api/graphql/operation.py +10 -6
  11. datahub/cli/docker_check.py +2 -2
  12. datahub/configuration/common.py +29 -1
  13. datahub/configuration/connection_resolver.py +5 -2
  14. datahub/configuration/import_resolver.py +7 -4
  15. datahub/configuration/pydantic_migration_helpers.py +0 -9
  16. datahub/configuration/source_common.py +3 -2
  17. datahub/configuration/validate_field_deprecation.py +5 -2
  18. datahub/configuration/validate_field_removal.py +5 -2
  19. datahub/configuration/validate_field_rename.py +6 -5
  20. datahub/configuration/validate_multiline_string.py +5 -2
  21. datahub/ingestion/autogenerated/capability_summary.json +45 -1
  22. datahub/ingestion/run/pipeline_config.py +2 -2
  23. datahub/ingestion/source/azure/azure_common.py +1 -1
  24. datahub/ingestion/source/bigquery_v2/bigquery_config.py +28 -14
  25. datahub/ingestion/source/bigquery_v2/bigquery_schema.py +23 -16
  26. datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +11 -0
  27. datahub/ingestion/source/bigquery_v2/queries_extractor.py +4 -5
  28. datahub/ingestion/source/common/gcp_credentials_config.py +3 -1
  29. datahub/ingestion/source/data_lake_common/path_spec.py +16 -16
  30. datahub/ingestion/source/datahub/config.py +8 -9
  31. datahub/ingestion/source/dbt/dbt_common.py +65 -5
  32. datahub/ingestion/source/delta_lake/config.py +1 -1
  33. datahub/ingestion/source/dremio/dremio_config.py +3 -4
  34. datahub/ingestion/source/feast.py +8 -10
  35. datahub/ingestion/source/fivetran/config.py +1 -1
  36. datahub/ingestion/source/gcs/gcs_source.py +19 -2
  37. datahub/ingestion/source/ge_data_profiler.py +15 -2
  38. datahub/ingestion/source/ge_profiling_config.py +26 -22
  39. datahub/ingestion/source/grafana/grafana_config.py +2 -2
  40. datahub/ingestion/source/grafana/models.py +12 -14
  41. datahub/ingestion/source/hex/hex.py +6 -1
  42. datahub/ingestion/source/iceberg/iceberg_profiler.py +4 -2
  43. datahub/ingestion/source/kafka_connect/common.py +2 -2
  44. datahub/ingestion/source/looker/looker_common.py +76 -75
  45. datahub/ingestion/source/looker/looker_config.py +15 -4
  46. datahub/ingestion/source/looker/looker_source.py +493 -547
  47. datahub/ingestion/source/looker/lookml_config.py +1 -1
  48. datahub/ingestion/source/looker/lookml_source.py +46 -88
  49. datahub/ingestion/source/metabase.py +9 -2
  50. datahub/ingestion/source/metadata/business_glossary.py +7 -7
  51. datahub/ingestion/source/metadata/lineage.py +1 -1
  52. datahub/ingestion/source/mode.py +13 -5
  53. datahub/ingestion/source/nifi.py +1 -1
  54. datahub/ingestion/source/powerbi/config.py +14 -21
  55. datahub/ingestion/source/preset.py +1 -1
  56. datahub/ingestion/source/qlik_sense/data_classes.py +28 -8
  57. datahub/ingestion/source/redash.py +1 -1
  58. datahub/ingestion/source/redshift/config.py +6 -3
  59. datahub/ingestion/source/redshift/query.py +23 -19
  60. datahub/ingestion/source/s3/source.py +26 -24
  61. datahub/ingestion/source/salesforce.py +13 -9
  62. datahub/ingestion/source/schema/json_schema.py +14 -14
  63. datahub/ingestion/source/sigma/data_classes.py +3 -0
  64. datahub/ingestion/source/snaplogic/__init__.py +0 -0
  65. datahub/ingestion/source/snaplogic/snaplogic.py +355 -0
  66. datahub/ingestion/source/snaplogic/snaplogic_config.py +37 -0
  67. datahub/ingestion/source/snaplogic/snaplogic_lineage_extractor.py +107 -0
  68. datahub/ingestion/source/snaplogic/snaplogic_parser.py +168 -0
  69. datahub/ingestion/source/snaplogic/snaplogic_utils.py +31 -0
  70. datahub/ingestion/source/snowflake/snowflake_config.py +12 -15
  71. datahub/ingestion/source/snowflake/snowflake_connection.py +8 -3
  72. datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +15 -2
  73. datahub/ingestion/source/snowflake/snowflake_queries.py +4 -5
  74. datahub/ingestion/source/sql/athena.py +2 -1
  75. datahub/ingestion/source/sql/clickhouse.py +12 -7
  76. datahub/ingestion/source/sql/cockroachdb.py +5 -3
  77. datahub/ingestion/source/sql/druid.py +2 -2
  78. datahub/ingestion/source/sql/hive.py +4 -3
  79. datahub/ingestion/source/sql/hive_metastore.py +7 -9
  80. datahub/ingestion/source/sql/mssql/source.py +2 -2
  81. datahub/ingestion/source/sql/mysql.py +2 -2
  82. datahub/ingestion/source/sql/oracle.py +3 -3
  83. datahub/ingestion/source/sql/presto.py +2 -1
  84. datahub/ingestion/source/sql/teradata.py +4 -4
  85. datahub/ingestion/source/sql/trino.py +2 -1
  86. datahub/ingestion/source/sql/two_tier_sql_source.py +2 -3
  87. datahub/ingestion/source/sql/vertica.py +1 -1
  88. datahub/ingestion/source/sql_queries.py +6 -6
  89. datahub/ingestion/source/state/checkpoint.py +5 -1
  90. datahub/ingestion/source/state/entity_removal_state.py +5 -2
  91. datahub/ingestion/source/state/stateful_ingestion_base.py +5 -8
  92. datahub/ingestion/source/superset.py +122 -15
  93. datahub/ingestion/source/tableau/tableau.py +68 -14
  94. datahub/ingestion/source/tableau/tableau_common.py +5 -0
  95. datahub/ingestion/source/tableau/tableau_constant.py +1 -0
  96. datahub/ingestion/source/tableau/tableau_server_wrapper.py +3 -0
  97. datahub/ingestion/source/unity/config.py +7 -3
  98. datahub/ingestion/source/usage/usage_common.py +3 -3
  99. datahub/ingestion/source_config/pulsar.py +3 -1
  100. datahub/ingestion/transformer/set_browse_path.py +112 -0
  101. datahub/metadata/_internal_schema_classes.py +728 -528
  102. datahub/metadata/_urns/urn_defs.py +1702 -1702
  103. datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
  104. datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +4 -0
  105. datahub/metadata/schema.avsc +17434 -17732
  106. datahub/metadata/schemas/GlobalSettingsInfo.avsc +72 -0
  107. datahub/metadata/schemas/InstitutionalMemory.avsc +22 -0
  108. datahub/metadata/schemas/LogicalParent.avsc +2 -1
  109. datahub/metadata/schemas/MLModelGroupKey.avsc +2 -1
  110. datahub/metadata/schemas/MetadataChangeEvent.avsc +22 -0
  111. datahub/sdk/_shared.py +126 -0
  112. datahub/sdk/chart.py +87 -30
  113. datahub/sdk/dashboard.py +79 -34
  114. datahub/sdk/entity_client.py +11 -4
  115. datahub/sdk/lineage_client.py +3 -3
  116. datahub/sdk/search_filters.py +1 -7
  117. datahub/sql_parsing/split_statements.py +13 -0
  118. {acryl_datahub-1.2.0.9rc1.dist-info → acryl_datahub-1.2.0.10.dist-info}/WHEEL +0 -0
  119. {acryl_datahub-1.2.0.9rc1.dist-info → acryl_datahub-1.2.0.10.dist-info}/licenses/LICENSE +0 -0
  120. {acryl_datahub-1.2.0.9rc1.dist-info → acryl_datahub-1.2.0.10.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,7 @@
1
1
  import datetime
2
2
  import json
3
3
  import logging
4
+ from dataclasses import dataclass
4
5
  from json import JSONDecodeError
5
6
  from typing import (
6
7
  Any,
@@ -18,7 +19,7 @@ from typing import (
18
19
  from looker_sdk.error import SDKError
19
20
  from looker_sdk.rtl.serialize import DeserializeError
20
21
  from looker_sdk.sdk.api40.models import (
21
- Dashboard,
22
+ Dashboard as LookerAPIDashboard,
22
23
  DashboardElement,
23
24
  Folder,
24
25
  FolderBase,
@@ -29,7 +30,7 @@ from looker_sdk.sdk.api40.models import (
29
30
 
30
31
  import datahub.emitter.mce_builder as builder
31
32
  from datahub.emitter.mcp import MetadataChangeProposalWrapper
32
- from datahub.emitter.mcp_builder import create_embed_mcp, gen_containers
33
+ from datahub.emitter.mcp_builder import mcps_from_mce
33
34
  from datahub.ingestion.api.common import PipelineContext
34
35
  from datahub.ingestion.api.decorators import (
35
36
  SupportStatus,
@@ -80,36 +81,38 @@ from datahub.ingestion.source.state.stateful_ingestion_base import (
80
81
  StatefulIngestionSourceBase,
81
82
  )
82
83
  from datahub.metadata.com.linkedin.pegasus2avro.common import (
83
- AuditStamp,
84
- ChangeAuditStamps,
85
- DataPlatformInstance,
86
84
  Status,
87
85
  )
88
- from datahub.metadata.com.linkedin.pegasus2avro.metadata.snapshot import (
89
- ChartSnapshot,
90
- DashboardSnapshot,
91
- )
92
- from datahub.metadata.com.linkedin.pegasus2avro.mxe import MetadataChangeEvent
93
86
  from datahub.metadata.schema_classes import (
94
- BrowsePathEntryClass,
95
- BrowsePathsClass,
96
- BrowsePathsV2Class,
97
- ChartInfoClass,
98
87
  ChartTypeClass,
99
- ContainerClass,
100
- DashboardInfoClass,
88
+ EmbedClass,
101
89
  InputFieldClass,
102
90
  InputFieldsClass,
103
91
  OwnerClass,
104
- OwnershipClass,
105
92
  OwnershipTypeClass,
106
- SubTypesClass,
107
93
  )
94
+ from datahub.sdk.chart import Chart
95
+ from datahub.sdk.container import Container
96
+ from datahub.sdk.dashboard import Dashboard
97
+ from datahub.sdk.dataset import Dataset
98
+ from datahub.sdk.entity import Entity
108
99
  from datahub.utilities.backpressure_aware_executor import BackpressureAwareExecutor
100
+ from datahub.utilities.sentinels import Unset, unset
109
101
 
110
102
  logger = logging.getLogger(__name__)
111
103
 
112
104
 
105
+ @dataclass
106
+ class DashboardProcessingResult:
107
+ """Result of processing a single dashboard."""
108
+
109
+ entities: List[Entity]
110
+ dashboard_usage: Optional[looker_usage.LookerDashboardForUsage]
111
+ dashboard_id: str
112
+ start_time: datetime.datetime
113
+ end_time: datetime.datetime
114
+
115
+
113
116
  @platform_name("Looker")
114
117
  @support_status(SupportStatus.CERTIFIED)
115
118
  @config_class(LookerDashboardSourceConfig)
@@ -633,35 +636,17 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
633
636
 
634
637
  return chart_type
635
638
 
636
- def _get_folder_browse_path_v2_entries(
639
+ def _get_folder_ancestors_urn_entries(
637
640
  self, folder: LookerFolder, include_current_folder: bool = True
638
- ) -> Iterable[BrowsePathEntryClass]:
641
+ ) -> Iterable[str]:
639
642
  for ancestor in self.looker_api.folder_ancestors(folder_id=folder.id):
640
- assert ancestor.id
643
+ assert ancestor.id # to make the linter happy as `Folder` has id field marked optional - which is always returned by the API
641
644
  urn = self._gen_folder_key(ancestor.id).as_urn()
642
- yield BrowsePathEntryClass(id=urn, urn=urn)
645
+ yield urn
643
646
 
644
647
  urn = self._gen_folder_key(folder.id).as_urn()
645
648
  if include_current_folder:
646
- yield BrowsePathEntryClass(id=urn, urn=urn)
647
-
648
- def _create_platform_instance_aspect(
649
- self,
650
- ) -> DataPlatformInstance:
651
- assert self.source_config.platform_name, (
652
- "Platform name is not set in the configuration."
653
- )
654
- assert self.source_config.platform_instance, (
655
- "Platform instance is not set in the configuration."
656
- )
657
-
658
- return DataPlatformInstance(
659
- platform=builder.make_data_platform_urn(self.source_config.platform_name),
660
- instance=builder.make_dataplatform_instance_urn(
661
- platform=self.source_config.platform_name,
662
- instance=self.source_config.platform_instance,
663
- ),
664
- )
649
+ yield urn
665
650
 
666
651
  def _make_chart_urn(self, element_id: str) -> str:
667
652
  platform_instance: Optional[str] = None
@@ -674,104 +659,46 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
674
659
  platform_instance=platform_instance,
675
660
  )
676
661
 
677
- def _make_chart_metadata_events(
662
+ def _make_chart_entities(
678
663
  self,
679
664
  dashboard_element: LookerDashboardElement,
680
665
  dashboard: Optional[
681
666
  LookerDashboard
682
667
  ], # dashboard will be None if this is a standalone look
683
- ) -> List[Union[MetadataChangeEvent, MetadataChangeProposalWrapper]]:
684
- chart_urn = self._make_chart_urn(
685
- element_id=dashboard_element.get_urn_element_id()
686
- )
687
- self.chart_urns.add(chart_urn)
688
- chart_snapshot = ChartSnapshot(
689
- urn=chart_urn,
690
- aspects=[Status(removed=False)],
691
- )
692
- browse_path_v2: Optional[BrowsePathsV2Class] = None
693
-
694
- chart_type = self._get_chart_type(dashboard_element)
695
- chart_info = ChartInfoClass(
696
- type=chart_type,
697
- description=dashboard_element.description or "",
698
- title=dashboard_element.title or "",
699
- lastModified=ChangeAuditStamps(),
700
- chartUrl=dashboard_element.url(self.source_config.external_base_url or ""),
701
- inputs=dashboard_element.get_view_urns(self.source_config),
702
- customProperties={
703
- "upstream_fields": (
704
- ",".join(
705
- sorted({field.name for field in dashboard_element.input_fields})
706
- )
707
- if dashboard_element.input_fields
708
- else ""
709
- )
710
- },
711
- )
712
- chart_snapshot.aspects.append(chart_info)
713
-
668
+ ) -> List[Chart]:
669
+ chart_parent_container: Union[List[str], Unset] = unset
714
670
  if (
715
671
  dashboard
716
672
  and dashboard.folder_path is not None
717
673
  and dashboard.folder is not None
718
674
  ):
719
- browse_path = BrowsePathsClass(
720
- paths=[f"/Folders/{dashboard.folder_path}/{dashboard.title}"]
721
- )
722
- chart_snapshot.aspects.append(browse_path)
723
-
724
- dashboard_urn = self.make_dashboard_urn(dashboard)
725
- browse_path_v2 = BrowsePathsV2Class(
726
- path=[
727
- BrowsePathEntryClass("Folders"),
728
- *self._get_folder_browse_path_v2_entries(dashboard.folder),
729
- BrowsePathEntryClass(id=dashboard_urn, urn=dashboard_urn),
730
- ],
731
- )
675
+ chart_parent_container = [
676
+ "Folders",
677
+ *self._get_folder_ancestors_urn_entries(dashboard.folder),
678
+ self.make_dashboard_urn(dashboard),
679
+ ]
732
680
  elif (
733
681
  dashboard is None
734
682
  and dashboard_element.folder_path is not None
735
683
  and dashboard_element.folder is not None
736
- ): # independent look
737
- browse_path = BrowsePathsClass(
738
- paths=[f"/Folders/{dashboard_element.folder_path}"]
739
- )
740
- chart_snapshot.aspects.append(browse_path)
741
- browse_path_v2 = BrowsePathsV2Class(
742
- path=[
743
- BrowsePathEntryClass("Folders"),
744
- *self._get_folder_browse_path_v2_entries(dashboard_element.folder),
745
- ],
746
- )
684
+ ): # Independent look
685
+ chart_parent_container = [
686
+ "Folders",
687
+ *self._get_folder_ancestors_urn_entries(dashboard_element.folder),
688
+ ]
747
689
 
690
+ # Determine chart ownership
691
+ chart_ownership: Optional[List[OwnerClass]] = None
748
692
  if dashboard is not None:
749
693
  ownership = self.get_ownership(dashboard)
750
694
  if ownership is not None:
751
- chart_snapshot.aspects.append(ownership)
695
+ chart_ownership = [ownership]
752
696
  elif dashboard is None and dashboard_element is not None:
753
697
  ownership = self.get_ownership(dashboard_element)
754
698
  if ownership is not None:
755
- chart_snapshot.aspects.append(ownership)
756
-
757
- chart_mce = MetadataChangeEvent(proposedSnapshot=chart_snapshot)
758
-
759
- proposals: List[Union[MetadataChangeEvent, MetadataChangeProposalWrapper]] = [
760
- chart_mce,
761
- MetadataChangeProposalWrapper(
762
- entityUrn=chart_urn,
763
- aspect=SubTypesClass(typeNames=[BIAssetSubTypes.LOOKER_LOOK]),
764
- ),
765
- ]
766
-
767
- if self.source_config.include_platform_instance_in_urns:
768
- proposals.append(
769
- MetadataChangeProposalWrapper(
770
- entityUrn=chart_urn,
771
- aspect=self._create_platform_instance_aspect(),
772
- ),
773
- )
699
+ chart_ownership = [ownership]
774
700
 
701
+ chart_extra_aspects: List[Union[InputFieldsClass, EmbedClass]] = []
775
702
  # If extracting embeds is enabled, produce an MCP for embed URL.
776
703
  if (
777
704
  self.source_config.extract_embed_urls
@@ -781,111 +708,124 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
781
708
  self.source_config.external_base_url
782
709
  )
783
710
  if maybe_embed_url:
784
- proposals.append(
785
- create_embed_mcp(
786
- chart_snapshot.urn,
787
- maybe_embed_url,
788
- )
789
- )
711
+ chart_extra_aspects.append(EmbedClass(renderUrl=maybe_embed_url))
790
712
 
791
- if dashboard is None and dashboard_element.folder:
792
- container = ContainerClass(
793
- container=self._gen_folder_key(dashboard_element.folder.id).as_urn(),
713
+ chart_extra_aspects.append(
714
+ InputFieldsClass(
715
+ fields=self._input_fields_from_dashboard_element(dashboard_element)
794
716
  )
795
- proposals.append(
796
- MetadataChangeProposalWrapper(entityUrn=chart_urn, aspect=container)
797
- )
798
-
799
- if browse_path_v2:
800
- proposals.append(
801
- MetadataChangeProposalWrapper(
802
- entityUrn=chart_urn, aspect=browse_path_v2
803
- )
804
- )
805
-
806
- return proposals
807
-
808
- def _make_dashboard_metadata_events(
809
- self, looker_dashboard: LookerDashboard, chart_urns: List[str]
810
- ) -> List[Union[MetadataChangeEvent, MetadataChangeProposalWrapper]]:
811
- dashboard_urn = self.make_dashboard_urn(looker_dashboard)
812
- dashboard_snapshot = DashboardSnapshot(
813
- urn=dashboard_urn,
814
- aspects=[],
815
- )
816
- browse_path_v2: Optional[BrowsePathsV2Class] = None
817
- dashboard_info = DashboardInfoClass(
818
- description=looker_dashboard.description or "",
819
- title=looker_dashboard.title,
820
- charts=chart_urns,
821
- lastModified=self._get_change_audit_stamps(looker_dashboard),
822
- dashboardUrl=looker_dashboard.url(self.source_config.external_base_url),
823
717
  )
824
-
825
- dashboard_snapshot.aspects.append(dashboard_info)
826
- if (
827
- looker_dashboard.folder_path is not None
828
- and looker_dashboard.folder is not None
829
- ):
830
- browse_path = BrowsePathsClass(
831
- paths=[f"/Folders/{looker_dashboard.folder_path}"]
832
- )
833
- browse_path_v2 = BrowsePathsV2Class(
834
- path=[
835
- BrowsePathEntryClass("Folders"),
836
- *self._get_folder_browse_path_v2_entries(looker_dashboard.folder),
837
- ],
718
+ return [
719
+ Chart(
720
+ chart_type=self._get_chart_type(dashboard_element),
721
+ chart_url=dashboard_element.url(
722
+ self.source_config.external_base_url or ""
723
+ ),
724
+ custom_properties={
725
+ "upstream_fields": (
726
+ ",".join(
727
+ sorted(
728
+ {field.name for field in dashboard_element.input_fields}
729
+ )
730
+ )
731
+ if dashboard_element.input_fields
732
+ else ""
733
+ )
734
+ },
735
+ description=dashboard_element.description or "",
736
+ display_name=dashboard_element.title, # title is (deprecated) using display_name
737
+ extra_aspects=chart_extra_aspects,
738
+ input_datasets=dashboard_element.get_view_urns(self.source_config),
739
+ last_modified=self._get_last_modified_time(
740
+ dashboard
741
+ ), # Inherited from Dashboard
742
+ last_modified_by=self._get_last_modified_by(
743
+ dashboard
744
+ ), # Inherited from Dashboard
745
+ created_at=self._get_created_at(dashboard), # Inherited from Dashboard
746
+ created_by=self._get_created_by(dashboard), # Inherited from Dashboard
747
+ deleted_on=self._get_deleted_on(dashboard), # Inherited from Dashboard
748
+ deleted_by=self._get_deleted_by(dashboard), # Inherited from Dashboard
749
+ name=dashboard_element.get_urn_element_id(),
750
+ owners=chart_ownership,
751
+ parent_container=chart_parent_container,
752
+ platform=self.source_config.platform_name,
753
+ platform_instance=self.source_config.platform_instance
754
+ if self.source_config.include_platform_instance_in_urns
755
+ else None,
756
+ subtype=BIAssetSubTypes.LOOKER_LOOK,
838
757
  )
839
- dashboard_snapshot.aspects.append(browse_path)
840
-
841
- ownership = self.get_ownership(looker_dashboard)
842
- if ownership is not None:
843
- dashboard_snapshot.aspects.append(ownership)
844
-
845
- dashboard_snapshot.aspects.append(Status(removed=looker_dashboard.is_deleted))
846
-
847
- dashboard_mce = MetadataChangeEvent(proposedSnapshot=dashboard_snapshot)
848
-
849
- proposals: List[Union[MetadataChangeEvent, MetadataChangeProposalWrapper]] = [
850
- dashboard_mce
851
758
  ]
852
759
 
853
- if looker_dashboard.folder is not None:
854
- container = ContainerClass(
855
- container=self._gen_folder_key(looker_dashboard.folder.id).as_urn(),
856
- )
857
- proposals.append(
858
- MetadataChangeProposalWrapper(entityUrn=dashboard_urn, aspect=container)
859
- )
760
+ def _make_dashboard_entities(
761
+ self, looker_dashboard: LookerDashboard, charts: List[Chart]
762
+ ) -> List[Dashboard]:
763
+ dashboard_ownership: Optional[List[OwnerClass]] = None
764
+ ownership: Optional[OwnerClass] = self.get_ownership(looker_dashboard)
765
+ if ownership is not None:
766
+ dashboard_ownership = [ownership]
860
767
 
861
- if browse_path_v2:
862
- proposals.append(
863
- MetadataChangeProposalWrapper(
864
- entityUrn=dashboard_urn, aspect=browse_path_v2
865
- )
866
- )
768
+ # Extra Aspects not yet supported in the Dashboard entity class SDKv2
769
+ dashboard_extra_aspects: List[Union[EmbedClass, InputFieldsClass, Status]] = []
867
770
 
868
- # If extracting embeds is enabled, produce an MCP for embed URL.
771
+ # Embed URL aspect
869
772
  if (
870
773
  self.source_config.extract_embed_urls
871
774
  and self.source_config.external_base_url
872
775
  ):
873
- proposals.append(
874
- create_embed_mcp(
875
- dashboard_snapshot.urn,
876
- looker_dashboard.embed_url(self.source_config.external_base_url),
776
+ dashboard_extra_aspects.append(
777
+ EmbedClass(
778
+ renderUrl=looker_dashboard.embed_url(
779
+ self.source_config.external_base_url
780
+ )
877
781
  )
878
782
  )
879
783
 
880
- if self.source_config.include_platform_instance_in_urns:
881
- proposals.append(
882
- MetadataChangeProposalWrapper(
883
- entityUrn=dashboard_urn,
884
- aspect=self._create_platform_instance_aspect(),
885
- )
784
+ # Input fields aspect
785
+ # Populate input fields from all the dashboard elements
786
+ all_fields: List[InputFieldClass] = []
787
+ for dashboard_element in looker_dashboard.dashboard_elements:
788
+ all_fields.extend(
789
+ self._input_fields_from_dashboard_element(dashboard_element)
886
790
  )
791
+ dashboard_extra_aspects.append(InputFieldsClass(fields=all_fields))
792
+ # Status aspect
793
+ dashboard_extra_aspects.append(Status(removed=looker_dashboard.is_deleted))
794
+
795
+ dashboard_parent_container: Union[List[str], Unset] = unset
796
+ if (
797
+ looker_dashboard.folder_path is not None
798
+ and looker_dashboard.folder is not None
799
+ ):
800
+ dashboard_parent_container = [
801
+ "Folders",
802
+ *self._get_folder_ancestors_urn_entries(looker_dashboard.folder),
803
+ ]
887
804
 
888
- return proposals
805
+ return [
806
+ Dashboard(
807
+ charts=charts,
808
+ dashboard_url=looker_dashboard.url(
809
+ self.source_config.external_base_url
810
+ ),
811
+ description=looker_dashboard.description or "",
812
+ display_name=looker_dashboard.title, # title is (deprecated) using display_name
813
+ extra_aspects=dashboard_extra_aspects,
814
+ last_modified=self._get_last_modified_time(looker_dashboard),
815
+ last_modified_by=self._get_last_modified_by(looker_dashboard),
816
+ created_at=self._get_created_at(looker_dashboard),
817
+ created_by=self._get_created_by(looker_dashboard),
818
+ deleted_on=self._get_deleted_on(looker_dashboard),
819
+ deleted_by=self._get_deleted_by(looker_dashboard),
820
+ name=looker_dashboard.get_urn_dashboard_id(),
821
+ owners=dashboard_ownership,
822
+ parent_container=dashboard_parent_container,
823
+ platform=self.source_config.platform_name,
824
+ platform_instance=self.source_config.platform_instance
825
+ if self.source_config.include_platform_instance_in_urns
826
+ else None,
827
+ )
828
+ ]
889
829
 
890
830
  def _make_dashboard_urn(self, looker_dashboard_name_part: str) -> str:
891
831
  # Note that `looker_dashboard_name_part` will like be `dashboard.1234`.
@@ -902,11 +842,9 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
902
842
  def make_dashboard_urn(self, looker_dashboard: LookerDashboard) -> str:
903
843
  return self._make_dashboard_urn(looker_dashboard.get_urn_dashboard_id())
904
844
 
905
- def _make_explore_metadata_events(
845
+ def _make_explore_containers(
906
846
  self,
907
- ) -> Iterable[
908
- Union[MetadataChangeEvent, MetadataChangeProposalWrapper, MetadataWorkUnit]
909
- ]:
847
+ ) -> Iterable[Union[Container, Dataset]]:
910
848
  if not self.source_config.emit_used_explores_only:
911
849
  explores_to_fetch = list(self.list_all_explores())
912
850
  else:
@@ -924,19 +862,14 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
924
862
  for project_name, model, _ in explores_to_fetch:
925
863
  if model not in processed_models:
926
864
  model_key = gen_model_key(self.source_config, model)
927
- yield from gen_containers(
865
+ yield Container(
928
866
  container_key=model_key,
929
- name=model,
930
- sub_types=[BIContainerSubTypes.LOOKML_MODEL],
867
+ display_name=model,
868
+ subtype=BIContainerSubTypes.LOOKML_MODEL,
931
869
  extra_properties=(
932
870
  {"project": project_name} if project_name is not None else None
933
871
  ),
934
- )
935
- yield MetadataChangeProposalWrapper(
936
- entityUrn=model_key.as_urn(),
937
- aspect=BrowsePathsV2Class(
938
- path=[BrowsePathEntryClass("Explore")],
939
- ),
872
+ parent_container=["Explore"],
940
873
  )
941
874
 
942
875
  processed_models.append(model)
@@ -947,9 +880,10 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
947
880
  ((model, explore) for (_project, model, explore) in explores_to_fetch),
948
881
  max_workers=self.source_config.max_threads,
949
882
  ):
950
- events, explore_id, start_time, end_time = future.result()
883
+ explore_dataset_entity, explore_id, start_time, end_time = future.result()
951
884
  self.reporter.explores_scanned += 1
952
- yield from events
885
+ if explore_dataset_entity:
886
+ yield explore_dataset_entity
953
887
  self.reporter.report_upstream_latency(start_time, end_time)
954
888
  logger.debug(
955
889
  f"Running time of fetch_one_explore for {explore_id}: {(end_time - start_time).total_seconds()}"
@@ -969,66 +903,50 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
969
903
  def fetch_one_explore(
970
904
  self, model: str, explore: str
971
905
  ) -> Tuple[
972
- List[Union[MetadataChangeEvent, MetadataChangeProposalWrapper]],
906
+ Optional[Dataset],
973
907
  str,
974
908
  datetime.datetime,
975
909
  datetime.datetime,
976
910
  ]:
977
911
  start_time = datetime.datetime.now()
978
- events: List[Union[MetadataChangeEvent, MetadataChangeProposalWrapper]] = []
979
912
  looker_explore = self.explore_registry.get_explore(model, explore)
913
+ explore_dataset_entity: Optional[Dataset] = None
980
914
  if looker_explore is not None:
981
- events = (
982
- looker_explore._to_metadata_events(
983
- self.source_config,
984
- self.reporter,
985
- self.source_config.external_base_url or self.source_config.base_url,
986
- self.source_config.extract_embed_urls,
987
- )
988
- or events
915
+ explore_dataset_entity = looker_explore._to_metadata_events(
916
+ self.source_config,
917
+ self.reporter,
918
+ self.source_config.external_base_url or self.source_config.base_url,
919
+ self.source_config.extract_embed_urls,
989
920
  )
990
921
 
991
- return events, f"{model}:{explore}", start_time, datetime.datetime.now()
992
-
993
- def _extract_event_urn(
994
- self, event: Union[MetadataChangeEvent, MetadataChangeProposalWrapper]
995
- ) -> Optional[str]:
996
- if isinstance(event, MetadataChangeEvent):
997
- return event.proposedSnapshot.urn
998
- else:
999
- return event.entityUrn
922
+ return (
923
+ explore_dataset_entity,
924
+ f"{model}:{explore}",
925
+ start_time,
926
+ datetime.datetime.now(),
927
+ )
1000
928
 
1001
- def _emit_folder_as_container(
1002
- self, folder: LookerFolder
1003
- ) -> Iterable[MetadataWorkUnit]:
929
+ def _emit_folder_as_container(self, folder: LookerFolder) -> Iterable[Container]:
1004
930
  if folder.id not in self.processed_folders:
1005
- yield from gen_containers(
1006
- container_key=self._gen_folder_key(folder.id),
1007
- name=folder.name,
1008
- sub_types=[BIContainerSubTypes.LOOKER_FOLDER],
1009
- parent_container_key=(
1010
- self._gen_folder_key(folder.parent_id) if folder.parent_id else None
1011
- ),
1012
- )
1013
931
  if folder.parent_id is None:
1014
- yield MetadataChangeProposalWrapper(
1015
- entityUrn=self._gen_folder_key(folder.id).as_urn(),
1016
- aspect=BrowsePathsV2Class(
1017
- path=[BrowsePathEntryClass("Folders")],
1018
- ),
1019
- ).as_workunit()
932
+ yield Container(
933
+ container_key=self._gen_folder_key(folder.id),
934
+ display_name=folder.name,
935
+ subtype=BIContainerSubTypes.LOOKER_FOLDER,
936
+ parent_container=["Folders"],
937
+ )
1020
938
  else:
1021
- yield MetadataChangeProposalWrapper(
1022
- entityUrn=self._gen_folder_key(folder.id).as_urn(),
1023
- aspect=BrowsePathsV2Class(
1024
- path=[
1025
- BrowsePathEntryClass("Folders"),
1026
- *self._get_folder_browse_path_v2_entries(
1027
- folder, include_current_folder=False
1028
- ),
1029
- ],
1030
- ),
1031
- ).as_workunit()
939
+ yield Container(
940
+ container_key=self._gen_folder_key(folder.id),
941
+ display_name=folder.name,
942
+ subtype=BIContainerSubTypes.LOOKER_FOLDER,
943
+ parent_container=[
944
+ "Folders",
945
+ *self._get_folder_ancestors_urn_entries(
946
+ folder, include_current_folder=False
947
+ ),
948
+ ],
949
+ )
1032
950
  self.processed_folders.append(folder.id)
1033
951
 
1034
952
  def _gen_folder_key(self, folder_id: str) -> LookerFolderKey:
@@ -1039,91 +957,89 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
1039
957
  instance=self.source_config.platform_instance,
1040
958
  )
1041
959
 
1042
- def _make_dashboard_and_chart_mces(
960
+ def _make_dashboard_and_chart_entities(
1043
961
  self, looker_dashboard: LookerDashboard
1044
- ) -> Iterable[Union[MetadataChangeEvent, MetadataChangeProposalWrapper]]:
962
+ ) -> Iterable[Union[Chart, Dashboard]]:
1045
963
  # Step 1: Emit metadata for each Chart inside the Dashboard.
1046
- chart_events = []
964
+ chart_events: List[Chart] = []
1047
965
  for element in looker_dashboard.dashboard_elements:
1048
966
  if element.type == "vis":
1049
967
  chart_events.extend(
1050
- self._make_chart_metadata_events(element, looker_dashboard)
968
+ self._make_chart_entities(element, looker_dashboard)
1051
969
  )
1052
970
 
1053
971
  yield from chart_events
1054
972
 
1055
- # Step 2: Emit metadata events for the Dashboard itself.
1056
- chart_urns: Set[str] = (
1057
- set()
1058
- ) # Collect the unique child chart urns for dashboard input lineage.
973
+ # # Step 2: Emit metadata events for the Dashboard itself.
974
+ # Create a set of unique chart entities for dashboard input lineage based in chart.urn
975
+ unique_chart_entities: List[Chart] = []
1059
976
  for chart_event in chart_events:
1060
- chart_event_urn = self._extract_event_urn(chart_event)
1061
- if chart_event_urn:
1062
- chart_urns.add(chart_event_urn)
1063
-
1064
- dashboard_events = self._make_dashboard_metadata_events(
1065
- looker_dashboard, list(chart_urns)
977
+ # Use chart.urn to ensure uniqueness based on the chart's URN property
978
+ # Also, update the set of processed chart urns
979
+ if str(chart_event.urn) not in self.chart_urns:
980
+ self.chart_urns.add(str(chart_event.urn))
981
+ unique_chart_entities.append(chart_event)
982
+
983
+ dashboard_events = self._make_dashboard_entities(
984
+ looker_dashboard, unique_chart_entities
1066
985
  )
1067
986
  yield from dashboard_events
1068
987
 
1069
988
  def get_ownership(
1070
989
  self, looker_dashboard_look: Union[LookerDashboard, LookerDashboardElement]
1071
- ) -> Optional[OwnershipClass]:
990
+ ) -> Optional[OwnerClass]:
1072
991
  if looker_dashboard_look.owner is not None:
1073
992
  owner_urn = looker_dashboard_look.owner.get_urn(
1074
993
  self.source_config.strip_user_ids_from_email
1075
994
  )
1076
995
  if owner_urn is not None:
1077
- ownership: OwnershipClass = OwnershipClass(
1078
- owners=[
1079
- OwnerClass(
1080
- owner=owner_urn,
1081
- type=OwnershipTypeClass.DATAOWNER,
1082
- )
1083
- ]
996
+ return OwnerClass(
997
+ owner=owner_urn,
998
+ type=OwnershipTypeClass.DATAOWNER,
1084
999
  )
1085
- return ownership
1086
1000
  return None
1087
1001
 
1088
- def _get_change_audit_stamps(
1089
- self, looker_dashboard: LookerDashboard
1090
- ) -> ChangeAuditStamps:
1091
- change_audit_stamp: ChangeAuditStamps = ChangeAuditStamps()
1092
- if looker_dashboard.created_at is not None:
1093
- change_audit_stamp.created.time = round(
1094
- looker_dashboard.created_at.timestamp() * 1000
1095
- )
1096
- if looker_dashboard.owner is not None:
1097
- owner_urn = looker_dashboard.owner.get_urn(
1098
- self.source_config.strip_user_ids_from_email
1099
- )
1100
- if owner_urn:
1101
- change_audit_stamp.created.actor = owner_urn
1102
- if looker_dashboard.last_updated_at is not None:
1103
- change_audit_stamp.lastModified.time = round(
1104
- looker_dashboard.last_updated_at.timestamp() * 1000
1105
- )
1106
- if looker_dashboard.last_updated_by is not None:
1107
- updated_by_urn = looker_dashboard.last_updated_by.get_urn(
1108
- self.source_config.strip_user_ids_from_email
1109
- )
1110
- if updated_by_urn:
1111
- change_audit_stamp.lastModified.actor = updated_by_urn
1112
- if (
1113
- looker_dashboard.is_deleted
1114
- and looker_dashboard.deleted_by is not None
1115
- and looker_dashboard.deleted_at is not None
1116
- ):
1117
- deleter_urn = looker_dashboard.deleted_by.get_urn(
1118
- self.source_config.strip_user_ids_from_email
1119
- )
1120
- if deleter_urn:
1121
- change_audit_stamp.deleted = AuditStamp(
1122
- actor=deleter_urn,
1123
- time=round(looker_dashboard.deleted_at.timestamp() * 1000),
1124
- )
1002
+ def _get_last_modified_time(
1003
+ self, looker_dashboard: Optional[LookerDashboard]
1004
+ ) -> Optional[datetime.datetime]:
1005
+ return looker_dashboard.last_updated_at if looker_dashboard else None
1006
+
1007
+ def _get_last_modified_by(
1008
+ self, looker_dashboard: Optional[LookerDashboard]
1009
+ ) -> Optional[str]:
1010
+ if not looker_dashboard or not looker_dashboard.last_updated_by:
1011
+ return None
1012
+ return looker_dashboard.last_updated_by.get_urn(
1013
+ self.source_config.strip_user_ids_from_email
1014
+ )
1015
+
1016
+ def _get_created_at(
1017
+ self, looker_dashboard: Optional[LookerDashboard]
1018
+ ) -> Optional[datetime.datetime]:
1019
+ return looker_dashboard.created_at if looker_dashboard else None
1020
+
1021
+ def _get_created_by(
1022
+ self, looker_dashboard: Optional[LookerDashboard]
1023
+ ) -> Optional[str]:
1024
+ if not looker_dashboard or not looker_dashboard.owner:
1025
+ return None
1026
+ return looker_dashboard.owner.get_urn(
1027
+ self.source_config.strip_user_ids_from_email
1028
+ )
1125
1029
 
1126
- return change_audit_stamp
1030
+ def _get_deleted_on(
1031
+ self, looker_dashboard: Optional[LookerDashboard]
1032
+ ) -> Optional[datetime.datetime]:
1033
+ return looker_dashboard.deleted_at if looker_dashboard else None
1034
+
1035
+ def _get_deleted_by(
1036
+ self, looker_dashboard: Optional[LookerDashboard]
1037
+ ) -> Optional[str]:
1038
+ if not looker_dashboard or not looker_dashboard.deleted_by:
1039
+ return None
1040
+ return looker_dashboard.deleted_by.get_urn(
1041
+ self.source_config.strip_user_ids_from_email
1042
+ )
1127
1043
 
1128
1044
  def _get_looker_folder(self, folder: Union[Folder, FolderBase]) -> LookerFolder:
1129
1045
  assert folder.id
@@ -1136,7 +1052,7 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
1136
1052
  ]
1137
1053
  return "/".join(ancestors + [folder.name])
1138
1054
 
1139
- def _get_looker_dashboard(self, dashboard: Dashboard) -> LookerDashboard:
1055
+ def _get_looker_dashboard(self, dashboard: LookerAPIDashboard) -> LookerDashboard:
1140
1056
  self.reporter.accessed_dashboards += 1
1141
1057
  if dashboard.folder is None:
1142
1058
  logger.debug(f"{dashboard.id} has no folder")
@@ -1210,22 +1126,6 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
1210
1126
 
1211
1127
  return user
1212
1128
 
1213
- def process_metrics_dimensions_and_fields_for_dashboard(
1214
- self, dashboard: LookerDashboard
1215
- ) -> List[MetadataWorkUnit]:
1216
- chart_mcps = [
1217
- self._make_metrics_dimensions_chart_mcp(element)
1218
- for element in dashboard.dashboard_elements
1219
- ]
1220
- dashboard_mcp = self._make_metrics_dimensions_dashboard_mcp(dashboard)
1221
-
1222
- mcps = chart_mcps
1223
- mcps.append(dashboard_mcp)
1224
-
1225
- workunits = [mcp.as_workunit() for mcp in mcps]
1226
-
1227
- return workunits
1228
-
1229
1129
  def _input_fields_from_dashboard_element(
1230
1130
  self, dashboard_element: LookerDashboardElement
1231
1131
  ) -> List[InputFieldClass]:
@@ -1318,104 +1218,141 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
1318
1218
  aspect=input_fields_aspect,
1319
1219
  )
1320
1220
 
1321
- def process_dashboard(
1221
+ def _should_skip_personal_folder_dashboard(
1222
+ self, dashboard_object: LookerAPIDashboard
1223
+ ) -> bool:
1224
+ """Check if dashboard should be skipped due to being in personal folder."""
1225
+ if not self.source_config.skip_personal_folders:
1226
+ return False
1227
+
1228
+ if dashboard_object.folder is not None and (
1229
+ dashboard_object.folder.is_personal
1230
+ or dashboard_object.folder.is_personal_descendant
1231
+ ):
1232
+ self.reporter.info(
1233
+ title="Dropped Dashboard",
1234
+ message="Dropped due to being a personal folder",
1235
+ context=f"Dashboard ID: {dashboard_object.id}",
1236
+ )
1237
+ assert dashboard_object.id is not None
1238
+ self.reporter.report_dashboards_dropped(dashboard_object.id)
1239
+ return True
1240
+ return False
1241
+
1242
+ def _should_skip_dashboard_by_folder_path(
1243
+ self, looker_dashboard: LookerDashboard
1244
+ ) -> bool:
1245
+ """Check if dashboard should be skipped based on folder path pattern."""
1246
+ if (
1247
+ looker_dashboard.folder_path is not None
1248
+ and not self.source_config.folder_path_pattern.allowed(
1249
+ looker_dashboard.folder_path
1250
+ )
1251
+ ):
1252
+ logger.debug(
1253
+ f"Folder path {looker_dashboard.folder_path} is denied in folder_path_pattern"
1254
+ )
1255
+ self.reporter.report_dashboards_dropped(looker_dashboard.id)
1256
+ return True
1257
+ return False
1258
+
1259
+ def _fetch_dashboard_from_api(
1322
1260
  self, dashboard_id: str, fields: List[str]
1323
- ) -> Tuple[
1324
- List[MetadataWorkUnit],
1325
- Optional[looker_usage.LookerDashboardForUsage],
1326
- str,
1327
- datetime.datetime,
1328
- datetime.datetime,
1329
- ]:
1330
- start_time = datetime.datetime.now()
1331
- assert dashboard_id is not None
1332
- if not self.source_config.dashboard_pattern.allowed(dashboard_id):
1333
- self.reporter.report_dashboards_dropped(dashboard_id)
1334
- return [], None, dashboard_id, start_time, datetime.datetime.now()
1261
+ ) -> Optional[LookerAPIDashboard]:
1262
+ """Fetch dashboard object from Looker API with error handling."""
1335
1263
  try:
1336
- dashboard_object: Dashboard = self.looker_api.dashboard(
1264
+ return self.looker_api.dashboard(
1337
1265
  dashboard_id=dashboard_id,
1338
1266
  fields=fields,
1339
1267
  )
1340
1268
  except (SDKError, DeserializeError) as e:
1341
- # A looker dashboard could be deleted in between the list and the get
1342
1269
  self.reporter.report_warning(
1343
1270
  title="Failed to fetch dashboard from the Looker API",
1344
1271
  message="Error occurred while attempting to loading dashboard from Looker API. Skipping.",
1345
1272
  context=f"Dashboard ID: {dashboard_id}",
1346
1273
  exc=e,
1347
1274
  )
1348
- return [], None, dashboard_id, start_time, datetime.datetime.now()
1275
+ return None
1349
1276
 
1350
- if self.source_config.skip_personal_folders:
1351
- if dashboard_object.folder is not None and (
1352
- dashboard_object.folder.is_personal
1353
- or dashboard_object.folder.is_personal_descendant
1354
- ):
1355
- self.reporter.info(
1356
- title="Dropped Dashboard",
1357
- message="Dropped due to being a personal folder",
1358
- context=f"Dashboard ID: {dashboard_id}",
1359
- )
1360
- self.reporter.report_dashboards_dropped(dashboard_id)
1361
- return [], None, dashboard_id, start_time, datetime.datetime.now()
1277
+ def _create_empty_result(
1278
+ self, dashboard_id: str, start_time: datetime.datetime
1279
+ ) -> DashboardProcessingResult:
1280
+ """Create an empty result for skipped or failed dashboard processing."""
1281
+ return DashboardProcessingResult(
1282
+ entities=[],
1283
+ dashboard_usage=None,
1284
+ dashboard_id=dashboard_id,
1285
+ start_time=start_time,
1286
+ end_time=datetime.datetime.now(),
1287
+ )
1362
1288
 
1363
- looker_dashboard = self._get_looker_dashboard(dashboard_object)
1289
+ def process_dashboard(
1290
+ self, dashboard_id: str, fields: List[str]
1291
+ ) -> DashboardProcessingResult:
1292
+ """
1293
+ Process a single dashboard and return the metadata workunits.
1364
1294
 
1365
- workunits = []
1366
- if (
1367
- looker_dashboard.folder_path is not None
1368
- and not self.source_config.folder_path_pattern.allowed(
1369
- looker_dashboard.folder_path
1370
- )
1371
- ):
1372
- logger.debug(
1373
- f"Folder path {looker_dashboard.folder_path} is denied in folder_path_pattern"
1374
- )
1375
- return [], None, dashboard_id, start_time, datetime.datetime.now()
1295
+ Args:
1296
+ dashboard_id: The ID of the dashboard to process
1297
+ fields: List of fields to fetch from the Looker API
1376
1298
 
1377
- if looker_dashboard.folder:
1378
- workunits += list(
1379
- self._get_folder_and_ancestors_workunits(looker_dashboard.folder)
1380
- )
1299
+ Returns:
1300
+ DashboardProcessingResult containing entities, usage data, and timing information
1301
+ """
1302
+ start_time = datetime.datetime.now()
1381
1303
 
1382
- mces = self._make_dashboard_and_chart_mces(looker_dashboard)
1383
- workunits += [
1384
- (
1385
- MetadataWorkUnit(id=f"looker-{mce.proposedSnapshot.urn}", mce=mce)
1386
- if isinstance(mce, MetadataChangeEvent)
1387
- else MetadataWorkUnit(
1388
- id=f"looker-{mce.aspectName}-{mce.entityUrn}", mcp=mce
1389
- )
1390
- )
1391
- for mce in mces
1392
- ]
1304
+ if dashboard_id is None:
1305
+ raise ValueError("Dashboard ID cannot be None")
1393
1306
 
1394
- # add on metrics, dimensions, fields events
1395
- metric_dim_workunits = self.process_metrics_dimensions_and_fields_for_dashboard(
1396
- looker_dashboard
1307
+ # Fetch dashboard from API
1308
+ dashboard_object: Optional[LookerAPIDashboard] = self._fetch_dashboard_from_api(
1309
+ dashboard_id, fields
1397
1310
  )
1311
+ if dashboard_object is None:
1312
+ return self._create_empty_result(dashboard_id, start_time)
1313
+
1314
+ # Check if dashboard should be skipped due to personal folder
1315
+ if self._should_skip_personal_folder_dashboard(dashboard_object):
1316
+ return self._create_empty_result(dashboard_id, start_time)
1398
1317
 
1399
- workunits.extend(metric_dim_workunits)
1318
+ # Convert to internal representation
1319
+ looker_dashboard: LookerDashboard = self._get_looker_dashboard(dashboard_object)
1400
1320
 
1321
+ # Check folder path pattern
1322
+ if self._should_skip_dashboard_by_folder_path(looker_dashboard):
1323
+ return self._create_empty_result(dashboard_id, start_time)
1324
+
1325
+ # Build entities list
1326
+ entities: List[Entity] = []
1327
+
1328
+ # Add folder containers if dashboard has a folder
1329
+ if looker_dashboard.folder:
1330
+ entities.extend(
1331
+ list(self._get_folder_and_ancestors_containers(looker_dashboard.folder))
1332
+ )
1333
+
1334
+ # Add dashboard and chart entities
1335
+ entities.extend(list(self._make_dashboard_and_chart_entities(looker_dashboard)))
1336
+
1337
+ # Report successful processing
1401
1338
  self.reporter.report_dashboards_scanned()
1402
1339
 
1403
- # generate usage tracking object
1340
+ # Generate usage tracking object
1404
1341
  dashboard_usage = looker_usage.LookerDashboardForUsage.from_dashboard(
1405
1342
  dashboard_object
1406
1343
  )
1407
1344
 
1408
- return (
1409
- workunits,
1410
- dashboard_usage,
1411
- dashboard_id,
1412
- start_time,
1413
- datetime.datetime.now(),
1345
+ return DashboardProcessingResult(
1346
+ entities=entities,
1347
+ dashboard_usage=dashboard_usage,
1348
+ dashboard_id=dashboard_id,
1349
+ start_time=start_time,
1350
+ end_time=datetime.datetime.now(),
1414
1351
  )
1415
1352
 
1416
- def _get_folder_and_ancestors_workunits(
1353
+ def _get_folder_and_ancestors_containers(
1417
1354
  self, folder: LookerFolder
1418
- ) -> Iterable[MetadataWorkUnit]:
1355
+ ) -> Iterable[Container]:
1419
1356
  for ancestor_folder in self.looker_api.folder_ancestors(folder.id):
1420
1357
  yield from self._emit_folder_as_container(
1421
1358
  self._get_looker_folder(ancestor_folder)
@@ -1486,39 +1423,27 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
1486
1423
  ).workunit_processor,
1487
1424
  ]
1488
1425
 
1489
- def emit_independent_looks_mcp(
1426
+ def emit_independent_looks_entities(
1490
1427
  self, dashboard_element: LookerDashboardElement
1491
- ) -> Iterable[MetadataWorkUnit]:
1428
+ ) -> Iterable[Union[Container, Chart]]:
1492
1429
  if dashboard_element.folder: # independent look
1493
- yield from self._get_folder_and_ancestors_workunits(
1430
+ yield from self._get_folder_and_ancestors_containers(
1494
1431
  dashboard_element.folder
1495
1432
  )
1496
1433
 
1497
- yield from auto_workunit(
1498
- stream=self._make_chart_metadata_events(
1499
- dashboard_element=dashboard_element,
1500
- dashboard=None,
1501
- )
1502
- )
1503
-
1504
- yield from auto_workunit(
1505
- [
1506
- self._make_metrics_dimensions_chart_mcp(
1507
- dashboard_element,
1508
- )
1509
- ]
1434
+ yield from self._make_chart_entities(
1435
+ dashboard_element=dashboard_element,
1436
+ dashboard=None,
1510
1437
  )
1511
1438
 
1512
- def extract_independent_looks(self) -> Iterable[MetadataWorkUnit]:
1513
- """
1514
- Emit MetadataWorkUnit for looks which are not part of any Dashboard
1439
+ def extract_independent_looks(self) -> Iterable[Union[Container, Chart]]:
1515
1440
  """
1516
- if self.source_config.extract_independent_looks is False:
1517
- return
1441
+ Emit entities for Looks which are not part of any Dashboard.
1518
1442
 
1519
- self.reporter.report_stage_start("extract_independent_looks")
1443
+ Returns: Containers for the folders and ancestors folders and Charts for the looks
1444
+ """
1445
+ logger.debug("Extracting Looks not part of any Dashboard")
1520
1446
 
1521
- logger.debug("Extracting looks not part of Dashboard")
1522
1447
  look_fields: List[str] = [
1523
1448
  "id",
1524
1449
  "title",
@@ -1540,15 +1465,21 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
1540
1465
  all_looks: List[Look] = self.looker_api.all_looks(
1541
1466
  fields=look_fields, soft_deleted=self.source_config.include_deleted
1542
1467
  )
1468
+
1543
1469
  for look in all_looks:
1470
+ # Skip looks that are already referenced from a dashboard
1471
+ if look.id is None:
1472
+ logger.warning("Encountered Look with no ID, skipping.")
1473
+ continue
1474
+
1544
1475
  if look.id in self.reachable_look_registry:
1545
- # This look is reachable from the Dashboard
1546
1476
  continue
1547
1477
 
1548
1478
  if look.query_id is None:
1549
1479
  logger.info(f"query_id is None for look {look.title}({look.id})")
1550
1480
  continue
1551
1481
 
1482
+ # Skip looks in personal folders if configured
1552
1483
  if self.source_config.skip_personal_folders:
1553
1484
  if look.folder is not None and (
1554
1485
  look.folder.is_personal or look.folder.is_personal_descendant
@@ -1559,76 +1490,96 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
1559
1490
  context=f"Look ID: {look.id}",
1560
1491
  )
1561
1492
 
1562
- assert look.id, "Looker id is null"
1563
1493
  self.reporter.report_charts_dropped(look.id)
1564
1494
  continue
1565
1495
 
1566
- if look.id is not None:
1567
- query: Optional[Query] = self.looker_api.get_look(
1568
- look.id, fields=["query"]
1569
- ).query
1570
- # Only include fields that are in the query_fields list
1571
- query = Query(
1572
- **{
1573
- key: getattr(query, key)
1574
- for key in query_fields
1575
- if hasattr(query, key)
1576
- }
1577
- )
1496
+ # Fetch the Look's query and filter to allowed fields
1497
+ query: Optional[Query] = None
1498
+ try:
1499
+ look_with_query = self.looker_api.get_look(look.id, fields=["query"])
1500
+ query_obj = look_with_query.query
1501
+ if query_obj:
1502
+ query = Query(
1503
+ **{
1504
+ key: getattr(query_obj, key)
1505
+ for key in query_fields
1506
+ if hasattr(query_obj, key)
1507
+ }
1508
+ )
1509
+ except Exception as exc:
1510
+ logger.warning(f"Failed to fetch query for Look {look.id}: {exc}")
1511
+ continue
1578
1512
 
1579
- dashboard_element: Optional[LookerDashboardElement] = (
1580
- self._get_looker_dashboard_element(
1581
- DashboardElement(
1582
- id=f"looks_{look.id}", # to avoid conflict with non-standalone looks (element.id prefixes),
1583
- # we add the "looks_" prefix to look.id.
1584
- title=look.title,
1585
- subtitle_text=look.description,
1586
- look_id=look.id,
1587
- dashboard_id=None, # As this is an independent look
1588
- look=LookWithQuery(
1589
- query=query, folder=look.folder, user_id=look.user_id
1590
- ),
1513
+ dashboard_element = self._get_looker_dashboard_element(
1514
+ DashboardElement(
1515
+ id=f"looks_{look.id}", # to avoid conflict with non-standalone looks (element.id prefixes),
1516
+ # we add the "looks_" prefix to look.id.
1517
+ title=look.title,
1518
+ subtitle_text=look.description,
1519
+ look_id=look.id,
1520
+ dashboard_id=None, # As this is an independent look
1521
+ look=LookWithQuery(
1522
+ query=query,
1523
+ folder=getattr(look, "folder", None),
1524
+ user_id=getattr(look, "user_id", None),
1591
1525
  ),
1592
1526
  )
1593
1527
  )
1594
1528
 
1595
1529
  if dashboard_element is not None:
1596
- logger.debug(f"Emitting MCPS for look {look.title}({look.id})")
1597
- yield from self.emit_independent_looks_mcp(
1530
+ logger.debug(f"Emitting MCPs for look {look.title}({look.id})")
1531
+ yield from self.emit_independent_looks_entities(
1598
1532
  dashboard_element=dashboard_element
1599
1533
  )
1600
1534
 
1601
- self.reporter.report_stage_end("extract_independent_looks")
1535
+ def get_workunits_internal(self) -> Iterable[Union[MetadataWorkUnit, Entity]]:
1536
+ """
1537
+ Note: Returns Entities from SDKv2 where possible else MCPs only.
1602
1538
 
1603
- def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
1604
- self.reporter.report_stage_start("list_dashboards")
1605
- dashboards = self.looker_api.all_dashboards(fields="id")
1606
- deleted_dashboards = (
1607
- self.looker_api.search_dashboards(fields="id", deleted="true")
1608
- if self.source_config.include_deleted
1609
- else []
1610
- )
1611
- if deleted_dashboards != []:
1612
- logger.debug(f"Deleted Dashboards = {deleted_dashboards}")
1539
+ Using SDKv2: Containers, Datasets, Dashboards and Charts
1540
+ Using MCPW: Tags, DashboardUsageStats and UserResourceMapping
1613
1541
 
1614
- dashboard_ids = [dashboard_base.id for dashboard_base in dashboards]
1615
- dashboard_ids.extend(
1616
- [deleted_dashboard.id for deleted_dashboard in deleted_dashboards]
1617
- )
1618
- selected_dashboard_ids: List[Optional[str]] = []
1619
- for id in dashboard_ids:
1620
- if id is None:
1621
- continue
1622
- if not self.source_config.dashboard_pattern.allowed(id):
1623
- self.reporter.report_dashboards_dropped(id)
1542
+ TODO: Convert MCPWs to use SDKv2 entities
1543
+ """
1544
+ with self.reporter.report_stage("list_dashboards"):
1545
+ # Fetch all dashboards (not deleted)
1546
+ dashboards = self.looker_api.all_dashboards(fields="id")
1547
+
1548
+ # Optionally fetch deleted dashboards if configured
1549
+ if self.source_config.include_deleted:
1550
+ deleted_dashboards = self.looker_api.search_dashboards(
1551
+ fields="id", deleted="true"
1552
+ )
1624
1553
  else:
1625
- selected_dashboard_ids.append(id)
1626
- dashboard_ids = selected_dashboard_ids
1627
- self.reporter.report_stage_end("list_dashboards")
1628
- self.reporter.report_total_dashboards(len(dashboard_ids))
1554
+ deleted_dashboards = []
1555
+
1556
+ if deleted_dashboards:
1557
+ logger.debug(f"Deleted Dashboards = {deleted_dashboards}")
1558
+
1559
+ # Collect all dashboard IDs (including deleted if applicable)
1560
+ all_dashboard_ids: List[Optional[str]] = [
1561
+ dashboard.id for dashboard in dashboards
1562
+ ]
1563
+ all_dashboard_ids.extend([dashboard.id for dashboard in deleted_dashboards])
1629
1564
 
1630
- # List dashboard fields to extract for processing
1631
- fields = [
1565
+ # Filter dashboard IDs based on the allowed pattern
1566
+ filtered_dashboard_ids: List[str] = []
1567
+ for dashboard_id in all_dashboard_ids:
1568
+ if dashboard_id is None:
1569
+ continue
1570
+ if not self.source_config.dashboard_pattern.allowed(dashboard_id):
1571
+ self.reporter.report_dashboards_dropped(dashboard_id)
1572
+ else:
1573
+ filtered_dashboard_ids.append(dashboard_id)
1574
+
1575
+ # Use the filtered list for further processing
1576
+ dashboard_ids: List[str] = filtered_dashboard_ids
1577
+
1578
+ # Report the total number of dashboards to be processed
1579
+ self.reporter.report_total_dashboards(len(dashboard_ids))
1580
+
1581
+ # Define the fields to extract for each dashboard
1582
+ dashboard_fields = [
1632
1583
  "id",
1633
1584
  "title",
1634
1585
  "dashboard_elements",
@@ -1644,41 +1595,47 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
1644
1595
  "deleted_at",
1645
1596
  "deleter_id",
1646
1597
  ]
1598
+
1599
+ # Add usage-related fields if usage history extraction is enabled
1647
1600
  if self.source_config.extract_usage_history:
1648
- fields += [
1649
- "favorite_count",
1650
- "view_count",
1651
- "last_viewed_at",
1652
- ]
1601
+ dashboard_fields.extend(
1602
+ [
1603
+ "favorite_count",
1604
+ "view_count",
1605
+ "last_viewed_at",
1606
+ ]
1607
+ )
1653
1608
 
1609
+ # Store dashboards for which usage stats will be extracted
1654
1610
  looker_dashboards_for_usage: List[looker_usage.LookerDashboardForUsage] = []
1655
1611
 
1612
+ # Process dashboard and chart metadata
1656
1613
  with self.reporter.report_stage("dashboard_chart_metadata"):
1614
+ dashboard_jobs = (
1615
+ (dashboard_id, dashboard_fields)
1616
+ for dashboard_id in dashboard_ids
1617
+ if dashboard_id is not None
1618
+ )
1657
1619
  for job in BackpressureAwareExecutor.map(
1658
1620
  self.process_dashboard,
1659
- (
1660
- (dashboard_id, fields)
1661
- for dashboard_id in dashboard_ids
1662
- if dashboard_id is not None
1663
- ),
1621
+ dashboard_jobs,
1664
1622
  max_workers=self.source_config.max_threads,
1665
1623
  ):
1666
- (
1667
- work_units,
1668
- dashboard_usage,
1669
- dashboard_id,
1670
- start_time,
1671
- end_time,
1672
- ) = job.result()
1624
+ result: DashboardProcessingResult = job.result()
1625
+
1673
1626
  logger.debug(
1674
- f"Running time of process_dashboard for {dashboard_id} = {(end_time - start_time).total_seconds()}"
1627
+ f"Running time of process_dashboard for {result.dashboard_id} = {(result.end_time - result.start_time).total_seconds()}"
1675
1628
  )
1676
- self.reporter.report_upstream_latency(start_time, end_time)
1629
+ self.reporter.report_upstream_latency(
1630
+ result.start_time, result.end_time
1631
+ )
1632
+
1633
+ yield from result.entities
1677
1634
 
1678
- yield from work_units
1679
- if dashboard_usage is not None:
1680
- looker_dashboards_for_usage.append(dashboard_usage)
1635
+ if result.dashboard_usage is not None:
1636
+ looker_dashboards_for_usage.append(result.dashboard_usage)
1681
1637
 
1638
+ # Warn if owner extraction was enabled but no emails could be found
1682
1639
  if (
1683
1640
  self.source_config.extract_owners
1684
1641
  and self.reporter.resolved_user_ids > 0
@@ -1690,53 +1647,42 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
1690
1647
  "Failed to extract owners emails for any dashboards. Please enable the see_users permission for your Looker API key",
1691
1648
  )
1692
1649
 
1693
- # Extract independent look here, so that explore of this look would get consider in _make_explore_metadata_events
1694
- yield from self.extract_independent_looks()
1650
+ # Extract independent looks first, so their explores are considered in _make_explore_containers.
1651
+ if self.source_config.extract_independent_looks:
1652
+ with self.reporter.report_stage("extract_independent_looks"):
1653
+ yield from self.extract_independent_looks()
1695
1654
 
1696
- self.reporter.report_stage_start("explore_metadata")
1697
-
1698
- for event in self._make_explore_metadata_events():
1699
- if isinstance(event, MetadataChangeEvent):
1700
- yield MetadataWorkUnit(
1701
- id=f"looker-{event.proposedSnapshot.urn}", mce=event
1702
- )
1703
- elif isinstance(event, MetadataChangeProposalWrapper):
1704
- yield event.as_workunit()
1705
- elif isinstance(event, MetadataWorkUnit):
1706
- yield event
1707
- else:
1708
- raise Exception(f"Unexpected type of event {event}")
1709
- self.reporter.report_stage_end("explore_metadata")
1655
+ # Process explore containers and yield them.
1656
+ with self.reporter.report_stage("explore_metadata"):
1657
+ yield from self._make_explore_containers()
1710
1658
 
1711
1659
  if (
1712
1660
  self.source_config.tag_measures_and_dimensions
1713
1661
  and self.reporter.explores_scanned > 0
1714
1662
  ):
1715
- # Emit tag MCEs for measures and dimensions if we produced any explores:
1663
+ # Emit tag MCPs for measures and dimensions if we produced any explores:
1664
+ # Tags MCEs are converted to MCPs
1716
1665
  for tag_mce in LookerUtil.get_tag_mces():
1717
- yield MetadataWorkUnit(
1718
- id=f"tag-{tag_mce.proposedSnapshot.urn}",
1719
- mce=tag_mce,
1720
- )
1666
+ yield from auto_workunit(mcps_from_mce(tag_mce))
1721
1667
 
1722
1668
  # Extract usage history is enabled
1723
1669
  if self.source_config.extract_usage_history:
1724
- self.reporter.report_stage_start("usage_extraction")
1725
- usage_mcps: List[MetadataChangeProposalWrapper] = self.extract_usage_stat(
1726
- looker_dashboards_for_usage, self.chart_urns
1727
- )
1728
- for usage_mcp in usage_mcps:
1729
- yield usage_mcp.as_workunit()
1730
- self.reporter.report_stage_end("usage_extraction")
1670
+ with self.reporter.report_stage("usage_extraction"):
1671
+ usage_mcps: List[MetadataChangeProposalWrapper] = (
1672
+ self.extract_usage_stat(
1673
+ looker_dashboards_for_usage, self.chart_urns
1674
+ )
1675
+ )
1676
+ yield from auto_workunit(usage_mcps)
1731
1677
 
1732
- # Dump looker user resource mappings.
1678
+ # Ingest looker user resource mapping workunits.
1733
1679
  logger.info("Ingesting looker user resource mapping workunits")
1734
- self.reporter.report_stage_start("user_resource_extraction")
1735
- yield from auto_workunit(
1736
- self.user_registry.to_platform_resource(
1737
- self.source_config.platform_instance
1680
+ with self.reporter.report_stage("user_resource_extraction"):
1681
+ yield from auto_workunit(
1682
+ self.user_registry.to_platform_resource(
1683
+ self.source_config.platform_instance
1684
+ )
1738
1685
  )
1739
- )
1740
1686
 
1741
1687
  def get_report(self) -> SourceReport:
1742
1688
  return self.reporter