acryl-datahub 1.2.0.2rc2__py3-none-any.whl → 1.2.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.2.0.2rc2.dist-info → acryl_datahub-1.2.0.3.dist-info}/METADATA +2511 -2509
- {acryl_datahub-1.2.0.2rc2.dist-info → acryl_datahub-1.2.0.3.dist-info}/RECORD +51 -43
- datahub/_version.py +1 -1
- datahub/api/entities/dataset/dataset.py +13 -1
- datahub/emitter/rest_emitter.py +18 -5
- datahub/ingestion/autogenerated/capability_summary.json +97 -6
- datahub/ingestion/graph/client.py +19 -3
- datahub/ingestion/sink/datahub_rest.py +2 -0
- datahub/ingestion/source/aws/glue.py +8 -0
- datahub/ingestion/source/cassandra/cassandra.py +5 -7
- datahub/ingestion/source/common/subtypes.py +2 -0
- datahub/ingestion/source/datahub/datahub_source.py +3 -0
- datahub/ingestion/source/dbt/dbt_common.py +10 -0
- datahub/ingestion/source/delta_lake/source.py +1 -0
- datahub/ingestion/source/grafana/entity_mcp_builder.py +272 -0
- datahub/ingestion/source/grafana/field_utils.py +307 -0
- datahub/ingestion/source/grafana/grafana_api.py +142 -0
- datahub/ingestion/source/grafana/grafana_config.py +104 -0
- datahub/ingestion/source/grafana/grafana_source.py +522 -84
- datahub/ingestion/source/grafana/lineage.py +202 -0
- datahub/ingestion/source/grafana/models.py +120 -0
- datahub/ingestion/source/grafana/report.py +91 -0
- datahub/ingestion/source/grafana/types.py +16 -0
- datahub/ingestion/source/hex/hex.py +8 -0
- datahub/ingestion/source/hex/query_fetcher.py +1 -1
- datahub/ingestion/source/looker/looker_source.py +9 -0
- datahub/ingestion/source/looker/lookml_source.py +8 -0
- datahub/ingestion/source/mongodb.py +11 -1
- datahub/ingestion/source/redshift/redshift.py +8 -1
- datahub/ingestion/source/s3/source.py +9 -1
- datahub/ingestion/source/sql/athena.py +8 -2
- datahub/ingestion/source/sql/athena_properties_extractor.py +2 -2
- datahub/ingestion/source/sql/clickhouse.py +9 -0
- datahub/ingestion/source/sql/vertica.py +3 -0
- datahub/ingestion/source/sql_queries.py +88 -46
- datahub/ingestion/source/unity/proxy.py +112 -22
- datahub/ingestion/source/unity/source.py +7 -10
- datahub/metadata/_internal_schema_classes.py +18 -3
- datahub/metadata/schema.avsc +19 -1
- datahub/metadata/schemas/DataHubPageModuleProperties.avsc +10 -1
- datahub/metadata/schemas/DataJobInputOutput.avsc +8 -0
- datahub/metadata/schemas/MetadataChangeEvent.avsc +9 -0
- datahub/metadata/schemas/UpstreamLineage.avsc +9 -0
- datahub/sdk/dataset.py +44 -0
- datahub/sdk/search_filters.py +34 -14
- datahub/sql_parsing/sql_parsing_aggregator.py +5 -0
- datahub/telemetry/telemetry.py +4 -1
- {acryl_datahub-1.2.0.2rc2.dist-info → acryl_datahub-1.2.0.3.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.2.0.2rc2.dist-info → acryl_datahub-1.2.0.3.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-1.2.0.2rc2.dist-info → acryl_datahub-1.2.0.3.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.2.0.2rc2.dist-info → acryl_datahub-1.2.0.3.dist-info}/top_level.txt +0 -0
|
@@ -1,131 +1,569 @@
|
|
|
1
|
+
import logging
|
|
1
2
|
from typing import Iterable, List, Optional
|
|
2
3
|
|
|
3
4
|
import requests
|
|
4
|
-
from pydantic import Field, SecretStr
|
|
5
5
|
|
|
6
|
-
|
|
7
|
-
|
|
6
|
+
from datahub.emitter.mce_builder import (
|
|
7
|
+
make_chart_urn,
|
|
8
|
+
make_container_urn,
|
|
9
|
+
make_dashboard_urn,
|
|
10
|
+
make_data_platform_urn,
|
|
11
|
+
make_dataplatform_instance_urn,
|
|
12
|
+
make_dataset_urn_with_platform_instance,
|
|
13
|
+
make_schema_field_urn,
|
|
14
|
+
make_tag_urn,
|
|
15
|
+
)
|
|
8
16
|
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
17
|
+
from datahub.emitter.mcp_builder import add_dataset_to_container, gen_containers
|
|
9
18
|
from datahub.ingestion.api.common import PipelineContext
|
|
10
19
|
from datahub.ingestion.api.decorators import (
|
|
20
|
+
SourceCapability,
|
|
11
21
|
SupportStatus,
|
|
22
|
+
capability,
|
|
12
23
|
config_class,
|
|
13
24
|
platform_name,
|
|
14
25
|
support_status,
|
|
15
26
|
)
|
|
16
27
|
from datahub.ingestion.api.source import MetadataWorkUnitProcessor
|
|
17
|
-
from datahub.ingestion.api.source_helpers import auto_workunit
|
|
18
28
|
from datahub.ingestion.api.workunit import MetadataWorkUnit
|
|
29
|
+
from datahub.ingestion.source.common.subtypes import BIContainerSubTypes
|
|
30
|
+
from datahub.ingestion.source.grafana.entity_mcp_builder import (
|
|
31
|
+
build_chart_mcps,
|
|
32
|
+
build_dashboard_mcps,
|
|
33
|
+
)
|
|
34
|
+
from datahub.ingestion.source.grafana.field_utils import extract_fields_from_panel
|
|
35
|
+
from datahub.ingestion.source.grafana.grafana_api import GrafanaAPIClient
|
|
36
|
+
from datahub.ingestion.source.grafana.grafana_config import (
|
|
37
|
+
GrafanaSourceConfig,
|
|
38
|
+
)
|
|
39
|
+
from datahub.ingestion.source.grafana.lineage import LineageExtractor
|
|
40
|
+
from datahub.ingestion.source.grafana.models import (
|
|
41
|
+
Dashboard,
|
|
42
|
+
DashboardContainerKey,
|
|
43
|
+
Folder,
|
|
44
|
+
FolderKey,
|
|
45
|
+
Panel,
|
|
46
|
+
)
|
|
47
|
+
from datahub.ingestion.source.grafana.report import (
|
|
48
|
+
GrafanaSourceReport,
|
|
49
|
+
)
|
|
19
50
|
from datahub.ingestion.source.state.stale_entity_removal_handler import (
|
|
20
51
|
StaleEntityRemovalHandler,
|
|
21
|
-
StaleEntityRemovalSourceReport,
|
|
22
|
-
StatefulIngestionConfigBase,
|
|
23
52
|
)
|
|
24
53
|
from datahub.ingestion.source.state.stateful_ingestion_base import (
|
|
25
|
-
StatefulIngestionReport,
|
|
26
54
|
StatefulIngestionSourceBase,
|
|
27
55
|
)
|
|
56
|
+
from datahub.ingestion.source_report.ingestion_stage import (
|
|
57
|
+
LINEAGE_EXTRACTION,
|
|
58
|
+
)
|
|
28
59
|
from datahub.metadata.com.linkedin.pegasus2avro.common import ChangeAuditStamps
|
|
29
|
-
from datahub.metadata.schema_classes import
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
60
|
+
from datahub.metadata.schema_classes import (
|
|
61
|
+
DashboardInfoClass,
|
|
62
|
+
DataPlatformInstanceClass,
|
|
63
|
+
DatasetPropertiesClass,
|
|
64
|
+
DatasetSnapshotClass,
|
|
65
|
+
GlobalTagsClass,
|
|
66
|
+
InputFieldClass,
|
|
67
|
+
InputFieldsClass,
|
|
68
|
+
MetadataChangeEventClass,
|
|
69
|
+
OtherSchemaClass,
|
|
70
|
+
SchemaFieldClass,
|
|
71
|
+
SchemaMetadataClass,
|
|
72
|
+
StatusClass,
|
|
73
|
+
TagAssociationClass,
|
|
74
|
+
)
|
|
40
75
|
|
|
76
|
+
# Grafana-specific ingestion stages
|
|
77
|
+
GRAFANA_BASIC_EXTRACTION = "Grafana Basic Dashboard Extraction"
|
|
78
|
+
GRAFANA_FOLDER_EXTRACTION = "Grafana Folder Extraction"
|
|
79
|
+
GRAFANA_DASHBOARD_EXTRACTION = "Grafana Dashboard Extraction"
|
|
80
|
+
GRAFANA_PANEL_EXTRACTION = "Grafana Panel Extraction"
|
|
41
81
|
|
|
42
|
-
|
|
43
|
-
pass
|
|
82
|
+
logger = logging.getLogger(__name__)
|
|
44
83
|
|
|
45
84
|
|
|
46
85
|
@platform_name("Grafana")
|
|
47
86
|
@config_class(GrafanaSourceConfig)
|
|
48
|
-
@support_status(SupportStatus.
|
|
87
|
+
@support_status(SupportStatus.CERTIFIED)
|
|
88
|
+
@capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default")
|
|
89
|
+
@capability(SourceCapability.DELETION_DETECTION, "Enabled by default")
|
|
90
|
+
@capability(SourceCapability.LINEAGE_COARSE, "Enabled by default")
|
|
91
|
+
@capability(SourceCapability.LINEAGE_FINE, "Enabled by default")
|
|
92
|
+
@capability(SourceCapability.OWNERSHIP, "Enabled by default")
|
|
93
|
+
@capability(SourceCapability.TAGS, "Enabled by default")
|
|
49
94
|
class GrafanaSource(StatefulIngestionSourceBase):
|
|
50
95
|
"""
|
|
51
|
-
This
|
|
52
|
-
|
|
96
|
+
This plugin extracts metadata from Grafana and ingests it into DataHub. It connects to Grafana's API
|
|
97
|
+
to extract metadata about dashboards, charts, and data sources. The following types of metadata are extracted:
|
|
98
|
+
|
|
99
|
+
- Container Entities:
|
|
100
|
+
- Folders: Top-level organizational units in Grafana
|
|
101
|
+
- Dashboards: Collections of panels and charts
|
|
102
|
+
- The full container hierarchy is preserved (Folders -> Dashboards -> Charts/Datasets)
|
|
103
|
+
|
|
104
|
+
- Charts and Visualizations:
|
|
105
|
+
- All panel types (graphs, tables, stat panels, etc.)
|
|
106
|
+
- Chart configuration and properties
|
|
107
|
+
- Links to the original Grafana UI
|
|
108
|
+
- Custom properties including panel types and data source information
|
|
109
|
+
- Input fields and schema information when available
|
|
110
|
+
|
|
111
|
+
- Data Sources and Datasets:
|
|
112
|
+
- Physical datasets representing Grafana's data sources
|
|
113
|
+
- Dataset schema information extracted from queries and panel configurations
|
|
114
|
+
- Support for various data source types (SQL, Prometheus, etc.)
|
|
115
|
+
- Custom properties including data source type and configuration
|
|
116
|
+
|
|
117
|
+
- Lineage Information:
|
|
118
|
+
- Dataset-level lineage showing relationships between:
|
|
119
|
+
- Source data systems and Grafana datasets
|
|
120
|
+
- Grafana datasets and charts
|
|
121
|
+
- Column-level lineage for SQL-based data sources
|
|
122
|
+
- Support for external source systems through configurable platform mappings
|
|
123
|
+
|
|
124
|
+
- Tags and Ownership:
|
|
125
|
+
- Dashboard and chart tags
|
|
126
|
+
- Ownership information derived from:
|
|
127
|
+
- Dashboard creators
|
|
128
|
+
- Technical owners based on dashboard UIDs
|
|
129
|
+
- Custom ownership assignments
|
|
130
|
+
|
|
131
|
+
The source supports the following capabilities:
|
|
132
|
+
- Platform instance support for multi-Grafana deployments
|
|
133
|
+
- Stateful ingestion with support for soft-deletes
|
|
134
|
+
- Fine-grained lineage at both dataset and column levels
|
|
135
|
+
- Automated tag extraction
|
|
136
|
+
- Support for both HTTP and HTTPS connections with optional SSL verification
|
|
53
137
|
"""
|
|
54
138
|
|
|
139
|
+
config: GrafanaSourceConfig
|
|
140
|
+
report: GrafanaSourceReport
|
|
141
|
+
|
|
55
142
|
def __init__(self, config: GrafanaSourceConfig, ctx: PipelineContext):
|
|
56
143
|
super().__init__(config, ctx)
|
|
57
|
-
self.
|
|
58
|
-
self.
|
|
59
|
-
self.platform =
|
|
144
|
+
self.config = config
|
|
145
|
+
self.ctx = ctx
|
|
146
|
+
self.platform = config.platform
|
|
147
|
+
self.platform_instance = self.config.platform_instance
|
|
148
|
+
self.env = self.config.env
|
|
149
|
+
self.report = GrafanaSourceReport()
|
|
150
|
+
|
|
151
|
+
self.api_client = GrafanaAPIClient(
|
|
152
|
+
base_url=self.config.url,
|
|
153
|
+
token=self.config.service_account_token,
|
|
154
|
+
verify_ssl=self.config.verify_ssl,
|
|
155
|
+
page_size=self.config.page_size,
|
|
156
|
+
report=self.report,
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
# Initialize lineage extractor with graph
|
|
160
|
+
self.lineage_extractor = None
|
|
161
|
+
if self.config.include_lineage:
|
|
162
|
+
self.lineage_extractor = LineageExtractor(
|
|
163
|
+
platform=self.config.platform,
|
|
164
|
+
platform_instance=self.config.platform_instance,
|
|
165
|
+
env=self.config.env,
|
|
166
|
+
connection_to_platform_map=self.config.connection_to_platform_map,
|
|
167
|
+
graph=self.ctx.graph,
|
|
168
|
+
report=self.report,
|
|
169
|
+
include_column_lineage=self.config.include_column_lineage,
|
|
170
|
+
)
|
|
60
171
|
|
|
61
172
|
@classmethod
|
|
62
|
-
def create(cls, config_dict, ctx):
|
|
173
|
+
def create(cls, config_dict: dict, ctx: PipelineContext) -> "GrafanaSource":
|
|
63
174
|
config = GrafanaSourceConfig.parse_obj(config_dict)
|
|
64
175
|
return cls(config, ctx)
|
|
65
176
|
|
|
66
177
|
def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
|
|
67
|
-
|
|
68
|
-
|
|
178
|
+
processors = super().get_workunit_processors()
|
|
179
|
+
processors.append(
|
|
69
180
|
StaleEntityRemovalHandler.create(
|
|
70
|
-
self, self.
|
|
71
|
-
).workunit_processor
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
def get_report(self) -> StatefulIngestionReport:
|
|
75
|
-
return self.report
|
|
181
|
+
self, self.config, self.ctx
|
|
182
|
+
).workunit_processor
|
|
183
|
+
)
|
|
184
|
+
return processors
|
|
76
185
|
|
|
77
186
|
def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
f"{self.source_config.url}/api/search", headers=headers
|
|
85
|
-
)
|
|
86
|
-
response.raise_for_status()
|
|
87
|
-
except requests.exceptions.RequestException as e:
|
|
88
|
-
self.report.report_failure(f"Failed to fetch dashboards: {str(e)}")
|
|
187
|
+
"""Main extraction logic"""
|
|
188
|
+
|
|
189
|
+
# Check if we should use basic mode
|
|
190
|
+
if self.config.basic_mode:
|
|
191
|
+
logger.info("Running in basic mode - extracting dashboard metadata only")
|
|
192
|
+
yield from self._get_workunits_basic_mode()
|
|
89
193
|
return
|
|
90
|
-
res_json = response.json()
|
|
91
|
-
for item in res_json:
|
|
92
|
-
uid = item["uid"]
|
|
93
|
-
title = item["title"]
|
|
94
|
-
url_path = item["url"]
|
|
95
|
-
full_url = f"{self.source_config.url}{url_path}"
|
|
96
|
-
dashboard_urn = builder.make_dashboard_urn(
|
|
97
|
-
platform=self.platform,
|
|
98
|
-
name=uid,
|
|
99
|
-
platform_instance=self.source_config.platform_instance,
|
|
100
|
-
)
|
|
101
194
|
|
|
102
|
-
|
|
103
|
-
|
|
195
|
+
# Enhanced mode - extract full hierarchy and details
|
|
196
|
+
yield from self._get_workunits_enhanced_mode()
|
|
197
|
+
|
|
198
|
+
def _get_workunits_basic_mode(self) -> Iterable[MetadataWorkUnit]:
|
|
199
|
+
"""Basic extraction mode - only dashboard metadata (backwards compatible)"""
|
|
200
|
+
with self.report.new_stage(GRAFANA_BASIC_EXTRACTION):
|
|
201
|
+
headers = {
|
|
202
|
+
"Authorization": f"Bearer {self.config.service_account_token.get_secret_value()}",
|
|
203
|
+
"Content-Type": "application/json",
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
try:
|
|
207
|
+
response = requests.get(
|
|
208
|
+
f"{self.config.url}/api/search",
|
|
209
|
+
headers=headers,
|
|
210
|
+
verify=self.config.verify_ssl,
|
|
211
|
+
)
|
|
212
|
+
response.raise_for_status()
|
|
213
|
+
except requests.exceptions.RequestException as e:
|
|
214
|
+
self.report.report_failure(
|
|
215
|
+
title="Dashboard Search Error",
|
|
216
|
+
message="Failed to fetch dashboards in basic mode",
|
|
217
|
+
context=str(e),
|
|
218
|
+
exc=e,
|
|
219
|
+
)
|
|
220
|
+
return
|
|
221
|
+
|
|
222
|
+
dashboards = response.json()
|
|
223
|
+
|
|
224
|
+
for item in dashboards:
|
|
225
|
+
if not self.config.dashboard_pattern.allowed(item.get("title", "")):
|
|
226
|
+
continue
|
|
227
|
+
|
|
228
|
+
uid = item["uid"]
|
|
229
|
+
title = item["title"]
|
|
230
|
+
url_path = item["url"]
|
|
231
|
+
full_url = f"{self.config.url}{url_path}"
|
|
232
|
+
|
|
233
|
+
dashboard_urn = make_dashboard_urn(
|
|
234
|
+
platform=self.platform,
|
|
235
|
+
name=uid,
|
|
236
|
+
platform_instance=self.platform_instance,
|
|
237
|
+
)
|
|
238
|
+
|
|
239
|
+
# Create basic dashboard info
|
|
240
|
+
dashboard_info = DashboardInfoClass(
|
|
241
|
+
description="",
|
|
242
|
+
title=title,
|
|
243
|
+
charts=[],
|
|
244
|
+
lastModified=ChangeAuditStamps(),
|
|
245
|
+
externalUrl=full_url,
|
|
246
|
+
customProperties={
|
|
247
|
+
key: str(value)
|
|
248
|
+
for key, value in {
|
|
249
|
+
"displayName": title,
|
|
250
|
+
"id": item["id"],
|
|
251
|
+
"uid": uid,
|
|
252
|
+
"title": title,
|
|
253
|
+
"uri": item["uri"],
|
|
254
|
+
"type": item["type"],
|
|
255
|
+
"folderId": item.get("folderId"),
|
|
256
|
+
"folderUid": item.get("folderUid"),
|
|
257
|
+
"folderTitle": item.get("folderTitle"),
|
|
258
|
+
}.items()
|
|
259
|
+
if value is not None
|
|
260
|
+
},
|
|
261
|
+
)
|
|
262
|
+
|
|
263
|
+
# Yield dashboard workunit
|
|
264
|
+
yield MetadataChangeProposalWrapper(
|
|
104
265
|
entityUrn=dashboard_urn,
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
266
|
+
aspect=dashboard_info,
|
|
267
|
+
).as_workunit()
|
|
268
|
+
|
|
269
|
+
yield MetadataChangeProposalWrapper(
|
|
270
|
+
entityUrn=dashboard_urn,
|
|
271
|
+
aspect=StatusClass(removed=False),
|
|
272
|
+
).as_workunit()
|
|
273
|
+
|
|
274
|
+
self.report.report_dashboard_scanned()
|
|
275
|
+
|
|
276
|
+
def _get_workunits_enhanced_mode(self) -> Iterable[MetadataWorkUnit]:
|
|
277
|
+
"""Enhanced extraction mode - full hierarchy, panels, and lineage"""
|
|
278
|
+
# Process folders first
|
|
279
|
+
with self.report.new_stage(GRAFANA_FOLDER_EXTRACTION):
|
|
280
|
+
for folder in self.api_client.get_folders():
|
|
281
|
+
if self.config.folder_pattern.allowed(folder.title):
|
|
282
|
+
self.report.report_folder_scanned()
|
|
283
|
+
yield from self._process_folder(folder)
|
|
284
|
+
|
|
285
|
+
# Process dashboards
|
|
286
|
+
with self.report.new_stage(GRAFANA_DASHBOARD_EXTRACTION):
|
|
287
|
+
for dashboard in self.api_client.get_dashboards():
|
|
288
|
+
if self.config.dashboard_pattern.allowed(dashboard.title):
|
|
289
|
+
self.report.report_dashboard_scanned()
|
|
290
|
+
yield from self._process_dashboard(dashboard)
|
|
291
|
+
|
|
292
|
+
def _process_folder(self, folder: Folder) -> Iterable[MetadataWorkUnit]:
|
|
293
|
+
"""Process Grafana folder metadata"""
|
|
294
|
+
folder_key = FolderKey(
|
|
295
|
+
platform=self.config.platform,
|
|
296
|
+
instance=self.config.platform_instance,
|
|
297
|
+
folder_id=folder.id,
|
|
298
|
+
)
|
|
299
|
+
|
|
300
|
+
yield from gen_containers(
|
|
301
|
+
container_key=folder_key,
|
|
302
|
+
name=folder.title,
|
|
303
|
+
sub_types=[BIContainerSubTypes.LOOKER_FOLDER],
|
|
304
|
+
description=folder.description,
|
|
305
|
+
)
|
|
306
|
+
|
|
307
|
+
def _process_dashboard(self, dashboard: Dashboard) -> Iterable[MetadataWorkUnit]:
|
|
308
|
+
"""Process dashboard and its panels"""
|
|
309
|
+
chart_urns = []
|
|
310
|
+
|
|
311
|
+
# First create the dashboard container
|
|
312
|
+
dashboard_container_key = DashboardContainerKey(
|
|
313
|
+
platform=self.config.platform,
|
|
314
|
+
instance=self.config.platform_instance,
|
|
315
|
+
dashboard_id=dashboard.uid,
|
|
316
|
+
folder_id=dashboard.folder_id,
|
|
317
|
+
)
|
|
318
|
+
|
|
319
|
+
# Generate dashboard container first
|
|
320
|
+
yield from gen_containers(
|
|
321
|
+
container_key=dashboard_container_key,
|
|
322
|
+
name=dashboard.title,
|
|
323
|
+
sub_types=[BIContainerSubTypes.GRAFANA_DASHBOARD],
|
|
324
|
+
description=dashboard.description,
|
|
325
|
+
)
|
|
326
|
+
|
|
327
|
+
# If dashboard is in a folder, add it to folder container
|
|
328
|
+
if dashboard.folder_id:
|
|
329
|
+
folder_key = FolderKey(
|
|
330
|
+
platform=self.config.platform,
|
|
331
|
+
instance=self.config.platform_instance,
|
|
332
|
+
folder_id=dashboard.folder_id,
|
|
333
|
+
)
|
|
334
|
+
|
|
335
|
+
yield from add_dataset_to_container(
|
|
336
|
+
container_key=folder_key,
|
|
337
|
+
dataset_urn=make_container_urn(dashboard_container_key),
|
|
338
|
+
)
|
|
339
|
+
|
|
340
|
+
# Process all panels first
|
|
341
|
+
with self.report.new_stage(GRAFANA_PANEL_EXTRACTION):
|
|
342
|
+
for panel in dashboard.panels:
|
|
343
|
+
self.report.report_chart_scanned()
|
|
344
|
+
|
|
345
|
+
# First emit the dataset for each panel's datasource
|
|
346
|
+
yield from self._process_panel_dataset(
|
|
347
|
+
panel, dashboard.uid, self.config.ingest_tags
|
|
348
|
+
)
|
|
349
|
+
|
|
350
|
+
# Create chart MCE
|
|
351
|
+
dataset_urn, chart_urn, chart_mcps = build_chart_mcps(
|
|
352
|
+
panel=panel,
|
|
353
|
+
dashboard=dashboard,
|
|
354
|
+
platform=self.config.platform,
|
|
355
|
+
platform_instance=self.config.platform_instance,
|
|
356
|
+
env=self.config.env,
|
|
357
|
+
base_url=self.config.url,
|
|
358
|
+
ingest_tags=self.config.ingest_tags,
|
|
130
359
|
)
|
|
360
|
+
chart_urns.append(chart_urn)
|
|
361
|
+
|
|
362
|
+
for mcp in chart_mcps:
|
|
363
|
+
yield mcp.as_workunit()
|
|
364
|
+
|
|
365
|
+
# Add chart to dashboard container
|
|
366
|
+
chart_urn = make_chart_urn(
|
|
367
|
+
self.platform,
|
|
368
|
+
f"{dashboard.uid}.{panel.id}",
|
|
369
|
+
self.platform_instance,
|
|
370
|
+
)
|
|
371
|
+
if dataset_urn:
|
|
372
|
+
input_fields = extract_fields_from_panel(
|
|
373
|
+
panel,
|
|
374
|
+
self.config.connection_to_platform_map,
|
|
375
|
+
self.ctx.graph,
|
|
376
|
+
self.report,
|
|
377
|
+
)
|
|
378
|
+
if input_fields:
|
|
379
|
+
yield from self._add_input_fields_to_chart(
|
|
380
|
+
chart_urn=chart_urn,
|
|
381
|
+
dataset_urn=dataset_urn,
|
|
382
|
+
input_fields=input_fields,
|
|
383
|
+
)
|
|
384
|
+
|
|
385
|
+
yield from add_dataset_to_container(
|
|
386
|
+
container_key=dashboard_container_key,
|
|
387
|
+
dataset_urn=chart_urn,
|
|
388
|
+
)
|
|
389
|
+
|
|
390
|
+
# Process lineage extraction
|
|
391
|
+
if self.config.include_lineage and self.lineage_extractor:
|
|
392
|
+
with self.report.new_stage(LINEAGE_EXTRACTION):
|
|
393
|
+
for panel in dashboard.panels:
|
|
394
|
+
# Process lineage
|
|
395
|
+
try:
|
|
396
|
+
lineage = self.lineage_extractor.extract_panel_lineage(panel)
|
|
397
|
+
if lineage:
|
|
398
|
+
yield lineage.as_workunit()
|
|
399
|
+
self.report.report_lineage_extracted()
|
|
400
|
+
else:
|
|
401
|
+
self.report.report_no_lineage()
|
|
402
|
+
except Exception as e:
|
|
403
|
+
logger.warning(
|
|
404
|
+
f"Failed to extract lineage for panel {panel.id}: {e}"
|
|
405
|
+
)
|
|
406
|
+
self.report.report_lineage_extraction_failure()
|
|
407
|
+
|
|
408
|
+
# Create dashboard MCPs
|
|
409
|
+
dashboard_urn, dashboard_mcps = build_dashboard_mcps(
|
|
410
|
+
dashboard=dashboard,
|
|
411
|
+
platform=self.config.platform,
|
|
412
|
+
platform_instance=self.config.platform_instance,
|
|
413
|
+
chart_urns=chart_urns,
|
|
414
|
+
base_url=self.config.url,
|
|
415
|
+
ingest_owners=self.config.ingest_owners,
|
|
416
|
+
ingest_tags=self.config.ingest_tags,
|
|
417
|
+
)
|
|
418
|
+
|
|
419
|
+
# Add each dashboard MCP as a work unit
|
|
420
|
+
for mcp in dashboard_mcps:
|
|
421
|
+
yield mcp.as_workunit()
|
|
422
|
+
|
|
423
|
+
# Add dashboard entity to its container
|
|
424
|
+
yield from add_dataset_to_container(
|
|
425
|
+
container_key=dashboard_container_key,
|
|
426
|
+
dataset_urn=dashboard_urn,
|
|
427
|
+
)
|
|
428
|
+
|
|
429
|
+
def _add_dashboard_to_folder(
|
|
430
|
+
self, dashboard: Dashboard
|
|
431
|
+
) -> Iterable[MetadataWorkUnit]:
|
|
432
|
+
"""Add dashboard to folder container"""
|
|
433
|
+
folder_key = FolderKey(
|
|
434
|
+
platform=self.config.platform,
|
|
435
|
+
instance=self.config.platform_instance,
|
|
436
|
+
folder_id=str(dashboard.folder_id),
|
|
437
|
+
)
|
|
438
|
+
|
|
439
|
+
dashboard_key = DashboardContainerKey(
|
|
440
|
+
platform=self.config.platform,
|
|
441
|
+
instance=self.config.platform_instance,
|
|
442
|
+
dashboard_id=dashboard.uid,
|
|
443
|
+
folder_id=dashboard.folder_id,
|
|
444
|
+
)
|
|
445
|
+
|
|
446
|
+
yield from add_dataset_to_container(
|
|
447
|
+
container_key=folder_key,
|
|
448
|
+
dataset_urn=dashboard_key.as_urn(),
|
|
449
|
+
)
|
|
450
|
+
|
|
451
|
+
def _add_input_fields_to_chart(
|
|
452
|
+
self, chart_urn: str, dataset_urn: str, input_fields: List[SchemaFieldClass]
|
|
453
|
+
) -> Iterable[MetadataWorkUnit]:
|
|
454
|
+
"""Add input fields aspect to chart"""
|
|
455
|
+
if not input_fields:
|
|
456
|
+
return
|
|
457
|
+
|
|
458
|
+
yield MetadataChangeProposalWrapper(
|
|
459
|
+
entityUrn=chart_urn,
|
|
460
|
+
aspect=InputFieldsClass(
|
|
461
|
+
fields=[
|
|
462
|
+
InputFieldClass(
|
|
463
|
+
schemaField=field,
|
|
464
|
+
schemaFieldUrn=make_schema_field_urn(
|
|
465
|
+
dataset_urn, field.fieldPath
|
|
466
|
+
),
|
|
467
|
+
)
|
|
468
|
+
for field in input_fields
|
|
469
|
+
]
|
|
470
|
+
),
|
|
471
|
+
).as_workunit()
|
|
472
|
+
|
|
473
|
+
def _process_panel_dataset(
|
|
474
|
+
self, panel: Panel, dashboard_uid: str, ingest_tags: bool
|
|
475
|
+
) -> Iterable[MetadataWorkUnit]:
|
|
476
|
+
"""Process dataset metadata for a panel"""
|
|
477
|
+
if not panel.datasource_ref:
|
|
478
|
+
self.report.report_datasource_warning()
|
|
479
|
+
return
|
|
480
|
+
|
|
481
|
+
ds_type = panel.datasource_ref.type or "unknown"
|
|
482
|
+
ds_uid = panel.datasource_ref.uid or "unknown"
|
|
483
|
+
|
|
484
|
+
# Track datasource warnings for unknown types
|
|
485
|
+
if ds_type == "unknown" or ds_uid == "unknown":
|
|
486
|
+
self.report.report_datasource_warning()
|
|
487
|
+
|
|
488
|
+
# Build dataset name
|
|
489
|
+
dataset_name = f"{ds_type}.{ds_uid}.{panel.id}"
|
|
490
|
+
|
|
491
|
+
# Create dataset URN
|
|
492
|
+
dataset_urn = make_dataset_urn_with_platform_instance(
|
|
493
|
+
platform=self.platform,
|
|
494
|
+
name=dataset_name,
|
|
495
|
+
platform_instance=self.platform_instance,
|
|
496
|
+
env=self.env,
|
|
497
|
+
)
|
|
498
|
+
|
|
499
|
+
# Create dataset snapshot
|
|
500
|
+
dataset_snapshot = DatasetSnapshotClass(
|
|
501
|
+
urn=dataset_urn,
|
|
502
|
+
aspects=[
|
|
503
|
+
DataPlatformInstanceClass(
|
|
504
|
+
platform=make_data_platform_urn(self.platform),
|
|
505
|
+
instance=make_dataplatform_instance_urn(
|
|
506
|
+
platform=self.platform,
|
|
507
|
+
instance=self.platform_instance,
|
|
508
|
+
)
|
|
509
|
+
if self.platform_instance
|
|
510
|
+
else None,
|
|
511
|
+
),
|
|
512
|
+
DatasetPropertiesClass(
|
|
513
|
+
name=f"{ds_uid} ({panel.title or panel.id})",
|
|
514
|
+
description="",
|
|
515
|
+
customProperties={
|
|
516
|
+
"type": ds_type,
|
|
517
|
+
"uid": ds_uid,
|
|
518
|
+
"full_path": dataset_name,
|
|
519
|
+
},
|
|
520
|
+
),
|
|
521
|
+
StatusClass(removed=False),
|
|
522
|
+
],
|
|
523
|
+
)
|
|
524
|
+
|
|
525
|
+
# Add schema metadata if available
|
|
526
|
+
schema_fields = extract_fields_from_panel(
|
|
527
|
+
panel, self.config.connection_to_platform_map, self.ctx.graph, self.report
|
|
528
|
+
)
|
|
529
|
+
if schema_fields:
|
|
530
|
+
schema_metadata = SchemaMetadataClass(
|
|
531
|
+
schemaName=f"{ds_type}.{ds_uid}.{panel.id}",
|
|
532
|
+
platform=make_data_platform_urn(self.platform),
|
|
533
|
+
version=0,
|
|
534
|
+
fields=schema_fields,
|
|
535
|
+
hash="",
|
|
536
|
+
platformSchema=OtherSchemaClass(rawSchema=""),
|
|
131
537
|
)
|
|
538
|
+
dataset_snapshot.aspects.append(schema_metadata)
|
|
539
|
+
|
|
540
|
+
if dashboard_uid and self.config.ingest_tags:
|
|
541
|
+
dashboard = self.api_client.get_dashboard(dashboard_uid)
|
|
542
|
+
if dashboard and dashboard.tags:
|
|
543
|
+
tags = []
|
|
544
|
+
for tag in dashboard.tags:
|
|
545
|
+
tags.append(TagAssociationClass(tag=make_tag_urn(tag)))
|
|
546
|
+
|
|
547
|
+
if tags:
|
|
548
|
+
dataset_snapshot.aspects.append(GlobalTagsClass(tags=tags))
|
|
549
|
+
|
|
550
|
+
self.report.report_dataset_scanned()
|
|
551
|
+
yield MetadataWorkUnit(
|
|
552
|
+
id=f"grafana-dataset-{ds_uid}-{panel.id}",
|
|
553
|
+
mce=MetadataChangeEventClass(proposedSnapshot=dataset_snapshot),
|
|
554
|
+
)
|
|
555
|
+
|
|
556
|
+
# Add dataset to dashboard container
|
|
557
|
+
if dashboard_uid:
|
|
558
|
+
dashboard_key = DashboardContainerKey(
|
|
559
|
+
platform=self.platform,
|
|
560
|
+
instance=self.platform_instance,
|
|
561
|
+
dashboard_id=dashboard_uid,
|
|
562
|
+
)
|
|
563
|
+
yield from add_dataset_to_container(
|
|
564
|
+
container_key=dashboard_key,
|
|
565
|
+
dataset_urn=dataset_urn,
|
|
566
|
+
)
|
|
567
|
+
|
|
568
|
+
def get_report(self) -> GrafanaSourceReport:
|
|
569
|
+
return self.report
|