acryl-datahub 1.2.0.2rc2__py3-none-any.whl → 1.2.0.3rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (45) hide show
  1. {acryl_datahub-1.2.0.2rc2.dist-info → acryl_datahub-1.2.0.3rc1.dist-info}/METADATA +2620 -2618
  2. {acryl_datahub-1.2.0.2rc2.dist-info → acryl_datahub-1.2.0.3rc1.dist-info}/RECORD +45 -37
  3. datahub/_version.py +1 -1
  4. datahub/api/entities/dataset/dataset.py +13 -1
  5. datahub/ingestion/autogenerated/capability_summary.json +97 -6
  6. datahub/ingestion/source/aws/glue.py +8 -0
  7. datahub/ingestion/source/cassandra/cassandra.py +5 -7
  8. datahub/ingestion/source/common/subtypes.py +2 -0
  9. datahub/ingestion/source/datahub/datahub_source.py +3 -0
  10. datahub/ingestion/source/delta_lake/source.py +1 -0
  11. datahub/ingestion/source/grafana/entity_mcp_builder.py +272 -0
  12. datahub/ingestion/source/grafana/field_utils.py +307 -0
  13. datahub/ingestion/source/grafana/grafana_api.py +142 -0
  14. datahub/ingestion/source/grafana/grafana_config.py +104 -0
  15. datahub/ingestion/source/grafana/grafana_source.py +522 -84
  16. datahub/ingestion/source/grafana/lineage.py +202 -0
  17. datahub/ingestion/source/grafana/models.py +120 -0
  18. datahub/ingestion/source/grafana/report.py +91 -0
  19. datahub/ingestion/source/grafana/types.py +16 -0
  20. datahub/ingestion/source/hex/hex.py +8 -0
  21. datahub/ingestion/source/looker/looker_source.py +9 -0
  22. datahub/ingestion/source/looker/lookml_source.py +8 -0
  23. datahub/ingestion/source/mongodb.py +11 -1
  24. datahub/ingestion/source/redshift/redshift.py +8 -1
  25. datahub/ingestion/source/s3/source.py +9 -1
  26. datahub/ingestion/source/sql/athena.py +8 -2
  27. datahub/ingestion/source/sql/clickhouse.py +9 -0
  28. datahub/ingestion/source/sql/vertica.py +3 -0
  29. datahub/ingestion/source/sql_queries.py +88 -46
  30. datahub/ingestion/source/unity/proxy.py +112 -22
  31. datahub/ingestion/source/unity/source.py +7 -10
  32. datahub/metadata/_internal_schema_classes.py +18 -3
  33. datahub/metadata/schema.avsc +19 -1
  34. datahub/metadata/schemas/DataHubPageModuleProperties.avsc +10 -1
  35. datahub/metadata/schemas/DataJobInputOutput.avsc +8 -0
  36. datahub/metadata/schemas/MetadataChangeEvent.avsc +9 -0
  37. datahub/metadata/schemas/UpstreamLineage.avsc +9 -0
  38. datahub/sdk/dataset.py +44 -0
  39. datahub/sdk/search_filters.py +34 -14
  40. datahub/sql_parsing/sql_parsing_aggregator.py +5 -0
  41. datahub/telemetry/telemetry.py +4 -1
  42. {acryl_datahub-1.2.0.2rc2.dist-info → acryl_datahub-1.2.0.3rc1.dist-info}/WHEEL +0 -0
  43. {acryl_datahub-1.2.0.2rc2.dist-info → acryl_datahub-1.2.0.3rc1.dist-info}/entry_points.txt +0 -0
  44. {acryl_datahub-1.2.0.2rc2.dist-info → acryl_datahub-1.2.0.3rc1.dist-info}/licenses/LICENSE +0 -0
  45. {acryl_datahub-1.2.0.2rc2.dist-info → acryl_datahub-1.2.0.3rc1.dist-info}/top_level.txt +0 -0
@@ -1,131 +1,569 @@
1
+ import logging
1
2
  from typing import Iterable, List, Optional
2
3
 
3
4
  import requests
4
- from pydantic import Field, SecretStr
5
5
 
6
- import datahub.emitter.mce_builder as builder
7
- from datahub.configuration.source_common import PlatformInstanceConfigMixin
6
+ from datahub.emitter.mce_builder import (
7
+ make_chart_urn,
8
+ make_container_urn,
9
+ make_dashboard_urn,
10
+ make_data_platform_urn,
11
+ make_dataplatform_instance_urn,
12
+ make_dataset_urn_with_platform_instance,
13
+ make_schema_field_urn,
14
+ make_tag_urn,
15
+ )
8
16
  from datahub.emitter.mcp import MetadataChangeProposalWrapper
17
+ from datahub.emitter.mcp_builder import add_dataset_to_container, gen_containers
9
18
  from datahub.ingestion.api.common import PipelineContext
10
19
  from datahub.ingestion.api.decorators import (
20
+ SourceCapability,
11
21
  SupportStatus,
22
+ capability,
12
23
  config_class,
13
24
  platform_name,
14
25
  support_status,
15
26
  )
16
27
  from datahub.ingestion.api.source import MetadataWorkUnitProcessor
17
- from datahub.ingestion.api.source_helpers import auto_workunit
18
28
  from datahub.ingestion.api.workunit import MetadataWorkUnit
29
+ from datahub.ingestion.source.common.subtypes import BIContainerSubTypes
30
+ from datahub.ingestion.source.grafana.entity_mcp_builder import (
31
+ build_chart_mcps,
32
+ build_dashboard_mcps,
33
+ )
34
+ from datahub.ingestion.source.grafana.field_utils import extract_fields_from_panel
35
+ from datahub.ingestion.source.grafana.grafana_api import GrafanaAPIClient
36
+ from datahub.ingestion.source.grafana.grafana_config import (
37
+ GrafanaSourceConfig,
38
+ )
39
+ from datahub.ingestion.source.grafana.lineage import LineageExtractor
40
+ from datahub.ingestion.source.grafana.models import (
41
+ Dashboard,
42
+ DashboardContainerKey,
43
+ Folder,
44
+ FolderKey,
45
+ Panel,
46
+ )
47
+ from datahub.ingestion.source.grafana.report import (
48
+ GrafanaSourceReport,
49
+ )
19
50
  from datahub.ingestion.source.state.stale_entity_removal_handler import (
20
51
  StaleEntityRemovalHandler,
21
- StaleEntityRemovalSourceReport,
22
- StatefulIngestionConfigBase,
23
52
  )
24
53
  from datahub.ingestion.source.state.stateful_ingestion_base import (
25
- StatefulIngestionReport,
26
54
  StatefulIngestionSourceBase,
27
55
  )
56
+ from datahub.ingestion.source_report.ingestion_stage import (
57
+ LINEAGE_EXTRACTION,
58
+ )
28
59
  from datahub.metadata.com.linkedin.pegasus2avro.common import ChangeAuditStamps
29
- from datahub.metadata.schema_classes import DashboardInfoClass, StatusClass
30
-
31
-
32
- class GrafanaSourceConfig(StatefulIngestionConfigBase, PlatformInstanceConfigMixin):
33
- url: str = Field(
34
- default="",
35
- description="Grafana URL in the format http://your-grafana-instance with no trailing slash",
36
- )
37
- service_account_token: SecretStr = Field(
38
- description="Service account token for Grafana"
39
- )
60
+ from datahub.metadata.schema_classes import (
61
+ DashboardInfoClass,
62
+ DataPlatformInstanceClass,
63
+ DatasetPropertiesClass,
64
+ DatasetSnapshotClass,
65
+ GlobalTagsClass,
66
+ InputFieldClass,
67
+ InputFieldsClass,
68
+ MetadataChangeEventClass,
69
+ OtherSchemaClass,
70
+ SchemaFieldClass,
71
+ SchemaMetadataClass,
72
+ StatusClass,
73
+ TagAssociationClass,
74
+ )
40
75
 
76
+ # Grafana-specific ingestion stages
77
+ GRAFANA_BASIC_EXTRACTION = "Grafana Basic Dashboard Extraction"
78
+ GRAFANA_FOLDER_EXTRACTION = "Grafana Folder Extraction"
79
+ GRAFANA_DASHBOARD_EXTRACTION = "Grafana Dashboard Extraction"
80
+ GRAFANA_PANEL_EXTRACTION = "Grafana Panel Extraction"
41
81
 
42
- class GrafanaReport(StaleEntityRemovalSourceReport):
43
- pass
82
+ logger = logging.getLogger(__name__)
44
83
 
45
84
 
46
85
  @platform_name("Grafana")
47
86
  @config_class(GrafanaSourceConfig)
48
- @support_status(SupportStatus.TESTING)
87
+ @support_status(SupportStatus.CERTIFIED)
88
+ @capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default")
89
+ @capability(SourceCapability.DELETION_DETECTION, "Enabled by default")
90
+ @capability(SourceCapability.LINEAGE_COARSE, "Enabled by default")
91
+ @capability(SourceCapability.LINEAGE_FINE, "Enabled by default")
92
+ @capability(SourceCapability.OWNERSHIP, "Enabled by default")
93
+ @capability(SourceCapability.TAGS, "Enabled by default")
49
94
  class GrafanaSource(StatefulIngestionSourceBase):
50
95
  """
51
- This is an experimental source for Grafana.
52
- Currently only ingests dashboards (no charts)
96
+ This plugin extracts metadata from Grafana and ingests it into DataHub. It connects to Grafana's API
97
+ to extract metadata about dashboards, charts, and data sources. The following types of metadata are extracted:
98
+
99
+ - Container Entities:
100
+ - Folders: Top-level organizational units in Grafana
101
+ - Dashboards: Collections of panels and charts
102
+ - The full container hierarchy is preserved (Folders -> Dashboards -> Charts/Datasets)
103
+
104
+ - Charts and Visualizations:
105
+ - All panel types (graphs, tables, stat panels, etc.)
106
+ - Chart configuration and properties
107
+ - Links to the original Grafana UI
108
+ - Custom properties including panel types and data source information
109
+ - Input fields and schema information when available
110
+
111
+ - Data Sources and Datasets:
112
+ - Physical datasets representing Grafana's data sources
113
+ - Dataset schema information extracted from queries and panel configurations
114
+ - Support for various data source types (SQL, Prometheus, etc.)
115
+ - Custom properties including data source type and configuration
116
+
117
+ - Lineage Information:
118
+ - Dataset-level lineage showing relationships between:
119
+ - Source data systems and Grafana datasets
120
+ - Grafana datasets and charts
121
+ - Column-level lineage for SQL-based data sources
122
+ - Support for external source systems through configurable platform mappings
123
+
124
+ - Tags and Ownership:
125
+ - Dashboard and chart tags
126
+ - Ownership information derived from:
127
+ - Dashboard creators
128
+ - Technical owners based on dashboard UIDs
129
+ - Custom ownership assignments
130
+
131
+ The source supports the following capabilities:
132
+ - Platform instance support for multi-Grafana deployments
133
+ - Stateful ingestion with support for soft-deletes
134
+ - Fine-grained lineage at both dataset and column levels
135
+ - Automated tag extraction
136
+ - Support for both HTTP and HTTPS connections with optional SSL verification
53
137
  """
54
138
 
139
+ config: GrafanaSourceConfig
140
+ report: GrafanaSourceReport
141
+
55
142
  def __init__(self, config: GrafanaSourceConfig, ctx: PipelineContext):
56
143
  super().__init__(config, ctx)
57
- self.source_config = config
58
- self.report = GrafanaReport()
59
- self.platform = "grafana"
144
+ self.config = config
145
+ self.ctx = ctx
146
+ self.platform = config.platform
147
+ self.platform_instance = self.config.platform_instance
148
+ self.env = self.config.env
149
+ self.report = GrafanaSourceReport()
150
+
151
+ self.api_client = GrafanaAPIClient(
152
+ base_url=self.config.url,
153
+ token=self.config.service_account_token,
154
+ verify_ssl=self.config.verify_ssl,
155
+ page_size=self.config.page_size,
156
+ report=self.report,
157
+ )
158
+
159
+ # Initialize lineage extractor with graph
160
+ self.lineage_extractor = None
161
+ if self.config.include_lineage:
162
+ self.lineage_extractor = LineageExtractor(
163
+ platform=self.config.platform,
164
+ platform_instance=self.config.platform_instance,
165
+ env=self.config.env,
166
+ connection_to_platform_map=self.config.connection_to_platform_map,
167
+ graph=self.ctx.graph,
168
+ report=self.report,
169
+ include_column_lineage=self.config.include_column_lineage,
170
+ )
60
171
 
61
172
  @classmethod
62
- def create(cls, config_dict, ctx):
173
+ def create(cls, config_dict: dict, ctx: PipelineContext) -> "GrafanaSource":
63
174
  config = GrafanaSourceConfig.parse_obj(config_dict)
64
175
  return cls(config, ctx)
65
176
 
66
177
  def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
67
- return [
68
- *super().get_workunit_processors(),
178
+ processors = super().get_workunit_processors()
179
+ processors.append(
69
180
  StaleEntityRemovalHandler.create(
70
- self, self.source_config, self.ctx
71
- ).workunit_processor,
72
- ]
73
-
74
- def get_report(self) -> StatefulIngestionReport:
75
- return self.report
181
+ self, self.config, self.ctx
182
+ ).workunit_processor
183
+ )
184
+ return processors
76
185
 
77
186
  def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
78
- headers = {
79
- "Authorization": f"Bearer {self.source_config.service_account_token.get_secret_value()}",
80
- "Content-Type": "application/json",
81
- }
82
- try:
83
- response = requests.get(
84
- f"{self.source_config.url}/api/search", headers=headers
85
- )
86
- response.raise_for_status()
87
- except requests.exceptions.RequestException as e:
88
- self.report.report_failure(f"Failed to fetch dashboards: {str(e)}")
187
+ """Main extraction logic"""
188
+
189
+ # Check if we should use basic mode
190
+ if self.config.basic_mode:
191
+ logger.info("Running in basic mode - extracting dashboard metadata only")
192
+ yield from self._get_workunits_basic_mode()
89
193
  return
90
- res_json = response.json()
91
- for item in res_json:
92
- uid = item["uid"]
93
- title = item["title"]
94
- url_path = item["url"]
95
- full_url = f"{self.source_config.url}{url_path}"
96
- dashboard_urn = builder.make_dashboard_urn(
97
- platform=self.platform,
98
- name=uid,
99
- platform_instance=self.source_config.platform_instance,
100
- )
101
194
 
102
- yield from auto_workunit(
103
- MetadataChangeProposalWrapper.construct_many(
195
+ # Enhanced mode - extract full hierarchy and details
196
+ yield from self._get_workunits_enhanced_mode()
197
+
198
+ def _get_workunits_basic_mode(self) -> Iterable[MetadataWorkUnit]:
199
+ """Basic extraction mode - only dashboard metadata (backwards compatible)"""
200
+ with self.report.new_stage(GRAFANA_BASIC_EXTRACTION):
201
+ headers = {
202
+ "Authorization": f"Bearer {self.config.service_account_token.get_secret_value()}",
203
+ "Content-Type": "application/json",
204
+ }
205
+
206
+ try:
207
+ response = requests.get(
208
+ f"{self.config.url}/api/search",
209
+ headers=headers,
210
+ verify=self.config.verify_ssl,
211
+ )
212
+ response.raise_for_status()
213
+ except requests.exceptions.RequestException as e:
214
+ self.report.report_failure(
215
+ title="Dashboard Search Error",
216
+ message="Failed to fetch dashboards in basic mode",
217
+ context=str(e),
218
+ exc=e,
219
+ )
220
+ return
221
+
222
+ dashboards = response.json()
223
+
224
+ for item in dashboards:
225
+ if not self.config.dashboard_pattern.allowed(item.get("title", "")):
226
+ continue
227
+
228
+ uid = item["uid"]
229
+ title = item["title"]
230
+ url_path = item["url"]
231
+ full_url = f"{self.config.url}{url_path}"
232
+
233
+ dashboard_urn = make_dashboard_urn(
234
+ platform=self.platform,
235
+ name=uid,
236
+ platform_instance=self.platform_instance,
237
+ )
238
+
239
+ # Create basic dashboard info
240
+ dashboard_info = DashboardInfoClass(
241
+ description="",
242
+ title=title,
243
+ charts=[],
244
+ lastModified=ChangeAuditStamps(),
245
+ externalUrl=full_url,
246
+ customProperties={
247
+ key: str(value)
248
+ for key, value in {
249
+ "displayName": title,
250
+ "id": item["id"],
251
+ "uid": uid,
252
+ "title": title,
253
+ "uri": item["uri"],
254
+ "type": item["type"],
255
+ "folderId": item.get("folderId"),
256
+ "folderUid": item.get("folderUid"),
257
+ "folderTitle": item.get("folderTitle"),
258
+ }.items()
259
+ if value is not None
260
+ },
261
+ )
262
+
263
+ # Yield dashboard workunit
264
+ yield MetadataChangeProposalWrapper(
104
265
  entityUrn=dashboard_urn,
105
- aspects=[
106
- DashboardInfoClass(
107
- description="",
108
- title=title,
109
- charts=[],
110
- lastModified=ChangeAuditStamps(),
111
- externalUrl=full_url,
112
- customProperties={
113
- key: str(value)
114
- for key, value in {
115
- "displayName": title,
116
- "id": item["id"],
117
- "uid": uid,
118
- "title": title,
119
- "uri": item["uri"],
120
- "type": item["type"],
121
- "folderId": item.get("folderId"),
122
- "folderUid": item.get("folderUid"),
123
- "folderTitle": item.get("folderTitle"),
124
- }.items()
125
- if value is not None
126
- },
127
- ),
128
- StatusClass(removed=False),
129
- ],
266
+ aspect=dashboard_info,
267
+ ).as_workunit()
268
+
269
+ yield MetadataChangeProposalWrapper(
270
+ entityUrn=dashboard_urn,
271
+ aspect=StatusClass(removed=False),
272
+ ).as_workunit()
273
+
274
+ self.report.report_dashboard_scanned()
275
+
276
+ def _get_workunits_enhanced_mode(self) -> Iterable[MetadataWorkUnit]:
277
+ """Enhanced extraction mode - full hierarchy, panels, and lineage"""
278
+ # Process folders first
279
+ with self.report.new_stage(GRAFANA_FOLDER_EXTRACTION):
280
+ for folder in self.api_client.get_folders():
281
+ if self.config.folder_pattern.allowed(folder.title):
282
+ self.report.report_folder_scanned()
283
+ yield from self._process_folder(folder)
284
+
285
+ # Process dashboards
286
+ with self.report.new_stage(GRAFANA_DASHBOARD_EXTRACTION):
287
+ for dashboard in self.api_client.get_dashboards():
288
+ if self.config.dashboard_pattern.allowed(dashboard.title):
289
+ self.report.report_dashboard_scanned()
290
+ yield from self._process_dashboard(dashboard)
291
+
292
+ def _process_folder(self, folder: Folder) -> Iterable[MetadataWorkUnit]:
293
+ """Process Grafana folder metadata"""
294
+ folder_key = FolderKey(
295
+ platform=self.config.platform,
296
+ instance=self.config.platform_instance,
297
+ folder_id=folder.id,
298
+ )
299
+
300
+ yield from gen_containers(
301
+ container_key=folder_key,
302
+ name=folder.title,
303
+ sub_types=[BIContainerSubTypes.LOOKER_FOLDER],
304
+ description=folder.description,
305
+ )
306
+
307
+ def _process_dashboard(self, dashboard: Dashboard) -> Iterable[MetadataWorkUnit]:
308
+ """Process dashboard and its panels"""
309
+ chart_urns = []
310
+
311
+ # First create the dashboard container
312
+ dashboard_container_key = DashboardContainerKey(
313
+ platform=self.config.platform,
314
+ instance=self.config.platform_instance,
315
+ dashboard_id=dashboard.uid,
316
+ folder_id=dashboard.folder_id,
317
+ )
318
+
319
+ # Generate dashboard container first
320
+ yield from gen_containers(
321
+ container_key=dashboard_container_key,
322
+ name=dashboard.title,
323
+ sub_types=[BIContainerSubTypes.GRAFANA_DASHBOARD],
324
+ description=dashboard.description,
325
+ )
326
+
327
+ # If dashboard is in a folder, add it to folder container
328
+ if dashboard.folder_id:
329
+ folder_key = FolderKey(
330
+ platform=self.config.platform,
331
+ instance=self.config.platform_instance,
332
+ folder_id=dashboard.folder_id,
333
+ )
334
+
335
+ yield from add_dataset_to_container(
336
+ container_key=folder_key,
337
+ dataset_urn=make_container_urn(dashboard_container_key),
338
+ )
339
+
340
+ # Process all panels first
341
+ with self.report.new_stage(GRAFANA_PANEL_EXTRACTION):
342
+ for panel in dashboard.panels:
343
+ self.report.report_chart_scanned()
344
+
345
+ # First emit the dataset for each panel's datasource
346
+ yield from self._process_panel_dataset(
347
+ panel, dashboard.uid, self.config.ingest_tags
348
+ )
349
+
350
+ # Create chart MCE
351
+ dataset_urn, chart_urn, chart_mcps = build_chart_mcps(
352
+ panel=panel,
353
+ dashboard=dashboard,
354
+ platform=self.config.platform,
355
+ platform_instance=self.config.platform_instance,
356
+ env=self.config.env,
357
+ base_url=self.config.url,
358
+ ingest_tags=self.config.ingest_tags,
130
359
  )
360
+ chart_urns.append(chart_urn)
361
+
362
+ for mcp in chart_mcps:
363
+ yield mcp.as_workunit()
364
+
365
+ # Add chart to dashboard container
366
+ chart_urn = make_chart_urn(
367
+ self.platform,
368
+ f"{dashboard.uid}.{panel.id}",
369
+ self.platform_instance,
370
+ )
371
+ if dataset_urn:
372
+ input_fields = extract_fields_from_panel(
373
+ panel,
374
+ self.config.connection_to_platform_map,
375
+ self.ctx.graph,
376
+ self.report,
377
+ )
378
+ if input_fields:
379
+ yield from self._add_input_fields_to_chart(
380
+ chart_urn=chart_urn,
381
+ dataset_urn=dataset_urn,
382
+ input_fields=input_fields,
383
+ )
384
+
385
+ yield from add_dataset_to_container(
386
+ container_key=dashboard_container_key,
387
+ dataset_urn=chart_urn,
388
+ )
389
+
390
+ # Process lineage extraction
391
+ if self.config.include_lineage and self.lineage_extractor:
392
+ with self.report.new_stage(LINEAGE_EXTRACTION):
393
+ for panel in dashboard.panels:
394
+ # Process lineage
395
+ try:
396
+ lineage = self.lineage_extractor.extract_panel_lineage(panel)
397
+ if lineage:
398
+ yield lineage.as_workunit()
399
+ self.report.report_lineage_extracted()
400
+ else:
401
+ self.report.report_no_lineage()
402
+ except Exception as e:
403
+ logger.warning(
404
+ f"Failed to extract lineage for panel {panel.id}: {e}"
405
+ )
406
+ self.report.report_lineage_extraction_failure()
407
+
408
+ # Create dashboard MCPs
409
+ dashboard_urn, dashboard_mcps = build_dashboard_mcps(
410
+ dashboard=dashboard,
411
+ platform=self.config.platform,
412
+ platform_instance=self.config.platform_instance,
413
+ chart_urns=chart_urns,
414
+ base_url=self.config.url,
415
+ ingest_owners=self.config.ingest_owners,
416
+ ingest_tags=self.config.ingest_tags,
417
+ )
418
+
419
+ # Add each dashboard MCP as a work unit
420
+ for mcp in dashboard_mcps:
421
+ yield mcp.as_workunit()
422
+
423
+ # Add dashboard entity to its container
424
+ yield from add_dataset_to_container(
425
+ container_key=dashboard_container_key,
426
+ dataset_urn=dashboard_urn,
427
+ )
428
+
429
+ def _add_dashboard_to_folder(
430
+ self, dashboard: Dashboard
431
+ ) -> Iterable[MetadataWorkUnit]:
432
+ """Add dashboard to folder container"""
433
+ folder_key = FolderKey(
434
+ platform=self.config.platform,
435
+ instance=self.config.platform_instance,
436
+ folder_id=str(dashboard.folder_id),
437
+ )
438
+
439
+ dashboard_key = DashboardContainerKey(
440
+ platform=self.config.platform,
441
+ instance=self.config.platform_instance,
442
+ dashboard_id=dashboard.uid,
443
+ folder_id=dashboard.folder_id,
444
+ )
445
+
446
+ yield from add_dataset_to_container(
447
+ container_key=folder_key,
448
+ dataset_urn=dashboard_key.as_urn(),
449
+ )
450
+
451
+ def _add_input_fields_to_chart(
452
+ self, chart_urn: str, dataset_urn: str, input_fields: List[SchemaFieldClass]
453
+ ) -> Iterable[MetadataWorkUnit]:
454
+ """Add input fields aspect to chart"""
455
+ if not input_fields:
456
+ return
457
+
458
+ yield MetadataChangeProposalWrapper(
459
+ entityUrn=chart_urn,
460
+ aspect=InputFieldsClass(
461
+ fields=[
462
+ InputFieldClass(
463
+ schemaField=field,
464
+ schemaFieldUrn=make_schema_field_urn(
465
+ dataset_urn, field.fieldPath
466
+ ),
467
+ )
468
+ for field in input_fields
469
+ ]
470
+ ),
471
+ ).as_workunit()
472
+
473
+ def _process_panel_dataset(
474
+ self, panel: Panel, dashboard_uid: str, ingest_tags: bool
475
+ ) -> Iterable[MetadataWorkUnit]:
476
+ """Process dataset metadata for a panel"""
477
+ if not panel.datasource_ref:
478
+ self.report.report_datasource_warning()
479
+ return
480
+
481
+ ds_type = panel.datasource_ref.type or "unknown"
482
+ ds_uid = panel.datasource_ref.uid or "unknown"
483
+
484
+ # Track datasource warnings for unknown types
485
+ if ds_type == "unknown" or ds_uid == "unknown":
486
+ self.report.report_datasource_warning()
487
+
488
+ # Build dataset name
489
+ dataset_name = f"{ds_type}.{ds_uid}.{panel.id}"
490
+
491
+ # Create dataset URN
492
+ dataset_urn = make_dataset_urn_with_platform_instance(
493
+ platform=self.platform,
494
+ name=dataset_name,
495
+ platform_instance=self.platform_instance,
496
+ env=self.env,
497
+ )
498
+
499
+ # Create dataset snapshot
500
+ dataset_snapshot = DatasetSnapshotClass(
501
+ urn=dataset_urn,
502
+ aspects=[
503
+ DataPlatformInstanceClass(
504
+ platform=make_data_platform_urn(self.platform),
505
+ instance=make_dataplatform_instance_urn(
506
+ platform=self.platform,
507
+ instance=self.platform_instance,
508
+ )
509
+ if self.platform_instance
510
+ else None,
511
+ ),
512
+ DatasetPropertiesClass(
513
+ name=f"{ds_uid} ({panel.title or panel.id})",
514
+ description="",
515
+ customProperties={
516
+ "type": ds_type,
517
+ "uid": ds_uid,
518
+ "full_path": dataset_name,
519
+ },
520
+ ),
521
+ StatusClass(removed=False),
522
+ ],
523
+ )
524
+
525
+ # Add schema metadata if available
526
+ schema_fields = extract_fields_from_panel(
527
+ panel, self.config.connection_to_platform_map, self.ctx.graph, self.report
528
+ )
529
+ if schema_fields:
530
+ schema_metadata = SchemaMetadataClass(
531
+ schemaName=f"{ds_type}.{ds_uid}.{panel.id}",
532
+ platform=make_data_platform_urn(self.platform),
533
+ version=0,
534
+ fields=schema_fields,
535
+ hash="",
536
+ platformSchema=OtherSchemaClass(rawSchema=""),
131
537
  )
538
+ dataset_snapshot.aspects.append(schema_metadata)
539
+
540
+ if dashboard_uid and self.config.ingest_tags:
541
+ dashboard = self.api_client.get_dashboard(dashboard_uid)
542
+ if dashboard and dashboard.tags:
543
+ tags = []
544
+ for tag in dashboard.tags:
545
+ tags.append(TagAssociationClass(tag=make_tag_urn(tag)))
546
+
547
+ if tags:
548
+ dataset_snapshot.aspects.append(GlobalTagsClass(tags=tags))
549
+
550
+ self.report.report_dataset_scanned()
551
+ yield MetadataWorkUnit(
552
+ id=f"grafana-dataset-{ds_uid}-{panel.id}",
553
+ mce=MetadataChangeEventClass(proposedSnapshot=dataset_snapshot),
554
+ )
555
+
556
+ # Add dataset to dashboard container
557
+ if dashboard_uid:
558
+ dashboard_key = DashboardContainerKey(
559
+ platform=self.platform,
560
+ instance=self.platform_instance,
561
+ dashboard_id=dashboard_uid,
562
+ )
563
+ yield from add_dataset_to_container(
564
+ container_key=dashboard_key,
565
+ dataset_urn=dataset_urn,
566
+ )
567
+
568
+ def get_report(self) -> GrafanaSourceReport:
569
+ return self.report