acryl-datahub 1.2.0.2rc2__py3-none-any.whl → 1.2.0.3rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (45) hide show
  1. {acryl_datahub-1.2.0.2rc2.dist-info → acryl_datahub-1.2.0.3rc1.dist-info}/METADATA +2620 -2618
  2. {acryl_datahub-1.2.0.2rc2.dist-info → acryl_datahub-1.2.0.3rc1.dist-info}/RECORD +45 -37
  3. datahub/_version.py +1 -1
  4. datahub/api/entities/dataset/dataset.py +13 -1
  5. datahub/ingestion/autogenerated/capability_summary.json +97 -6
  6. datahub/ingestion/source/aws/glue.py +8 -0
  7. datahub/ingestion/source/cassandra/cassandra.py +5 -7
  8. datahub/ingestion/source/common/subtypes.py +2 -0
  9. datahub/ingestion/source/datahub/datahub_source.py +3 -0
  10. datahub/ingestion/source/delta_lake/source.py +1 -0
  11. datahub/ingestion/source/grafana/entity_mcp_builder.py +272 -0
  12. datahub/ingestion/source/grafana/field_utils.py +307 -0
  13. datahub/ingestion/source/grafana/grafana_api.py +142 -0
  14. datahub/ingestion/source/grafana/grafana_config.py +104 -0
  15. datahub/ingestion/source/grafana/grafana_source.py +522 -84
  16. datahub/ingestion/source/grafana/lineage.py +202 -0
  17. datahub/ingestion/source/grafana/models.py +120 -0
  18. datahub/ingestion/source/grafana/report.py +91 -0
  19. datahub/ingestion/source/grafana/types.py +16 -0
  20. datahub/ingestion/source/hex/hex.py +8 -0
  21. datahub/ingestion/source/looker/looker_source.py +9 -0
  22. datahub/ingestion/source/looker/lookml_source.py +8 -0
  23. datahub/ingestion/source/mongodb.py +11 -1
  24. datahub/ingestion/source/redshift/redshift.py +8 -1
  25. datahub/ingestion/source/s3/source.py +9 -1
  26. datahub/ingestion/source/sql/athena.py +8 -2
  27. datahub/ingestion/source/sql/clickhouse.py +9 -0
  28. datahub/ingestion/source/sql/vertica.py +3 -0
  29. datahub/ingestion/source/sql_queries.py +88 -46
  30. datahub/ingestion/source/unity/proxy.py +112 -22
  31. datahub/ingestion/source/unity/source.py +7 -10
  32. datahub/metadata/_internal_schema_classes.py +18 -3
  33. datahub/metadata/schema.avsc +19 -1
  34. datahub/metadata/schemas/DataHubPageModuleProperties.avsc +10 -1
  35. datahub/metadata/schemas/DataJobInputOutput.avsc +8 -0
  36. datahub/metadata/schemas/MetadataChangeEvent.avsc +9 -0
  37. datahub/metadata/schemas/UpstreamLineage.avsc +9 -0
  38. datahub/sdk/dataset.py +44 -0
  39. datahub/sdk/search_filters.py +34 -14
  40. datahub/sql_parsing/sql_parsing_aggregator.py +5 -0
  41. datahub/telemetry/telemetry.py +4 -1
  42. {acryl_datahub-1.2.0.2rc2.dist-info → acryl_datahub-1.2.0.3rc1.dist-info}/WHEEL +0 -0
  43. {acryl_datahub-1.2.0.2rc2.dist-info → acryl_datahub-1.2.0.3rc1.dist-info}/entry_points.txt +0 -0
  44. {acryl_datahub-1.2.0.2rc2.dist-info → acryl_datahub-1.2.0.3rc1.dist-info}/licenses/LICENSE +0 -0
  45. {acryl_datahub-1.2.0.2rc2.dist-info → acryl_datahub-1.2.0.3rc1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,142 @@
1
+ """API client for Grafana metadata extraction"""
2
+
3
+ import logging
4
+ from typing import Dict, List, Optional, Union
5
+
6
+ import requests
7
+ import urllib3.exceptions
8
+ from pydantic import SecretStr
9
+
10
+ from datahub.ingestion.source.grafana.models import Dashboard, Folder
11
+ from datahub.ingestion.source.grafana.report import GrafanaSourceReport
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ class GrafanaAPIClient:
17
+ """Client for making requests to Grafana API"""
18
+
19
+ def __init__(
20
+ self,
21
+ base_url: str,
22
+ token: SecretStr,
23
+ verify_ssl: bool,
24
+ page_size: int,
25
+ report: GrafanaSourceReport,
26
+ ) -> None:
27
+ self.base_url = base_url
28
+ self.verify_ssl = verify_ssl
29
+ self.page_size = page_size
30
+ self.report = report
31
+ self.session = self._create_session(token)
32
+
33
+ def _create_session(self, token: SecretStr) -> requests.Session:
34
+ session = requests.Session()
35
+ session.headers.update(
36
+ {
37
+ "Authorization": f"Bearer {token.get_secret_value()}",
38
+ "Accept": "application/json",
39
+ "Content-Type": "application/json",
40
+ }
41
+ )
42
+ session.verify = self.verify_ssl
43
+
44
+ # If SSL verification is disabled, suppress the warnings
45
+ if not self.verify_ssl:
46
+ urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
47
+ self.report.warning(
48
+ title="SSL Configuration Warning",
49
+ message="SSL Verification is recommended.",
50
+ )
51
+
52
+ return session
53
+
54
+ def get_folders(self) -> List[Folder]:
55
+ """Fetch all folders from Grafana with pagination."""
56
+ folders: List[Folder] = []
57
+ page = 1
58
+ per_page = self.page_size
59
+
60
+ while True:
61
+ try:
62
+ response = self.session.get(
63
+ f"{self.base_url}/api/folders",
64
+ params={"page": page, "limit": per_page},
65
+ )
66
+ response.raise_for_status()
67
+
68
+ batch = response.json()
69
+ if not batch:
70
+ break
71
+
72
+ folders.extend(Folder.parse_obj(folder) for folder in batch)
73
+ page += 1
74
+ except requests.exceptions.RequestException as e:
75
+ self.report.report_failure(
76
+ title="Folder Fetch Error",
77
+ message="Failed to fetch folders on page",
78
+ context=str(page),
79
+ exc=e,
80
+ )
81
+ self.report.report_permission_warning() # Likely a permission issue
82
+ break
83
+
84
+ return folders
85
+
86
+ def get_dashboard(self, uid: str) -> Optional[Dashboard]:
87
+ """Fetch a specific dashboard by UID"""
88
+ try:
89
+ response = self.session.get(f"{self.base_url}/api/dashboards/uid/{uid}")
90
+ response.raise_for_status()
91
+ return Dashboard.parse_obj(response.json())
92
+ except requests.exceptions.RequestException as e:
93
+ self.report.warning(
94
+ title="Dashboard Fetch Error",
95
+ message="Failed to fetch dashboard",
96
+ context=uid,
97
+ exc=e,
98
+ )
99
+ if e.response and e.response.status_code in (401, 403):
100
+ self.report.report_permission_warning()
101
+ return None
102
+
103
+ def get_dashboards(self) -> List[Dashboard]:
104
+ """Fetch all dashboards from search endpoint with pagination."""
105
+ dashboards: List[Dashboard] = []
106
+ page = 1
107
+ per_page = self.page_size
108
+
109
+ while True:
110
+ try:
111
+ params: Dict[str, Union[str, int]] = {
112
+ "type": "dash-db",
113
+ "page": page,
114
+ "limit": per_page,
115
+ }
116
+ response = self.session.get(
117
+ f"{self.base_url}/api/search",
118
+ params=params,
119
+ )
120
+ response.raise_for_status()
121
+
122
+ batch = response.json()
123
+ if not batch:
124
+ break
125
+
126
+ for result in batch:
127
+ dashboard = self.get_dashboard(result["uid"])
128
+ if dashboard:
129
+ dashboards.append(dashboard)
130
+ page += 1
131
+ except requests.exceptions.RequestException as e:
132
+ self.report.report_failure(
133
+ title="Dashboard Search Error",
134
+ message="Failed to fetch dashboards on page",
135
+ context=str(page),
136
+ exc=e,
137
+ )
138
+ if e.response and e.response.status_code in (401, 403):
139
+ self.report.report_permission_warning()
140
+ break
141
+
142
+ return dashboards
@@ -0,0 +1,104 @@
1
+ from typing import Dict, Optional
2
+
3
+ from pydantic import Field, SecretStr, validator
4
+
5
+ from datahub.configuration.common import AllowDenyPattern
6
+ from datahub.configuration.source_common import (
7
+ DatasetLineageProviderConfigBase,
8
+ EnvConfigMixin,
9
+ PlatformInstanceConfigMixin,
10
+ )
11
+ from datahub.ingestion.source.state.stateful_ingestion_base import (
12
+ StatefulIngestionConfigBase,
13
+ )
14
+ from datahub.utilities import config_clean
15
+
16
+
17
+ class PlatformConnectionConfig(
18
+ EnvConfigMixin,
19
+ PlatformInstanceConfigMixin,
20
+ ):
21
+ """Platform connection configuration for mapping Grafana datasources to their actual platforms."""
22
+
23
+ platform: str = Field(
24
+ description="The platform name (e.g., 'postgres', 'mysql', 'snowflake')"
25
+ )
26
+ database: Optional[str] = Field(default=None, description="Default database name")
27
+ database_schema: Optional[str] = Field(
28
+ default=None, description="Default schema name"
29
+ )
30
+
31
+
32
+ class GrafanaSourceConfig(
33
+ DatasetLineageProviderConfigBase,
34
+ StatefulIngestionConfigBase,
35
+ PlatformInstanceConfigMixin,
36
+ EnvConfigMixin,
37
+ ):
38
+ """Configuration for Grafana source"""
39
+
40
+ platform: str = Field(default="grafana", hidden_from_docs=True)
41
+ url: str = Field(
42
+ description="Grafana URL in the format http://your-grafana-instance with no trailing slash"
43
+ )
44
+ service_account_token: SecretStr = Field(
45
+ description="Service account token for Grafana"
46
+ )
47
+ verify_ssl: bool = Field(
48
+ default=True,
49
+ description="Whether to verify SSL certificates when connecting to Grafana",
50
+ )
51
+
52
+ # API pagination configuration
53
+ page_size: int = Field(
54
+ default=100,
55
+ description="Number of items to fetch per API call when paginating through folders and dashboards",
56
+ )
57
+
58
+ # Extraction mode configuration
59
+ basic_mode: bool = Field(
60
+ default=False,
61
+ description="Enable basic extraction mode for users with limited permissions. "
62
+ "In basic mode, only dashboard metadata is extracted without detailed panel information, "
63
+ "lineage, or folder hierarchy. This requires only basic dashboard read permissions.",
64
+ )
65
+
66
+ # Content filtering
67
+ dashboard_pattern: AllowDenyPattern = Field(
68
+ default=AllowDenyPattern.allow_all(),
69
+ description="Regex pattern to filter dashboards for ingestion",
70
+ )
71
+ folder_pattern: AllowDenyPattern = Field(
72
+ default=AllowDenyPattern.allow_all(),
73
+ description="Regex pattern to filter folders for ingestion",
74
+ )
75
+
76
+ # Feature toggles
77
+ ingest_tags: bool = Field(
78
+ default=True, description="Whether to ingest dashboard and chart tags"
79
+ )
80
+ ingest_owners: bool = Field(
81
+ default=True, description="Whether to ingest dashboard ownership information"
82
+ )
83
+
84
+ include_lineage: bool = Field(
85
+ default=True,
86
+ description="Whether to extract lineage between charts and data sources. "
87
+ "When enabled, the source will parse SQL queries and datasource configurations "
88
+ "to build lineage relationships.",
89
+ )
90
+ include_column_lineage: bool = Field(
91
+ default=True,
92
+ description="Whether to extract column-level lineage from SQL queries. "
93
+ "Only applicable when include_lineage is enabled.",
94
+ )
95
+
96
+ # Platform connection mappings
97
+ connection_to_platform_map: Dict[str, PlatformConnectionConfig] = Field(
98
+ default_factory=dict,
99
+ description="Map of Grafana datasource types/UIDs to platform connection configs for lineage extraction",
100
+ )
101
+
102
+ @validator("url", allow_reuse=True)
103
+ def remove_trailing_slash(cls, v):
104
+ return config_clean.remove_trailing_slashes(v)