acryl-datahub 1.3.0.1rc6__py3-none-any.whl → 1.3.0.1rc7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (34) hide show
  1. {acryl_datahub-1.3.0.1rc6.dist-info → acryl_datahub-1.3.0.1rc7.dist-info}/METADATA +2457 -2458
  2. {acryl_datahub-1.3.0.1rc6.dist-info → acryl_datahub-1.3.0.1rc7.dist-info}/RECORD +34 -32
  3. datahub/_version.py +1 -1
  4. datahub/cli/docker_check.py +1 -1
  5. datahub/emitter/mce_builder.py +6 -0
  6. datahub/ingestion/autogenerated/capability_summary.json +12 -12
  7. datahub/ingestion/source/bigquery_v2/bigquery_queries.py +2 -0
  8. datahub/ingestion/source/common/subtypes.py +2 -0
  9. datahub/ingestion/source/dremio/dremio_source.py +15 -15
  10. datahub/ingestion/source/dynamodb/dynamodb.py +1 -1
  11. datahub/ingestion/source/fivetran/config.py +33 -0
  12. datahub/ingestion/source/fivetran/fivetran.py +184 -13
  13. datahub/ingestion/source/fivetran/fivetran_log_api.py +20 -5
  14. datahub/ingestion/source/fivetran/fivetran_rest_api.py +65 -0
  15. datahub/ingestion/source/fivetran/response_models.py +97 -0
  16. datahub/ingestion/source/hex/hex.py +1 -1
  17. datahub/ingestion/source/iceberg/iceberg.py +1 -1
  18. datahub/ingestion/source/metabase.py +23 -4
  19. datahub/ingestion/source/mlflow.py +1 -1
  20. datahub/ingestion/source/s3/source.py +1 -1
  21. datahub/ingestion/source/salesforce.py +1 -1
  22. datahub/ingestion/source/slack/slack.py +1 -1
  23. datahub/ingestion/source/snowflake/snowflake_queries.py +3 -0
  24. datahub/ingestion/source/snowflake/snowflake_summary.py +1 -1
  25. datahub/ingestion/source/sql_queries.py +1 -1
  26. datahub/ingestion/source/unity/source.py +1 -1
  27. datahub/ingestion/source/vertexai/vertexai.py +1 -1
  28. datahub/metadata/schema.avsc +4 -2
  29. datahub/metadata/schemas/DataHubFileInfo.avsc +4 -2
  30. datahub/sdk/mlmodel.py +19 -0
  31. {acryl_datahub-1.3.0.1rc6.dist-info → acryl_datahub-1.3.0.1rc7.dist-info}/WHEEL +0 -0
  32. {acryl_datahub-1.3.0.1rc6.dist-info → acryl_datahub-1.3.0.1rc7.dist-info}/entry_points.txt +0 -0
  33. {acryl_datahub-1.3.0.1rc6.dist-info → acryl_datahub-1.3.0.1rc7.dist-info}/licenses/LICENSE +0 -0
  34. {acryl_datahub-1.3.0.1rc6.dist-info → acryl_datahub-1.3.0.1rc7.dist-info}/top_level.txt +0 -0
@@ -1,5 +1,6 @@
1
1
  import logging
2
2
  from typing import Dict, Iterable, List, Optional, Union
3
+ from urllib.parse import urlparse
3
4
 
4
5
  import datahub.emitter.mce_builder as builder
5
6
  from datahub.api.entities.datajob import DataJob as DataJobV1
@@ -22,6 +23,7 @@ from datahub.ingestion.api.source import (
22
23
  StructuredLogCategory,
23
24
  )
24
25
  from datahub.ingestion.api.workunit import MetadataWorkUnit
26
+ from datahub.ingestion.source.common.subtypes import DatasetSubTypes
25
27
  from datahub.ingestion.source.fivetran.config import (
26
28
  KNOWN_DATA_PLATFORM_MAPPING,
27
29
  Constant,
@@ -35,29 +37,39 @@ from datahub.ingestion.source.fivetran.fivetran_query import (
35
37
  MAX_JOBS_PER_CONNECTOR,
36
38
  MAX_TABLE_LINEAGE_PER_CONNECTOR,
37
39
  )
40
+ from datahub.ingestion.source.fivetran.fivetran_rest_api import FivetranAPIClient
41
+ from datahub.ingestion.source.fivetran.response_models import FivetranConnectionDetails
38
42
  from datahub.ingestion.source.state.stale_entity_removal_handler import (
39
43
  StaleEntityRemovalHandler,
40
44
  )
41
45
  from datahub.ingestion.source.state.stateful_ingestion_base import (
42
46
  StatefulIngestionSourceBase,
43
47
  )
48
+ from datahub.metadata.com.linkedin.pegasus2avro.common import AuditStamp
44
49
  from datahub.metadata.com.linkedin.pegasus2avro.dataset import (
45
50
  FineGrainedLineage,
46
51
  FineGrainedLineageDownstreamType,
47
52
  FineGrainedLineageUpstreamType,
53
+ UpstreamLineage,
54
+ )
55
+ from datahub.metadata.schema_classes import (
56
+ DatasetLineageTypeClass,
57
+ UpstreamClass,
48
58
  )
49
59
  from datahub.metadata.urns import CorpUserUrn, DataFlowUrn, DatasetUrn
50
60
  from datahub.sdk.dataflow import DataFlow
51
61
  from datahub.sdk.datajob import DataJob
62
+ from datahub.sdk.dataset import Dataset
52
63
  from datahub.sdk.entity import Entity
53
64
 
54
65
  # Logger instance
55
66
  logger = logging.getLogger(__name__)
67
+ CORPUSER_DATAHUB = "urn:li:corpuser:datahub"
56
68
 
57
69
 
58
70
  @platform_name("Fivetran")
59
71
  @config_class(FivetranSourceConfig)
60
- @support_status(SupportStatus.INCUBATING)
72
+ @support_status(SupportStatus.CERTIFIED)
61
73
  @capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default")
62
74
  @capability(
63
75
  SourceCapability.LINEAGE_FINE,
@@ -76,8 +88,12 @@ class FivetranSource(StatefulIngestionSourceBase):
76
88
  super().__init__(config, ctx)
77
89
  self.config = config
78
90
  self.report = FivetranSourceReport()
79
-
80
91
  self.audit_log = FivetranLogAPI(self.config.fivetran_log_config)
92
+ self.api_client: Optional[FivetranAPIClient] = None
93
+ self._connection_details_cache: Dict[str, FivetranConnectionDetails] = {}
94
+
95
+ if self.config.api_config:
96
+ self.api_client = FivetranAPIClient(self.config.api_config)
81
97
 
82
98
  def _extend_lineage(self, connector: Connector, datajob: DataJob) -> Dict[str, str]:
83
99
  input_dataset_urn_list: List[Union[str, DatasetUrn]] = []
@@ -131,17 +147,43 @@ class FivetranSource(StatefulIngestionSourceBase):
131
147
  if source_details.include_schema_in_urn
132
148
  else lineage.source_table.split(".", 1)[1]
133
149
  )
134
- input_dataset_urn = DatasetUrn.create_from_ids(
135
- platform_id=source_details.platform,
136
- table_name=(
137
- f"{source_details.database.lower()}.{source_table}"
138
- if source_details.database
139
- else source_table
140
- ),
141
- env=source_details.env,
142
- platform_instance=source_details.platform_instance,
143
- )
144
- input_dataset_urn_list.append(input_dataset_urn)
150
+ input_dataset_urn: Optional[DatasetUrn] = None
151
+ # Special Handling for Google Sheets Connectors
152
+ if connector.connector_type == Constant.GOOGLE_SHEETS_CONNECTOR_TYPE:
153
+ # Get Google Sheet dataset details from Fivetran API
154
+ # This is cached in the api_client
155
+ gsheets_conn_details: Optional[FivetranConnectionDetails] = (
156
+ self._get_connection_details_by_id(connector.connector_id)
157
+ )
158
+
159
+ if gsheets_conn_details:
160
+ input_dataset_urn = DatasetUrn.create_from_ids(
161
+ platform_id=Constant.GOOGLE_SHEETS_CONNECTOR_TYPE,
162
+ table_name=self._get_gsheet_named_range_dataset_id(
163
+ gsheets_conn_details
164
+ ),
165
+ env=source_details.env,
166
+ )
167
+ else:
168
+ self.report.warning(
169
+ title="Failed to extract lineage for Google Sheets Connector",
170
+ message="Unable to extract lineage for Google Sheets Connector, as the connector details are not available from Fivetran API.",
171
+ context=f"{connector.connector_name} (connector_id: {connector.connector_id})",
172
+ )
173
+ else:
174
+ input_dataset_urn = DatasetUrn.create_from_ids(
175
+ platform_id=source_details.platform,
176
+ table_name=(
177
+ f"{source_details.database.lower()}.{source_table}"
178
+ if source_details.database
179
+ else source_table
180
+ ),
181
+ env=source_details.env,
182
+ platform_instance=source_details.platform_instance,
183
+ )
184
+
185
+ if input_dataset_urn:
186
+ input_dataset_urn_list.append(input_dataset_urn)
145
187
 
146
188
  destination_table = (
147
189
  lineage.destination_table
@@ -262,6 +304,67 @@ class FivetranSource(StatefulIngestionSourceBase):
262
304
  clone_outlets=True,
263
305
  )
264
306
 
307
+ def _get_connection_details_by_id(
308
+ self, connection_id: str
309
+ ) -> Optional[FivetranConnectionDetails]:
310
+ if self.api_client is None:
311
+ self.report.warning(
312
+ title="Fivetran API client is not initialized",
313
+ message="Google Sheets Connector details cannot be extracted, as Fivetran API client is not initialized.",
314
+ context=f"connector_id: {connection_id}",
315
+ )
316
+ return None
317
+
318
+ if connection_id in self._connection_details_cache:
319
+ return self._connection_details_cache[connection_id]
320
+
321
+ try:
322
+ self.report.report_fivetran_rest_api_call_count()
323
+ conn_details = self.api_client.get_connection_details_by_id(connection_id)
324
+ # Update Cache
325
+ if conn_details:
326
+ self._connection_details_cache[connection_id] = conn_details
327
+
328
+ return conn_details
329
+ except Exception as e:
330
+ self.report.warning(
331
+ title="Failed to get connection details for Google Sheets Connector",
332
+ message=f"Exception occurred while getting connection details from Fivetran API. {e}",
333
+ context=f"connector_id: {connection_id}",
334
+ )
335
+ return None
336
+
337
+ def _get_gsheet_sheet_id_from_url(
338
+ self, gsheets_conn_details: FivetranConnectionDetails
339
+ ) -> str:
340
+ # Extracting the sheet_id (1A82PdLAE7NXLLb5JcLPKeIpKUMytXQba5Z-Ei-mbXLo) from the sheet_id url
341
+ # "https://docs.google.com/spreadsheets/d/1A82PdLAE7NXLLb5JcLPKeIpKUMytXQba5Z-Ei-mbXLo/edit?gid=0#gid=0",
342
+ try:
343
+ parsed = urlparse(gsheets_conn_details.config.sheet_id)
344
+ # Example: https://docs.google.com/spreadsheets/d/<spreadsheetId>/edit
345
+ parts = parsed.path.split("/")
346
+ return parts[3] if len(parts) > 2 else ""
347
+ except Exception as e:
348
+ logger.warning(
349
+ f"Failed to extract sheet_id from the sheet_id url: {gsheets_conn_details.config.sheet_id}, {e}"
350
+ )
351
+
352
+ return ""
353
+
354
+ def _get_gsheet_named_range_dataset_id(
355
+ self, gsheets_conn_details: FivetranConnectionDetails
356
+ ) -> str:
357
+ sheet_id = self._get_gsheet_sheet_id_from_url(gsheets_conn_details)
358
+ named_range_id = (
359
+ f"{sheet_id}.{gsheets_conn_details.config.named_range}"
360
+ if sheet_id
361
+ else gsheets_conn_details.config.named_range
362
+ )
363
+ logger.debug(
364
+ f"Using gsheet_named_range_dataset_id: {named_range_id} for connector: {gsheets_conn_details.id}"
365
+ )
366
+ return named_range_id
367
+
265
368
  def _get_dpi_workunits(
266
369
  self, job: Job, dpi: DataProcessInstance
267
370
  ) -> Iterable[MetadataWorkUnit]:
@@ -295,6 +398,74 @@ class FivetranSource(StatefulIngestionSourceBase):
295
398
  self, connector: Connector
296
399
  ) -> Iterable[Union[MetadataWorkUnit, Entity]]:
297
400
  self.report.report_connectors_scanned()
401
+
402
+ """
403
+ -------------------------------------------------------
404
+ Special Handling for Google Sheets Connectors
405
+ -------------------------------------------------------
406
+ Google Sheets source is not supported by Datahub yet.
407
+ As a workaround, we are emitting a dataset entity for the Google Sheet
408
+ and adding it to the lineage. This workaround needs to be removed once
409
+ Datahub supports Google Sheets source natively.
410
+ -------------------------------------------------------
411
+ """
412
+ if connector.connector_type == Constant.GOOGLE_SHEETS_CONNECTOR_TYPE:
413
+ # Get Google Sheet dataset details from Fivetran API
414
+ gsheets_conn_details: Optional[FivetranConnectionDetails] = (
415
+ self._get_connection_details_by_id(connector.connector_id)
416
+ )
417
+
418
+ if gsheets_conn_details:
419
+ gsheets_dataset = Dataset(
420
+ name=self._get_gsheet_sheet_id_from_url(gsheets_conn_details),
421
+ platform=Constant.GOOGLE_SHEETS_CONNECTOR_TYPE,
422
+ env=self.config.env,
423
+ display_name=self._get_gsheet_sheet_id_from_url(
424
+ gsheets_conn_details
425
+ ),
426
+ external_url=gsheets_conn_details.config.sheet_id,
427
+ created=gsheets_conn_details.created_at,
428
+ last_modified=gsheets_conn_details.source_sync_details.last_synced,
429
+ subtype=DatasetSubTypes.GOOGLE_SHEETS,
430
+ custom_properties={
431
+ "ingested_by": "fivetran source",
432
+ "connector_id": gsheets_conn_details.id,
433
+ },
434
+ )
435
+ gsheets_named_range_dataset = Dataset(
436
+ name=self._get_gsheet_named_range_dataset_id(gsheets_conn_details),
437
+ platform=Constant.GOOGLE_SHEETS_CONNECTOR_TYPE,
438
+ env=self.config.env,
439
+ display_name=gsheets_conn_details.config.named_range,
440
+ external_url=gsheets_conn_details.config.sheet_id,
441
+ created=gsheets_conn_details.created_at,
442
+ last_modified=gsheets_conn_details.source_sync_details.last_synced,
443
+ subtype=DatasetSubTypes.GOOGLE_SHEETS_NAMED_RANGE,
444
+ custom_properties={
445
+ "ingested_by": "fivetran source",
446
+ "connector_id": gsheets_conn_details.id,
447
+ },
448
+ upstreams=UpstreamLineage(
449
+ upstreams=[
450
+ UpstreamClass(
451
+ dataset=str(gsheets_dataset.urn),
452
+ type=DatasetLineageTypeClass.VIEW,
453
+ auditStamp=AuditStamp(
454
+ time=int(
455
+ gsheets_conn_details.created_at.timestamp()
456
+ * 1000
457
+ ),
458
+ actor=CORPUSER_DATAHUB,
459
+ ),
460
+ )
461
+ ],
462
+ fineGrainedLineages=None,
463
+ ),
464
+ )
465
+
466
+ yield gsheets_dataset
467
+ yield gsheets_named_range_dataset
468
+
298
469
  # Create dataflow entity with same name as connector name
299
470
  dataflow = self._generate_dataflow_from_connector(connector)
300
471
  yield dataflow
@@ -9,6 +9,7 @@ from sqlalchemy import create_engine
9
9
 
10
10
  from datahub.configuration.common import AllowDenyPattern, ConfigurationError
11
11
  from datahub.ingestion.source.fivetran.config import (
12
+ DISABLE_COL_LINEAGE_FOR_CONNECTOR_TYPES,
12
13
  Constant,
13
14
  FivetranLogConfig,
14
15
  FivetranSourceReport,
@@ -112,7 +113,11 @@ class FivetranLogAPI:
112
113
  """
113
114
  Returns dict of column lineage metadata with key as (<SOURCE_TABLE_ID>, <DESTINATION_TABLE_ID>)
114
115
  """
115
- all_column_lineage = defaultdict(list)
116
+ all_column_lineage: Dict[Tuple[str, str], List] = defaultdict(list)
117
+
118
+ if not connector_ids:
119
+ return dict(all_column_lineage)
120
+
116
121
  column_lineage_result = self._query(
117
122
  self.fivetran_log_query.get_column_lineage_query(
118
123
  connector_ids=connector_ids
@@ -130,7 +135,11 @@ class FivetranLogAPI:
130
135
  """
131
136
  Returns dict of table lineage metadata with key as 'CONNECTOR_ID'
132
137
  """
133
- connectors_table_lineage_metadata = defaultdict(list)
138
+ connectors_table_lineage_metadata: Dict[str, List] = defaultdict(list)
139
+
140
+ if not connector_ids:
141
+ return dict(connectors_table_lineage_metadata)
142
+
134
143
  table_lineage_result = self._query(
135
144
  self.fivetran_log_query.get_table_lineage_query(connector_ids=connector_ids)
136
145
  )
@@ -246,9 +255,15 @@ class FivetranLogAPI:
246
255
  return self._get_users().get(user_id)
247
256
 
248
257
  def _fill_connectors_lineage(self, connectors: List[Connector]) -> None:
249
- connector_ids = [connector.connector_id for connector in connectors]
250
- table_lineage_metadata = self._get_table_lineage_metadata(connector_ids)
251
- column_lineage_metadata = self._get_column_lineage_metadata(connector_ids)
258
+ # Create 2 filtered connector_ids lists - one for table lineage and one for column lineage
259
+ tll_connector_ids: List[str] = []
260
+ cll_connector_ids: List[str] = []
261
+ for connector in connectors:
262
+ tll_connector_ids.append(connector.connector_id)
263
+ if connector.connector_type not in DISABLE_COL_LINEAGE_FOR_CONNECTOR_TYPES:
264
+ cll_connector_ids.append(connector.connector_id)
265
+ table_lineage_metadata = self._get_table_lineage_metadata(tll_connector_ids)
266
+ column_lineage_metadata = self._get_column_lineage_metadata(cll_connector_ids)
252
267
  for connector in connectors:
253
268
  connector.lineage = self._extract_connector_lineage(
254
269
  table_lineage_result=table_lineage_metadata.get(connector.connector_id),
@@ -0,0 +1,65 @@
1
+ import logging
2
+
3
+ import requests
4
+ from requests.adapters import HTTPAdapter
5
+ from urllib3.util import Retry
6
+
7
+ from datahub.ingestion.source.fivetran.config import (
8
+ FivetranAPIConfig,
9
+ )
10
+ from datahub.ingestion.source.fivetran.response_models import FivetranConnectionDetails
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+ # Retry configuration constants
15
+ RETRY_MAX_TIMES = 3
16
+ RETRY_STATUS_CODES = [429, 500, 502, 503, 504]
17
+ RETRY_BACKOFF_FACTOR = 1
18
+ RETRY_ALLOWED_METHODS = ["GET"]
19
+
20
+
21
+ class FivetranAPIClient:
22
+ """Client for interacting with the Fivetran REST API."""
23
+
24
+ def __init__(self, config: FivetranAPIConfig) -> None:
25
+ self.config = config
26
+ self._session = self._create_session()
27
+
28
+ def _create_session(self) -> requests.Session:
29
+ """
30
+ Create a session with retry logic and basic authentication
31
+ """
32
+ requests_session = requests.Session()
33
+
34
+ # Configure retry strategy for transient failures
35
+ retry_strategy = Retry(
36
+ total=RETRY_MAX_TIMES,
37
+ backoff_factor=RETRY_BACKOFF_FACTOR,
38
+ status_forcelist=RETRY_STATUS_CODES,
39
+ allowed_methods=RETRY_ALLOWED_METHODS,
40
+ raise_on_status=True,
41
+ )
42
+
43
+ adapter = HTTPAdapter(max_retries=retry_strategy)
44
+ requests_session.mount("http://", adapter)
45
+ requests_session.mount("https://", adapter)
46
+
47
+ # Set up basic authentication
48
+ requests_session.auth = (self.config.api_key, self.config.api_secret)
49
+ requests_session.headers.update(
50
+ {
51
+ "Content-Type": "application/json",
52
+ "Accept": "application/json",
53
+ }
54
+ )
55
+ return requests_session
56
+
57
+ def get_connection_details_by_id(
58
+ self, connection_id: str
59
+ ) -> FivetranConnectionDetails:
60
+ """Get details for a specific connection."""
61
+ connection_details = self._session.get(
62
+ f"{self.config.base_url}/v1/connections/{connection_id}",
63
+ timeout=self.config.request_timeout_sec,
64
+ )
65
+ return FivetranConnectionDetails(**connection_details.json().get("data", {}))
@@ -0,0 +1,97 @@
1
+ import datetime
2
+ from typing import Dict, List
3
+
4
+ from pydantic import BaseModel
5
+
6
+
7
+ class FivetranConnectionWarnings(BaseModel):
8
+ code: str # Warning Code
9
+ message: str # Warning Message
10
+ details: Dict # Warning Details
11
+
12
+
13
+ class FivetranConnectionStatus(BaseModel):
14
+ setup_state: str # Setup State
15
+ schema_status: str # Schema Status
16
+ sync_state: str # Sync State
17
+ update_state: str # Update State
18
+ is_historical_sync: bool # Is Historical Sync
19
+ warnings: List[FivetranConnectionWarnings] # Warnings
20
+
21
+
22
+ class FivetranConnectionConfig(BaseModel):
23
+ # Note: Connection Config is different for different connectors
24
+ auth_type: str # Auth Type
25
+ sheet_id: str # Sheet ID - URL to the Google Sheet
26
+ named_range: str # Named Range
27
+
28
+
29
+ class FivetranConnectionSourceSyncDetails(BaseModel):
30
+ last_synced: datetime.datetime # Last Synced
31
+
32
+
33
+ class FivetranConnectionDetails(BaseModel):
34
+ """
35
+ Note: This reponse class only captures fields that are relevant to the Google Sheets Connector
36
+ """
37
+
38
+ id: str # Source ID
39
+ group_id: str # Destination ID
40
+ service: str # Connector Type
41
+ created_at: datetime.datetime
42
+ succeeded_at: datetime.datetime
43
+ paused: bool # Paused Status
44
+ sync_frequency: int # Sync Frequency (minutes)
45
+ status: FivetranConnectionStatus # Status
46
+ config: FivetranConnectionConfig # Connection Config
47
+ source_sync_details: FivetranConnectionSourceSyncDetails # Source Sync Details
48
+
49
+ """
50
+ # Sample Response for Google Sheets Connector
51
+ {
52
+ "code": "Success",
53
+ "data": {
54
+ "id": "dialectical_remindful",
55
+ "group_id": "empties_classification",
56
+ "service": "google_sheets",
57
+ "service_version": 1,
58
+ "schema": "fivetran_google_sheets.fivetran_google_sheets",
59
+ "connected_by": "sewn_restrained",
60
+ "created_at": "2025-10-06T17:53:01.554289Z",
61
+ "succeeded_at": "2025-10-06T22:55:45.275000Z",
62
+ "failed_at": null,
63
+ "paused": true,
64
+ "pause_after_trial": false,
65
+ "sync_frequency": 360,
66
+ "data_delay_threshold": 0,
67
+ "data_delay_sensitivity": "NORMAL",
68
+ "private_link_id": null,
69
+ "networking_method": "Directly",
70
+ "proxy_agent_id": null,
71
+ "schedule_type": "auto",
72
+ "status": {
73
+ "setup_state": "connected",
74
+ "schema_status": "ready",
75
+ "sync_state": "paused",
76
+ "update_state": "on_schedule",
77
+ "is_historical_sync": false,
78
+ "tasks": [],
79
+ "warnings": [
80
+ {
81
+ "code": "snowflake_discontinuing_password_auth",
82
+ "message": "Snowflake is discontinuing username/password authentication",
83
+ "details": {}
84
+ }
85
+ ]
86
+ },
87
+ "config": {
88
+ "auth_type": "ServiceAccount",
89
+ "sheet_id": "https://docs.google.com/spreadsheets/d/1A82PdLAE7NXLLb5JcLPKeIpKUMytXQba5Z-Ei-mbXLo/edit?gid=0#gid=0",
90
+ "named_range": "Fivetran_Test_Range"
91
+ },
92
+ "source_sync_details": {
93
+ "last_synced": "2025-10-06T22:55:27.371Z"
94
+ }
95
+ }
96
+ }
97
+ """
@@ -178,7 +178,7 @@ class HexReport(
178
178
 
179
179
  @platform_name("Hex")
180
180
  @config_class(HexSourceConfig)
181
- @support_status(SupportStatus.TESTING)
181
+ @support_status(SupportStatus.INCUBATING)
182
182
  @capability(SourceCapability.DESCRIPTIONS, "Supported by default")
183
183
  @capability(SourceCapability.OWNERSHIP, "Supported by default")
184
184
  @capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default")
@@ -118,7 +118,7 @@ logging.getLogger("azure.core.pipeline.policies.http_logging_policy").setLevel(
118
118
 
119
119
 
120
120
  @platform_name("Iceberg")
121
- @support_status(SupportStatus.TESTING)
121
+ @support_status(SupportStatus.INCUBATING)
122
122
  @config_class(IcebergSourceConfig)
123
123
  @capability(
124
124
  SourceCapability.PLATFORM_INSTANCE,
@@ -52,6 +52,7 @@ from datahub.metadata.schema_classes import (
52
52
  ChartQueryTypeClass,
53
53
  ChartTypeClass,
54
54
  DashboardInfoClass,
55
+ EdgeClass,
55
56
  OwnerClass,
56
57
  OwnershipClass,
57
58
  OwnershipTypeClass,
@@ -338,19 +339,25 @@ class MetabaseSource(StatefulIngestionSourceBase):
338
339
  lastModified=AuditStamp(time=modified_ts, actor=modified_actor),
339
340
  )
340
341
 
341
- chart_urns = []
342
+ # Convert chart URNs to chart edges (instead of deprecated charts field)
343
+ chart_edges = []
342
344
  cards_data = dashboard_details.get("dashcards", {})
343
345
  for card_info in cards_data:
344
346
  card_id = card_info.get("card").get("id", "")
345
347
  if not card_id:
346
348
  continue # most likely a virtual card without an id (text or heading), not relevant.
347
349
  chart_urn = builder.make_chart_urn(self.platform, str(card_id))
348
- chart_urns.append(chart_urn)
350
+ chart_edges.append(
351
+ EdgeClass(
352
+ destinationUrn=chart_urn,
353
+ lastModified=last_modified.lastModified,
354
+ )
355
+ )
349
356
 
350
357
  dashboard_info_class = DashboardInfoClass(
351
358
  description=description,
352
359
  title=title,
353
- charts=chart_urns,
360
+ chartEdges=chart_edges,
354
361
  lastModified=last_modified,
355
362
  dashboardUrl=f"{self.config.display_uri}/dashboard/{dashboard_id}",
356
363
  customProperties={},
@@ -488,13 +495,25 @@ class MetabaseSource(StatefulIngestionSourceBase):
488
495
  datasource_urn = self.get_datasource_urn(card_details)
489
496
  custom_properties = self.construct_card_custom_properties(card_details)
490
497
 
498
+ input_edges = (
499
+ [
500
+ EdgeClass(
501
+ destinationUrn=urn,
502
+ lastModified=last_modified.lastModified,
503
+ )
504
+ for urn in datasource_urn
505
+ ]
506
+ if datasource_urn
507
+ else None
508
+ )
509
+
491
510
  chart_info = ChartInfoClass(
492
511
  type=chart_type,
493
512
  description=description,
494
513
  title=title,
495
514
  lastModified=last_modified,
496
515
  chartUrl=f"{self.config.display_uri}/card/{card_id}",
497
- inputs=datasource_urn,
516
+ inputEdges=input_edges,
498
517
  customProperties=custom_properties,
499
518
  )
500
519
  chart_snapshot.aspects.append(chart_info)
@@ -136,7 +136,7 @@ class MLflowRegisteredModelStageInfo:
136
136
 
137
137
  @platform_name("MLflow")
138
138
  @config_class(MLflowConfig)
139
- @support_status(SupportStatus.TESTING)
139
+ @support_status(SupportStatus.INCUBATING)
140
140
  @capability(
141
141
  SourceCapability.DESCRIPTIONS,
142
142
  "Extract descriptions for MLflow Registered Models and Model Versions",
@@ -188,7 +188,7 @@ class TableData:
188
188
 
189
189
  @platform_name("S3 / Local Files", id="s3")
190
190
  @config_class(DataLakeSourceConfig)
191
- @support_status(SupportStatus.INCUBATING)
191
+ @support_status(SupportStatus.CERTIFIED)
192
192
  @capability(
193
193
  SourceCapability.CONTAINERS,
194
194
  "Enabled by default",
@@ -527,7 +527,7 @@ class SalesforceApi:
527
527
 
528
528
  @platform_name("Salesforce")
529
529
  @config_class(SalesforceConfig)
530
- @support_status(SupportStatus.INCUBATING)
530
+ @support_status(SupportStatus.CERTIFIED)
531
531
  @capability(
532
532
  capability_name=SourceCapability.PLATFORM_INSTANCE,
533
533
  description="Can be equivalent to Salesforce organization",
@@ -245,7 +245,7 @@ DATA_PLATFORM_SLACK_URN: str = builder.make_data_platform_urn(PLATFORM_NAME)
245
245
 
246
246
  @platform_name("Slack")
247
247
  @config_class(SlackSourceConfig)
248
- @support_status(SupportStatus.TESTING)
248
+ @support_status(SupportStatus.CERTIFIED)
249
249
  class SlackSource(StatefulIngestionSourceBase):
250
250
  def __init__(self, ctx: PipelineContext, config: SlackSourceConfig):
251
251
  super().__init__(config, ctx)
@@ -21,6 +21,7 @@ from datahub.configuration.time_window_config import (
21
21
  )
22
22
  from datahub.ingestion.api.closeable import Closeable
23
23
  from datahub.ingestion.api.common import PipelineContext
24
+ from datahub.ingestion.api.decorators import SupportStatus, config_class, support_status
24
25
  from datahub.ingestion.api.report import Report
25
26
  from datahub.ingestion.api.source import Source, SourceReport
26
27
  from datahub.ingestion.api.source_helpers import auto_workunit
@@ -750,6 +751,8 @@ class SnowflakeQueriesExtractor(SnowflakeStructuredReportMixin, Closeable):
750
751
  self._exit_stack.close()
751
752
 
752
753
 
754
+ @support_status(SupportStatus.CERTIFIED)
755
+ @config_class(SnowflakeQueriesSourceConfig)
753
756
  class SnowflakeQueriesSource(Source):
754
757
  def __init__(self, ctx: PipelineContext, config: SnowflakeQueriesSourceConfig):
755
758
  self.ctx = ctx
@@ -59,7 +59,7 @@ class SnowflakeSummaryReport(SourceReport, BaseTimeWindowReport):
59
59
 
60
60
 
61
61
  @config_class(SnowflakeSummaryConfig)
62
- @support_status(SupportStatus.INCUBATING)
62
+ @support_status(SupportStatus.CERTIFIED)
63
63
  class SnowflakeSummarySource(Source):
64
64
  def __init__(self, ctx: PipelineContext, config: SnowflakeSummaryConfig):
65
65
  super().__init__(ctx)
@@ -93,7 +93,7 @@ class SqlQueriesSourceReport(SourceReport):
93
93
  sql_aggregator: Optional[SqlAggregatorReport] = None
94
94
 
95
95
 
96
- @platform_name("SQL Queries")
96
+ @platform_name("SQL Queries", id="sql-queries")
97
97
  @config_class(SqlQueriesSourceConfig)
98
98
  @support_status(SupportStatus.INCUBATING)
99
99
  @capability(SourceCapability.LINEAGE_COARSE, "Parsed from SQL queries")
@@ -176,7 +176,7 @@ logger: logging.Logger = logging.getLogger(__name__)
176
176
  supported=True,
177
177
  )
178
178
  @capability(SourceCapability.TEST_CONNECTION, "Enabled by default")
179
- @support_status(SupportStatus.INCUBATING)
179
+ @support_status(SupportStatus.CERTIFIED)
180
180
  class UnityCatalogSource(StatefulIngestionSourceBase, TestableSource):
181
181
  """
182
182
  This plugin extracts the following metadata from Databricks Unity Catalog:
@@ -145,7 +145,7 @@ class PipelineMetadata:
145
145
 
146
146
  @platform_name("Vertex AI", id="vertexai")
147
147
  @config_class(VertexAIConfig)
148
- @support_status(SupportStatus.TESTING)
148
+ @support_status(SupportStatus.INCUBATING)
149
149
  @capability(
150
150
  SourceCapability.DESCRIPTIONS,
151
151
  "Extract descriptions for Vertex AI Registered Models and Model Versions",