acryl-datahub 0.15.0rc20__py3-none-any.whl → 0.15.0rc22__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (28) hide show
  1. {acryl_datahub-0.15.0rc20.dist-info → acryl_datahub-0.15.0rc22.dist-info}/METADATA +2478 -2478
  2. {acryl_datahub-0.15.0rc20.dist-info → acryl_datahub-0.15.0rc22.dist-info}/RECORD +28 -26
  3. datahub/__init__.py +1 -1
  4. datahub/api/entities/structuredproperties/structuredproperties.py +56 -68
  5. datahub/cli/ingest_cli.py +110 -0
  6. datahub/emitter/rest_emitter.py +17 -4
  7. datahub/ingestion/sink/datahub_rest.py +12 -1
  8. datahub/ingestion/source/datahub/datahub_database_reader.py +41 -21
  9. datahub/ingestion/source/datahub/datahub_source.py +8 -1
  10. datahub/ingestion/source/kafka/kafka_connect.py +81 -51
  11. datahub/ingestion/source/s3/source.py +2 -3
  12. datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +2 -1
  13. datahub/ingestion/source/snowflake/snowflake_query.py +13 -0
  14. datahub/ingestion/source/snowflake/snowflake_schema.py +16 -0
  15. datahub/ingestion/source/snowflake/snowflake_schema_gen.py +23 -0
  16. datahub/ingestion/source/tableau/tableau.py +42 -3
  17. datahub/ingestion/source/tableau/tableau_common.py +12 -5
  18. datahub/ingestion/source/tableau/tableau_constant.py +2 -0
  19. datahub/ingestion/source/tableau/tableau_server_wrapper.py +33 -0
  20. datahub/ingestion/source/tableau/tableau_validation.py +48 -0
  21. datahub/metadata/_schema_classes.py +400 -400
  22. datahub/metadata/_urns/urn_defs.py +1355 -1355
  23. datahub/metadata/schema.avsc +17221 -17574
  24. datahub/testing/compare_metadata_json.py +1 -1
  25. datahub/utilities/file_backed_collections.py +35 -2
  26. {acryl_datahub-0.15.0rc20.dist-info → acryl_datahub-0.15.0rc22.dist-info}/WHEEL +0 -0
  27. {acryl_datahub-0.15.0rc20.dist-info → acryl_datahub-0.15.0rc22.dist-info}/entry_points.txt +0 -0
  28. {acryl_datahub-0.15.0rc20.dist-info → acryl_datahub-0.15.0rc22.dist-info}/top_level.txt +0 -0
@@ -282,10 +282,6 @@ class ConfluentJDBCSourceConnector:
282
282
  query: str
283
283
  transforms: list
284
284
 
285
- def report_warning(self, key: str, reason: str) -> None:
286
- logger.warning(f"{key}: {reason}")
287
- self.report.report_warning(key, reason)
288
-
289
285
  def get_parser(
290
286
  self,
291
287
  connector_manifest: ConnectorManifest,
@@ -355,9 +351,9 @@ class ConfluentJDBCSourceConnector:
355
351
  source_table = f"{table_name_tuple[-2]}.{source_table}"
356
352
  else:
357
353
  include_source_dataset = False
358
- self.report_warning(
359
- self.connector_manifest.name,
360
- f"could not find schema for table {source_table}",
354
+ self.report.warning(
355
+ "Could not find schema for table"
356
+ f"{self.connector_manifest.name} : {source_table}",
361
357
  )
362
358
  dataset_name: str = get_dataset_name(database_name, source_table)
363
359
  lineage = KafkaConnectLineage(
@@ -457,9 +453,9 @@ class ConfluentJDBCSourceConnector:
457
453
  target_platform=KAFKA,
458
454
  )
459
455
  lineages.append(lineage)
460
- self.report_warning(
456
+ self.report.warning(
457
+ "Could not find input dataset, the connector has query configuration set",
461
458
  self.connector_manifest.name,
462
- "could not find input dataset, the connector has query configuration set",
463
459
  )
464
460
  self.connector_manifest.lineages = lineages
465
461
  return
@@ -535,24 +531,24 @@ class ConfluentJDBCSourceConnector:
535
531
  include_source_dataset=False,
536
532
  )
537
533
  )
538
- self.report_warning(
539
- self.connector_manifest.name,
540
- f"could not find input dataset, for connector topics {topic_names}",
534
+ self.report.warning(
535
+ "Could not find input dataset for connector topics",
536
+ f"{self.connector_manifest.name} : {topic_names}",
541
537
  )
542
538
  self.connector_manifest.lineages = lineages
543
539
  return
544
540
  else:
545
541
  include_source_dataset = True
546
542
  if SINGLE_TRANSFORM and UNKNOWN_TRANSFORM:
547
- self.report_warning(
548
- self.connector_manifest.name,
549
- f"could not find input dataset, connector has unknown transform - {transforms[0]['type']}",
543
+ self.report.warning(
544
+ "Could not find input dataset, connector has unknown transform",
545
+ f"{self.connector_manifest.name} : {transforms[0]['type']}",
550
546
  )
551
547
  include_source_dataset = False
552
548
  if not SINGLE_TRANSFORM and UNKNOWN_TRANSFORM:
553
- self.report_warning(
549
+ self.report.warning(
550
+ "Could not find input dataset, connector has one or more unknown transforms",
554
551
  self.connector_manifest.name,
555
- "could not find input dataset, connector has one or more unknown transforms",
556
552
  )
557
553
  include_source_dataset = False
558
554
  lineages = self.default_get_lineages(
@@ -753,8 +749,10 @@ class DebeziumSourceConnector:
753
749
  lineages.append(lineage)
754
750
  self.connector_manifest.lineages = lineages
755
751
  except Exception as e:
756
- self.report.report_warning(
757
- self.connector_manifest.name, f"Error resolving lineage: {e}"
752
+ self.report.warning(
753
+ "Error resolving lineage for connector",
754
+ self.connector_manifest.name,
755
+ exc=e,
758
756
  )
759
757
 
760
758
  return
@@ -783,10 +781,6 @@ class BigQuerySinkConnector:
783
781
  defaultDataset: Optional[str] = None
784
782
  version: str = "v1"
785
783
 
786
- def report_warning(self, key: str, reason: str) -> None:
787
- logger.warning(f"{key}: {reason}")
788
- self.report.report_warning(key, reason)
789
-
790
784
  def get_parser(
791
785
  self,
792
786
  connector_manifest: ConnectorManifest,
@@ -917,9 +911,9 @@ class BigQuerySinkConnector:
917
911
  transformed_topic = self.apply_transformations(topic, transforms)
918
912
  dataset_table = self.get_dataset_table_for_topic(transformed_topic, parser)
919
913
  if dataset_table is None:
920
- self.report_warning(
921
- self.connector_manifest.name,
922
- f"could not find target dataset for topic {transformed_topic}, please check your connector configuration",
914
+ self.report.warning(
915
+ "Could not find target dataset for topic, please check your connector configuration"
916
+ f"{self.connector_manifest.name} : {transformed_topic} ",
923
917
  )
924
918
  continue
925
919
  target_dataset = f"{project}.{dataset_table}"
@@ -954,10 +948,6 @@ class SnowflakeSinkConnector:
954
948
  schema_name: str
955
949
  topics_to_tables: Dict[str, str]
956
950
 
957
- def report_warning(self, key: str, reason: str) -> None:
958
- logger.warning(f"{key}: {reason}")
959
- self.report.report_warning(key, reason)
960
-
961
951
  def get_table_name_from_topic_name(self, topic_name: str) -> str:
962
952
  """
963
953
  This function converts the topic name to a valid Snowflake table name using some rules.
@@ -1105,8 +1095,10 @@ class ConfluentS3SinkConnector:
1105
1095
  )
1106
1096
  self.connector_manifest.lineages = lineages
1107
1097
  except Exception as e:
1108
- self.report.report_warning(
1109
- self.connector_manifest.name, f"Error resolving lineage: {e}"
1098
+ self.report.warning(
1099
+ "Error resolving lineage for connector",
1100
+ self.connector_manifest.name,
1101
+ exc=e,
1110
1102
  )
1111
1103
 
1112
1104
  return
@@ -1155,7 +1147,7 @@ class KafkaConnectSource(StatefulIngestionSourceBase):
1155
1147
  )
1156
1148
  self.session.auth = (self.config.username, self.config.password)
1157
1149
 
1158
- test_response = self.session.get(f"{self.config.connect_uri}")
1150
+ test_response = self.session.get(f"{self.config.connect_uri}/connectors")
1159
1151
  test_response.raise_for_status()
1160
1152
  logger.info(f"Connection to {self.config.connect_uri} is ok")
1161
1153
  if not jpype.isJVMStarted():
@@ -1178,13 +1170,16 @@ class KafkaConnectSource(StatefulIngestionSourceBase):
1178
1170
 
1179
1171
  payload = connector_response.json()
1180
1172
 
1181
- for c in payload:
1182
- connector_url = f"{self.config.connect_uri}/connectors/{c}"
1183
- connector_response = self.session.get(connector_url)
1184
- manifest = connector_response.json()
1185
- connector_manifest = ConnectorManifest(**manifest)
1186
- if not self.config.connector_patterns.allowed(connector_manifest.name):
1187
- self.report.report_dropped(connector_manifest.name)
1173
+ for connector_name in payload:
1174
+ connector_url = f"{self.config.connect_uri}/connectors/{connector_name}"
1175
+ connector_manifest = self._get_connector_manifest(
1176
+ connector_name, connector_url
1177
+ )
1178
+ if (
1179
+ connector_manifest is None
1180
+ or not self.config.connector_patterns.allowed(connector_manifest.name)
1181
+ ):
1182
+ self.report.report_dropped(connector_name)
1188
1183
  continue
1189
1184
 
1190
1185
  if self.config.provided_configs:
@@ -1195,19 +1190,11 @@ class KafkaConnectSource(StatefulIngestionSourceBase):
1195
1190
  connector_manifest.lineages = list()
1196
1191
  connector_manifest.url = connector_url
1197
1192
 
1198
- topics = self.session.get(
1199
- f"{self.config.connect_uri}/connectors/{c}/topics",
1200
- ).json()
1201
-
1202
- connector_manifest.topic_names = topics[c]["topics"]
1193
+ connector_manifest.topic_names = self._get_connector_topics(connector_name)
1203
1194
 
1204
1195
  # Populate Source Connector metadata
1205
1196
  if connector_manifest.type == SOURCE:
1206
- tasks = self.session.get(
1207
- f"{self.config.connect_uri}/connectors/{c}/tasks",
1208
- ).json()
1209
-
1210
- connector_manifest.tasks = tasks
1197
+ connector_manifest.tasks = self._get_connector_tasks(connector_name)
1211
1198
 
1212
1199
  # JDBC source connector lineages
1213
1200
  if connector_manifest.config.get(CONNECTOR_CLASS).__eq__(
@@ -1246,7 +1233,7 @@ class KafkaConnectSource(StatefulIngestionSourceBase):
1246
1233
  )
1247
1234
  continue
1248
1235
 
1249
- for topic in topics:
1236
+ for topic in connector_manifest.topic_names:
1250
1237
  lineage = KafkaConnectLineage(
1251
1238
  source_dataset=target_connector.source_dataset,
1252
1239
  source_platform=target_connector.source_platform,
@@ -1286,6 +1273,49 @@ class KafkaConnectSource(StatefulIngestionSourceBase):
1286
1273
 
1287
1274
  return connectors_manifest
1288
1275
 
1276
+ def _get_connector_manifest(
1277
+ self, connector_name: str, connector_url: str
1278
+ ) -> Optional[ConnectorManifest]:
1279
+ try:
1280
+ connector_response = self.session.get(connector_url)
1281
+ connector_response.raise_for_status()
1282
+ except Exception as e:
1283
+ self.report.warning(
1284
+ "Failed to get connector details", connector_name, exc=e
1285
+ )
1286
+ return None
1287
+ manifest = connector_response.json()
1288
+ connector_manifest = ConnectorManifest(**manifest)
1289
+ return connector_manifest
1290
+
1291
+ def _get_connector_tasks(self, connector_name: str) -> dict:
1292
+ try:
1293
+ response = self.session.get(
1294
+ f"{self.config.connect_uri}/connectors/{connector_name}/tasks",
1295
+ )
1296
+ response.raise_for_status()
1297
+ except Exception as e:
1298
+ self.report.warning(
1299
+ "Error getting connector tasks", context=connector_name, exc=e
1300
+ )
1301
+ return {}
1302
+
1303
+ return response.json()
1304
+
1305
+ def _get_connector_topics(self, connector_name: str) -> List[str]:
1306
+ try:
1307
+ response = self.session.get(
1308
+ f"{self.config.connect_uri}/connectors/{connector_name}/topics",
1309
+ )
1310
+ response.raise_for_status()
1311
+ except Exception as e:
1312
+ self.report.warning(
1313
+ "Error getting connector topics", context=connector_name, exc=e
1314
+ )
1315
+ return []
1316
+
1317
+ return response.json()[connector_name]["topics"]
1318
+
1289
1319
  def construct_flow_workunit(self, connector: ConnectorManifest) -> MetadataWorkUnit:
1290
1320
  connector_name = connector.name
1291
1321
  connector_type = connector.type
@@ -9,6 +9,7 @@ from datetime import datetime
9
9
  from itertools import groupby
10
10
  from pathlib import PurePath
11
11
  from typing import Any, Dict, Iterable, List, Optional, Tuple
12
+ from urllib.parse import urlparse
12
13
 
13
14
  import smart_open.compression as so_compression
14
15
  from more_itertools import peekable
@@ -993,9 +994,7 @@ class S3Source(StatefulIngestionSourceBase):
993
994
  folders = []
994
995
  for dir in dirs_to_process:
995
996
  logger.info(f"Getting files from folder: {dir}")
996
- prefix_to_process = dir.rstrip("\\").lstrip(
997
- self.create_s3_path(bucket_name, "/")
998
- )
997
+ prefix_to_process = urlparse(dir).path.lstrip("/")
999
998
 
1000
999
  folders.extend(
1001
1000
  self.get_folder_info(
@@ -413,9 +413,10 @@ class SnowflakeLineageExtractor(SnowflakeCommonMixin, Closeable):
413
413
  return UpstreamLineageEdge.parse_obj(db_row)
414
414
  except Exception as e:
415
415
  self.report.num_upstream_lineage_edge_parsing_failed += 1
416
+ upstream_tables = db_row.get("UPSTREAM_TABLES")
416
417
  self.structured_reporter.warning(
417
418
  "Failed to parse lineage edge",
418
- context=db_row.get("DOWNSTREAM_TABLE_NAME") or None,
419
+ context=f"Upstreams: {upstream_tables} Downstreams: {db_row.get('DOWNSTREAM_TABLE_NAME')}",
419
420
  exc=e,
420
421
  )
421
422
  return None
@@ -237,6 +237,19 @@ SHOW VIEWS IN DATABASE "{db_name}"
237
237
  LIMIT {limit} {from_clause};
238
238
  """
239
239
 
240
+ @staticmethod
241
+ def get_secure_view_definitions() -> str:
242
+ # https://docs.snowflake.com/en/sql-reference/account-usage/views
243
+ return """
244
+ SELECT
245
+ TABLE_CATALOG as "TABLE_CATALOG",
246
+ TABLE_SCHEMA as "TABLE_SCHEMA",
247
+ TABLE_NAME as "TABLE_NAME",
248
+ VIEW_DEFINITION as "VIEW_DEFINITION"
249
+ FROM SNOWFLAKE.ACCOUNT_USAGE.VIEWS
250
+ WHERE IS_SECURE = 'YES' AND VIEW_DEFINITION !='' AND DELETED IS NULL
251
+ """
252
+
240
253
  @staticmethod
241
254
  def columns_for_schema(
242
255
  schema_name: str,
@@ -266,6 +266,22 @@ class SnowflakeDataDictionary(SupportsAsObj):
266
266
  snowflake_schemas.append(snowflake_schema)
267
267
  return snowflake_schemas
268
268
 
269
+ @serialized_lru_cache(maxsize=1)
270
+ def get_secure_view_definitions(self) -> Dict[str, Dict[str, Dict[str, str]]]:
271
+ secure_view_definitions: Dict[str, Dict[str, Dict[str, str]]] = defaultdict(
272
+ lambda: defaultdict(lambda: defaultdict())
273
+ )
274
+ cur = self.connection.query(SnowflakeQuery.get_secure_view_definitions())
275
+ for view in cur:
276
+ db_name = view["TABLE_CATALOG"]
277
+ schema_name = view["TABLE_SCHEMA"]
278
+ view_name = view["TABLE_NAME"]
279
+ secure_view_definitions[db_name][schema_name][view_name] = view[
280
+ "VIEW_DEFINITION"
281
+ ]
282
+
283
+ return secure_view_definitions
284
+
269
285
  @serialized_lru_cache(maxsize=1)
270
286
  def get_tables_for_database(
271
287
  self, db_name: str
@@ -424,6 +424,10 @@ class SnowflakeSchemaGenerator(SnowflakeStructuredReportMixin):
424
424
  view_identifier = self.identifiers.get_dataset_identifier(
425
425
  view.name, schema_name, db_name
426
426
  )
427
+ if view.is_secure and not view.view_definition:
428
+ view.view_definition = self.fetch_secure_view_definition(
429
+ view.name, schema_name, db_name
430
+ )
427
431
  if view.view_definition:
428
432
  self.aggregator.add_view_definition(
429
433
  view_urn=self.identifiers.gen_dataset_urn(view_identifier),
@@ -449,6 +453,25 @@ class SnowflakeSchemaGenerator(SnowflakeStructuredReportMixin):
449
453
  context=f"{db_name}.{schema_name}",
450
454
  )
451
455
 
456
+ def fetch_secure_view_definition(
457
+ self, table_name: str, schema_name: str, db_name: str
458
+ ) -> Optional[str]:
459
+ try:
460
+ view_definitions = self.data_dictionary.get_secure_view_definitions()
461
+ return view_definitions[db_name][schema_name][table_name]
462
+ except Exception as e:
463
+ if isinstance(e, SnowflakePermissionError):
464
+ error_msg = (
465
+ "Failed to get secure views definitions. Please check permissions."
466
+ )
467
+ else:
468
+ error_msg = "Failed to get secure views definitions"
469
+ self.structured_reporter.warning(
470
+ error_msg,
471
+ exc=e,
472
+ )
473
+ return None
474
+
452
475
  def fetch_views_for_schema(
453
476
  self, snowflake_schema: SnowflakeSchema, db_name: str, schema_name: str
454
477
  ) -> List[SnowflakeView]:
@@ -111,6 +111,8 @@ from datahub.ingestion.source.tableau.tableau_common import (
111
111
  tableau_field_to_schema_field,
112
112
  workbook_graphql_query,
113
113
  )
114
+ from datahub.ingestion.source.tableau.tableau_server_wrapper import UserInfo
115
+ from datahub.ingestion.source.tableau.tableau_validation import check_user_role
114
116
  from datahub.metadata.com.linkedin.pegasus2avro.common import (
115
117
  AuditStamp,
116
118
  ChangeAuditStamps,
@@ -167,7 +169,7 @@ from datahub.utilities.urns.dataset_urn import DatasetUrn
167
169
 
168
170
  try:
169
171
  # On earlier versions of the tableauserverclient, the NonXMLResponseError
170
- # was thrown when reauthentication was needed. We'll keep both exceptions
172
+ # was thrown when reauthentication was necessary. We'll keep both exceptions
171
173
  # around for now, but can remove this in the future.
172
174
  from tableauserverclient.server.endpoint.exceptions import ( # type: ignore
173
175
  NotSignedInError,
@@ -632,6 +634,33 @@ class TableauSourceReport(StaleEntityRemovalSourceReport):
632
634
  num_upstream_table_lineage_failed_parse_sql: int = 0
633
635
  num_upstream_fine_grained_lineage_failed_parse_sql: int = 0
634
636
  num_hidden_assets_skipped: int = 0
637
+ logged_in_user: List[UserInfo] = []
638
+
639
+
640
+ def report_user_role(report: TableauSourceReport, server: Server) -> None:
641
+ title: str = "Insufficient Permissions"
642
+ message: str = "The user must have the `Site Administrator Explorer` role to perform metadata ingestion."
643
+ try:
644
+ # TableauSiteSource instance is per site, so each time we need to find-out user detail
645
+ # the site-role might be different on another site
646
+ logged_in_user: UserInfo = UserInfo.from_server(server=server)
647
+
648
+ if not logged_in_user.is_site_administrator_explorer():
649
+ report.warning(
650
+ title=title,
651
+ message=message,
652
+ context=f"user-name={logged_in_user.user_name}, role={logged_in_user.site_role}, site_id={logged_in_user.site_id}",
653
+ )
654
+
655
+ report.logged_in_user.append(logged_in_user)
656
+
657
+ except Exception as e:
658
+ report.warning(
659
+ title=title,
660
+ message="Failed to verify the user's role. The user must have `Site Administrator Explorer` role.",
661
+ context=f"{e}",
662
+ exc=e,
663
+ )
635
664
 
636
665
 
637
666
  @platform_name("Tableau")
@@ -676,6 +705,7 @@ class TableauSource(StatefulIngestionSourceBase, TestableSource):
676
705
  try:
677
706
  logger.info(f"Authenticated to Tableau site: '{site_content_url}'")
678
707
  self.server = self.config.make_tableau_client(site_content_url)
708
+ report_user_role(report=self.report, server=self.server)
679
709
  # Note that we're not catching ConfigurationError, since we want that to throw.
680
710
  except ValueError as e:
681
711
  self.report.failure(
@@ -689,9 +719,17 @@ class TableauSource(StatefulIngestionSourceBase, TestableSource):
689
719
  test_report = TestConnectionReport()
690
720
  try:
691
721
  source_config = TableauConfig.parse_obj_allow_extras(config_dict)
692
- source_config.make_tableau_client(source_config.site)
722
+
723
+ server = source_config.make_tableau_client(source_config.site)
724
+
693
725
  test_report.basic_connectivity = CapabilityReport(capable=True)
726
+
727
+ test_report.capability_report = check_user_role(
728
+ logged_in_user=UserInfo.from_server(server=server)
729
+ )
730
+
694
731
  except Exception as e:
732
+ logger.warning(f"{e}", exc_info=e)
695
733
  test_report.basic_connectivity = CapabilityReport(
696
734
  capable=False, failure_reason=str(e)
697
735
  )
@@ -831,6 +869,8 @@ class TableauSiteSource:
831
869
  # when emitting custom SQL data sources.
832
870
  self.custom_sql_ids_being_used: List[str] = []
833
871
 
872
+ report_user_role(report=report, server=server)
873
+
834
874
  @property
835
875
  def no_env_browse_prefix(self) -> str:
836
876
  # Prefix to use with browse path (v1)
@@ -1290,7 +1330,6 @@ class TableauSiteSource:
1290
1330
  page_size = page_size_override or self.config.page_size
1291
1331
 
1292
1332
  filter_pages = get_filter_pages(query_filter, page_size)
1293
-
1294
1333
  for filter_page in filter_pages:
1295
1334
  has_next_page = 1
1296
1335
  current_cursor: Optional[str] = None
@@ -975,15 +975,22 @@ def get_filter_pages(query_filter: dict, page_size: int) -> List[dict]:
975
975
  # a few ten thousand, then tableau server responds with empty response
976
976
  # causing below error:
977
977
  # tableauserverclient.server.endpoint.exceptions.NonXMLResponseError: b''
978
+
979
+ # in practice, we only do pagination if len(query_filter.keys()) == 1
980
+ if len(query_filter.keys()) != 1:
981
+ return filter_pages
982
+
983
+ current_key = (list(query_filter.keys()))[0]
984
+
978
985
  if (
979
- len(query_filter.keys()) == 1
980
- and query_filter.get(c.ID_WITH_IN)
981
- and isinstance(query_filter[c.ID_WITH_IN], list)
986
+ current_key in [c.ID_WITH_IN, c.PROJECT_NAME_WITH_IN]
987
+ and query_filter.get(current_key)
988
+ and isinstance(query_filter[current_key], list)
982
989
  ):
983
- ids = query_filter[c.ID_WITH_IN]
990
+ ids = query_filter[current_key]
984
991
  filter_pages = [
985
992
  {
986
- c.ID_WITH_IN: ids[
993
+ current_key: ids[
987
994
  start : (
988
995
  start + page_size if start + page_size < len(ids) else len(ids)
989
996
  )
@@ -81,3 +81,5 @@ EMBEDDED_DATA_SOURCES_CONNECTION = "embeddedDatasourcesConnection"
81
81
  PROJECT = "Project"
82
82
  SITE = "Site"
83
83
  IS_UNSUPPORTED_CUSTOM_SQL = "isUnsupportedCustomSql"
84
+ SITE_PERMISSION = "sitePermission"
85
+ SITE_ROLE = "SiteAdministratorExplorer"
@@ -0,0 +1,33 @@
1
+ from dataclasses import dataclass
2
+
3
+ from tableauserverclient import Server, UserItem
4
+
5
+ from datahub.ingestion.source.tableau import tableau_constant as c
6
+
7
+
8
+ @dataclass
9
+ class UserInfo:
10
+ user_name: str
11
+ site_role: str
12
+ site_id: str
13
+
14
+ def is_site_administrator_explorer(self):
15
+ return self.site_role == c.SITE_ROLE
16
+
17
+ @staticmethod
18
+ def from_server(server: Server) -> "UserInfo":
19
+ assert server.user_id, "make the connection with tableau"
20
+
21
+ user: UserItem = server.users.get_by_id(server.user_id)
22
+
23
+ assert user.site_role, "site_role is not available" # to silent the lint
24
+
25
+ assert user.name, "user name is not available" # to silent the lint
26
+
27
+ assert server.site_id, "site identifier is not available" # to silent the lint
28
+
29
+ return UserInfo(
30
+ user_name=user.name,
31
+ site_role=user.site_role,
32
+ site_id=server.site_id,
33
+ )
@@ -0,0 +1,48 @@
1
+ import logging
2
+ from typing import Dict, Union
3
+
4
+ from datahub.ingestion.api.source import CapabilityReport, SourceCapability
5
+ from datahub.ingestion.source.tableau import tableau_constant as c
6
+ from datahub.ingestion.source.tableau.tableau_server_wrapper import UserInfo
7
+
8
+ logger = logging.getLogger(__name__)
9
+
10
+
11
+ def check_user_role(
12
+ logged_in_user: UserInfo,
13
+ ) -> Dict[Union[SourceCapability, str], CapabilityReport]:
14
+ capability_dict: Dict[Union[SourceCapability, str], CapabilityReport] = {
15
+ c.SITE_PERMISSION: CapabilityReport(
16
+ capable=True,
17
+ )
18
+ }
19
+
20
+ failure_reason: str = (
21
+ "The user does not have the `Site Administrator Explorer` role."
22
+ )
23
+
24
+ mitigation_message_prefix: str = (
25
+ "Assign `Site Administrator Explorer` role to the user"
26
+ )
27
+ mitigation_message_suffix: str = "Refer to the setup guide: https://datahubproject.io/docs/quick-ingestion-guides/tableau/setup"
28
+
29
+ try:
30
+ # TODO: Add check for `Enable Derived Permissions`
31
+ if not logged_in_user.is_site_administrator_explorer():
32
+ capability_dict[c.SITE_PERMISSION] = CapabilityReport(
33
+ capable=False,
34
+ failure_reason=f"{failure_reason} Their current role is {logged_in_user.site_role}.",
35
+ mitigation_message=f"{mitigation_message_prefix} `{logged_in_user.user_name}`. {mitigation_message_suffix}",
36
+ )
37
+
38
+ return capability_dict
39
+
40
+ except Exception as e:
41
+ logger.warning(msg=e, exc_info=e)
42
+ capability_dict[c.SITE_PERMISSION] = CapabilityReport(
43
+ capable=False,
44
+ failure_reason="Failed to verify user role.",
45
+ mitigation_message=f"{mitigation_message_prefix}. {mitigation_message_suffix}", # user is unknown
46
+ )
47
+
48
+ return capability_dict