acryl-datahub 0.15.0rc20__py3-none-any.whl → 0.15.0rc22__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-0.15.0rc20.dist-info → acryl_datahub-0.15.0rc22.dist-info}/METADATA +2478 -2478
- {acryl_datahub-0.15.0rc20.dist-info → acryl_datahub-0.15.0rc22.dist-info}/RECORD +28 -26
- datahub/__init__.py +1 -1
- datahub/api/entities/structuredproperties/structuredproperties.py +56 -68
- datahub/cli/ingest_cli.py +110 -0
- datahub/emitter/rest_emitter.py +17 -4
- datahub/ingestion/sink/datahub_rest.py +12 -1
- datahub/ingestion/source/datahub/datahub_database_reader.py +41 -21
- datahub/ingestion/source/datahub/datahub_source.py +8 -1
- datahub/ingestion/source/kafka/kafka_connect.py +81 -51
- datahub/ingestion/source/s3/source.py +2 -3
- datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +2 -1
- datahub/ingestion/source/snowflake/snowflake_query.py +13 -0
- datahub/ingestion/source/snowflake/snowflake_schema.py +16 -0
- datahub/ingestion/source/snowflake/snowflake_schema_gen.py +23 -0
- datahub/ingestion/source/tableau/tableau.py +42 -3
- datahub/ingestion/source/tableau/tableau_common.py +12 -5
- datahub/ingestion/source/tableau/tableau_constant.py +2 -0
- datahub/ingestion/source/tableau/tableau_server_wrapper.py +33 -0
- datahub/ingestion/source/tableau/tableau_validation.py +48 -0
- datahub/metadata/_schema_classes.py +400 -400
- datahub/metadata/_urns/urn_defs.py +1355 -1355
- datahub/metadata/schema.avsc +17221 -17574
- datahub/testing/compare_metadata_json.py +1 -1
- datahub/utilities/file_backed_collections.py +35 -2
- {acryl_datahub-0.15.0rc20.dist-info → acryl_datahub-0.15.0rc22.dist-info}/WHEEL +0 -0
- {acryl_datahub-0.15.0rc20.dist-info → acryl_datahub-0.15.0rc22.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-0.15.0rc20.dist-info → acryl_datahub-0.15.0rc22.dist-info}/top_level.txt +0 -0
|
@@ -282,10 +282,6 @@ class ConfluentJDBCSourceConnector:
|
|
|
282
282
|
query: str
|
|
283
283
|
transforms: list
|
|
284
284
|
|
|
285
|
-
def report_warning(self, key: str, reason: str) -> None:
|
|
286
|
-
logger.warning(f"{key}: {reason}")
|
|
287
|
-
self.report.report_warning(key, reason)
|
|
288
|
-
|
|
289
285
|
def get_parser(
|
|
290
286
|
self,
|
|
291
287
|
connector_manifest: ConnectorManifest,
|
|
@@ -355,9 +351,9 @@ class ConfluentJDBCSourceConnector:
|
|
|
355
351
|
source_table = f"{table_name_tuple[-2]}.{source_table}"
|
|
356
352
|
else:
|
|
357
353
|
include_source_dataset = False
|
|
358
|
-
self.
|
|
359
|
-
|
|
360
|
-
f"
|
|
354
|
+
self.report.warning(
|
|
355
|
+
"Could not find schema for table"
|
|
356
|
+
f"{self.connector_manifest.name} : {source_table}",
|
|
361
357
|
)
|
|
362
358
|
dataset_name: str = get_dataset_name(database_name, source_table)
|
|
363
359
|
lineage = KafkaConnectLineage(
|
|
@@ -457,9 +453,9 @@ class ConfluentJDBCSourceConnector:
|
|
|
457
453
|
target_platform=KAFKA,
|
|
458
454
|
)
|
|
459
455
|
lineages.append(lineage)
|
|
460
|
-
self.
|
|
456
|
+
self.report.warning(
|
|
457
|
+
"Could not find input dataset, the connector has query configuration set",
|
|
461
458
|
self.connector_manifest.name,
|
|
462
|
-
"could not find input dataset, the connector has query configuration set",
|
|
463
459
|
)
|
|
464
460
|
self.connector_manifest.lineages = lineages
|
|
465
461
|
return
|
|
@@ -535,24 +531,24 @@ class ConfluentJDBCSourceConnector:
|
|
|
535
531
|
include_source_dataset=False,
|
|
536
532
|
)
|
|
537
533
|
)
|
|
538
|
-
self.
|
|
539
|
-
|
|
540
|
-
f"
|
|
534
|
+
self.report.warning(
|
|
535
|
+
"Could not find input dataset for connector topics",
|
|
536
|
+
f"{self.connector_manifest.name} : {topic_names}",
|
|
541
537
|
)
|
|
542
538
|
self.connector_manifest.lineages = lineages
|
|
543
539
|
return
|
|
544
540
|
else:
|
|
545
541
|
include_source_dataset = True
|
|
546
542
|
if SINGLE_TRANSFORM and UNKNOWN_TRANSFORM:
|
|
547
|
-
self.
|
|
548
|
-
|
|
549
|
-
f"
|
|
543
|
+
self.report.warning(
|
|
544
|
+
"Could not find input dataset, connector has unknown transform",
|
|
545
|
+
f"{self.connector_manifest.name} : {transforms[0]['type']}",
|
|
550
546
|
)
|
|
551
547
|
include_source_dataset = False
|
|
552
548
|
if not SINGLE_TRANSFORM and UNKNOWN_TRANSFORM:
|
|
553
|
-
self.
|
|
549
|
+
self.report.warning(
|
|
550
|
+
"Could not find input dataset, connector has one or more unknown transforms",
|
|
554
551
|
self.connector_manifest.name,
|
|
555
|
-
"could not find input dataset, connector has one or more unknown transforms",
|
|
556
552
|
)
|
|
557
553
|
include_source_dataset = False
|
|
558
554
|
lineages = self.default_get_lineages(
|
|
@@ -753,8 +749,10 @@ class DebeziumSourceConnector:
|
|
|
753
749
|
lineages.append(lineage)
|
|
754
750
|
self.connector_manifest.lineages = lineages
|
|
755
751
|
except Exception as e:
|
|
756
|
-
self.report.
|
|
757
|
-
|
|
752
|
+
self.report.warning(
|
|
753
|
+
"Error resolving lineage for connector",
|
|
754
|
+
self.connector_manifest.name,
|
|
755
|
+
exc=e,
|
|
758
756
|
)
|
|
759
757
|
|
|
760
758
|
return
|
|
@@ -783,10 +781,6 @@ class BigQuerySinkConnector:
|
|
|
783
781
|
defaultDataset: Optional[str] = None
|
|
784
782
|
version: str = "v1"
|
|
785
783
|
|
|
786
|
-
def report_warning(self, key: str, reason: str) -> None:
|
|
787
|
-
logger.warning(f"{key}: {reason}")
|
|
788
|
-
self.report.report_warning(key, reason)
|
|
789
|
-
|
|
790
784
|
def get_parser(
|
|
791
785
|
self,
|
|
792
786
|
connector_manifest: ConnectorManifest,
|
|
@@ -917,9 +911,9 @@ class BigQuerySinkConnector:
|
|
|
917
911
|
transformed_topic = self.apply_transformations(topic, transforms)
|
|
918
912
|
dataset_table = self.get_dataset_table_for_topic(transformed_topic, parser)
|
|
919
913
|
if dataset_table is None:
|
|
920
|
-
self.
|
|
921
|
-
|
|
922
|
-
f"
|
|
914
|
+
self.report.warning(
|
|
915
|
+
"Could not find target dataset for topic, please check your connector configuration"
|
|
916
|
+
f"{self.connector_manifest.name} : {transformed_topic} ",
|
|
923
917
|
)
|
|
924
918
|
continue
|
|
925
919
|
target_dataset = f"{project}.{dataset_table}"
|
|
@@ -954,10 +948,6 @@ class SnowflakeSinkConnector:
|
|
|
954
948
|
schema_name: str
|
|
955
949
|
topics_to_tables: Dict[str, str]
|
|
956
950
|
|
|
957
|
-
def report_warning(self, key: str, reason: str) -> None:
|
|
958
|
-
logger.warning(f"{key}: {reason}")
|
|
959
|
-
self.report.report_warning(key, reason)
|
|
960
|
-
|
|
961
951
|
def get_table_name_from_topic_name(self, topic_name: str) -> str:
|
|
962
952
|
"""
|
|
963
953
|
This function converts the topic name to a valid Snowflake table name using some rules.
|
|
@@ -1105,8 +1095,10 @@ class ConfluentS3SinkConnector:
|
|
|
1105
1095
|
)
|
|
1106
1096
|
self.connector_manifest.lineages = lineages
|
|
1107
1097
|
except Exception as e:
|
|
1108
|
-
self.report.
|
|
1109
|
-
|
|
1098
|
+
self.report.warning(
|
|
1099
|
+
"Error resolving lineage for connector",
|
|
1100
|
+
self.connector_manifest.name,
|
|
1101
|
+
exc=e,
|
|
1110
1102
|
)
|
|
1111
1103
|
|
|
1112
1104
|
return
|
|
@@ -1155,7 +1147,7 @@ class KafkaConnectSource(StatefulIngestionSourceBase):
|
|
|
1155
1147
|
)
|
|
1156
1148
|
self.session.auth = (self.config.username, self.config.password)
|
|
1157
1149
|
|
|
1158
|
-
test_response = self.session.get(f"{self.config.connect_uri}")
|
|
1150
|
+
test_response = self.session.get(f"{self.config.connect_uri}/connectors")
|
|
1159
1151
|
test_response.raise_for_status()
|
|
1160
1152
|
logger.info(f"Connection to {self.config.connect_uri} is ok")
|
|
1161
1153
|
if not jpype.isJVMStarted():
|
|
@@ -1178,13 +1170,16 @@ class KafkaConnectSource(StatefulIngestionSourceBase):
|
|
|
1178
1170
|
|
|
1179
1171
|
payload = connector_response.json()
|
|
1180
1172
|
|
|
1181
|
-
for
|
|
1182
|
-
connector_url = f"{self.config.connect_uri}/connectors/{
|
|
1183
|
-
|
|
1184
|
-
|
|
1185
|
-
|
|
1186
|
-
if
|
|
1187
|
-
|
|
1173
|
+
for connector_name in payload:
|
|
1174
|
+
connector_url = f"{self.config.connect_uri}/connectors/{connector_name}"
|
|
1175
|
+
connector_manifest = self._get_connector_manifest(
|
|
1176
|
+
connector_name, connector_url
|
|
1177
|
+
)
|
|
1178
|
+
if (
|
|
1179
|
+
connector_manifest is None
|
|
1180
|
+
or not self.config.connector_patterns.allowed(connector_manifest.name)
|
|
1181
|
+
):
|
|
1182
|
+
self.report.report_dropped(connector_name)
|
|
1188
1183
|
continue
|
|
1189
1184
|
|
|
1190
1185
|
if self.config.provided_configs:
|
|
@@ -1195,19 +1190,11 @@ class KafkaConnectSource(StatefulIngestionSourceBase):
|
|
|
1195
1190
|
connector_manifest.lineages = list()
|
|
1196
1191
|
connector_manifest.url = connector_url
|
|
1197
1192
|
|
|
1198
|
-
|
|
1199
|
-
f"{self.config.connect_uri}/connectors/{c}/topics",
|
|
1200
|
-
).json()
|
|
1201
|
-
|
|
1202
|
-
connector_manifest.topic_names = topics[c]["topics"]
|
|
1193
|
+
connector_manifest.topic_names = self._get_connector_topics(connector_name)
|
|
1203
1194
|
|
|
1204
1195
|
# Populate Source Connector metadata
|
|
1205
1196
|
if connector_manifest.type == SOURCE:
|
|
1206
|
-
tasks = self.
|
|
1207
|
-
f"{self.config.connect_uri}/connectors/{c}/tasks",
|
|
1208
|
-
).json()
|
|
1209
|
-
|
|
1210
|
-
connector_manifest.tasks = tasks
|
|
1197
|
+
connector_manifest.tasks = self._get_connector_tasks(connector_name)
|
|
1211
1198
|
|
|
1212
1199
|
# JDBC source connector lineages
|
|
1213
1200
|
if connector_manifest.config.get(CONNECTOR_CLASS).__eq__(
|
|
@@ -1246,7 +1233,7 @@ class KafkaConnectSource(StatefulIngestionSourceBase):
|
|
|
1246
1233
|
)
|
|
1247
1234
|
continue
|
|
1248
1235
|
|
|
1249
|
-
for topic in
|
|
1236
|
+
for topic in connector_manifest.topic_names:
|
|
1250
1237
|
lineage = KafkaConnectLineage(
|
|
1251
1238
|
source_dataset=target_connector.source_dataset,
|
|
1252
1239
|
source_platform=target_connector.source_platform,
|
|
@@ -1286,6 +1273,49 @@ class KafkaConnectSource(StatefulIngestionSourceBase):
|
|
|
1286
1273
|
|
|
1287
1274
|
return connectors_manifest
|
|
1288
1275
|
|
|
1276
|
+
def _get_connector_manifest(
|
|
1277
|
+
self, connector_name: str, connector_url: str
|
|
1278
|
+
) -> Optional[ConnectorManifest]:
|
|
1279
|
+
try:
|
|
1280
|
+
connector_response = self.session.get(connector_url)
|
|
1281
|
+
connector_response.raise_for_status()
|
|
1282
|
+
except Exception as e:
|
|
1283
|
+
self.report.warning(
|
|
1284
|
+
"Failed to get connector details", connector_name, exc=e
|
|
1285
|
+
)
|
|
1286
|
+
return None
|
|
1287
|
+
manifest = connector_response.json()
|
|
1288
|
+
connector_manifest = ConnectorManifest(**manifest)
|
|
1289
|
+
return connector_manifest
|
|
1290
|
+
|
|
1291
|
+
def _get_connector_tasks(self, connector_name: str) -> dict:
|
|
1292
|
+
try:
|
|
1293
|
+
response = self.session.get(
|
|
1294
|
+
f"{self.config.connect_uri}/connectors/{connector_name}/tasks",
|
|
1295
|
+
)
|
|
1296
|
+
response.raise_for_status()
|
|
1297
|
+
except Exception as e:
|
|
1298
|
+
self.report.warning(
|
|
1299
|
+
"Error getting connector tasks", context=connector_name, exc=e
|
|
1300
|
+
)
|
|
1301
|
+
return {}
|
|
1302
|
+
|
|
1303
|
+
return response.json()
|
|
1304
|
+
|
|
1305
|
+
def _get_connector_topics(self, connector_name: str) -> List[str]:
|
|
1306
|
+
try:
|
|
1307
|
+
response = self.session.get(
|
|
1308
|
+
f"{self.config.connect_uri}/connectors/{connector_name}/topics",
|
|
1309
|
+
)
|
|
1310
|
+
response.raise_for_status()
|
|
1311
|
+
except Exception as e:
|
|
1312
|
+
self.report.warning(
|
|
1313
|
+
"Error getting connector topics", context=connector_name, exc=e
|
|
1314
|
+
)
|
|
1315
|
+
return []
|
|
1316
|
+
|
|
1317
|
+
return response.json()[connector_name]["topics"]
|
|
1318
|
+
|
|
1289
1319
|
def construct_flow_workunit(self, connector: ConnectorManifest) -> MetadataWorkUnit:
|
|
1290
1320
|
connector_name = connector.name
|
|
1291
1321
|
connector_type = connector.type
|
|
@@ -9,6 +9,7 @@ from datetime import datetime
|
|
|
9
9
|
from itertools import groupby
|
|
10
10
|
from pathlib import PurePath
|
|
11
11
|
from typing import Any, Dict, Iterable, List, Optional, Tuple
|
|
12
|
+
from urllib.parse import urlparse
|
|
12
13
|
|
|
13
14
|
import smart_open.compression as so_compression
|
|
14
15
|
from more_itertools import peekable
|
|
@@ -993,9 +994,7 @@ class S3Source(StatefulIngestionSourceBase):
|
|
|
993
994
|
folders = []
|
|
994
995
|
for dir in dirs_to_process:
|
|
995
996
|
logger.info(f"Getting files from folder: {dir}")
|
|
996
|
-
prefix_to_process = dir.
|
|
997
|
-
self.create_s3_path(bucket_name, "/")
|
|
998
|
-
)
|
|
997
|
+
prefix_to_process = urlparse(dir).path.lstrip("/")
|
|
999
998
|
|
|
1000
999
|
folders.extend(
|
|
1001
1000
|
self.get_folder_info(
|
|
@@ -413,9 +413,10 @@ class SnowflakeLineageExtractor(SnowflakeCommonMixin, Closeable):
|
|
|
413
413
|
return UpstreamLineageEdge.parse_obj(db_row)
|
|
414
414
|
except Exception as e:
|
|
415
415
|
self.report.num_upstream_lineage_edge_parsing_failed += 1
|
|
416
|
+
upstream_tables = db_row.get("UPSTREAM_TABLES")
|
|
416
417
|
self.structured_reporter.warning(
|
|
417
418
|
"Failed to parse lineage edge",
|
|
418
|
-
context=db_row.get(
|
|
419
|
+
context=f"Upstreams: {upstream_tables} Downstreams: {db_row.get('DOWNSTREAM_TABLE_NAME')}",
|
|
419
420
|
exc=e,
|
|
420
421
|
)
|
|
421
422
|
return None
|
|
@@ -237,6 +237,19 @@ SHOW VIEWS IN DATABASE "{db_name}"
|
|
|
237
237
|
LIMIT {limit} {from_clause};
|
|
238
238
|
"""
|
|
239
239
|
|
|
240
|
+
@staticmethod
|
|
241
|
+
def get_secure_view_definitions() -> str:
|
|
242
|
+
# https://docs.snowflake.com/en/sql-reference/account-usage/views
|
|
243
|
+
return """
|
|
244
|
+
SELECT
|
|
245
|
+
TABLE_CATALOG as "TABLE_CATALOG",
|
|
246
|
+
TABLE_SCHEMA as "TABLE_SCHEMA",
|
|
247
|
+
TABLE_NAME as "TABLE_NAME",
|
|
248
|
+
VIEW_DEFINITION as "VIEW_DEFINITION"
|
|
249
|
+
FROM SNOWFLAKE.ACCOUNT_USAGE.VIEWS
|
|
250
|
+
WHERE IS_SECURE = 'YES' AND VIEW_DEFINITION !='' AND DELETED IS NULL
|
|
251
|
+
"""
|
|
252
|
+
|
|
240
253
|
@staticmethod
|
|
241
254
|
def columns_for_schema(
|
|
242
255
|
schema_name: str,
|
|
@@ -266,6 +266,22 @@ class SnowflakeDataDictionary(SupportsAsObj):
|
|
|
266
266
|
snowflake_schemas.append(snowflake_schema)
|
|
267
267
|
return snowflake_schemas
|
|
268
268
|
|
|
269
|
+
@serialized_lru_cache(maxsize=1)
|
|
270
|
+
def get_secure_view_definitions(self) -> Dict[str, Dict[str, Dict[str, str]]]:
|
|
271
|
+
secure_view_definitions: Dict[str, Dict[str, Dict[str, str]]] = defaultdict(
|
|
272
|
+
lambda: defaultdict(lambda: defaultdict())
|
|
273
|
+
)
|
|
274
|
+
cur = self.connection.query(SnowflakeQuery.get_secure_view_definitions())
|
|
275
|
+
for view in cur:
|
|
276
|
+
db_name = view["TABLE_CATALOG"]
|
|
277
|
+
schema_name = view["TABLE_SCHEMA"]
|
|
278
|
+
view_name = view["TABLE_NAME"]
|
|
279
|
+
secure_view_definitions[db_name][schema_name][view_name] = view[
|
|
280
|
+
"VIEW_DEFINITION"
|
|
281
|
+
]
|
|
282
|
+
|
|
283
|
+
return secure_view_definitions
|
|
284
|
+
|
|
269
285
|
@serialized_lru_cache(maxsize=1)
|
|
270
286
|
def get_tables_for_database(
|
|
271
287
|
self, db_name: str
|
|
@@ -424,6 +424,10 @@ class SnowflakeSchemaGenerator(SnowflakeStructuredReportMixin):
|
|
|
424
424
|
view_identifier = self.identifiers.get_dataset_identifier(
|
|
425
425
|
view.name, schema_name, db_name
|
|
426
426
|
)
|
|
427
|
+
if view.is_secure and not view.view_definition:
|
|
428
|
+
view.view_definition = self.fetch_secure_view_definition(
|
|
429
|
+
view.name, schema_name, db_name
|
|
430
|
+
)
|
|
427
431
|
if view.view_definition:
|
|
428
432
|
self.aggregator.add_view_definition(
|
|
429
433
|
view_urn=self.identifiers.gen_dataset_urn(view_identifier),
|
|
@@ -449,6 +453,25 @@ class SnowflakeSchemaGenerator(SnowflakeStructuredReportMixin):
|
|
|
449
453
|
context=f"{db_name}.{schema_name}",
|
|
450
454
|
)
|
|
451
455
|
|
|
456
|
+
def fetch_secure_view_definition(
|
|
457
|
+
self, table_name: str, schema_name: str, db_name: str
|
|
458
|
+
) -> Optional[str]:
|
|
459
|
+
try:
|
|
460
|
+
view_definitions = self.data_dictionary.get_secure_view_definitions()
|
|
461
|
+
return view_definitions[db_name][schema_name][table_name]
|
|
462
|
+
except Exception as e:
|
|
463
|
+
if isinstance(e, SnowflakePermissionError):
|
|
464
|
+
error_msg = (
|
|
465
|
+
"Failed to get secure views definitions. Please check permissions."
|
|
466
|
+
)
|
|
467
|
+
else:
|
|
468
|
+
error_msg = "Failed to get secure views definitions"
|
|
469
|
+
self.structured_reporter.warning(
|
|
470
|
+
error_msg,
|
|
471
|
+
exc=e,
|
|
472
|
+
)
|
|
473
|
+
return None
|
|
474
|
+
|
|
452
475
|
def fetch_views_for_schema(
|
|
453
476
|
self, snowflake_schema: SnowflakeSchema, db_name: str, schema_name: str
|
|
454
477
|
) -> List[SnowflakeView]:
|
|
@@ -111,6 +111,8 @@ from datahub.ingestion.source.tableau.tableau_common import (
|
|
|
111
111
|
tableau_field_to_schema_field,
|
|
112
112
|
workbook_graphql_query,
|
|
113
113
|
)
|
|
114
|
+
from datahub.ingestion.source.tableau.tableau_server_wrapper import UserInfo
|
|
115
|
+
from datahub.ingestion.source.tableau.tableau_validation import check_user_role
|
|
114
116
|
from datahub.metadata.com.linkedin.pegasus2avro.common import (
|
|
115
117
|
AuditStamp,
|
|
116
118
|
ChangeAuditStamps,
|
|
@@ -167,7 +169,7 @@ from datahub.utilities.urns.dataset_urn import DatasetUrn
|
|
|
167
169
|
|
|
168
170
|
try:
|
|
169
171
|
# On earlier versions of the tableauserverclient, the NonXMLResponseError
|
|
170
|
-
# was thrown when reauthentication was
|
|
172
|
+
# was thrown when reauthentication was necessary. We'll keep both exceptions
|
|
171
173
|
# around for now, but can remove this in the future.
|
|
172
174
|
from tableauserverclient.server.endpoint.exceptions import ( # type: ignore
|
|
173
175
|
NotSignedInError,
|
|
@@ -632,6 +634,33 @@ class TableauSourceReport(StaleEntityRemovalSourceReport):
|
|
|
632
634
|
num_upstream_table_lineage_failed_parse_sql: int = 0
|
|
633
635
|
num_upstream_fine_grained_lineage_failed_parse_sql: int = 0
|
|
634
636
|
num_hidden_assets_skipped: int = 0
|
|
637
|
+
logged_in_user: List[UserInfo] = []
|
|
638
|
+
|
|
639
|
+
|
|
640
|
+
def report_user_role(report: TableauSourceReport, server: Server) -> None:
|
|
641
|
+
title: str = "Insufficient Permissions"
|
|
642
|
+
message: str = "The user must have the `Site Administrator Explorer` role to perform metadata ingestion."
|
|
643
|
+
try:
|
|
644
|
+
# TableauSiteSource instance is per site, so each time we need to find-out user detail
|
|
645
|
+
# the site-role might be different on another site
|
|
646
|
+
logged_in_user: UserInfo = UserInfo.from_server(server=server)
|
|
647
|
+
|
|
648
|
+
if not logged_in_user.is_site_administrator_explorer():
|
|
649
|
+
report.warning(
|
|
650
|
+
title=title,
|
|
651
|
+
message=message,
|
|
652
|
+
context=f"user-name={logged_in_user.user_name}, role={logged_in_user.site_role}, site_id={logged_in_user.site_id}",
|
|
653
|
+
)
|
|
654
|
+
|
|
655
|
+
report.logged_in_user.append(logged_in_user)
|
|
656
|
+
|
|
657
|
+
except Exception as e:
|
|
658
|
+
report.warning(
|
|
659
|
+
title=title,
|
|
660
|
+
message="Failed to verify the user's role. The user must have `Site Administrator Explorer` role.",
|
|
661
|
+
context=f"{e}",
|
|
662
|
+
exc=e,
|
|
663
|
+
)
|
|
635
664
|
|
|
636
665
|
|
|
637
666
|
@platform_name("Tableau")
|
|
@@ -676,6 +705,7 @@ class TableauSource(StatefulIngestionSourceBase, TestableSource):
|
|
|
676
705
|
try:
|
|
677
706
|
logger.info(f"Authenticated to Tableau site: '{site_content_url}'")
|
|
678
707
|
self.server = self.config.make_tableau_client(site_content_url)
|
|
708
|
+
report_user_role(report=self.report, server=self.server)
|
|
679
709
|
# Note that we're not catching ConfigurationError, since we want that to throw.
|
|
680
710
|
except ValueError as e:
|
|
681
711
|
self.report.failure(
|
|
@@ -689,9 +719,17 @@ class TableauSource(StatefulIngestionSourceBase, TestableSource):
|
|
|
689
719
|
test_report = TestConnectionReport()
|
|
690
720
|
try:
|
|
691
721
|
source_config = TableauConfig.parse_obj_allow_extras(config_dict)
|
|
692
|
-
|
|
722
|
+
|
|
723
|
+
server = source_config.make_tableau_client(source_config.site)
|
|
724
|
+
|
|
693
725
|
test_report.basic_connectivity = CapabilityReport(capable=True)
|
|
726
|
+
|
|
727
|
+
test_report.capability_report = check_user_role(
|
|
728
|
+
logged_in_user=UserInfo.from_server(server=server)
|
|
729
|
+
)
|
|
730
|
+
|
|
694
731
|
except Exception as e:
|
|
732
|
+
logger.warning(f"{e}", exc_info=e)
|
|
695
733
|
test_report.basic_connectivity = CapabilityReport(
|
|
696
734
|
capable=False, failure_reason=str(e)
|
|
697
735
|
)
|
|
@@ -831,6 +869,8 @@ class TableauSiteSource:
|
|
|
831
869
|
# when emitting custom SQL data sources.
|
|
832
870
|
self.custom_sql_ids_being_used: List[str] = []
|
|
833
871
|
|
|
872
|
+
report_user_role(report=report, server=server)
|
|
873
|
+
|
|
834
874
|
@property
|
|
835
875
|
def no_env_browse_prefix(self) -> str:
|
|
836
876
|
# Prefix to use with browse path (v1)
|
|
@@ -1290,7 +1330,6 @@ class TableauSiteSource:
|
|
|
1290
1330
|
page_size = page_size_override or self.config.page_size
|
|
1291
1331
|
|
|
1292
1332
|
filter_pages = get_filter_pages(query_filter, page_size)
|
|
1293
|
-
|
|
1294
1333
|
for filter_page in filter_pages:
|
|
1295
1334
|
has_next_page = 1
|
|
1296
1335
|
current_cursor: Optional[str] = None
|
|
@@ -975,15 +975,22 @@ def get_filter_pages(query_filter: dict, page_size: int) -> List[dict]:
|
|
|
975
975
|
# a few ten thousand, then tableau server responds with empty response
|
|
976
976
|
# causing below error:
|
|
977
977
|
# tableauserverclient.server.endpoint.exceptions.NonXMLResponseError: b''
|
|
978
|
+
|
|
979
|
+
# in practice, we only do pagination if len(query_filter.keys()) == 1
|
|
980
|
+
if len(query_filter.keys()) != 1:
|
|
981
|
+
return filter_pages
|
|
982
|
+
|
|
983
|
+
current_key = (list(query_filter.keys()))[0]
|
|
984
|
+
|
|
978
985
|
if (
|
|
979
|
-
|
|
980
|
-
and query_filter.get(
|
|
981
|
-
and isinstance(query_filter[
|
|
986
|
+
current_key in [c.ID_WITH_IN, c.PROJECT_NAME_WITH_IN]
|
|
987
|
+
and query_filter.get(current_key)
|
|
988
|
+
and isinstance(query_filter[current_key], list)
|
|
982
989
|
):
|
|
983
|
-
ids = query_filter[
|
|
990
|
+
ids = query_filter[current_key]
|
|
984
991
|
filter_pages = [
|
|
985
992
|
{
|
|
986
|
-
|
|
993
|
+
current_key: ids[
|
|
987
994
|
start : (
|
|
988
995
|
start + page_size if start + page_size < len(ids) else len(ids)
|
|
989
996
|
)
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
|
|
3
|
+
from tableauserverclient import Server, UserItem
|
|
4
|
+
|
|
5
|
+
from datahub.ingestion.source.tableau import tableau_constant as c
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@dataclass
|
|
9
|
+
class UserInfo:
|
|
10
|
+
user_name: str
|
|
11
|
+
site_role: str
|
|
12
|
+
site_id: str
|
|
13
|
+
|
|
14
|
+
def is_site_administrator_explorer(self):
|
|
15
|
+
return self.site_role == c.SITE_ROLE
|
|
16
|
+
|
|
17
|
+
@staticmethod
|
|
18
|
+
def from_server(server: Server) -> "UserInfo":
|
|
19
|
+
assert server.user_id, "make the connection with tableau"
|
|
20
|
+
|
|
21
|
+
user: UserItem = server.users.get_by_id(server.user_id)
|
|
22
|
+
|
|
23
|
+
assert user.site_role, "site_role is not available" # to silent the lint
|
|
24
|
+
|
|
25
|
+
assert user.name, "user name is not available" # to silent the lint
|
|
26
|
+
|
|
27
|
+
assert server.site_id, "site identifier is not available" # to silent the lint
|
|
28
|
+
|
|
29
|
+
return UserInfo(
|
|
30
|
+
user_name=user.name,
|
|
31
|
+
site_role=user.site_role,
|
|
32
|
+
site_id=server.site_id,
|
|
33
|
+
)
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Dict, Union
|
|
3
|
+
|
|
4
|
+
from datahub.ingestion.api.source import CapabilityReport, SourceCapability
|
|
5
|
+
from datahub.ingestion.source.tableau import tableau_constant as c
|
|
6
|
+
from datahub.ingestion.source.tableau.tableau_server_wrapper import UserInfo
|
|
7
|
+
|
|
8
|
+
logger = logging.getLogger(__name__)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def check_user_role(
|
|
12
|
+
logged_in_user: UserInfo,
|
|
13
|
+
) -> Dict[Union[SourceCapability, str], CapabilityReport]:
|
|
14
|
+
capability_dict: Dict[Union[SourceCapability, str], CapabilityReport] = {
|
|
15
|
+
c.SITE_PERMISSION: CapabilityReport(
|
|
16
|
+
capable=True,
|
|
17
|
+
)
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
failure_reason: str = (
|
|
21
|
+
"The user does not have the `Site Administrator Explorer` role."
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
mitigation_message_prefix: str = (
|
|
25
|
+
"Assign `Site Administrator Explorer` role to the user"
|
|
26
|
+
)
|
|
27
|
+
mitigation_message_suffix: str = "Refer to the setup guide: https://datahubproject.io/docs/quick-ingestion-guides/tableau/setup"
|
|
28
|
+
|
|
29
|
+
try:
|
|
30
|
+
# TODO: Add check for `Enable Derived Permissions`
|
|
31
|
+
if not logged_in_user.is_site_administrator_explorer():
|
|
32
|
+
capability_dict[c.SITE_PERMISSION] = CapabilityReport(
|
|
33
|
+
capable=False,
|
|
34
|
+
failure_reason=f"{failure_reason} Their current role is {logged_in_user.site_role}.",
|
|
35
|
+
mitigation_message=f"{mitigation_message_prefix} `{logged_in_user.user_name}`. {mitigation_message_suffix}",
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
return capability_dict
|
|
39
|
+
|
|
40
|
+
except Exception as e:
|
|
41
|
+
logger.warning(msg=e, exc_info=e)
|
|
42
|
+
capability_dict[c.SITE_PERMISSION] = CapabilityReport(
|
|
43
|
+
capable=False,
|
|
44
|
+
failure_reason="Failed to verify user role.",
|
|
45
|
+
mitigation_message=f"{mitigation_message_prefix}. {mitigation_message_suffix}", # user is unknown
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
return capability_dict
|