acryl-datahub 0.15.0.1rc17__py3-none-any.whl → 0.15.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-0.15.0.1rc17.dist-info → acryl_datahub-0.15.0.2.dist-info}/METADATA +2440 -2438
- {acryl_datahub-0.15.0.1rc17.dist-info → acryl_datahub-0.15.0.2.dist-info}/RECORD +211 -207
- {acryl_datahub-0.15.0.1rc17.dist-info → acryl_datahub-0.15.0.2.dist-info}/WHEEL +1 -1
- datahub/__init__.py +1 -1
- datahub/api/entities/assertion/assertion_operator.py +3 -5
- datahub/api/entities/corpgroup/corpgroup.py +1 -1
- datahub/api/entities/datacontract/assertion_operator.py +3 -5
- datahub/api/entities/dataproduct/dataproduct.py +4 -4
- datahub/api/entities/dataset/dataset.py +2 -1
- datahub/api/entities/structuredproperties/structuredproperties.py +18 -7
- datahub/cli/cli_utils.py +13 -2
- datahub/cli/delete_cli.py +3 -3
- datahub/cli/docker_cli.py +6 -6
- datahub/cli/ingest_cli.py +25 -15
- datahub/cli/lite_cli.py +2 -2
- datahub/cli/migrate.py +5 -5
- datahub/cli/specific/assertions_cli.py +3 -3
- datahub/cli/specific/structuredproperties_cli.py +84 -0
- datahub/cli/timeline_cli.py +1 -1
- datahub/configuration/common.py +1 -2
- datahub/configuration/config_loader.py +73 -50
- datahub/configuration/git.py +2 -2
- datahub/configuration/time_window_config.py +10 -5
- datahub/emitter/mce_builder.py +4 -8
- datahub/emitter/mcp_builder.py +27 -0
- datahub/emitter/mcp_patch_builder.py +1 -2
- datahub/emitter/rest_emitter.py +126 -85
- datahub/entrypoints.py +6 -0
- datahub/ingestion/api/incremental_lineage_helper.py +2 -8
- datahub/ingestion/api/report.py +1 -2
- datahub/ingestion/api/source.py +4 -2
- datahub/ingestion/api/source_helpers.py +1 -1
- datahub/ingestion/extractor/json_schema_util.py +3 -3
- datahub/ingestion/extractor/schema_util.py +3 -5
- datahub/ingestion/fs/s3_fs.py +3 -3
- datahub/ingestion/glossary/datahub_classifier.py +6 -4
- datahub/ingestion/graph/client.py +22 -19
- datahub/ingestion/graph/config.py +1 -1
- datahub/ingestion/run/pipeline.py +8 -7
- datahub/ingestion/run/pipeline_config.py +3 -3
- datahub/ingestion/source/abs/datalake_profiler_config.py +3 -3
- datahub/ingestion/source/abs/source.py +19 -8
- datahub/ingestion/source/aws/glue.py +77 -47
- datahub/ingestion/source/aws/s3_boto_utils.py +3 -3
- datahub/ingestion/source/aws/s3_util.py +24 -1
- datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py +1 -1
- datahub/ingestion/source/aws/sagemaker_processors/models.py +2 -2
- datahub/ingestion/source/bigquery_v2/bigquery.py +34 -34
- datahub/ingestion/source/bigquery_v2/bigquery_audit.py +3 -3
- datahub/ingestion/source/bigquery_v2/bigquery_config.py +14 -6
- datahub/ingestion/source/bigquery_v2/bigquery_platform_resource_helper.py +8 -4
- datahub/ingestion/source/bigquery_v2/bigquery_report.py +0 -3
- datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +22 -16
- datahub/ingestion/source/bigquery_v2/lineage.py +16 -16
- datahub/ingestion/source/bigquery_v2/queries.py +1 -3
- datahub/ingestion/source/bigquery_v2/queries_extractor.py +3 -3
- datahub/ingestion/source/bigquery_v2/usage.py +60 -60
- datahub/ingestion/source/cassandra/cassandra.py +0 -1
- datahub/ingestion/source/cassandra/cassandra_profiling.py +24 -24
- datahub/ingestion/source/cassandra/cassandra_utils.py +4 -7
- datahub/ingestion/source/confluent_schema_registry.py +6 -6
- datahub/ingestion/source/csv_enricher.py +29 -29
- datahub/ingestion/source/datahub/config.py +10 -0
- datahub/ingestion/source/datahub/datahub_database_reader.py +4 -2
- datahub/ingestion/source/datahub/datahub_source.py +12 -2
- datahub/ingestion/source/dbt/dbt_cloud.py +13 -13
- datahub/ingestion/source/dbt/dbt_common.py +9 -7
- datahub/ingestion/source/delta_lake/source.py +0 -5
- datahub/ingestion/source/demo_data.py +1 -1
- datahub/ingestion/source/dremio/dremio_api.py +4 -4
- datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py +3 -3
- datahub/ingestion/source/dremio/dremio_reporting.py +0 -3
- datahub/ingestion/source/dremio/dremio_source.py +2 -2
- datahub/ingestion/source/elastic_search.py +4 -4
- datahub/ingestion/source/fivetran/fivetran.py +1 -6
- datahub/ingestion/source/gc/datahub_gc.py +11 -14
- datahub/ingestion/source/gc/execution_request_cleanup.py +31 -6
- datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +48 -15
- datahub/ingestion/source/gcs/gcs_source.py +3 -2
- datahub/ingestion/source/ge_data_profiler.py +2 -5
- datahub/ingestion/source/ge_profiling_config.py +3 -3
- datahub/ingestion/source/iceberg/iceberg.py +13 -6
- datahub/ingestion/source/iceberg/iceberg_common.py +49 -9
- datahub/ingestion/source/iceberg/iceberg_profiler.py +3 -1
- datahub/ingestion/source/identity/azure_ad.py +3 -3
- datahub/ingestion/source/identity/okta.py +3 -3
- datahub/ingestion/source/kafka/kafka.py +11 -9
- datahub/ingestion/source/kafka_connect/kafka_connect.py +3 -9
- datahub/ingestion/source/kafka_connect/sink_connectors.py +3 -3
- datahub/ingestion/source/kafka_connect/source_connectors.py +3 -3
- datahub/ingestion/source/looker/looker_common.py +19 -19
- datahub/ingestion/source/looker/looker_config.py +11 -6
- datahub/ingestion/source/looker/looker_source.py +25 -25
- datahub/ingestion/source/looker/looker_template_language.py +3 -3
- datahub/ingestion/source/looker/looker_usage.py +5 -7
- datahub/ingestion/source/looker/lookml_concept_context.py +6 -6
- datahub/ingestion/source/looker/lookml_source.py +13 -15
- datahub/ingestion/source/looker/view_upstream.py +5 -5
- datahub/ingestion/source/metabase.py +1 -6
- datahub/ingestion/source/mlflow.py +4 -9
- datahub/ingestion/source/mode.py +5 -5
- datahub/ingestion/source/mongodb.py +6 -4
- datahub/ingestion/source/neo4j/neo4j_source.py +1 -1
- datahub/ingestion/source/nifi.py +24 -31
- datahub/ingestion/source/openapi.py +9 -9
- datahub/ingestion/source/powerbi/config.py +12 -12
- datahub/ingestion/source/powerbi/m_query/parser.py +11 -11
- datahub/ingestion/source/powerbi/m_query/pattern_handler.py +26 -24
- datahub/ingestion/source/powerbi/m_query/resolver.py +13 -13
- datahub/ingestion/source/powerbi/powerbi.py +6 -6
- datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +9 -9
- datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +7 -7
- datahub/ingestion/source/powerbi_report_server/report_server.py +1 -1
- datahub/ingestion/source/qlik_sense/qlik_api.py +1 -1
- datahub/ingestion/source/redash.py +0 -5
- datahub/ingestion/source/redshift/config.py +3 -3
- datahub/ingestion/source/redshift/redshift.py +45 -46
- datahub/ingestion/source/redshift/usage.py +33 -33
- datahub/ingestion/source/s3/datalake_profiler_config.py +3 -3
- datahub/ingestion/source/s3/source.py +11 -15
- datahub/ingestion/source/salesforce.py +26 -25
- datahub/ingestion/source/schema/json_schema.py +1 -1
- datahub/ingestion/source/sigma/sigma.py +3 -3
- datahub/ingestion/source/sigma/sigma_api.py +12 -10
- datahub/ingestion/source/snowflake/snowflake_config.py +30 -7
- datahub/ingestion/source/snowflake/snowflake_connection.py +6 -6
- datahub/ingestion/source/snowflake/snowflake_queries.py +2 -2
- datahub/ingestion/source/snowflake/snowflake_report.py +0 -3
- datahub/ingestion/source/snowflake/snowflake_schema.py +8 -5
- datahub/ingestion/source/snowflake/snowflake_schema_gen.py +136 -42
- datahub/ingestion/source/snowflake/snowflake_tag.py +21 -11
- datahub/ingestion/source/snowflake/snowflake_usage_v2.py +49 -50
- datahub/ingestion/source/snowflake/snowflake_utils.py +1 -2
- datahub/ingestion/source/snowflake/snowflake_v2.py +51 -47
- datahub/ingestion/source/sql/athena.py +1 -3
- datahub/ingestion/source/sql/clickhouse.py +8 -14
- datahub/ingestion/source/sql/oracle.py +1 -3
- datahub/ingestion/source/sql/sql_generic_profiler.py +1 -2
- datahub/ingestion/source/sql/sql_types.py +1 -2
- datahub/ingestion/source/sql/sql_utils.py +5 -0
- datahub/ingestion/source/sql/teradata.py +18 -5
- datahub/ingestion/source/state/profiling_state_handler.py +3 -3
- datahub/ingestion/source/state/redundant_run_skip_handler.py +5 -7
- datahub/ingestion/source/state/stale_entity_removal_handler.py +3 -3
- datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +9 -9
- datahub/ingestion/source/state_provider/file_ingestion_checkpointing_provider.py +1 -1
- datahub/ingestion/source/superset.py +1 -6
- datahub/ingestion/source/tableau/tableau.py +343 -117
- datahub/ingestion/source/tableau/tableau_common.py +5 -2
- datahub/ingestion/source/unity/config.py +3 -1
- datahub/ingestion/source/unity/proxy.py +1 -1
- datahub/ingestion/source/unity/source.py +74 -74
- datahub/ingestion/source/unity/usage.py +3 -1
- datahub/ingestion/source/usage/clickhouse_usage.py +4 -4
- datahub/ingestion/source/usage/starburst_trino_usage.py +3 -3
- datahub/ingestion/source/usage/usage_common.py +1 -1
- datahub/ingestion/source_report/ingestion_stage.py +24 -20
- datahub/ingestion/transformer/add_dataset_dataproduct.py +4 -4
- datahub/ingestion/transformer/add_dataset_properties.py +3 -3
- datahub/ingestion/transformer/add_dataset_schema_tags.py +3 -3
- datahub/ingestion/transformer/add_dataset_schema_terms.py +3 -3
- datahub/ingestion/transformer/dataset_domain_based_on_tags.py +4 -4
- datahub/ingestion/transformer/extract_ownership_from_tags.py +3 -3
- datahub/ingestion/transformer/tags_to_terms.py +7 -7
- datahub/integrations/assertion/snowflake/compiler.py +10 -10
- datahub/lite/duckdb_lite.py +12 -10
- datahub/metadata/_schema_classes.py +317 -44
- datahub/metadata/_urns/urn_defs.py +69 -15
- datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/versionset/__init__.py +17 -0
- datahub/metadata/schema.avsc +302 -89
- datahub/metadata/schemas/DataFlowKey.avsc +1 -0
- datahub/metadata/schemas/DataJobKey.avsc +1 -0
- datahub/metadata/schemas/DataProcessInstanceInput.avsc +4 -2
- datahub/metadata/schemas/DataProcessInstanceOutput.avsc +2 -0
- datahub/metadata/schemas/DatasetKey.avsc +2 -1
- datahub/metadata/schemas/MLFeatureProperties.avsc +51 -0
- datahub/metadata/schemas/MLModelDeploymentProperties.avsc +51 -0
- datahub/metadata/schemas/MLModelGroupProperties.avsc +96 -23
- datahub/metadata/schemas/MLModelKey.avsc +2 -1
- datahub/metadata/schemas/MLModelProperties.avsc +96 -48
- datahub/metadata/schemas/MLPrimaryKeyProperties.avsc +51 -0
- datahub/metadata/schemas/MetadataChangeEvent.avsc +98 -71
- datahub/metadata/schemas/VersionProperties.avsc +216 -0
- datahub/metadata/schemas/VersionSetKey.avsc +26 -0
- datahub/metadata/schemas/VersionSetProperties.avsc +49 -0
- datahub/secret/datahub_secrets_client.py +12 -21
- datahub/secret/secret_common.py +14 -8
- datahub/specific/aspect_helpers/custom_properties.py +1 -2
- datahub/sql_parsing/schema_resolver.py +5 -10
- datahub/sql_parsing/sql_parsing_aggregator.py +18 -16
- datahub/sql_parsing/sqlglot_lineage.py +3 -3
- datahub/sql_parsing/sqlglot_utils.py +1 -1
- datahub/telemetry/stats.py +1 -2
- datahub/testing/mcp_diff.py +1 -1
- datahub/utilities/file_backed_collections.py +11 -11
- datahub/utilities/hive_schema_to_avro.py +2 -2
- datahub/utilities/logging_manager.py +2 -2
- datahub/utilities/lossy_collections.py +3 -3
- datahub/utilities/mapping.py +3 -3
- datahub/utilities/memory_footprint.py +3 -2
- datahub/utilities/perf_timer.py +11 -6
- datahub/utilities/serialized_lru_cache.py +3 -1
- datahub/utilities/sqlalchemy_query_combiner.py +6 -6
- datahub/utilities/sqllineage_patch.py +1 -1
- datahub/utilities/stats_collections.py +3 -1
- datahub/utilities/urns/_urn_base.py +28 -5
- datahub/utilities/urns/urn_iter.py +2 -2
- {acryl_datahub-0.15.0.1rc17.dist-info → acryl_datahub-0.15.0.2.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-0.15.0.1rc17.dist-info → acryl_datahub-0.15.0.2.dist-info}/top_level.txt +0 -0
|
@@ -71,7 +71,6 @@ from datahub.ingestion.api.decorators import (
|
|
|
71
71
|
from datahub.ingestion.api.source import (
|
|
72
72
|
CapabilityReport,
|
|
73
73
|
MetadataWorkUnitProcessor,
|
|
74
|
-
Source,
|
|
75
74
|
StructuredLogLevel,
|
|
76
75
|
TestableSource,
|
|
77
76
|
TestConnectionReport,
|
|
@@ -118,6 +117,7 @@ from datahub.ingestion.source.tableau.tableau_common import (
|
|
|
118
117
|
)
|
|
119
118
|
from datahub.ingestion.source.tableau.tableau_server_wrapper import UserInfo
|
|
120
119
|
from datahub.ingestion.source.tableau.tableau_validation import check_user_role
|
|
120
|
+
from datahub.ingestion.source_report.ingestion_stage import IngestionStageReport
|
|
121
121
|
from datahub.metadata.com.linkedin.pegasus2avro.common import (
|
|
122
122
|
AuditStamp,
|
|
123
123
|
ChangeAuditStamps,
|
|
@@ -170,8 +170,12 @@ from datahub.sql_parsing.sqlglot_lineage import (
|
|
|
170
170
|
create_lineage_sql_parsed_result,
|
|
171
171
|
)
|
|
172
172
|
from datahub.utilities import config_clean
|
|
173
|
+
from datahub.utilities.perf_timer import PerfTimer
|
|
174
|
+
from datahub.utilities.stats_collections import TopKDict
|
|
173
175
|
from datahub.utilities.urns.dataset_urn import DatasetUrn
|
|
174
176
|
|
|
177
|
+
DEFAULT_PAGE_SIZE = 10
|
|
178
|
+
|
|
175
179
|
try:
|
|
176
180
|
# On earlier versions of the tableauserverclient, the NonXMLResponseError
|
|
177
181
|
# was thrown when reauthentication was necessary. We'll keep both exceptions
|
|
@@ -277,9 +281,9 @@ class TableauConnectionConfig(ConfigModel):
|
|
|
277
281
|
return authentication
|
|
278
282
|
|
|
279
283
|
def make_tableau_client(self, site: str) -> Server:
|
|
280
|
-
authentication: Union[
|
|
281
|
-
|
|
282
|
-
|
|
284
|
+
authentication: Union[TableauAuth, PersonalAccessTokenAuth] = (
|
|
285
|
+
self.get_tableau_auth(site)
|
|
286
|
+
)
|
|
283
287
|
try:
|
|
284
288
|
server = Server(
|
|
285
289
|
self.connect_uri,
|
|
@@ -340,11 +344,140 @@ class PermissionIngestionConfig(ConfigModel):
|
|
|
340
344
|
)
|
|
341
345
|
|
|
342
346
|
|
|
347
|
+
class TableauPageSizeConfig(ConfigModel):
|
|
348
|
+
"""
|
|
349
|
+
Configuration for setting page sizes for different Tableau metadata objects.
|
|
350
|
+
|
|
351
|
+
Some considerations:
|
|
352
|
+
- All have default values, so no setting is mandatory.
|
|
353
|
+
- In general, with the `effective_` methods, if not specifically set fine-grained metrics fallback to `page_size`
|
|
354
|
+
or correlate with `page_size`.
|
|
355
|
+
|
|
356
|
+
Measuring the impact of changing these values can be done by looking at the
|
|
357
|
+
`num_(filter_|paginated_)?queries_by_connection_type` metrics in the report.
|
|
358
|
+
"""
|
|
359
|
+
|
|
360
|
+
page_size: int = Field(
|
|
361
|
+
default=DEFAULT_PAGE_SIZE,
|
|
362
|
+
description="[advanced] Number of metadata objects (e.g. CustomSQLTable, PublishedDatasource, etc) to query at a time using the Tableau API.",
|
|
363
|
+
)
|
|
364
|
+
|
|
365
|
+
database_server_page_size: Optional[int] = Field(
|
|
366
|
+
default=None,
|
|
367
|
+
description="[advanced] Number of database servers to query at a time using the Tableau API; fallbacks to `page_size` if not set.",
|
|
368
|
+
)
|
|
369
|
+
|
|
370
|
+
@property
|
|
371
|
+
def effective_database_server_page_size(self) -> int:
|
|
372
|
+
return self.database_server_page_size or self.page_size
|
|
373
|
+
|
|
374
|
+
# We've found that even with a small workbook page size (e.g. 10), the Tableau API often
|
|
375
|
+
# returns warnings like this:
|
|
376
|
+
# {
|
|
377
|
+
# 'message': 'Showing partial results. The request exceeded the 20000 node limit. Use pagination, additional filtering, or both in the query to adjust results.',
|
|
378
|
+
# 'extensions': {
|
|
379
|
+
# 'severity': 'WARNING',
|
|
380
|
+
# 'code': 'NODE_LIMIT_EXCEEDED',
|
|
381
|
+
# 'properties': {
|
|
382
|
+
# 'nodeLimit': 20000
|
|
383
|
+
# }
|
|
384
|
+
# }
|
|
385
|
+
# }
|
|
386
|
+
# Reducing the page size for the workbook queries helps to avoid this.
|
|
387
|
+
workbook_page_size: Optional[int] = Field(
|
|
388
|
+
default=1,
|
|
389
|
+
description="[advanced] Number of workbooks to query at a time using the Tableau API; defaults to `1` and fallbacks to `page_size` if not set.",
|
|
390
|
+
)
|
|
391
|
+
|
|
392
|
+
@property
|
|
393
|
+
def effective_workbook_page_size(self) -> int:
|
|
394
|
+
return self.workbook_page_size or self.page_size
|
|
395
|
+
|
|
396
|
+
sheet_page_size: Optional[int] = Field(
|
|
397
|
+
default=None,
|
|
398
|
+
description="[advanced] Number of sheets to query at a time using the Tableau API; fallbacks to `page_size` if not set.",
|
|
399
|
+
)
|
|
400
|
+
|
|
401
|
+
@property
|
|
402
|
+
def effective_sheet_page_size(self) -> int:
|
|
403
|
+
return self.sheet_page_size or self.page_size
|
|
404
|
+
|
|
405
|
+
dashboard_page_size: Optional[int] = Field(
|
|
406
|
+
default=None,
|
|
407
|
+
description="[advanced] Number of dashboards to query at a time using the Tableau API; fallbacks to `page_size` if not set.",
|
|
408
|
+
)
|
|
409
|
+
|
|
410
|
+
@property
|
|
411
|
+
def effective_dashboard_page_size(self) -> int:
|
|
412
|
+
return self.dashboard_page_size or self.page_size
|
|
413
|
+
|
|
414
|
+
embedded_datasource_page_size: Optional[int] = Field(
|
|
415
|
+
default=None,
|
|
416
|
+
description="[advanced] Number of embedded datasources to query at a time using the Tableau API; fallbacks to `page_size` if not set.",
|
|
417
|
+
)
|
|
418
|
+
|
|
419
|
+
@property
|
|
420
|
+
def effective_embedded_datasource_page_size(self) -> int:
|
|
421
|
+
return self.embedded_datasource_page_size or self.page_size
|
|
422
|
+
|
|
423
|
+
# Since the field upstream query was separated from the embedded datasource queries into an independent query,
|
|
424
|
+
# the number of queries increased significantly and so the execution time.
|
|
425
|
+
# To increase the batching and so reduce the number of queries, we can increase the page size for that
|
|
426
|
+
# particular case.
|
|
427
|
+
#
|
|
428
|
+
# That's why unless specifically set, we will effectively use 10 times the page size as the default page size.
|
|
429
|
+
embedded_datasource_field_upstream_page_size: Optional[int] = Field(
|
|
430
|
+
default=None,
|
|
431
|
+
description="[advanced] Number of upstream fields to query at a time for embedded datasources using the Tableau API; fallbacks to `page_size` * 10 if not set.",
|
|
432
|
+
)
|
|
433
|
+
|
|
434
|
+
@property
|
|
435
|
+
def effective_embedded_datasource_field_upstream_page_size(self) -> int:
|
|
436
|
+
return self.embedded_datasource_field_upstream_page_size or self.page_size * 10
|
|
437
|
+
|
|
438
|
+
published_datasource_page_size: Optional[int] = Field(
|
|
439
|
+
default=None,
|
|
440
|
+
description="[advanced] Number of published datasources to query at a time using the Tableau API; fallbacks to `page_size` if not set.",
|
|
441
|
+
)
|
|
442
|
+
|
|
443
|
+
@property
|
|
444
|
+
def effective_published_datasource_page_size(self) -> int:
|
|
445
|
+
return self.published_datasource_page_size or self.page_size
|
|
446
|
+
|
|
447
|
+
published_datasource_field_upstream_page_size: Optional[int] = Field(
|
|
448
|
+
default=None,
|
|
449
|
+
description="[advanced] Number of upstream fields to query at a time for published datasources using the Tableau API; fallbacks to `page_size` * 10 if not set.",
|
|
450
|
+
)
|
|
451
|
+
|
|
452
|
+
@property
|
|
453
|
+
def effective_published_datasource_field_upstream_page_size(self) -> int:
|
|
454
|
+
return self.published_datasource_field_upstream_page_size or self.page_size * 10
|
|
455
|
+
|
|
456
|
+
custom_sql_table_page_size: Optional[int] = Field(
|
|
457
|
+
default=None,
|
|
458
|
+
description="[advanced] Number of custom sql datasources to query at a time using the Tableau API; fallbacks to `page_size` if not set.",
|
|
459
|
+
)
|
|
460
|
+
|
|
461
|
+
@property
|
|
462
|
+
def effective_custom_sql_table_page_size(self) -> int:
|
|
463
|
+
return self.custom_sql_table_page_size or self.page_size
|
|
464
|
+
|
|
465
|
+
database_table_page_size: Optional[int] = Field(
|
|
466
|
+
default=None,
|
|
467
|
+
description="[advanced] Number of database tables to query at a time using the Tableau API; fallbacks to `page_size` if not set.",
|
|
468
|
+
)
|
|
469
|
+
|
|
470
|
+
@property
|
|
471
|
+
def effective_database_table_page_size(self) -> int:
|
|
472
|
+
return self.database_table_page_size or self.page_size
|
|
473
|
+
|
|
474
|
+
|
|
343
475
|
class TableauConfig(
|
|
344
476
|
DatasetLineageProviderConfigBase,
|
|
345
477
|
StatefulIngestionConfigBase,
|
|
346
478
|
DatasetSourceConfigMixin,
|
|
347
479
|
TableauConnectionConfig,
|
|
480
|
+
TableauPageSizeConfig,
|
|
348
481
|
):
|
|
349
482
|
projects: Optional[List[str]] = Field(
|
|
350
483
|
default=["default"],
|
|
@@ -394,29 +527,6 @@ class TableauConfig(
|
|
|
394
527
|
description="Ingest details for tables external to (not embedded in) tableau as entities.",
|
|
395
528
|
)
|
|
396
529
|
|
|
397
|
-
page_size: int = Field(
|
|
398
|
-
default=10,
|
|
399
|
-
description="[advanced] Number of metadata objects (e.g. CustomSQLTable, PublishedDatasource, etc) to query at a time using the Tableau API.",
|
|
400
|
-
)
|
|
401
|
-
|
|
402
|
-
# We've found that even with a small workbook page size (e.g. 10), the Tableau API often
|
|
403
|
-
# returns warnings like this:
|
|
404
|
-
# {
|
|
405
|
-
# 'message': 'Showing partial results. The request exceeded the 20000 node limit. Use pagination, additional filtering, or both in the query to adjust results.',
|
|
406
|
-
# 'extensions': {
|
|
407
|
-
# 'severity': 'WARNING',
|
|
408
|
-
# 'code': 'NODE_LIMIT_EXCEEDED',
|
|
409
|
-
# 'properties': {
|
|
410
|
-
# 'nodeLimit': 20000
|
|
411
|
-
# }
|
|
412
|
-
# }
|
|
413
|
-
# }
|
|
414
|
-
# Reducing the page size for the workbook queries helps to avoid this.
|
|
415
|
-
workbook_page_size: int = Field(
|
|
416
|
-
default=1,
|
|
417
|
-
description="[advanced] Number of workbooks to query at a time using the Tableau API.",
|
|
418
|
-
)
|
|
419
|
-
|
|
420
530
|
env: str = Field(
|
|
421
531
|
default=builder.DEFAULT_ENV,
|
|
422
532
|
description="Environment to use in namespace when constructing URNs.",
|
|
@@ -525,7 +635,7 @@ class TableauConfig(
|
|
|
525
635
|
project_path_pattern = values.get("project_path_pattern")
|
|
526
636
|
if project_pattern is None and project_path_pattern is None and projects:
|
|
527
637
|
logger.warning(
|
|
528
|
-
"projects is deprecated, please use
|
|
638
|
+
"projects is deprecated, please use project_path_pattern instead."
|
|
529
639
|
)
|
|
530
640
|
logger.info("Initializing project_pattern from projects")
|
|
531
641
|
values["project_pattern"] = AllowDenyPattern(
|
|
@@ -598,18 +708,18 @@ class DatabaseTable:
|
|
|
598
708
|
"""
|
|
599
709
|
|
|
600
710
|
urn: str
|
|
601
|
-
id: Optional[
|
|
602
|
-
|
|
603
|
-
|
|
711
|
+
id: Optional[str] = (
|
|
712
|
+
None # is not None only for tables that came from Tableau metadata
|
|
713
|
+
)
|
|
604
714
|
num_cols: Optional[int] = None
|
|
605
715
|
|
|
606
|
-
paths: Optional[
|
|
607
|
-
|
|
608
|
-
|
|
716
|
+
paths: Optional[Set[str]] = (
|
|
717
|
+
None # maintains all browse paths encountered for this table
|
|
718
|
+
)
|
|
609
719
|
|
|
610
|
-
parsed_columns: Optional[
|
|
611
|
-
|
|
612
|
-
|
|
720
|
+
parsed_columns: Optional[Set[str]] = (
|
|
721
|
+
None # maintains all columns encountered for this table during parsing SQL queries
|
|
722
|
+
)
|
|
613
723
|
|
|
614
724
|
def update_table(
|
|
615
725
|
self,
|
|
@@ -643,12 +753,41 @@ class SiteIdContentUrl:
|
|
|
643
753
|
|
|
644
754
|
|
|
645
755
|
@dataclass
|
|
646
|
-
class TableauSourceReport(
|
|
756
|
+
class TableauSourceReport(
|
|
757
|
+
StaleEntityRemovalSourceReport,
|
|
758
|
+
IngestionStageReport,
|
|
759
|
+
):
|
|
647
760
|
get_all_datasources_query_failed: bool = False
|
|
648
761
|
num_get_datasource_query_failures: int = 0
|
|
649
762
|
num_datasource_field_skipped_no_name: int = 0
|
|
650
763
|
num_csql_field_skipped_no_name: int = 0
|
|
651
764
|
num_table_field_skipped_no_name: int = 0
|
|
765
|
+
# timers
|
|
766
|
+
extract_usage_stats_timer: Dict[str, float] = dataclass_field(
|
|
767
|
+
default_factory=TopKDict
|
|
768
|
+
)
|
|
769
|
+
fetch_groups_timer: Dict[str, float] = dataclass_field(default_factory=TopKDict)
|
|
770
|
+
populate_database_server_hostname_map_timer: Dict[str, float] = dataclass_field(
|
|
771
|
+
default_factory=TopKDict
|
|
772
|
+
)
|
|
773
|
+
populate_projects_registry_timer: Dict[str, float] = dataclass_field(
|
|
774
|
+
default_factory=TopKDict
|
|
775
|
+
)
|
|
776
|
+
emit_workbooks_timer: Dict[str, float] = dataclass_field(default_factory=TopKDict)
|
|
777
|
+
emit_sheets_timer: Dict[str, float] = dataclass_field(default_factory=TopKDict)
|
|
778
|
+
emit_dashboards_timer: Dict[str, float] = dataclass_field(default_factory=TopKDict)
|
|
779
|
+
emit_embedded_datasources_timer: Dict[str, float] = dataclass_field(
|
|
780
|
+
default_factory=TopKDict
|
|
781
|
+
)
|
|
782
|
+
emit_published_datasources_timer: Dict[str, float] = dataclass_field(
|
|
783
|
+
default_factory=TopKDict
|
|
784
|
+
)
|
|
785
|
+
emit_custom_sql_datasources_timer: Dict[str, float] = dataclass_field(
|
|
786
|
+
default_factory=TopKDict
|
|
787
|
+
)
|
|
788
|
+
emit_upstream_tables_timer: Dict[str, float] = dataclass_field(
|
|
789
|
+
default_factory=TopKDict
|
|
790
|
+
)
|
|
652
791
|
# lineage
|
|
653
792
|
num_tables_with_upstream_lineage: int = 0
|
|
654
793
|
num_upstream_table_lineage: int = 0
|
|
@@ -660,6 +799,7 @@ class TableauSourceReport(StaleEntityRemovalSourceReport):
|
|
|
660
799
|
num_upstream_fine_grained_lineage_failed_parse_sql: int = 0
|
|
661
800
|
num_hidden_assets_skipped: int = 0
|
|
662
801
|
logged_in_user: List[UserInfo] = dataclass_field(default_factory=list)
|
|
802
|
+
|
|
663
803
|
last_authenticated_at: Optional[datetime] = None
|
|
664
804
|
|
|
665
805
|
num_expected_tableau_metadata_queries: int = 0
|
|
@@ -668,6 +808,23 @@ class TableauSourceReport(StaleEntityRemovalSourceReport):
|
|
|
668
808
|
default_factory=(lambda: defaultdict(int))
|
|
669
809
|
)
|
|
670
810
|
|
|
811
|
+
# Counters for tracking the number of queries made to get_connection_objects method
|
|
812
|
+
# by connection type (static and short set of keys):
|
|
813
|
+
# - num_queries_by_connection_type: total number of queries
|
|
814
|
+
# - num_filter_queries_by_connection_type: number of paginated queries due to splitting query filters
|
|
815
|
+
# - num_paginated_queries_by_connection_type: total number of queries due to Tableau pagination
|
|
816
|
+
# These counters are useful to understand the impact of changing the page size.
|
|
817
|
+
|
|
818
|
+
num_queries_by_connection_type: Dict[str, int] = dataclass_field(
|
|
819
|
+
default_factory=(lambda: defaultdict(int))
|
|
820
|
+
)
|
|
821
|
+
num_filter_queries_by_connection_type: Dict[str, int] = dataclass_field(
|
|
822
|
+
default_factory=(lambda: defaultdict(int))
|
|
823
|
+
)
|
|
824
|
+
num_paginated_queries_by_connection_type: Dict[str, int] = dataclass_field(
|
|
825
|
+
default_factory=(lambda: defaultdict(int))
|
|
826
|
+
)
|
|
827
|
+
|
|
671
828
|
|
|
672
829
|
def report_user_role(report: TableauSourceReport, server: Server) -> None:
|
|
673
830
|
title: str = "Insufficient Permissions"
|
|
@@ -771,11 +928,6 @@ class TableauSource(StatefulIngestionSourceBase, TestableSource):
|
|
|
771
928
|
def get_report(self) -> TableauSourceReport:
|
|
772
929
|
return self.report
|
|
773
930
|
|
|
774
|
-
@classmethod
|
|
775
|
-
def create(cls, config_dict: dict, ctx: PipelineContext) -> Source:
|
|
776
|
-
config = TableauConfig.parse_obj(config_dict)
|
|
777
|
-
return cls(config, ctx)
|
|
778
|
-
|
|
779
931
|
def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
|
|
780
932
|
return [
|
|
781
933
|
*super().get_workunit_processors(),
|
|
@@ -834,6 +986,7 @@ class TableauSource(StatefulIngestionSourceBase, TestableSource):
|
|
|
834
986
|
platform=self.platform,
|
|
835
987
|
)
|
|
836
988
|
yield from site_source.ingest_tableau_site()
|
|
989
|
+
|
|
837
990
|
except MetadataQueryException as md_exception:
|
|
838
991
|
self.report.failure(
|
|
839
992
|
title="Failed to Retrieve Tableau Metadata",
|
|
@@ -966,7 +1119,9 @@ class TableauSiteSource:
|
|
|
966
1119
|
return server_connection
|
|
967
1120
|
|
|
968
1121
|
for database_server in self.get_connection_objects(
|
|
969
|
-
database_servers_graphql_query,
|
|
1122
|
+
query=database_servers_graphql_query,
|
|
1123
|
+
connection_type=c.DATABASE_SERVERS_CONNECTION,
|
|
1124
|
+
page_size=self.config.effective_database_server_page_size,
|
|
970
1125
|
):
|
|
971
1126
|
database_server_id = database_server.get(c.ID)
|
|
972
1127
|
server_connection = database_server.get(c.HOST_NAME)
|
|
@@ -1392,22 +1547,30 @@ class TableauSiteSource:
|
|
|
1392
1547
|
self,
|
|
1393
1548
|
query: str,
|
|
1394
1549
|
connection_type: str,
|
|
1550
|
+
page_size: int,
|
|
1395
1551
|
query_filter: dict = {},
|
|
1396
|
-
page_size_override: Optional[int] = None,
|
|
1397
1552
|
) -> Iterable[dict]:
|
|
1398
1553
|
query_filter = optimize_query_filter(query_filter)
|
|
1399
1554
|
|
|
1400
1555
|
# Calls the get_connection_object_page function to get the objects,
|
|
1401
1556
|
# and automatically handles pagination.
|
|
1402
|
-
page_size = page_size_override or self.config.page_size
|
|
1403
1557
|
|
|
1404
1558
|
filter_pages = get_filter_pages(query_filter, page_size)
|
|
1559
|
+
self.report.num_queries_by_connection_type[connection_type] += 1
|
|
1560
|
+
self.report.num_filter_queries_by_connection_type[connection_type] += len(
|
|
1561
|
+
filter_pages
|
|
1562
|
+
)
|
|
1563
|
+
|
|
1405
1564
|
for filter_page in filter_pages:
|
|
1406
1565
|
has_next_page = 1
|
|
1407
1566
|
current_cursor: Optional[str] = None
|
|
1408
1567
|
while has_next_page:
|
|
1409
1568
|
filter_: str = make_filter(filter_page)
|
|
1410
1569
|
|
|
1570
|
+
self.report.num_paginated_queries_by_connection_type[
|
|
1571
|
+
connection_type
|
|
1572
|
+
] += 1
|
|
1573
|
+
|
|
1411
1574
|
self.report.num_expected_tableau_metadata_queries += 1
|
|
1412
1575
|
(
|
|
1413
1576
|
connection_objects,
|
|
@@ -1435,10 +1598,10 @@ class TableauSiteSource:
|
|
|
1435
1598
|
projects = {c.PROJECT_NAME_WITH_IN: project_names}
|
|
1436
1599
|
|
|
1437
1600
|
for workbook in self.get_connection_objects(
|
|
1438
|
-
workbook_graphql_query,
|
|
1439
|
-
c.WORKBOOKS_CONNECTION,
|
|
1440
|
-
projects,
|
|
1441
|
-
|
|
1601
|
+
query=workbook_graphql_query,
|
|
1602
|
+
connection_type=c.WORKBOOKS_CONNECTION,
|
|
1603
|
+
query_filter=projects,
|
|
1604
|
+
page_size=self.config.effective_workbook_page_size,
|
|
1442
1605
|
):
|
|
1443
1606
|
# This check is needed as we are using projectNameWithin which return project as per project name so if
|
|
1444
1607
|
# user want to ingest only nested project C from A->B->C then tableau might return more than one Project
|
|
@@ -1893,9 +2056,10 @@ class TableauSiteSource:
|
|
|
1893
2056
|
|
|
1894
2057
|
custom_sql_connection = list(
|
|
1895
2058
|
self.get_connection_objects(
|
|
1896
|
-
custom_sql_graphql_query,
|
|
1897
|
-
c.CUSTOM_SQL_TABLE_CONNECTION,
|
|
1898
|
-
custom_sql_filter,
|
|
2059
|
+
query=custom_sql_graphql_query,
|
|
2060
|
+
connection_type=c.CUSTOM_SQL_TABLE_CONNECTION,
|
|
2061
|
+
query_filter=custom_sql_filter,
|
|
2062
|
+
page_size=self.config.effective_custom_sql_table_page_size,
|
|
1899
2063
|
)
|
|
1900
2064
|
)
|
|
1901
2065
|
|
|
@@ -2146,8 +2310,7 @@ class TableauSiteSource:
|
|
|
2146
2310
|
c.EMBEDDED_DATA_SOURCE,
|
|
2147
2311
|
):
|
|
2148
2312
|
logger.debug(
|
|
2149
|
-
f"datasource {ds.get(c.NAME)} type {ds.get(c.TYPE_NAME)} is "
|
|
2150
|
-
f"unsupported"
|
|
2313
|
+
f"datasource {ds.get(c.NAME)} type {ds.get(c.TYPE_NAME)} is unsupported"
|
|
2151
2314
|
)
|
|
2152
2315
|
return None
|
|
2153
2316
|
|
|
@@ -2329,9 +2492,9 @@ class TableauSiteSource:
|
|
|
2329
2492
|
def _enrich_database_tables_with_parsed_schemas(
|
|
2330
2493
|
self, parsing_result: SqlParsingResult
|
|
2331
2494
|
) -> None:
|
|
2332
|
-
in_tables_schemas: Dict[
|
|
2333
|
-
|
|
2334
|
-
|
|
2495
|
+
in_tables_schemas: Dict[str, Set[str]] = (
|
|
2496
|
+
transform_parsing_result_to_in_tables_schemas(parsing_result)
|
|
2497
|
+
)
|
|
2335
2498
|
|
|
2336
2499
|
if not in_tables_schemas:
|
|
2337
2500
|
logger.info("Unable to extract table schema from parsing result")
|
|
@@ -2604,6 +2767,7 @@ class TableauSiteSource:
|
|
|
2604
2767
|
self,
|
|
2605
2768
|
datasource: dict,
|
|
2606
2769
|
field_upstream_query: str,
|
|
2770
|
+
page_size: int,
|
|
2607
2771
|
) -> dict:
|
|
2608
2772
|
# Collect field ids to fetch field upstreams
|
|
2609
2773
|
field_ids: List[str] = []
|
|
@@ -2614,9 +2778,10 @@ class TableauSiteSource:
|
|
|
2614
2778
|
# Fetch field upstreams and arrange them in map
|
|
2615
2779
|
field_vs_upstream: Dict[str, dict] = {}
|
|
2616
2780
|
for field_upstream in self.get_connection_objects(
|
|
2617
|
-
field_upstream_query,
|
|
2618
|
-
c.FIELDS_CONNECTION,
|
|
2619
|
-
{c.ID_WITH_IN: field_ids},
|
|
2781
|
+
query=field_upstream_query,
|
|
2782
|
+
connection_type=c.FIELDS_CONNECTION,
|
|
2783
|
+
query_filter={c.ID_WITH_IN: field_ids},
|
|
2784
|
+
page_size=page_size,
|
|
2620
2785
|
):
|
|
2621
2786
|
if field_upstream.get(c.ID):
|
|
2622
2787
|
field_id = field_upstream[c.ID]
|
|
@@ -2639,13 +2804,15 @@ class TableauSiteSource:
|
|
|
2639
2804
|
datasource_filter = {c.ID_WITH_IN: self.datasource_ids_being_used}
|
|
2640
2805
|
|
|
2641
2806
|
for datasource in self.get_connection_objects(
|
|
2642
|
-
published_datasource_graphql_query,
|
|
2643
|
-
c.PUBLISHED_DATA_SOURCES_CONNECTION,
|
|
2644
|
-
datasource_filter,
|
|
2807
|
+
query=published_datasource_graphql_query,
|
|
2808
|
+
connection_type=c.PUBLISHED_DATA_SOURCES_CONNECTION,
|
|
2809
|
+
query_filter=datasource_filter,
|
|
2810
|
+
page_size=self.config.effective_published_datasource_page_size,
|
|
2645
2811
|
):
|
|
2646
2812
|
datasource = self.update_datasource_for_field_upstream(
|
|
2647
2813
|
datasource=datasource,
|
|
2648
2814
|
field_upstream_query=datasource_upstream_fields_graphql_query,
|
|
2815
|
+
page_size=self.config.effective_published_datasource_field_upstream_page_size,
|
|
2649
2816
|
)
|
|
2650
2817
|
|
|
2651
2818
|
yield from self.emit_datasource(datasource)
|
|
@@ -2661,11 +2828,12 @@ class TableauSiteSource:
|
|
|
2661
2828
|
c.ID_WITH_IN: list(tableau_database_table_id_to_urn_map.keys())
|
|
2662
2829
|
}
|
|
2663
2830
|
|
|
2664
|
-
#
|
|
2831
|
+
# Emitting tables that came from Tableau metadata
|
|
2665
2832
|
for tableau_table in self.get_connection_objects(
|
|
2666
|
-
database_tables_graphql_query,
|
|
2667
|
-
c.DATABASE_TABLES_CONNECTION,
|
|
2668
|
-
tables_filter,
|
|
2833
|
+
query=database_tables_graphql_query,
|
|
2834
|
+
connection_type=c.DATABASE_TABLES_CONNECTION,
|
|
2835
|
+
query_filter=tables_filter,
|
|
2836
|
+
page_size=self.config.effective_database_table_page_size,
|
|
2669
2837
|
):
|
|
2670
2838
|
database_table = self.database_tables[
|
|
2671
2839
|
tableau_database_table_id_to_urn_map[tableau_table[c.ID]]
|
|
@@ -2854,9 +3022,10 @@ class TableauSiteSource:
|
|
|
2854
3022
|
sheets_filter = {c.ID_WITH_IN: self.sheet_ids}
|
|
2855
3023
|
|
|
2856
3024
|
for sheet in self.get_connection_objects(
|
|
2857
|
-
sheet_graphql_query,
|
|
2858
|
-
c.SHEETS_CONNECTION,
|
|
2859
|
-
sheets_filter,
|
|
3025
|
+
query=sheet_graphql_query,
|
|
3026
|
+
connection_type=c.SHEETS_CONNECTION,
|
|
3027
|
+
query_filter=sheets_filter,
|
|
3028
|
+
page_size=self.config.effective_sheet_page_size,
|
|
2860
3029
|
):
|
|
2861
3030
|
if self.config.ingest_hidden_assets or not self._is_hidden_view(sheet):
|
|
2862
3031
|
yield from self.emit_sheets_as_charts(sheet, sheet.get(c.WORKBOOK))
|
|
@@ -3174,9 +3343,10 @@ class TableauSiteSource:
|
|
|
3174
3343
|
dashboards_filter = {c.ID_WITH_IN: self.dashboard_ids}
|
|
3175
3344
|
|
|
3176
3345
|
for dashboard in self.get_connection_objects(
|
|
3177
|
-
dashboard_graphql_query,
|
|
3178
|
-
c.DASHBOARDS_CONNECTION,
|
|
3179
|
-
dashboards_filter,
|
|
3346
|
+
query=dashboard_graphql_query,
|
|
3347
|
+
connection_type=c.DASHBOARDS_CONNECTION,
|
|
3348
|
+
query_filter=dashboards_filter,
|
|
3349
|
+
page_size=self.config.effective_dashboard_page_size,
|
|
3180
3350
|
):
|
|
3181
3351
|
if self.config.ingest_hidden_assets or not self._is_hidden_view(dashboard):
|
|
3182
3352
|
yield from self.emit_dashboard(dashboard, dashboard.get(c.WORKBOOK))
|
|
@@ -3321,13 +3491,15 @@ class TableauSiteSource:
|
|
|
3321
3491
|
datasource_filter = {c.ID_WITH_IN: self.embedded_datasource_ids_being_used}
|
|
3322
3492
|
|
|
3323
3493
|
for datasource in self.get_connection_objects(
|
|
3324
|
-
embedded_datasource_graphql_query,
|
|
3325
|
-
c.EMBEDDED_DATA_SOURCES_CONNECTION,
|
|
3326
|
-
datasource_filter,
|
|
3494
|
+
query=embedded_datasource_graphql_query,
|
|
3495
|
+
connection_type=c.EMBEDDED_DATA_SOURCES_CONNECTION,
|
|
3496
|
+
query_filter=datasource_filter,
|
|
3497
|
+
page_size=self.config.effective_embedded_datasource_page_size,
|
|
3327
3498
|
):
|
|
3328
3499
|
datasource = self.update_datasource_for_field_upstream(
|
|
3329
3500
|
datasource=datasource,
|
|
3330
3501
|
field_upstream_query=datasource_upstream_fields_graphql_query,
|
|
3502
|
+
page_size=self.config.effective_embedded_datasource_field_upstream_page_size,
|
|
3331
3503
|
)
|
|
3332
3504
|
yield from self.emit_datasource(
|
|
3333
3505
|
datasource,
|
|
@@ -3386,25 +3558,25 @@ class TableauSiteSource:
|
|
|
3386
3558
|
|
|
3387
3559
|
generated_project_keys.add(project_key.guid())
|
|
3388
3560
|
|
|
3389
|
-
parent_project_key: Optional[
|
|
3390
|
-
|
|
3391
|
-
|
|
3561
|
+
parent_project_key: Optional[Union[ProjectKey, SiteKey]] = (
|
|
3562
|
+
None # It is going
|
|
3563
|
+
)
|
|
3392
3564
|
# to be used as a parent container key for the current tableau project
|
|
3393
3565
|
|
|
3394
3566
|
if project_.parent_id is not None:
|
|
3395
3567
|
# Go to the parent project as we need to generate container first for parent
|
|
3396
3568
|
parent_project_key = self.gen_project_key(project_.parent_id)
|
|
3397
3569
|
|
|
3398
|
-
parent_tableau_project: Optional[
|
|
3399
|
-
|
|
3400
|
-
|
|
3570
|
+
parent_tableau_project: Optional[TableauProject] = (
|
|
3571
|
+
self.tableau_project_registry.get(project_.parent_id)
|
|
3572
|
+
)
|
|
3401
3573
|
|
|
3402
3574
|
if (
|
|
3403
3575
|
parent_tableau_project is None
|
|
3404
3576
|
): # It is not in project registry because of project_pattern
|
|
3405
|
-
assert (
|
|
3406
|
-
project_.
|
|
3407
|
-
)
|
|
3577
|
+
assert project_.parent_name, (
|
|
3578
|
+
f"project {project_.name} should not be null"
|
|
3579
|
+
)
|
|
3408
3580
|
parent_tableau_project = TableauProject(
|
|
3409
3581
|
id=project_.parent_id,
|
|
3410
3582
|
name=project_.parent_name,
|
|
@@ -3432,7 +3604,7 @@ class TableauSiteSource:
|
|
|
3432
3604
|
parent_container_key=parent_project_key,
|
|
3433
3605
|
)
|
|
3434
3606
|
|
|
3435
|
-
for
|
|
3607
|
+
for project in self.tableau_project_registry.values():
|
|
3436
3608
|
logger.debug(
|
|
3437
3609
|
f"project {project.name} and it's parent {project.parent_name} and parent id {project.parent_id}"
|
|
3438
3610
|
)
|
|
@@ -3489,33 +3661,87 @@ class TableauSiteSource:
|
|
|
3489
3661
|
return {"permissions": json.dumps(groups)} if len(groups) > 0 else None
|
|
3490
3662
|
|
|
3491
3663
|
def ingest_tableau_site(self):
|
|
3492
|
-
|
|
3493
|
-
|
|
3494
|
-
|
|
3495
|
-
|
|
3496
|
-
|
|
3497
|
-
|
|
3498
|
-
|
|
3499
|
-
|
|
3500
|
-
|
|
3501
|
-
|
|
3502
|
-
|
|
3503
|
-
|
|
3504
|
-
|
|
3505
|
-
|
|
3506
|
-
|
|
3507
|
-
|
|
3508
|
-
|
|
3509
|
-
|
|
3510
|
-
|
|
3511
|
-
|
|
3512
|
-
|
|
3513
|
-
|
|
3514
|
-
|
|
3515
|
-
|
|
3516
|
-
|
|
3517
|
-
|
|
3518
|
-
|
|
3519
|
-
|
|
3520
|
-
|
|
3521
|
-
|
|
3664
|
+
with self.report.new_stage(
|
|
3665
|
+
f"Ingesting Tableau Site: {self.site_id} {self.site_content_url}"
|
|
3666
|
+
):
|
|
3667
|
+
# Initialise the dictionary to later look-up for chart and dashboard stat
|
|
3668
|
+
if self.config.extract_usage_stats:
|
|
3669
|
+
with PerfTimer() as timer:
|
|
3670
|
+
self._populate_usage_stat_registry()
|
|
3671
|
+
self.report.extract_usage_stats_timer[self.site_content_url] = (
|
|
3672
|
+
timer.elapsed_seconds(digits=2)
|
|
3673
|
+
)
|
|
3674
|
+
|
|
3675
|
+
if self.config.permission_ingestion:
|
|
3676
|
+
with PerfTimer() as timer:
|
|
3677
|
+
self._fetch_groups()
|
|
3678
|
+
self.report.fetch_groups_timer[self.site_content_url] = (
|
|
3679
|
+
timer.elapsed_seconds(digits=2)
|
|
3680
|
+
)
|
|
3681
|
+
|
|
3682
|
+
# Populate the map of database names and database hostnames to be used later to map
|
|
3683
|
+
# databases to platform instances.
|
|
3684
|
+
if self.config.database_hostname_to_platform_instance_map:
|
|
3685
|
+
with PerfTimer() as timer:
|
|
3686
|
+
self._populate_database_server_hostname_map()
|
|
3687
|
+
self.report.populate_database_server_hostname_map_timer[
|
|
3688
|
+
self.site_content_url
|
|
3689
|
+
] = timer.elapsed_seconds(digits=2)
|
|
3690
|
+
|
|
3691
|
+
with PerfTimer() as timer:
|
|
3692
|
+
self._populate_projects_registry()
|
|
3693
|
+
self.report.populate_projects_registry_timer[self.site_content_url] = (
|
|
3694
|
+
timer.elapsed_seconds(digits=2)
|
|
3695
|
+
)
|
|
3696
|
+
|
|
3697
|
+
if self.config.add_site_container:
|
|
3698
|
+
yield from self.emit_site_container()
|
|
3699
|
+
yield from self.emit_project_containers()
|
|
3700
|
+
|
|
3701
|
+
with PerfTimer() as timer:
|
|
3702
|
+
yield from self.emit_workbooks()
|
|
3703
|
+
self.report.emit_workbooks_timer[self.site_content_url] = (
|
|
3704
|
+
timer.elapsed_seconds(digits=2)
|
|
3705
|
+
)
|
|
3706
|
+
|
|
3707
|
+
if self.sheet_ids:
|
|
3708
|
+
with PerfTimer() as timer:
|
|
3709
|
+
yield from self.emit_sheets()
|
|
3710
|
+
self.report.emit_sheets_timer[self.site_content_url] = (
|
|
3711
|
+
timer.elapsed_seconds(digits=2)
|
|
3712
|
+
)
|
|
3713
|
+
|
|
3714
|
+
if self.dashboard_ids:
|
|
3715
|
+
with PerfTimer() as timer:
|
|
3716
|
+
yield from self.emit_dashboards()
|
|
3717
|
+
self.report.emit_dashboards_timer[self.site_content_url] = (
|
|
3718
|
+
timer.elapsed_seconds(digits=2)
|
|
3719
|
+
)
|
|
3720
|
+
|
|
3721
|
+
if self.embedded_datasource_ids_being_used:
|
|
3722
|
+
with PerfTimer() as timer:
|
|
3723
|
+
yield from self.emit_embedded_datasources()
|
|
3724
|
+
self.report.emit_embedded_datasources_timer[
|
|
3725
|
+
self.site_content_url
|
|
3726
|
+
] = timer.elapsed_seconds(digits=2)
|
|
3727
|
+
|
|
3728
|
+
if self.datasource_ids_being_used:
|
|
3729
|
+
with PerfTimer() as timer:
|
|
3730
|
+
yield from self.emit_published_datasources()
|
|
3731
|
+
self.report.emit_published_datasources_timer[
|
|
3732
|
+
self.site_content_url
|
|
3733
|
+
] = timer.elapsed_seconds(digits=2)
|
|
3734
|
+
|
|
3735
|
+
if self.custom_sql_ids_being_used:
|
|
3736
|
+
with PerfTimer() as timer:
|
|
3737
|
+
yield from self.emit_custom_sql_datasources()
|
|
3738
|
+
self.report.emit_custom_sql_datasources_timer[
|
|
3739
|
+
self.site_content_url
|
|
3740
|
+
] = timer.elapsed_seconds(digits=2)
|
|
3741
|
+
|
|
3742
|
+
if self.database_tables:
|
|
3743
|
+
with PerfTimer() as timer:
|
|
3744
|
+
yield from self.emit_upstream_tables()
|
|
3745
|
+
self.report.emit_upstream_tables_timer[self.site_content_url] = (
|
|
3746
|
+
timer.elapsed_seconds(digits=2)
|
|
3747
|
+
)
|