acryl-datahub 0.15.0.1rc17__py3-none-any.whl → 0.15.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-0.15.0.1rc17.dist-info → acryl_datahub-0.15.0.2.dist-info}/METADATA +2440 -2438
- {acryl_datahub-0.15.0.1rc17.dist-info → acryl_datahub-0.15.0.2.dist-info}/RECORD +211 -207
- {acryl_datahub-0.15.0.1rc17.dist-info → acryl_datahub-0.15.0.2.dist-info}/WHEEL +1 -1
- datahub/__init__.py +1 -1
- datahub/api/entities/assertion/assertion_operator.py +3 -5
- datahub/api/entities/corpgroup/corpgroup.py +1 -1
- datahub/api/entities/datacontract/assertion_operator.py +3 -5
- datahub/api/entities/dataproduct/dataproduct.py +4 -4
- datahub/api/entities/dataset/dataset.py +2 -1
- datahub/api/entities/structuredproperties/structuredproperties.py +18 -7
- datahub/cli/cli_utils.py +13 -2
- datahub/cli/delete_cli.py +3 -3
- datahub/cli/docker_cli.py +6 -6
- datahub/cli/ingest_cli.py +25 -15
- datahub/cli/lite_cli.py +2 -2
- datahub/cli/migrate.py +5 -5
- datahub/cli/specific/assertions_cli.py +3 -3
- datahub/cli/specific/structuredproperties_cli.py +84 -0
- datahub/cli/timeline_cli.py +1 -1
- datahub/configuration/common.py +1 -2
- datahub/configuration/config_loader.py +73 -50
- datahub/configuration/git.py +2 -2
- datahub/configuration/time_window_config.py +10 -5
- datahub/emitter/mce_builder.py +4 -8
- datahub/emitter/mcp_builder.py +27 -0
- datahub/emitter/mcp_patch_builder.py +1 -2
- datahub/emitter/rest_emitter.py +126 -85
- datahub/entrypoints.py +6 -0
- datahub/ingestion/api/incremental_lineage_helper.py +2 -8
- datahub/ingestion/api/report.py +1 -2
- datahub/ingestion/api/source.py +4 -2
- datahub/ingestion/api/source_helpers.py +1 -1
- datahub/ingestion/extractor/json_schema_util.py +3 -3
- datahub/ingestion/extractor/schema_util.py +3 -5
- datahub/ingestion/fs/s3_fs.py +3 -3
- datahub/ingestion/glossary/datahub_classifier.py +6 -4
- datahub/ingestion/graph/client.py +22 -19
- datahub/ingestion/graph/config.py +1 -1
- datahub/ingestion/run/pipeline.py +8 -7
- datahub/ingestion/run/pipeline_config.py +3 -3
- datahub/ingestion/source/abs/datalake_profiler_config.py +3 -3
- datahub/ingestion/source/abs/source.py +19 -8
- datahub/ingestion/source/aws/glue.py +77 -47
- datahub/ingestion/source/aws/s3_boto_utils.py +3 -3
- datahub/ingestion/source/aws/s3_util.py +24 -1
- datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py +1 -1
- datahub/ingestion/source/aws/sagemaker_processors/models.py +2 -2
- datahub/ingestion/source/bigquery_v2/bigquery.py +34 -34
- datahub/ingestion/source/bigquery_v2/bigquery_audit.py +3 -3
- datahub/ingestion/source/bigquery_v2/bigquery_config.py +14 -6
- datahub/ingestion/source/bigquery_v2/bigquery_platform_resource_helper.py +8 -4
- datahub/ingestion/source/bigquery_v2/bigquery_report.py +0 -3
- datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +22 -16
- datahub/ingestion/source/bigquery_v2/lineage.py +16 -16
- datahub/ingestion/source/bigquery_v2/queries.py +1 -3
- datahub/ingestion/source/bigquery_v2/queries_extractor.py +3 -3
- datahub/ingestion/source/bigquery_v2/usage.py +60 -60
- datahub/ingestion/source/cassandra/cassandra.py +0 -1
- datahub/ingestion/source/cassandra/cassandra_profiling.py +24 -24
- datahub/ingestion/source/cassandra/cassandra_utils.py +4 -7
- datahub/ingestion/source/confluent_schema_registry.py +6 -6
- datahub/ingestion/source/csv_enricher.py +29 -29
- datahub/ingestion/source/datahub/config.py +10 -0
- datahub/ingestion/source/datahub/datahub_database_reader.py +4 -2
- datahub/ingestion/source/datahub/datahub_source.py +12 -2
- datahub/ingestion/source/dbt/dbt_cloud.py +13 -13
- datahub/ingestion/source/dbt/dbt_common.py +9 -7
- datahub/ingestion/source/delta_lake/source.py +0 -5
- datahub/ingestion/source/demo_data.py +1 -1
- datahub/ingestion/source/dremio/dremio_api.py +4 -4
- datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py +3 -3
- datahub/ingestion/source/dremio/dremio_reporting.py +0 -3
- datahub/ingestion/source/dremio/dremio_source.py +2 -2
- datahub/ingestion/source/elastic_search.py +4 -4
- datahub/ingestion/source/fivetran/fivetran.py +1 -6
- datahub/ingestion/source/gc/datahub_gc.py +11 -14
- datahub/ingestion/source/gc/execution_request_cleanup.py +31 -6
- datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +48 -15
- datahub/ingestion/source/gcs/gcs_source.py +3 -2
- datahub/ingestion/source/ge_data_profiler.py +2 -5
- datahub/ingestion/source/ge_profiling_config.py +3 -3
- datahub/ingestion/source/iceberg/iceberg.py +13 -6
- datahub/ingestion/source/iceberg/iceberg_common.py +49 -9
- datahub/ingestion/source/iceberg/iceberg_profiler.py +3 -1
- datahub/ingestion/source/identity/azure_ad.py +3 -3
- datahub/ingestion/source/identity/okta.py +3 -3
- datahub/ingestion/source/kafka/kafka.py +11 -9
- datahub/ingestion/source/kafka_connect/kafka_connect.py +3 -9
- datahub/ingestion/source/kafka_connect/sink_connectors.py +3 -3
- datahub/ingestion/source/kafka_connect/source_connectors.py +3 -3
- datahub/ingestion/source/looker/looker_common.py +19 -19
- datahub/ingestion/source/looker/looker_config.py +11 -6
- datahub/ingestion/source/looker/looker_source.py +25 -25
- datahub/ingestion/source/looker/looker_template_language.py +3 -3
- datahub/ingestion/source/looker/looker_usage.py +5 -7
- datahub/ingestion/source/looker/lookml_concept_context.py +6 -6
- datahub/ingestion/source/looker/lookml_source.py +13 -15
- datahub/ingestion/source/looker/view_upstream.py +5 -5
- datahub/ingestion/source/metabase.py +1 -6
- datahub/ingestion/source/mlflow.py +4 -9
- datahub/ingestion/source/mode.py +5 -5
- datahub/ingestion/source/mongodb.py +6 -4
- datahub/ingestion/source/neo4j/neo4j_source.py +1 -1
- datahub/ingestion/source/nifi.py +24 -31
- datahub/ingestion/source/openapi.py +9 -9
- datahub/ingestion/source/powerbi/config.py +12 -12
- datahub/ingestion/source/powerbi/m_query/parser.py +11 -11
- datahub/ingestion/source/powerbi/m_query/pattern_handler.py +26 -24
- datahub/ingestion/source/powerbi/m_query/resolver.py +13 -13
- datahub/ingestion/source/powerbi/powerbi.py +6 -6
- datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +9 -9
- datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +7 -7
- datahub/ingestion/source/powerbi_report_server/report_server.py +1 -1
- datahub/ingestion/source/qlik_sense/qlik_api.py +1 -1
- datahub/ingestion/source/redash.py +0 -5
- datahub/ingestion/source/redshift/config.py +3 -3
- datahub/ingestion/source/redshift/redshift.py +45 -46
- datahub/ingestion/source/redshift/usage.py +33 -33
- datahub/ingestion/source/s3/datalake_profiler_config.py +3 -3
- datahub/ingestion/source/s3/source.py +11 -15
- datahub/ingestion/source/salesforce.py +26 -25
- datahub/ingestion/source/schema/json_schema.py +1 -1
- datahub/ingestion/source/sigma/sigma.py +3 -3
- datahub/ingestion/source/sigma/sigma_api.py +12 -10
- datahub/ingestion/source/snowflake/snowflake_config.py +30 -7
- datahub/ingestion/source/snowflake/snowflake_connection.py +6 -6
- datahub/ingestion/source/snowflake/snowflake_queries.py +2 -2
- datahub/ingestion/source/snowflake/snowflake_report.py +0 -3
- datahub/ingestion/source/snowflake/snowflake_schema.py +8 -5
- datahub/ingestion/source/snowflake/snowflake_schema_gen.py +136 -42
- datahub/ingestion/source/snowflake/snowflake_tag.py +21 -11
- datahub/ingestion/source/snowflake/snowflake_usage_v2.py +49 -50
- datahub/ingestion/source/snowflake/snowflake_utils.py +1 -2
- datahub/ingestion/source/snowflake/snowflake_v2.py +51 -47
- datahub/ingestion/source/sql/athena.py +1 -3
- datahub/ingestion/source/sql/clickhouse.py +8 -14
- datahub/ingestion/source/sql/oracle.py +1 -3
- datahub/ingestion/source/sql/sql_generic_profiler.py +1 -2
- datahub/ingestion/source/sql/sql_types.py +1 -2
- datahub/ingestion/source/sql/sql_utils.py +5 -0
- datahub/ingestion/source/sql/teradata.py +18 -5
- datahub/ingestion/source/state/profiling_state_handler.py +3 -3
- datahub/ingestion/source/state/redundant_run_skip_handler.py +5 -7
- datahub/ingestion/source/state/stale_entity_removal_handler.py +3 -3
- datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +9 -9
- datahub/ingestion/source/state_provider/file_ingestion_checkpointing_provider.py +1 -1
- datahub/ingestion/source/superset.py +1 -6
- datahub/ingestion/source/tableau/tableau.py +343 -117
- datahub/ingestion/source/tableau/tableau_common.py +5 -2
- datahub/ingestion/source/unity/config.py +3 -1
- datahub/ingestion/source/unity/proxy.py +1 -1
- datahub/ingestion/source/unity/source.py +74 -74
- datahub/ingestion/source/unity/usage.py +3 -1
- datahub/ingestion/source/usage/clickhouse_usage.py +4 -4
- datahub/ingestion/source/usage/starburst_trino_usage.py +3 -3
- datahub/ingestion/source/usage/usage_common.py +1 -1
- datahub/ingestion/source_report/ingestion_stage.py +24 -20
- datahub/ingestion/transformer/add_dataset_dataproduct.py +4 -4
- datahub/ingestion/transformer/add_dataset_properties.py +3 -3
- datahub/ingestion/transformer/add_dataset_schema_tags.py +3 -3
- datahub/ingestion/transformer/add_dataset_schema_terms.py +3 -3
- datahub/ingestion/transformer/dataset_domain_based_on_tags.py +4 -4
- datahub/ingestion/transformer/extract_ownership_from_tags.py +3 -3
- datahub/ingestion/transformer/tags_to_terms.py +7 -7
- datahub/integrations/assertion/snowflake/compiler.py +10 -10
- datahub/lite/duckdb_lite.py +12 -10
- datahub/metadata/_schema_classes.py +317 -44
- datahub/metadata/_urns/urn_defs.py +69 -15
- datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/versionset/__init__.py +17 -0
- datahub/metadata/schema.avsc +302 -89
- datahub/metadata/schemas/DataFlowKey.avsc +1 -0
- datahub/metadata/schemas/DataJobKey.avsc +1 -0
- datahub/metadata/schemas/DataProcessInstanceInput.avsc +4 -2
- datahub/metadata/schemas/DataProcessInstanceOutput.avsc +2 -0
- datahub/metadata/schemas/DatasetKey.avsc +2 -1
- datahub/metadata/schemas/MLFeatureProperties.avsc +51 -0
- datahub/metadata/schemas/MLModelDeploymentProperties.avsc +51 -0
- datahub/metadata/schemas/MLModelGroupProperties.avsc +96 -23
- datahub/metadata/schemas/MLModelKey.avsc +2 -1
- datahub/metadata/schemas/MLModelProperties.avsc +96 -48
- datahub/metadata/schemas/MLPrimaryKeyProperties.avsc +51 -0
- datahub/metadata/schemas/MetadataChangeEvent.avsc +98 -71
- datahub/metadata/schemas/VersionProperties.avsc +216 -0
- datahub/metadata/schemas/VersionSetKey.avsc +26 -0
- datahub/metadata/schemas/VersionSetProperties.avsc +49 -0
- datahub/secret/datahub_secrets_client.py +12 -21
- datahub/secret/secret_common.py +14 -8
- datahub/specific/aspect_helpers/custom_properties.py +1 -2
- datahub/sql_parsing/schema_resolver.py +5 -10
- datahub/sql_parsing/sql_parsing_aggregator.py +18 -16
- datahub/sql_parsing/sqlglot_lineage.py +3 -3
- datahub/sql_parsing/sqlglot_utils.py +1 -1
- datahub/telemetry/stats.py +1 -2
- datahub/testing/mcp_diff.py +1 -1
- datahub/utilities/file_backed_collections.py +11 -11
- datahub/utilities/hive_schema_to_avro.py +2 -2
- datahub/utilities/logging_manager.py +2 -2
- datahub/utilities/lossy_collections.py +3 -3
- datahub/utilities/mapping.py +3 -3
- datahub/utilities/memory_footprint.py +3 -2
- datahub/utilities/perf_timer.py +11 -6
- datahub/utilities/serialized_lru_cache.py +3 -1
- datahub/utilities/sqlalchemy_query_combiner.py +6 -6
- datahub/utilities/sqllineage_patch.py +1 -1
- datahub/utilities/stats_collections.py +3 -1
- datahub/utilities/urns/_urn_base.py +28 -5
- datahub/utilities/urns/urn_iter.py +2 -2
- {acryl_datahub-0.15.0.1rc17.dist-info → acryl_datahub-0.15.0.2.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-0.15.0.1rc17.dist-info → acryl_datahub-0.15.0.2.dist-info}/top_level.txt +0 -0
|
@@ -276,6 +276,7 @@ class RedshiftSource(StatefulIngestionSourceBase, TestableSource):
|
|
|
276
276
|
"HLLSKETCH": NullType,
|
|
277
277
|
"TIMETZ": TimeType,
|
|
278
278
|
"VARBYTE": StringType,
|
|
279
|
+
"SUPER": NullType,
|
|
279
280
|
}
|
|
280
281
|
|
|
281
282
|
def get_platform_instance_id(self) -> str:
|
|
@@ -304,13 +305,13 @@ class RedshiftSource(StatefulIngestionSourceBase, TestableSource):
|
|
|
304
305
|
test_report.capability_report = {}
|
|
305
306
|
try:
|
|
306
307
|
RedshiftDataDictionary.get_schemas(connection, database=config.database)
|
|
307
|
-
test_report.capability_report[
|
|
308
|
-
|
|
309
|
-
|
|
308
|
+
test_report.capability_report[SourceCapability.SCHEMA_METADATA] = (
|
|
309
|
+
CapabilityReport(capable=True)
|
|
310
|
+
)
|
|
310
311
|
except Exception as e:
|
|
311
|
-
test_report.capability_report[
|
|
312
|
-
|
|
313
|
-
|
|
312
|
+
test_report.capability_report[SourceCapability.SCHEMA_METADATA] = (
|
|
313
|
+
CapabilityReport(capable=False, failure_reason=str(e))
|
|
314
|
+
)
|
|
314
315
|
|
|
315
316
|
except Exception as e:
|
|
316
317
|
test_report.basic_connectivity = CapabilityReport(
|
|
@@ -423,10 +424,10 @@ class RedshiftSource(StatefulIngestionSourceBase, TestableSource):
|
|
|
423
424
|
|
|
424
425
|
database = self.config.database
|
|
425
426
|
logger.info(f"Processing db {database}")
|
|
426
|
-
self.report.
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
427
|
+
with self.report.new_stage(METADATA_EXTRACTION):
|
|
428
|
+
self.db_tables[database] = defaultdict()
|
|
429
|
+
self.db_views[database] = defaultdict()
|
|
430
|
+
self.db_schemas.setdefault(database, {})
|
|
430
431
|
|
|
431
432
|
# TODO: Ideally, we'd push down exception handling to the place where the connection is used, as opposed to keeping
|
|
432
433
|
# this fallback. For now, this gets us broad coverage quickly.
|
|
@@ -462,12 +463,12 @@ class RedshiftSource(StatefulIngestionSourceBase, TestableSource):
|
|
|
462
463
|
self.process_schemas(connection, database)
|
|
463
464
|
)
|
|
464
465
|
|
|
465
|
-
self.report.
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
466
|
+
with self.report.new_stage(LINEAGE_EXTRACTION):
|
|
467
|
+
yield from self.extract_lineage_v2(
|
|
468
|
+
connection=connection,
|
|
469
|
+
database=database,
|
|
470
|
+
lineage_extractor=lineage_extractor,
|
|
471
|
+
)
|
|
471
472
|
|
|
472
473
|
all_tables = self.get_all_tables()
|
|
473
474
|
else:
|
|
@@ -480,25 +481,25 @@ class RedshiftSource(StatefulIngestionSourceBase, TestableSource):
|
|
|
480
481
|
or self.config.include_view_lineage
|
|
481
482
|
or self.config.include_copy_lineage
|
|
482
483
|
):
|
|
483
|
-
self.report.
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
484
|
+
with self.report.new_stage(LINEAGE_EXTRACTION):
|
|
485
|
+
yield from self.extract_lineage(
|
|
486
|
+
connection=connection, all_tables=all_tables, database=database
|
|
487
|
+
)
|
|
487
488
|
|
|
488
|
-
self.report.report_ingestion_stage_start(USAGE_EXTRACTION_INGESTION)
|
|
489
489
|
if self.config.include_usage_statistics:
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
490
|
+
with self.report.new_stage(USAGE_EXTRACTION_INGESTION):
|
|
491
|
+
yield from self.extract_usage(
|
|
492
|
+
connection=connection, all_tables=all_tables, database=database
|
|
493
|
+
)
|
|
493
494
|
|
|
494
495
|
if self.config.is_profiling_enabled():
|
|
495
|
-
self.report.
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
496
|
+
with self.report.new_stage(PROFILING):
|
|
497
|
+
profiler = RedshiftProfiler(
|
|
498
|
+
config=self.config,
|
|
499
|
+
report=self.report,
|
|
500
|
+
state_handler=self.profiling_state_handler,
|
|
501
|
+
)
|
|
502
|
+
yield from profiler.get_workunits(self.db_tables)
|
|
502
503
|
|
|
503
504
|
def process_schemas(self, connection, database):
|
|
504
505
|
for schema in self.data_dictionary.get_schemas(
|
|
@@ -633,8 +634,8 @@ class RedshiftSource(StatefulIngestionSourceBase, TestableSource):
|
|
|
633
634
|
else:
|
|
634
635
|
logger.info("View processing disabled, skipping")
|
|
635
636
|
|
|
636
|
-
self.report.metadata_extraction_sec[report_key] =
|
|
637
|
-
|
|
637
|
+
self.report.metadata_extraction_sec[report_key] = timer.elapsed_seconds(
|
|
638
|
+
digits=2
|
|
638
639
|
)
|
|
639
640
|
|
|
640
641
|
def _process_table(
|
|
@@ -946,9 +947,9 @@ class RedshiftSource(StatefulIngestionSourceBase, TestableSource):
|
|
|
946
947
|
def get_all_tables(
|
|
947
948
|
self,
|
|
948
949
|
) -> Dict[str, Dict[str, List[Union[RedshiftView, RedshiftTable]]]]:
|
|
949
|
-
all_tables: Dict[
|
|
950
|
-
|
|
951
|
-
|
|
950
|
+
all_tables: Dict[str, Dict[str, List[Union[RedshiftView, RedshiftTable]]]] = (
|
|
951
|
+
defaultdict(dict)
|
|
952
|
+
)
|
|
952
953
|
for db in set().union(self.db_tables, self.db_views):
|
|
953
954
|
tables = self.db_tables.get(db, {})
|
|
954
955
|
views = self.db_views.get(db, {})
|
|
@@ -966,9 +967,9 @@ class RedshiftSource(StatefulIngestionSourceBase, TestableSource):
|
|
|
966
967
|
all_tables: Dict[str, Dict[str, List[Union[RedshiftView, RedshiftTable]]]],
|
|
967
968
|
) -> Iterable[MetadataWorkUnit]:
|
|
968
969
|
with PerfTimer() as timer:
|
|
969
|
-
redundant_usage_run_skip_handler: Optional[
|
|
970
|
-
|
|
971
|
-
|
|
970
|
+
redundant_usage_run_skip_handler: Optional[RedundantUsageRunSkipHandler] = (
|
|
971
|
+
None
|
|
972
|
+
)
|
|
972
973
|
if self.config.enable_stateful_usage_ingestion:
|
|
973
974
|
redundant_usage_run_skip_handler = RedundantUsageRunSkipHandler(
|
|
974
975
|
source=self,
|
|
@@ -986,9 +987,7 @@ class RedshiftSource(StatefulIngestionSourceBase, TestableSource):
|
|
|
986
987
|
|
|
987
988
|
yield from usage_extractor.get_usage_workunits(all_tables=all_tables)
|
|
988
989
|
|
|
989
|
-
self.report.usage_extraction_sec[database] =
|
|
990
|
-
timer.elapsed_seconds(), 2
|
|
991
|
-
)
|
|
990
|
+
self.report.usage_extraction_sec[database] = timer.elapsed_seconds(digits=2)
|
|
992
991
|
|
|
993
992
|
def extract_lineage(
|
|
994
993
|
self,
|
|
@@ -1011,8 +1010,8 @@ class RedshiftSource(StatefulIngestionSourceBase, TestableSource):
|
|
|
1011
1010
|
database=database, connection=connection, all_tables=all_tables
|
|
1012
1011
|
)
|
|
1013
1012
|
|
|
1014
|
-
self.report.lineage_extraction_sec[f"{database}"] =
|
|
1015
|
-
|
|
1013
|
+
self.report.lineage_extraction_sec[f"{database}"] = timer.elapsed_seconds(
|
|
1014
|
+
digits=2
|
|
1016
1015
|
)
|
|
1017
1016
|
yield from self.generate_lineage(
|
|
1018
1017
|
database, lineage_extractor=lineage_extractor
|
|
@@ -1042,8 +1041,8 @@ class RedshiftSource(StatefulIngestionSourceBase, TestableSource):
|
|
|
1042
1041
|
|
|
1043
1042
|
yield from lineage_extractor.generate()
|
|
1044
1043
|
|
|
1045
|
-
self.report.lineage_extraction_sec[f"{database}"] =
|
|
1046
|
-
|
|
1044
|
+
self.report.lineage_extraction_sec[f"{database}"] = timer.elapsed_seconds(
|
|
1045
|
+
digits=2
|
|
1047
1046
|
)
|
|
1048
1047
|
|
|
1049
1048
|
if self.redundant_lineage_run_skip_handler:
|
|
@@ -182,38 +182,38 @@ class RedshiftUsageExtractor:
|
|
|
182
182
|
self.report.num_operational_stats_filtered = 0
|
|
183
183
|
|
|
184
184
|
if self.config.include_operational_stats:
|
|
185
|
-
self.report.
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
185
|
+
with self.report.new_stage(USAGE_EXTRACTION_OPERATIONAL_STATS):
|
|
186
|
+
with PerfTimer() as timer:
|
|
187
|
+
# Generate operation aspect workunits
|
|
188
|
+
yield from self._gen_operation_aspect_workunits(
|
|
189
|
+
self.connection, all_tables
|
|
190
|
+
)
|
|
191
|
+
self.report.operational_metadata_extraction_sec[
|
|
192
|
+
self.config.database
|
|
193
|
+
] = timer.elapsed_seconds(digits=2)
|
|
194
194
|
|
|
195
195
|
# Generate aggregate events
|
|
196
|
-
self.report.
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
196
|
+
with self.report.new_stage(USAGE_EXTRACTION_USAGE_AGGREGATION):
|
|
197
|
+
query: str = self.queries.usage_query(
|
|
198
|
+
start_time=self.start_time.strftime(REDSHIFT_DATETIME_FORMAT),
|
|
199
|
+
end_time=self.end_time.strftime(REDSHIFT_DATETIME_FORMAT),
|
|
200
|
+
database=self.config.database,
|
|
201
|
+
)
|
|
202
|
+
access_events_iterable: Iterable[RedshiftAccessEvent] = (
|
|
203
|
+
self._gen_access_events_from_history_query(
|
|
204
|
+
query, connection=self.connection, all_tables=all_tables
|
|
205
|
+
)
|
|
206
|
+
)
|
|
207
207
|
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
208
|
+
aggregated_events: AggregatedAccessEvents = self._aggregate_access_events(
|
|
209
|
+
access_events_iterable
|
|
210
|
+
)
|
|
211
|
+
# Generate usage workunits from aggregated events.
|
|
212
|
+
for time_bucket in aggregated_events.values():
|
|
213
|
+
for aggregate in time_bucket.values():
|
|
214
|
+
wu: MetadataWorkUnit = self._make_usage_stat(aggregate)
|
|
215
|
+
self.report.num_usage_workunits_emitted += 1
|
|
216
|
+
yield wu
|
|
217
217
|
|
|
218
218
|
def _gen_operation_aspect_workunits(
|
|
219
219
|
self,
|
|
@@ -225,10 +225,10 @@ class RedshiftUsageExtractor:
|
|
|
225
225
|
start_time=self.start_time.strftime(REDSHIFT_DATETIME_FORMAT),
|
|
226
226
|
end_time=self.end_time.strftime(REDSHIFT_DATETIME_FORMAT),
|
|
227
227
|
)
|
|
228
|
-
access_events_iterable: Iterable[
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
228
|
+
access_events_iterable: Iterable[RedshiftAccessEvent] = (
|
|
229
|
+
self._gen_access_events_from_history_query(
|
|
230
|
+
query, connection, all_tables=all_tables
|
|
231
|
+
)
|
|
232
232
|
)
|
|
233
233
|
|
|
234
234
|
# Generate operation aspect work units from the access events
|
|
@@ -85,8 +85,8 @@ class DataLakeProfilerConfig(ConfigModel):
|
|
|
85
85
|
if field_level_metric.startswith("include_field_"):
|
|
86
86
|
values.setdefault(field_level_metric, False)
|
|
87
87
|
|
|
88
|
-
assert (
|
|
89
|
-
|
|
90
|
-
)
|
|
88
|
+
assert max_num_fields_to_profile is None, (
|
|
89
|
+
f"{max_num_fields_to_profile_key} should be set to None"
|
|
90
|
+
)
|
|
91
91
|
|
|
92
92
|
return values
|
|
@@ -6,9 +6,8 @@ import pathlib
|
|
|
6
6
|
import re
|
|
7
7
|
import time
|
|
8
8
|
from datetime import datetime
|
|
9
|
-
from itertools import groupby
|
|
10
9
|
from pathlib import PurePath
|
|
11
|
-
from typing import
|
|
10
|
+
from typing import TYPE_CHECKING, Dict, Iterable, List, Optional, Tuple
|
|
12
11
|
from urllib.parse import urlparse
|
|
13
12
|
|
|
14
13
|
import smart_open.compression as so_compression
|
|
@@ -41,6 +40,7 @@ from datahub.ingestion.source.aws.s3_util import (
|
|
|
41
40
|
get_bucket_name,
|
|
42
41
|
get_bucket_relative_path,
|
|
43
42
|
get_key_prefix,
|
|
43
|
+
group_s3_objects_by_dirname,
|
|
44
44
|
strip_s3_prefix,
|
|
45
45
|
)
|
|
46
46
|
from datahub.ingestion.source.data_lake_common.data_lake_utils import ContainerWUCreator
|
|
@@ -75,6 +75,9 @@ from datahub.metadata.schema_classes import (
|
|
|
75
75
|
from datahub.telemetry import stats, telemetry
|
|
76
76
|
from datahub.utilities.perf_timer import PerfTimer
|
|
77
77
|
|
|
78
|
+
if TYPE_CHECKING:
|
|
79
|
+
from mypy_boto3_s3.service_resource import Bucket
|
|
80
|
+
|
|
78
81
|
# hide annoying debug errors from py4j
|
|
79
82
|
logging.getLogger("py4j").setLevel(logging.ERROR)
|
|
80
83
|
logger: logging.Logger = logging.getLogger(__name__)
|
|
@@ -842,7 +845,7 @@ class S3Source(StatefulIngestionSourceBase):
|
|
|
842
845
|
def get_folder_info(
|
|
843
846
|
self,
|
|
844
847
|
path_spec: PathSpec,
|
|
845
|
-
bucket:
|
|
848
|
+
bucket: "Bucket",
|
|
846
849
|
prefix: str,
|
|
847
850
|
) -> List[Folder]:
|
|
848
851
|
"""
|
|
@@ -857,22 +860,15 @@ class S3Source(StatefulIngestionSourceBase):
|
|
|
857
860
|
|
|
858
861
|
Parameters:
|
|
859
862
|
path_spec (PathSpec): The path specification used to determine partitioning.
|
|
860
|
-
bucket (
|
|
863
|
+
bucket (Bucket): The S3 bucket object.
|
|
861
864
|
prefix (str): The prefix path in the S3 bucket to list objects from.
|
|
862
865
|
|
|
863
866
|
Returns:
|
|
864
867
|
List[Folder]: A list of Folder objects representing the partitions found.
|
|
865
868
|
"""
|
|
866
|
-
|
|
867
|
-
prefix_to_list = prefix
|
|
868
|
-
files = list(
|
|
869
|
-
bucket.objects.filter(Prefix=f"{prefix_to_list}").page_size(PAGE_SIZE)
|
|
870
|
-
)
|
|
871
|
-
files = sorted(files, key=lambda a: a.last_modified)
|
|
872
|
-
grouped_files = groupby(files, lambda x: x.key.rsplit("/", 1)[0])
|
|
873
|
-
|
|
874
869
|
partitions: List[Folder] = []
|
|
875
|
-
|
|
870
|
+
s3_objects = bucket.objects.filter(Prefix=prefix).page_size(PAGE_SIZE)
|
|
871
|
+
for key, group in group_s3_objects_by_dirname(s3_objects).items():
|
|
876
872
|
file_size = 0
|
|
877
873
|
creation_time = None
|
|
878
874
|
modification_time = None
|
|
@@ -904,7 +900,7 @@ class S3Source(StatefulIngestionSourceBase):
|
|
|
904
900
|
Folder(
|
|
905
901
|
partition_id=id,
|
|
906
902
|
is_partition=bool(id),
|
|
907
|
-
creation_time=creation_time if creation_time else None,
|
|
903
|
+
creation_time=creation_time if creation_time else None, # type: ignore[arg-type]
|
|
908
904
|
modification_time=modification_time,
|
|
909
905
|
sample_file=self.create_s3_path(max_file.bucket_name, max_file.key),
|
|
910
906
|
size=file_size,
|
|
@@ -1128,7 +1124,7 @@ class S3Source(StatefulIngestionSourceBase):
|
|
|
1128
1124
|
table_data.table_path
|
|
1129
1125
|
].timestamp = table_data.timestamp
|
|
1130
1126
|
|
|
1131
|
-
for
|
|
1127
|
+
for _, table_data in table_dict.items():
|
|
1132
1128
|
yield from self.ingest_table(table_data, path_spec)
|
|
1133
1129
|
|
|
1134
1130
|
if not self.source_config.is_profiling_enabled():
|
|
@@ -236,12 +236,12 @@ class SalesforceSource(Source):
|
|
|
236
236
|
try:
|
|
237
237
|
if self.config.auth is SalesforceAuthType.DIRECT_ACCESS_TOKEN:
|
|
238
238
|
logger.debug("Access Token Provided in Config")
|
|
239
|
-
assert (
|
|
240
|
-
|
|
241
|
-
)
|
|
242
|
-
assert (
|
|
243
|
-
|
|
244
|
-
)
|
|
239
|
+
assert self.config.access_token is not None, (
|
|
240
|
+
"Config access_token is required for DIRECT_ACCESS_TOKEN auth"
|
|
241
|
+
)
|
|
242
|
+
assert self.config.instance_url is not None, (
|
|
243
|
+
"Config instance_url is required for DIRECT_ACCESS_TOKEN auth"
|
|
244
|
+
)
|
|
245
245
|
|
|
246
246
|
self.sf = Salesforce(
|
|
247
247
|
instance_url=self.config.instance_url,
|
|
@@ -250,15 +250,15 @@ class SalesforceSource(Source):
|
|
|
250
250
|
)
|
|
251
251
|
elif self.config.auth is SalesforceAuthType.USERNAME_PASSWORD:
|
|
252
252
|
logger.debug("Username/Password Provided in Config")
|
|
253
|
-
assert (
|
|
254
|
-
|
|
255
|
-
)
|
|
256
|
-
assert (
|
|
257
|
-
|
|
258
|
-
)
|
|
259
|
-
assert (
|
|
260
|
-
|
|
261
|
-
)
|
|
253
|
+
assert self.config.username is not None, (
|
|
254
|
+
"Config username is required for USERNAME_PASSWORD auth"
|
|
255
|
+
)
|
|
256
|
+
assert self.config.password is not None, (
|
|
257
|
+
"Config password is required for USERNAME_PASSWORD auth"
|
|
258
|
+
)
|
|
259
|
+
assert self.config.security_token is not None, (
|
|
260
|
+
"Config security_token is required for USERNAME_PASSWORD auth"
|
|
261
|
+
)
|
|
262
262
|
|
|
263
263
|
self.sf = Salesforce(
|
|
264
264
|
username=self.config.username,
|
|
@@ -269,15 +269,15 @@ class SalesforceSource(Source):
|
|
|
269
269
|
|
|
270
270
|
elif self.config.auth is SalesforceAuthType.JSON_WEB_TOKEN:
|
|
271
271
|
logger.debug("Json Web Token provided in the config")
|
|
272
|
-
assert (
|
|
273
|
-
|
|
274
|
-
)
|
|
275
|
-
assert (
|
|
276
|
-
|
|
277
|
-
)
|
|
278
|
-
assert (
|
|
279
|
-
|
|
280
|
-
)
|
|
272
|
+
assert self.config.username is not None, (
|
|
273
|
+
"Config username is required for JSON_WEB_TOKEN auth"
|
|
274
|
+
)
|
|
275
|
+
assert self.config.consumer_key is not None, (
|
|
276
|
+
"Config consumer_key is required for JSON_WEB_TOKEN auth"
|
|
277
|
+
)
|
|
278
|
+
assert self.config.private_key is not None, (
|
|
279
|
+
"Config private_key is required for JSON_WEB_TOKEN auth"
|
|
280
|
+
)
|
|
281
281
|
|
|
282
282
|
self.sf = Salesforce(
|
|
283
283
|
username=self.config.username,
|
|
@@ -439,7 +439,8 @@ class SalesforceSource(Source):
|
|
|
439
439
|
dataPlatformInstance = DataPlatformInstanceClass(
|
|
440
440
|
builder.make_data_platform_urn(self.platform),
|
|
441
441
|
instance=builder.make_dataplatform_instance_urn(
|
|
442
|
-
self.platform,
|
|
442
|
+
self.platform,
|
|
443
|
+
self.config.platform_instance, # type:ignore
|
|
443
444
|
),
|
|
444
445
|
)
|
|
445
446
|
|
|
@@ -354,7 +354,7 @@ class JsonSchemaSource(StatefulIngestionSourceBase):
|
|
|
354
354
|
browse_prefix = f"/{self.config.env.lower()}/{self.config.platform}/{self.config.platform_instance}"
|
|
355
355
|
|
|
356
356
|
if os.path.isdir(self.config.path):
|
|
357
|
-
for root,
|
|
357
|
+
for root, _, files in os.walk(self.config.path, topdown=False):
|
|
358
358
|
for file_name in [f for f in files if f.endswith(".json")]:
|
|
359
359
|
try:
|
|
360
360
|
yield from self._load_one_file(
|
|
@@ -477,9 +477,9 @@ class SigmaSource(StatefulIngestionSourceBase, TestableSource):
|
|
|
477
477
|
upstream_dataset_urns
|
|
478
478
|
and dataset_urn not in self.dataset_upstream_urn_mapping
|
|
479
479
|
):
|
|
480
|
-
self.dataset_upstream_urn_mapping[
|
|
481
|
-
|
|
482
|
-
|
|
480
|
+
self.dataset_upstream_urn_mapping[dataset_urn] = (
|
|
481
|
+
upstream_dataset_urns
|
|
482
|
+
)
|
|
483
483
|
|
|
484
484
|
element_input_fields = [
|
|
485
485
|
InputFieldClass(
|
|
@@ -126,9 +126,9 @@ class SigmaAPI:
|
|
|
126
126
|
response.raise_for_status()
|
|
127
127
|
response_dict = response.json()
|
|
128
128
|
for workspace_dict in response_dict[Constant.ENTRIES]:
|
|
129
|
-
self.workspaces[
|
|
130
|
-
workspace_dict
|
|
131
|
-
|
|
129
|
+
self.workspaces[workspace_dict[Constant.WORKSPACEID]] = (
|
|
130
|
+
Workspace.parse_obj(workspace_dict)
|
|
131
|
+
)
|
|
132
132
|
if response_dict[Constant.NEXTPAGE]:
|
|
133
133
|
url = f"{workspace_url}&page={response_dict[Constant.NEXTPAGE]}"
|
|
134
134
|
else:
|
|
@@ -147,9 +147,9 @@ class SigmaAPI:
|
|
|
147
147
|
response.raise_for_status()
|
|
148
148
|
response_dict = response.json()
|
|
149
149
|
for user_dict in response_dict[Constant.ENTRIES]:
|
|
150
|
-
users[
|
|
151
|
-
user_dict[Constant.
|
|
152
|
-
|
|
150
|
+
users[user_dict[Constant.MEMBERID]] = (
|
|
151
|
+
f"{user_dict[Constant.FIRSTNAME]}_{user_dict[Constant.LASTNAME]}"
|
|
152
|
+
)
|
|
153
153
|
if response_dict[Constant.NEXTPAGE]:
|
|
154
154
|
url = f"{members_url}&page={response_dict[Constant.NEXTPAGE]}"
|
|
155
155
|
else:
|
|
@@ -327,10 +327,12 @@ class SigmaAPI:
|
|
|
327
327
|
response.raise_for_status()
|
|
328
328
|
for i, element_dict in enumerate(response.json()[Constant.ENTRIES]):
|
|
329
329
|
if not element_dict.get(Constant.NAME):
|
|
330
|
-
element_dict[Constant.NAME] =
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
330
|
+
element_dict[Constant.NAME] = (
|
|
331
|
+
f"Element {i + 1} of Page '{page.name}'"
|
|
332
|
+
)
|
|
333
|
+
element_dict[Constant.URL] = (
|
|
334
|
+
f"{workbook.url}?:nodeId={element_dict[Constant.ELEMENTID]}&:fullScreen=true"
|
|
335
|
+
)
|
|
334
336
|
element = Element.parse_obj(element_dict)
|
|
335
337
|
if (
|
|
336
338
|
self.config.extract_lineage
|
|
@@ -221,6 +221,14 @@ class SnowflakeV2Config(
|
|
|
221
221
|
default=False,
|
|
222
222
|
description="If enabled, uses the new queries extractor to extract queries from snowflake.",
|
|
223
223
|
)
|
|
224
|
+
include_queries: bool = Field(
|
|
225
|
+
default=True,
|
|
226
|
+
description="If enabled, generate query entities associated with lineage edges. Only applicable if `use_queries_v2` is enabled.",
|
|
227
|
+
)
|
|
228
|
+
include_query_usage_statistics: bool = Field(
|
|
229
|
+
default=True,
|
|
230
|
+
description="If enabled, generate query popularity statistics. Only applicable if `use_queries_v2` is enabled.",
|
|
231
|
+
)
|
|
224
232
|
|
|
225
233
|
lazy_schema_resolver: bool = Field(
|
|
226
234
|
default=True,
|
|
@@ -236,6 +244,11 @@ class SnowflakeV2Config(
|
|
|
236
244
|
description="""Optional. Allowed values are `without_lineage`, `with_lineage`, and `skip` (default). `without_lineage` only extracts tags that have been applied directly to the given entity. `with_lineage` extracts both directly applied and propagated tags, but will be significantly slower. See the [Snowflake documentation](https://docs.snowflake.com/en/user-guide/object-tagging.html#tag-lineage) for information about tag lineage/propagation. """,
|
|
237
245
|
)
|
|
238
246
|
|
|
247
|
+
extract_tags_as_structured_properties: bool = Field(
|
|
248
|
+
default=False,
|
|
249
|
+
description="If enabled along with `extract_tags`, extracts snowflake's key-value tags as DataHub structured properties instead of DataHub tags.",
|
|
250
|
+
)
|
|
251
|
+
|
|
239
252
|
include_external_url: bool = Field(
|
|
240
253
|
default=True,
|
|
241
254
|
description="Whether to populate Snowsight url for Snowflake Objects",
|
|
@@ -255,6 +268,14 @@ class SnowflakeV2Config(
|
|
|
255
268
|
description="List of regex patterns for tags to include in ingestion. Only used if `extract_tags` is enabled.",
|
|
256
269
|
)
|
|
257
270
|
|
|
271
|
+
structured_property_pattern: AllowDenyPattern = Field(
|
|
272
|
+
default=AllowDenyPattern.allow_all(),
|
|
273
|
+
description=(
|
|
274
|
+
"List of regex patterns for structured properties to include in ingestion."
|
|
275
|
+
" Only used if `extract_tags` and `extract_tags_as_structured_properties` are enabled."
|
|
276
|
+
),
|
|
277
|
+
)
|
|
278
|
+
|
|
258
279
|
# This is required since access_history table does not capture whether the table was temporary table.
|
|
259
280
|
temporary_tables_pattern: List[str] = Field(
|
|
260
281
|
default=DEFAULT_TEMP_TABLES_PATTERNS,
|
|
@@ -363,18 +384,20 @@ class SnowflakeV2Config(
|
|
|
363
384
|
assert all(
|
|
364
385
|
consumer.platform_instance != share_details.platform_instance
|
|
365
386
|
for consumer in share_details.consumers
|
|
366
|
-
),
|
|
387
|
+
), (
|
|
388
|
+
"Share's platform_instance can not be same as consumer's platform instance. Self-sharing not supported in Snowflake."
|
|
389
|
+
)
|
|
367
390
|
|
|
368
391
|
databases_included_in_share.append(shared_db)
|
|
369
392
|
databases_created_from_share.extend(share_details.consumers)
|
|
370
393
|
|
|
371
394
|
for db_from_share in databases_created_from_share:
|
|
372
|
-
assert (
|
|
373
|
-
|
|
374
|
-
)
|
|
375
|
-
assert (
|
|
376
|
-
|
|
377
|
-
)
|
|
395
|
+
assert db_from_share not in databases_included_in_share, (
|
|
396
|
+
"Database included in a share can not be present as consumer in any share."
|
|
397
|
+
)
|
|
398
|
+
assert databases_created_from_share.count(db_from_share) == 1, (
|
|
399
|
+
"Same database can not be present as consumer in more than one share."
|
|
400
|
+
)
|
|
378
401
|
|
|
379
402
|
return shares
|
|
380
403
|
|
|
@@ -250,9 +250,9 @@ class SnowflakeConnectionConfig(ConfigModel):
|
|
|
250
250
|
if self.private_key is not None:
|
|
251
251
|
pkey_bytes = self.private_key.replace("\\n", "\n").encode()
|
|
252
252
|
else:
|
|
253
|
-
assert (
|
|
254
|
-
|
|
255
|
-
)
|
|
253
|
+
assert self.private_key_path, (
|
|
254
|
+
"missing required private key path to read key from"
|
|
255
|
+
)
|
|
256
256
|
with open(self.private_key_path, "rb") as key:
|
|
257
257
|
pkey_bytes = key.read()
|
|
258
258
|
|
|
@@ -284,9 +284,9 @@ class SnowflakeConnectionConfig(ConfigModel):
|
|
|
284
284
|
return self.options
|
|
285
285
|
|
|
286
286
|
def get_oauth_connection(self) -> NativeSnowflakeConnection:
|
|
287
|
-
assert (
|
|
288
|
-
|
|
289
|
-
)
|
|
287
|
+
assert self.oauth_config, (
|
|
288
|
+
"oauth_config should be provided if using oauth based authentication"
|
|
289
|
+
)
|
|
290
290
|
generator = OAuthTokenGenerator(
|
|
291
291
|
client_id=self.oauth_config.client_id,
|
|
292
292
|
authority_url=self.oauth_config.authority_url,
|
|
@@ -623,7 +623,7 @@ fingerprinted_queries as (
|
|
|
623
623
|
query_history.start_time >= to_timestamp_ltz({start_time_millis}, 3)
|
|
624
624
|
AND query_history.start_time < to_timestamp_ltz({end_time_millis}, 3)
|
|
625
625
|
AND execution_status = 'SUCCESS'
|
|
626
|
-
AND {users_filter or
|
|
626
|
+
AND {users_filter or "TRUE"}
|
|
627
627
|
)
|
|
628
628
|
, deduplicated_queries as (
|
|
629
629
|
SELECT
|
|
@@ -651,7 +651,7 @@ fingerprinted_queries as (
|
|
|
651
651
|
WHERE
|
|
652
652
|
query_start_time >= to_timestamp_ltz({start_time_millis}, 3)
|
|
653
653
|
AND query_start_time < to_timestamp_ltz({end_time_millis}, 3)
|
|
654
|
-
AND {users_filter or
|
|
654
|
+
AND {users_filter or "TRUE"}
|
|
655
655
|
AND query_id IN (
|
|
656
656
|
SELECT query_id FROM deduplicated_queries
|
|
657
657
|
)
|
|
@@ -166,6 +166,3 @@ class SnowflakeV2Report(
|
|
|
166
166
|
|
|
167
167
|
def report_tag_processed(self, tag_name: str) -> None:
|
|
168
168
|
self._processed_tags.add(tag_name)
|
|
169
|
-
|
|
170
|
-
def set_ingestion_stage(self, database: str, stage: str) -> None:
|
|
171
|
-
self.report_ingestion_stage_start(f"{database}: {stage}")
|
|
@@ -45,15 +45,18 @@ class SnowflakeTag:
|
|
|
45
45
|
name: str
|
|
46
46
|
value: str
|
|
47
47
|
|
|
48
|
-
def
|
|
48
|
+
def tag_display_name(self) -> str:
|
|
49
49
|
return f"{self.name}: {self.value}"
|
|
50
50
|
|
|
51
|
-
def
|
|
51
|
+
def tag_identifier(self) -> str:
|
|
52
52
|
return f"{self._id_prefix_as_str()}:{self.value}"
|
|
53
53
|
|
|
54
54
|
def _id_prefix_as_str(self) -> str:
|
|
55
55
|
return f"{self.database}.{self.schema}.{self.name}"
|
|
56
56
|
|
|
57
|
+
def structured_property_identifier(self) -> str:
|
|
58
|
+
return f"snowflake.{self.database}.{self.schema}.{self.name}"
|
|
59
|
+
|
|
57
60
|
|
|
58
61
|
@dataclass
|
|
59
62
|
class SnowflakeColumn(BaseColumn):
|
|
@@ -139,9 +142,9 @@ class _SnowflakeTagCache:
|
|
|
139
142
|
)
|
|
140
143
|
|
|
141
144
|
# self._table_tags[<database_name>][<schema_name>][<table_name>] = list of tags applied to table
|
|
142
|
-
self._table_tags: Dict[
|
|
143
|
-
|
|
144
|
-
|
|
145
|
+
self._table_tags: Dict[str, Dict[str, Dict[str, List[SnowflakeTag]]]] = (
|
|
146
|
+
defaultdict(lambda: defaultdict(lambda: defaultdict(list)))
|
|
147
|
+
)
|
|
145
148
|
|
|
146
149
|
# self._column_tags[<database_name>][<schema_name>][<table_name>][<column_name>] = list of tags applied to column
|
|
147
150
|
self._column_tags: Dict[
|