acryl-datahub 0.15.0.1rc17__py3-none-any.whl → 0.15.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-0.15.0.1rc17.dist-info → acryl_datahub-0.15.0.2.dist-info}/METADATA +2440 -2438
- {acryl_datahub-0.15.0.1rc17.dist-info → acryl_datahub-0.15.0.2.dist-info}/RECORD +211 -207
- {acryl_datahub-0.15.0.1rc17.dist-info → acryl_datahub-0.15.0.2.dist-info}/WHEEL +1 -1
- datahub/__init__.py +1 -1
- datahub/api/entities/assertion/assertion_operator.py +3 -5
- datahub/api/entities/corpgroup/corpgroup.py +1 -1
- datahub/api/entities/datacontract/assertion_operator.py +3 -5
- datahub/api/entities/dataproduct/dataproduct.py +4 -4
- datahub/api/entities/dataset/dataset.py +2 -1
- datahub/api/entities/structuredproperties/structuredproperties.py +18 -7
- datahub/cli/cli_utils.py +13 -2
- datahub/cli/delete_cli.py +3 -3
- datahub/cli/docker_cli.py +6 -6
- datahub/cli/ingest_cli.py +25 -15
- datahub/cli/lite_cli.py +2 -2
- datahub/cli/migrate.py +5 -5
- datahub/cli/specific/assertions_cli.py +3 -3
- datahub/cli/specific/structuredproperties_cli.py +84 -0
- datahub/cli/timeline_cli.py +1 -1
- datahub/configuration/common.py +1 -2
- datahub/configuration/config_loader.py +73 -50
- datahub/configuration/git.py +2 -2
- datahub/configuration/time_window_config.py +10 -5
- datahub/emitter/mce_builder.py +4 -8
- datahub/emitter/mcp_builder.py +27 -0
- datahub/emitter/mcp_patch_builder.py +1 -2
- datahub/emitter/rest_emitter.py +126 -85
- datahub/entrypoints.py +6 -0
- datahub/ingestion/api/incremental_lineage_helper.py +2 -8
- datahub/ingestion/api/report.py +1 -2
- datahub/ingestion/api/source.py +4 -2
- datahub/ingestion/api/source_helpers.py +1 -1
- datahub/ingestion/extractor/json_schema_util.py +3 -3
- datahub/ingestion/extractor/schema_util.py +3 -5
- datahub/ingestion/fs/s3_fs.py +3 -3
- datahub/ingestion/glossary/datahub_classifier.py +6 -4
- datahub/ingestion/graph/client.py +22 -19
- datahub/ingestion/graph/config.py +1 -1
- datahub/ingestion/run/pipeline.py +8 -7
- datahub/ingestion/run/pipeline_config.py +3 -3
- datahub/ingestion/source/abs/datalake_profiler_config.py +3 -3
- datahub/ingestion/source/abs/source.py +19 -8
- datahub/ingestion/source/aws/glue.py +77 -47
- datahub/ingestion/source/aws/s3_boto_utils.py +3 -3
- datahub/ingestion/source/aws/s3_util.py +24 -1
- datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py +1 -1
- datahub/ingestion/source/aws/sagemaker_processors/models.py +2 -2
- datahub/ingestion/source/bigquery_v2/bigquery.py +34 -34
- datahub/ingestion/source/bigquery_v2/bigquery_audit.py +3 -3
- datahub/ingestion/source/bigquery_v2/bigquery_config.py +14 -6
- datahub/ingestion/source/bigquery_v2/bigquery_platform_resource_helper.py +8 -4
- datahub/ingestion/source/bigquery_v2/bigquery_report.py +0 -3
- datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +22 -16
- datahub/ingestion/source/bigquery_v2/lineage.py +16 -16
- datahub/ingestion/source/bigquery_v2/queries.py +1 -3
- datahub/ingestion/source/bigquery_v2/queries_extractor.py +3 -3
- datahub/ingestion/source/bigquery_v2/usage.py +60 -60
- datahub/ingestion/source/cassandra/cassandra.py +0 -1
- datahub/ingestion/source/cassandra/cassandra_profiling.py +24 -24
- datahub/ingestion/source/cassandra/cassandra_utils.py +4 -7
- datahub/ingestion/source/confluent_schema_registry.py +6 -6
- datahub/ingestion/source/csv_enricher.py +29 -29
- datahub/ingestion/source/datahub/config.py +10 -0
- datahub/ingestion/source/datahub/datahub_database_reader.py +4 -2
- datahub/ingestion/source/datahub/datahub_source.py +12 -2
- datahub/ingestion/source/dbt/dbt_cloud.py +13 -13
- datahub/ingestion/source/dbt/dbt_common.py +9 -7
- datahub/ingestion/source/delta_lake/source.py +0 -5
- datahub/ingestion/source/demo_data.py +1 -1
- datahub/ingestion/source/dremio/dremio_api.py +4 -4
- datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py +3 -3
- datahub/ingestion/source/dremio/dremio_reporting.py +0 -3
- datahub/ingestion/source/dremio/dremio_source.py +2 -2
- datahub/ingestion/source/elastic_search.py +4 -4
- datahub/ingestion/source/fivetran/fivetran.py +1 -6
- datahub/ingestion/source/gc/datahub_gc.py +11 -14
- datahub/ingestion/source/gc/execution_request_cleanup.py +31 -6
- datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +48 -15
- datahub/ingestion/source/gcs/gcs_source.py +3 -2
- datahub/ingestion/source/ge_data_profiler.py +2 -5
- datahub/ingestion/source/ge_profiling_config.py +3 -3
- datahub/ingestion/source/iceberg/iceberg.py +13 -6
- datahub/ingestion/source/iceberg/iceberg_common.py +49 -9
- datahub/ingestion/source/iceberg/iceberg_profiler.py +3 -1
- datahub/ingestion/source/identity/azure_ad.py +3 -3
- datahub/ingestion/source/identity/okta.py +3 -3
- datahub/ingestion/source/kafka/kafka.py +11 -9
- datahub/ingestion/source/kafka_connect/kafka_connect.py +3 -9
- datahub/ingestion/source/kafka_connect/sink_connectors.py +3 -3
- datahub/ingestion/source/kafka_connect/source_connectors.py +3 -3
- datahub/ingestion/source/looker/looker_common.py +19 -19
- datahub/ingestion/source/looker/looker_config.py +11 -6
- datahub/ingestion/source/looker/looker_source.py +25 -25
- datahub/ingestion/source/looker/looker_template_language.py +3 -3
- datahub/ingestion/source/looker/looker_usage.py +5 -7
- datahub/ingestion/source/looker/lookml_concept_context.py +6 -6
- datahub/ingestion/source/looker/lookml_source.py +13 -15
- datahub/ingestion/source/looker/view_upstream.py +5 -5
- datahub/ingestion/source/metabase.py +1 -6
- datahub/ingestion/source/mlflow.py +4 -9
- datahub/ingestion/source/mode.py +5 -5
- datahub/ingestion/source/mongodb.py +6 -4
- datahub/ingestion/source/neo4j/neo4j_source.py +1 -1
- datahub/ingestion/source/nifi.py +24 -31
- datahub/ingestion/source/openapi.py +9 -9
- datahub/ingestion/source/powerbi/config.py +12 -12
- datahub/ingestion/source/powerbi/m_query/parser.py +11 -11
- datahub/ingestion/source/powerbi/m_query/pattern_handler.py +26 -24
- datahub/ingestion/source/powerbi/m_query/resolver.py +13 -13
- datahub/ingestion/source/powerbi/powerbi.py +6 -6
- datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +9 -9
- datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +7 -7
- datahub/ingestion/source/powerbi_report_server/report_server.py +1 -1
- datahub/ingestion/source/qlik_sense/qlik_api.py +1 -1
- datahub/ingestion/source/redash.py +0 -5
- datahub/ingestion/source/redshift/config.py +3 -3
- datahub/ingestion/source/redshift/redshift.py +45 -46
- datahub/ingestion/source/redshift/usage.py +33 -33
- datahub/ingestion/source/s3/datalake_profiler_config.py +3 -3
- datahub/ingestion/source/s3/source.py +11 -15
- datahub/ingestion/source/salesforce.py +26 -25
- datahub/ingestion/source/schema/json_schema.py +1 -1
- datahub/ingestion/source/sigma/sigma.py +3 -3
- datahub/ingestion/source/sigma/sigma_api.py +12 -10
- datahub/ingestion/source/snowflake/snowflake_config.py +30 -7
- datahub/ingestion/source/snowflake/snowflake_connection.py +6 -6
- datahub/ingestion/source/snowflake/snowflake_queries.py +2 -2
- datahub/ingestion/source/snowflake/snowflake_report.py +0 -3
- datahub/ingestion/source/snowflake/snowflake_schema.py +8 -5
- datahub/ingestion/source/snowflake/snowflake_schema_gen.py +136 -42
- datahub/ingestion/source/snowflake/snowflake_tag.py +21 -11
- datahub/ingestion/source/snowflake/snowflake_usage_v2.py +49 -50
- datahub/ingestion/source/snowflake/snowflake_utils.py +1 -2
- datahub/ingestion/source/snowflake/snowflake_v2.py +51 -47
- datahub/ingestion/source/sql/athena.py +1 -3
- datahub/ingestion/source/sql/clickhouse.py +8 -14
- datahub/ingestion/source/sql/oracle.py +1 -3
- datahub/ingestion/source/sql/sql_generic_profiler.py +1 -2
- datahub/ingestion/source/sql/sql_types.py +1 -2
- datahub/ingestion/source/sql/sql_utils.py +5 -0
- datahub/ingestion/source/sql/teradata.py +18 -5
- datahub/ingestion/source/state/profiling_state_handler.py +3 -3
- datahub/ingestion/source/state/redundant_run_skip_handler.py +5 -7
- datahub/ingestion/source/state/stale_entity_removal_handler.py +3 -3
- datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +9 -9
- datahub/ingestion/source/state_provider/file_ingestion_checkpointing_provider.py +1 -1
- datahub/ingestion/source/superset.py +1 -6
- datahub/ingestion/source/tableau/tableau.py +343 -117
- datahub/ingestion/source/tableau/tableau_common.py +5 -2
- datahub/ingestion/source/unity/config.py +3 -1
- datahub/ingestion/source/unity/proxy.py +1 -1
- datahub/ingestion/source/unity/source.py +74 -74
- datahub/ingestion/source/unity/usage.py +3 -1
- datahub/ingestion/source/usage/clickhouse_usage.py +4 -4
- datahub/ingestion/source/usage/starburst_trino_usage.py +3 -3
- datahub/ingestion/source/usage/usage_common.py +1 -1
- datahub/ingestion/source_report/ingestion_stage.py +24 -20
- datahub/ingestion/transformer/add_dataset_dataproduct.py +4 -4
- datahub/ingestion/transformer/add_dataset_properties.py +3 -3
- datahub/ingestion/transformer/add_dataset_schema_tags.py +3 -3
- datahub/ingestion/transformer/add_dataset_schema_terms.py +3 -3
- datahub/ingestion/transformer/dataset_domain_based_on_tags.py +4 -4
- datahub/ingestion/transformer/extract_ownership_from_tags.py +3 -3
- datahub/ingestion/transformer/tags_to_terms.py +7 -7
- datahub/integrations/assertion/snowflake/compiler.py +10 -10
- datahub/lite/duckdb_lite.py +12 -10
- datahub/metadata/_schema_classes.py +317 -44
- datahub/metadata/_urns/urn_defs.py +69 -15
- datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/versionset/__init__.py +17 -0
- datahub/metadata/schema.avsc +302 -89
- datahub/metadata/schemas/DataFlowKey.avsc +1 -0
- datahub/metadata/schemas/DataJobKey.avsc +1 -0
- datahub/metadata/schemas/DataProcessInstanceInput.avsc +4 -2
- datahub/metadata/schemas/DataProcessInstanceOutput.avsc +2 -0
- datahub/metadata/schemas/DatasetKey.avsc +2 -1
- datahub/metadata/schemas/MLFeatureProperties.avsc +51 -0
- datahub/metadata/schemas/MLModelDeploymentProperties.avsc +51 -0
- datahub/metadata/schemas/MLModelGroupProperties.avsc +96 -23
- datahub/metadata/schemas/MLModelKey.avsc +2 -1
- datahub/metadata/schemas/MLModelProperties.avsc +96 -48
- datahub/metadata/schemas/MLPrimaryKeyProperties.avsc +51 -0
- datahub/metadata/schemas/MetadataChangeEvent.avsc +98 -71
- datahub/metadata/schemas/VersionProperties.avsc +216 -0
- datahub/metadata/schemas/VersionSetKey.avsc +26 -0
- datahub/metadata/schemas/VersionSetProperties.avsc +49 -0
- datahub/secret/datahub_secrets_client.py +12 -21
- datahub/secret/secret_common.py +14 -8
- datahub/specific/aspect_helpers/custom_properties.py +1 -2
- datahub/sql_parsing/schema_resolver.py +5 -10
- datahub/sql_parsing/sql_parsing_aggregator.py +18 -16
- datahub/sql_parsing/sqlglot_lineage.py +3 -3
- datahub/sql_parsing/sqlglot_utils.py +1 -1
- datahub/telemetry/stats.py +1 -2
- datahub/testing/mcp_diff.py +1 -1
- datahub/utilities/file_backed_collections.py +11 -11
- datahub/utilities/hive_schema_to_avro.py +2 -2
- datahub/utilities/logging_manager.py +2 -2
- datahub/utilities/lossy_collections.py +3 -3
- datahub/utilities/mapping.py +3 -3
- datahub/utilities/memory_footprint.py +3 -2
- datahub/utilities/perf_timer.py +11 -6
- datahub/utilities/serialized_lru_cache.py +3 -1
- datahub/utilities/sqlalchemy_query_combiner.py +6 -6
- datahub/utilities/sqllineage_patch.py +1 -1
- datahub/utilities/stats_collections.py +3 -1
- datahub/utilities/urns/_urn_base.py +28 -5
- datahub/utilities/urns/urn_iter.py +2 -2
- {acryl_datahub-0.15.0.1rc17.dist-info → acryl_datahub-0.15.0.2.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-0.15.0.1rc17.dist-info → acryl_datahub-0.15.0.2.dist-info}/top_level.txt +0 -0
|
@@ -4,12 +4,14 @@ from typing import Dict, Iterable, List, Optional, Union
|
|
|
4
4
|
|
|
5
5
|
from datahub.configuration.pattern_utils import is_schema_allowed
|
|
6
6
|
from datahub.emitter.mce_builder import (
|
|
7
|
+
get_sys_time,
|
|
7
8
|
make_data_platform_urn,
|
|
8
9
|
make_dataset_urn_with_platform_instance,
|
|
9
10
|
make_schema_field_urn,
|
|
10
11
|
make_tag_urn,
|
|
11
12
|
)
|
|
12
13
|
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
14
|
+
from datahub.emitter.mcp_builder import add_structured_properties_to_entity_wu
|
|
13
15
|
from datahub.ingestion.api.source import SourceReport
|
|
14
16
|
from datahub.ingestion.api.workunit import MetadataWorkUnit
|
|
15
17
|
from datahub.ingestion.glossary.classification_mixin import (
|
|
@@ -72,6 +74,7 @@ from datahub.ingestion.source_report.ingestion_stage import (
|
|
|
72
74
|
PROFILING,
|
|
73
75
|
)
|
|
74
76
|
from datahub.metadata.com.linkedin.pegasus2avro.common import (
|
|
77
|
+
AuditStamp,
|
|
75
78
|
GlobalTags,
|
|
76
79
|
Status,
|
|
77
80
|
SubTypes,
|
|
@@ -98,7 +101,18 @@ from datahub.metadata.com.linkedin.pegasus2avro.schema import (
|
|
|
98
101
|
StringType,
|
|
99
102
|
TimeType,
|
|
100
103
|
)
|
|
104
|
+
from datahub.metadata.com.linkedin.pegasus2avro.structured import (
|
|
105
|
+
StructuredPropertyDefinition,
|
|
106
|
+
)
|
|
101
107
|
from datahub.metadata.com.linkedin.pegasus2avro.tag import TagProperties
|
|
108
|
+
from datahub.metadata.urns import (
|
|
109
|
+
ContainerUrn,
|
|
110
|
+
DatasetUrn,
|
|
111
|
+
DataTypeUrn,
|
|
112
|
+
EntityTypeUrn,
|
|
113
|
+
SchemaFieldUrn,
|
|
114
|
+
StructuredPropertyUrn,
|
|
115
|
+
)
|
|
102
116
|
from datahub.sql_parsing.sql_parsing_aggregator import (
|
|
103
117
|
KnownLineageMapping,
|
|
104
118
|
SqlParsingAggregator,
|
|
@@ -180,9 +194,9 @@ class SnowflakeSchemaGenerator(SnowflakeStructuredReportMixin):
|
|
|
180
194
|
config, self.data_dictionary, self.report
|
|
181
195
|
)
|
|
182
196
|
self.profiler: Optional[SnowflakeProfiler] = profiler
|
|
183
|
-
self.snowsight_url_builder: Optional[
|
|
184
|
-
|
|
185
|
-
|
|
197
|
+
self.snowsight_url_builder: Optional[SnowsightUrlBuilder] = (
|
|
198
|
+
snowsight_url_builder
|
|
199
|
+
)
|
|
186
200
|
|
|
187
201
|
# These are populated as side-effects of get_workunits_internal.
|
|
188
202
|
self.databases: List[SnowflakeDatabase] = []
|
|
@@ -216,21 +230,23 @@ class SnowflakeSchemaGenerator(SnowflakeStructuredReportMixin):
|
|
|
216
230
|
|
|
217
231
|
try:
|
|
218
232
|
for snowflake_db in self.databases:
|
|
219
|
-
self.report.
|
|
220
|
-
|
|
233
|
+
with self.report.new_stage(
|
|
234
|
+
f"{snowflake_db.name}: {METADATA_EXTRACTION}"
|
|
235
|
+
):
|
|
236
|
+
yield from self._process_database(snowflake_db)
|
|
221
237
|
|
|
222
|
-
self.report.
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
238
|
+
with self.report.new_stage(f"*: {EXTERNAL_TABLE_DDL_LINEAGE}"):
|
|
239
|
+
discovered_tables: List[str] = [
|
|
240
|
+
self.identifiers.get_dataset_identifier(
|
|
241
|
+
table_name, schema.name, db.name
|
|
242
|
+
)
|
|
243
|
+
for db in self.databases
|
|
244
|
+
for schema in db.schemas
|
|
245
|
+
for table_name in schema.tables
|
|
246
|
+
]
|
|
247
|
+
if self.aggregator:
|
|
248
|
+
for entry in self._external_tables_ddl_lineage(discovered_tables):
|
|
249
|
+
self.aggregator.add(entry)
|
|
234
250
|
|
|
235
251
|
except SnowflakePermissionError as e:
|
|
236
252
|
self.structured_reporter.failure(
|
|
@@ -251,9 +267,9 @@ class SnowflakeSchemaGenerator(SnowflakeStructuredReportMixin):
|
|
|
251
267
|
)
|
|
252
268
|
return None
|
|
253
269
|
else:
|
|
254
|
-
ischema_databases: List[
|
|
255
|
-
|
|
256
|
-
|
|
270
|
+
ischema_databases: List[SnowflakeDatabase] = (
|
|
271
|
+
self.get_databases_from_ischema(databases)
|
|
272
|
+
)
|
|
257
273
|
|
|
258
274
|
if len(ischema_databases) == 0:
|
|
259
275
|
self.structured_reporter.failure(
|
|
@@ -332,8 +348,8 @@ class SnowflakeSchemaGenerator(SnowflakeStructuredReportMixin):
|
|
|
332
348
|
yield from self._process_db_schemas(snowflake_db, db_tables)
|
|
333
349
|
|
|
334
350
|
if self.profiler and db_tables:
|
|
335
|
-
self.report.
|
|
336
|
-
|
|
351
|
+
with self.report.new_stage(f"{snowflake_db.name}: {PROFILING}"):
|
|
352
|
+
yield from self.profiler.get_workunits(snowflake_db, db_tables)
|
|
337
353
|
|
|
338
354
|
def _process_db_schemas(
|
|
339
355
|
self,
|
|
@@ -671,14 +687,31 @@ class SnowflakeSchemaGenerator(SnowflakeStructuredReportMixin):
|
|
|
671
687
|
yield from self.gen_dataset_workunits(view, schema_name, db_name)
|
|
672
688
|
|
|
673
689
|
def _process_tag(self, tag: SnowflakeTag) -> Iterable[MetadataWorkUnit]:
|
|
674
|
-
|
|
690
|
+
use_sp = self.config.extract_tags_as_structured_properties
|
|
691
|
+
identifier = (
|
|
692
|
+
self.snowflake_identifier(tag.structured_property_identifier())
|
|
693
|
+
if use_sp
|
|
694
|
+
else tag.tag_identifier()
|
|
695
|
+
)
|
|
675
696
|
|
|
676
|
-
if self.report.is_tag_processed(
|
|
697
|
+
if self.report.is_tag_processed(identifier):
|
|
677
698
|
return
|
|
678
699
|
|
|
679
|
-
self.report.report_tag_processed(
|
|
680
|
-
|
|
681
|
-
|
|
700
|
+
self.report.report_tag_processed(identifier)
|
|
701
|
+
if use_sp:
|
|
702
|
+
yield from self.gen_tag_as_structured_property_workunits(tag)
|
|
703
|
+
else:
|
|
704
|
+
yield from self.gen_tag_workunits(tag)
|
|
705
|
+
|
|
706
|
+
def _format_tags_as_structured_properties(
|
|
707
|
+
self, tags: List[SnowflakeTag]
|
|
708
|
+
) -> Dict[StructuredPropertyUrn, str]:
|
|
709
|
+
return {
|
|
710
|
+
StructuredPropertyUrn(
|
|
711
|
+
self.snowflake_identifier(tag.structured_property_identifier())
|
|
712
|
+
): tag.value
|
|
713
|
+
for tag in tags
|
|
714
|
+
}
|
|
682
715
|
|
|
683
716
|
def gen_dataset_workunits(
|
|
684
717
|
self,
|
|
@@ -723,6 +756,9 @@ class SnowflakeSchemaGenerator(SnowflakeStructuredReportMixin):
|
|
|
723
756
|
env=self.config.env,
|
|
724
757
|
)
|
|
725
758
|
|
|
759
|
+
if self.config.extract_tags_as_structured_properties:
|
|
760
|
+
yield from self.gen_column_tags_as_structured_properties(dataset_urn, table)
|
|
761
|
+
|
|
726
762
|
yield from add_table_to_schema_container(
|
|
727
763
|
dataset_urn=dataset_urn,
|
|
728
764
|
parent_container_key=schema_container_key,
|
|
@@ -756,16 +792,24 @@ class SnowflakeSchemaGenerator(SnowflakeStructuredReportMixin):
|
|
|
756
792
|
)
|
|
757
793
|
|
|
758
794
|
if table.tags:
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
|
|
795
|
+
if self.config.extract_tags_as_structured_properties:
|
|
796
|
+
yield from add_structured_properties_to_entity_wu(
|
|
797
|
+
dataset_urn,
|
|
798
|
+
self._format_tags_as_structured_properties(table.tags),
|
|
762
799
|
)
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
|
|
767
|
-
|
|
768
|
-
|
|
800
|
+
else:
|
|
801
|
+
tag_associations = [
|
|
802
|
+
TagAssociation(
|
|
803
|
+
tag=make_tag_urn(
|
|
804
|
+
self.snowflake_identifier(tag.tag_identifier())
|
|
805
|
+
)
|
|
806
|
+
)
|
|
807
|
+
for tag in table.tags
|
|
808
|
+
]
|
|
809
|
+
global_tags = GlobalTags(tag_associations)
|
|
810
|
+
yield MetadataChangeProposalWrapper(
|
|
811
|
+
entityUrn=dataset_urn, aspect=global_tags
|
|
812
|
+
).as_workunit()
|
|
769
813
|
|
|
770
814
|
if isinstance(table, SnowflakeView) and table.view_definition is not None:
|
|
771
815
|
view_properties_aspect = ViewProperties(
|
|
@@ -838,10 +882,10 @@ class SnowflakeSchemaGenerator(SnowflakeStructuredReportMixin):
|
|
|
838
882
|
)
|
|
839
883
|
|
|
840
884
|
def gen_tag_workunits(self, tag: SnowflakeTag) -> Iterable[MetadataWorkUnit]:
|
|
841
|
-
tag_urn = make_tag_urn(self.snowflake_identifier(tag.
|
|
885
|
+
tag_urn = make_tag_urn(self.snowflake_identifier(tag.tag_identifier()))
|
|
842
886
|
|
|
843
887
|
tag_properties_aspect = TagProperties(
|
|
844
|
-
name=tag.
|
|
888
|
+
name=tag.tag_display_name(),
|
|
845
889
|
description=f"Represents the Snowflake tag `{tag._id_prefix_as_str()}` with value `{tag.value}`.",
|
|
846
890
|
)
|
|
847
891
|
|
|
@@ -849,6 +893,41 @@ class SnowflakeSchemaGenerator(SnowflakeStructuredReportMixin):
|
|
|
849
893
|
entityUrn=tag_urn, aspect=tag_properties_aspect
|
|
850
894
|
).as_workunit()
|
|
851
895
|
|
|
896
|
+
def gen_tag_as_structured_property_workunits(
|
|
897
|
+
self, tag: SnowflakeTag
|
|
898
|
+
) -> Iterable[MetadataWorkUnit]:
|
|
899
|
+
identifier = self.snowflake_identifier(tag.structured_property_identifier())
|
|
900
|
+
urn = StructuredPropertyUrn(identifier).urn()
|
|
901
|
+
aspect = StructuredPropertyDefinition(
|
|
902
|
+
qualifiedName=identifier,
|
|
903
|
+
displayName=tag.name,
|
|
904
|
+
valueType=DataTypeUrn("datahub.string").urn(),
|
|
905
|
+
entityTypes=[
|
|
906
|
+
EntityTypeUrn(f"datahub.{ContainerUrn.ENTITY_TYPE}").urn(),
|
|
907
|
+
EntityTypeUrn(f"datahub.{DatasetUrn.ENTITY_TYPE}").urn(),
|
|
908
|
+
EntityTypeUrn(f"datahub.{SchemaFieldUrn.ENTITY_TYPE}").urn(),
|
|
909
|
+
],
|
|
910
|
+
lastModified=AuditStamp(
|
|
911
|
+
time=get_sys_time(), actor="urn:li:corpuser:datahub"
|
|
912
|
+
),
|
|
913
|
+
)
|
|
914
|
+
yield MetadataChangeProposalWrapper(
|
|
915
|
+
entityUrn=urn,
|
|
916
|
+
aspect=aspect,
|
|
917
|
+
).as_workunit()
|
|
918
|
+
|
|
919
|
+
def gen_column_tags_as_structured_properties(
|
|
920
|
+
self, dataset_urn: str, table: Union[SnowflakeTable, SnowflakeView]
|
|
921
|
+
) -> Iterable[MetadataWorkUnit]:
|
|
922
|
+
for column_name in table.column_tags:
|
|
923
|
+
schema_field_urn = SchemaFieldUrn(dataset_urn, column_name).urn()
|
|
924
|
+
yield from add_structured_properties_to_entity_wu(
|
|
925
|
+
schema_field_urn,
|
|
926
|
+
self._format_tags_as_structured_properties(
|
|
927
|
+
table.column_tags[column_name]
|
|
928
|
+
),
|
|
929
|
+
)
|
|
930
|
+
|
|
852
931
|
def gen_schema_metadata(
|
|
853
932
|
self,
|
|
854
933
|
table: Union[SnowflakeTable, SnowflakeView],
|
|
@@ -890,13 +969,14 @@ class SnowflakeSchemaGenerator(SnowflakeStructuredReportMixin):
|
|
|
890
969
|
[
|
|
891
970
|
TagAssociation(
|
|
892
971
|
make_tag_urn(
|
|
893
|
-
self.snowflake_identifier(tag.
|
|
972
|
+
self.snowflake_identifier(tag.tag_identifier())
|
|
894
973
|
)
|
|
895
974
|
)
|
|
896
975
|
for tag in table.column_tags[col.name]
|
|
897
976
|
]
|
|
898
977
|
)
|
|
899
978
|
if col.name in table.column_tags
|
|
979
|
+
and not self.config.extract_tags_as_structured_properties
|
|
900
980
|
else None
|
|
901
981
|
),
|
|
902
982
|
)
|
|
@@ -983,8 +1063,17 @@ class SnowflakeSchemaGenerator(SnowflakeStructuredReportMixin):
|
|
|
983
1063
|
)
|
|
984
1064
|
),
|
|
985
1065
|
tags=(
|
|
986
|
-
[
|
|
1066
|
+
[
|
|
1067
|
+
self.snowflake_identifier(tag.tag_identifier())
|
|
1068
|
+
for tag in database.tags
|
|
1069
|
+
]
|
|
987
1070
|
if database.tags
|
|
1071
|
+
and not self.config.extract_tags_as_structured_properties
|
|
1072
|
+
else None
|
|
1073
|
+
),
|
|
1074
|
+
structured_properties=(
|
|
1075
|
+
self._format_tags_as_structured_properties(database.tags)
|
|
1076
|
+
if database.tags and self.config.extract_tags_as_structured_properties
|
|
988
1077
|
else None
|
|
989
1078
|
),
|
|
990
1079
|
)
|
|
@@ -1036,8 +1125,13 @@ class SnowflakeSchemaGenerator(SnowflakeStructuredReportMixin):
|
|
|
1036
1125
|
else None
|
|
1037
1126
|
),
|
|
1038
1127
|
tags=(
|
|
1039
|
-
[self.snowflake_identifier(tag.
|
|
1040
|
-
if schema.tags
|
|
1128
|
+
[self.snowflake_identifier(tag.tag_identifier()) for tag in schema.tags]
|
|
1129
|
+
if schema.tags and not self.config.extract_tags_as_structured_properties
|
|
1130
|
+
else None
|
|
1131
|
+
),
|
|
1132
|
+
structured_properties=(
|
|
1133
|
+
self._format_tags_as_structured_properties(schema.tags)
|
|
1134
|
+
if schema.tags and self.config.extract_tags_as_structured_properties
|
|
1041
1135
|
else None
|
|
1042
1136
|
),
|
|
1043
1137
|
)
|
|
@@ -38,9 +38,9 @@ class SnowflakeTagExtractor(SnowflakeCommonMixin):
|
|
|
38
38
|
table_name: Optional[str],
|
|
39
39
|
) -> List[SnowflakeTag]:
|
|
40
40
|
if db_name not in self.tag_cache:
|
|
41
|
-
self.tag_cache[
|
|
42
|
-
db_name
|
|
43
|
-
|
|
41
|
+
self.tag_cache[db_name] = (
|
|
42
|
+
self.data_dictionary.get_tags_for_database_without_propagation(db_name)
|
|
43
|
+
)
|
|
44
44
|
|
|
45
45
|
if domain == SnowflakeObjectDomain.DATABASE:
|
|
46
46
|
return self.tag_cache[db_name].get_database_tags(db_name)
|
|
@@ -130,10 +130,10 @@ class SnowflakeTagExtractor(SnowflakeCommonMixin):
|
|
|
130
130
|
temp_column_tags: Dict[str, List[SnowflakeTag]] = {}
|
|
131
131
|
if self.config.extract_tags == TagOption.without_lineage:
|
|
132
132
|
if db_name not in self.tag_cache:
|
|
133
|
-
self.tag_cache[
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
133
|
+
self.tag_cache[db_name] = (
|
|
134
|
+
self.data_dictionary.get_tags_for_database_without_propagation(
|
|
135
|
+
db_name
|
|
136
|
+
)
|
|
137
137
|
)
|
|
138
138
|
temp_column_tags = self.tag_cache[db_name].get_column_tags_for_table(
|
|
139
139
|
table_name, schema_name, db_name
|
|
@@ -165,10 +165,20 @@ class SnowflakeTagExtractor(SnowflakeCommonMixin):
|
|
|
165
165
|
|
|
166
166
|
allowed_tags = []
|
|
167
167
|
for tag in tags:
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
168
|
+
identifier = (
|
|
169
|
+
tag._id_prefix_as_str()
|
|
170
|
+
if self.config.extract_tags_as_structured_properties
|
|
171
|
+
else tag.tag_identifier()
|
|
172
|
+
)
|
|
173
|
+
self.report.report_entity_scanned(identifier, "tag")
|
|
174
|
+
|
|
175
|
+
pattern = (
|
|
176
|
+
self.config.structured_property_pattern
|
|
177
|
+
if self.config.extract_tags_as_structured_properties
|
|
178
|
+
else self.config.tag_pattern
|
|
179
|
+
)
|
|
180
|
+
if not pattern.allowed(identifier):
|
|
181
|
+
self.report.report_dropped(identifier)
|
|
172
182
|
else:
|
|
173
183
|
allowed_tags.append(tag)
|
|
174
184
|
return allowed_tags
|
|
@@ -146,59 +146,58 @@ class SnowflakeUsageExtractor(SnowflakeCommonMixin, Closeable):
|
|
|
146
146
|
if not self._should_ingest_usage():
|
|
147
147
|
return
|
|
148
148
|
|
|
149
|
-
self.report.
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
149
|
+
with self.report.new_stage(f"*: {USAGE_EXTRACTION_USAGE_AGGREGATION}"):
|
|
150
|
+
if self.report.edition == SnowflakeEdition.STANDARD.value:
|
|
151
|
+
logger.info(
|
|
152
|
+
"Snowflake Account is Standard Edition. Usage and Operation History Feature is not supported."
|
|
153
|
+
)
|
|
154
|
+
return
|
|
155
155
|
|
|
156
|
-
|
|
156
|
+
logger.info("Checking usage date ranges")
|
|
157
157
|
|
|
158
|
-
|
|
158
|
+
self._check_usage_date_ranges()
|
|
159
159
|
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
160
|
+
# If permission error, execution returns from here
|
|
161
|
+
if (
|
|
162
|
+
self.report.min_access_history_time is None
|
|
163
|
+
or self.report.max_access_history_time is None
|
|
164
|
+
):
|
|
165
|
+
return
|
|
166
166
|
|
|
167
|
-
|
|
168
|
-
|
|
167
|
+
# NOTE: In earlier `snowflake-usage` connector, users with no email were not considered in usage counts as well as in operation
|
|
168
|
+
# Now, we report the usage as well as operation metadata even if user email is absent
|
|
169
169
|
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
170
|
+
if self.config.include_usage_stats:
|
|
171
|
+
yield from auto_empty_dataset_usage_statistics(
|
|
172
|
+
self._get_workunits_internal(discovered_datasets),
|
|
173
|
+
config=BaseTimeWindowConfig(
|
|
174
|
+
start_time=self.start_time,
|
|
175
|
+
end_time=self.end_time,
|
|
176
|
+
bucket_duration=self.config.bucket_duration,
|
|
177
|
+
),
|
|
178
|
+
dataset_urns={
|
|
179
|
+
self.identifiers.gen_dataset_urn(dataset_identifier)
|
|
180
|
+
for dataset_identifier in discovered_datasets
|
|
181
|
+
},
|
|
182
|
+
)
|
|
183
183
|
|
|
184
|
-
self.report.
|
|
184
|
+
with self.report.new_stage(f"*: {USAGE_EXTRACTION_OPERATIONAL_STATS}"):
|
|
185
|
+
if self.config.include_operational_stats:
|
|
186
|
+
# Generate the operation workunits.
|
|
187
|
+
access_events = self._get_snowflake_history()
|
|
188
|
+
for event in access_events:
|
|
189
|
+
yield from self._get_operation_aspect_work_unit(
|
|
190
|
+
event, discovered_datasets
|
|
191
|
+
)
|
|
185
192
|
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
193
|
+
if self.redundant_run_skip_handler:
|
|
194
|
+
# Update the checkpoint state for this run.
|
|
195
|
+
self.redundant_run_skip_handler.update_state(
|
|
196
|
+
self.config.start_time,
|
|
197
|
+
self.config.end_time,
|
|
198
|
+
self.config.bucket_duration,
|
|
192
199
|
)
|
|
193
200
|
|
|
194
|
-
if self.redundant_run_skip_handler:
|
|
195
|
-
# Update the checkpoint state for this run.
|
|
196
|
-
self.redundant_run_skip_handler.update_state(
|
|
197
|
-
self.config.start_time,
|
|
198
|
-
self.config.end_time,
|
|
199
|
-
self.config.bucket_duration,
|
|
200
|
-
)
|
|
201
|
-
|
|
202
201
|
def _get_workunits_internal(
|
|
203
202
|
self, discovered_datasets: List[str]
|
|
204
203
|
) -> Iterable[MetadataWorkUnit]:
|
|
@@ -386,7 +385,7 @@ class SnowflakeUsageExtractor(SnowflakeCommonMixin, Closeable):
|
|
|
386
385
|
)
|
|
387
386
|
self.report_status(USAGE_EXTRACTION_OPERATIONAL_STATS, False)
|
|
388
387
|
return
|
|
389
|
-
self.report.access_history_query_secs =
|
|
388
|
+
self.report.access_history_query_secs = timer.elapsed_seconds(digits=2)
|
|
390
389
|
|
|
391
390
|
for row in results:
|
|
392
391
|
yield from self._process_snowflake_history_row(row)
|
|
@@ -434,8 +433,8 @@ class SnowflakeUsageExtractor(SnowflakeCommonMixin, Closeable):
|
|
|
434
433
|
self.report.max_access_history_time = db_row["MAX_TIME"].astimezone(
|
|
435
434
|
tz=timezone.utc
|
|
436
435
|
)
|
|
437
|
-
self.report.access_history_range_query_secs =
|
|
438
|
-
|
|
436
|
+
self.report.access_history_range_query_secs = timer.elapsed_seconds(
|
|
437
|
+
digits=2
|
|
439
438
|
)
|
|
440
439
|
|
|
441
440
|
def _get_operation_aspect_work_unit(
|
|
@@ -550,9 +549,9 @@ class SnowflakeUsageExtractor(SnowflakeCommonMixin, Closeable):
|
|
|
550
549
|
):
|
|
551
550
|
# NOTE: Generated emails may be incorrect, as email may be different than
|
|
552
551
|
# username@email_domain
|
|
553
|
-
event_dict[
|
|
554
|
-
"
|
|
555
|
-
|
|
552
|
+
event_dict["EMAIL"] = (
|
|
553
|
+
f"{event_dict['USER_NAME']}@{self.config.email_domain}".lower()
|
|
554
|
+
)
|
|
556
555
|
|
|
557
556
|
if not event_dict["EMAIL"]:
|
|
558
557
|
self.report.rows_missing_email += 1
|
|
@@ -21,8 +21,7 @@ from datahub.ingestion.source.snowflake.snowflake_report import SnowflakeV2Repor
|
|
|
21
21
|
class SnowflakeStructuredReportMixin(abc.ABC):
|
|
22
22
|
@property
|
|
23
23
|
@abc.abstractmethod
|
|
24
|
-
def structured_reporter(self) -> SourceReport:
|
|
25
|
-
...
|
|
24
|
+
def structured_reporter(self) -> SourceReport: ...
|
|
26
25
|
|
|
27
26
|
|
|
28
27
|
class SnowsightUrlBuilder:
|
|
@@ -23,7 +23,6 @@ from datahub.ingestion.api.incremental_properties_helper import (
|
|
|
23
23
|
from datahub.ingestion.api.source import (
|
|
24
24
|
CapabilityReport,
|
|
25
25
|
MetadataWorkUnitProcessor,
|
|
26
|
-
Source,
|
|
27
26
|
SourceCapability,
|
|
28
27
|
SourceReport,
|
|
29
28
|
TestableSource,
|
|
@@ -212,9 +211,9 @@ class SnowflakeV2Source(
|
|
|
212
211
|
|
|
213
212
|
self.usage_extractor: Optional[SnowflakeUsageExtractor] = None
|
|
214
213
|
if self.config.include_usage_stats or self.config.include_operational_stats:
|
|
215
|
-
redundant_usage_run_skip_handler: Optional[
|
|
216
|
-
|
|
217
|
-
|
|
214
|
+
redundant_usage_run_skip_handler: Optional[RedundantUsageRunSkipHandler] = (
|
|
215
|
+
None
|
|
216
|
+
)
|
|
218
217
|
if self.config.enable_stateful_usage_ingestion:
|
|
219
218
|
redundant_usage_run_skip_handler = RedundantUsageRunSkipHandler(
|
|
220
219
|
source=self,
|
|
@@ -251,11 +250,6 @@ class SnowflakeV2Source(
|
|
|
251
250
|
|
|
252
251
|
self.add_config_to_report()
|
|
253
252
|
|
|
254
|
-
@classmethod
|
|
255
|
-
def create(cls, config_dict: dict, ctx: PipelineContext) -> "Source":
|
|
256
|
-
config = SnowflakeV2Config.parse_obj(config_dict)
|
|
257
|
-
return cls(ctx, config)
|
|
258
|
-
|
|
259
253
|
@staticmethod
|
|
260
254
|
def test_connection(config_dict: dict) -> TestConnectionReport:
|
|
261
255
|
test_report = TestConnectionReport()
|
|
@@ -302,7 +296,16 @@ class SnowflakeV2Source(
|
|
|
302
296
|
|
|
303
297
|
_report: Dict[Union[SourceCapability, str], CapabilityReport] = dict()
|
|
304
298
|
privileges: List[SnowflakePrivilege] = []
|
|
305
|
-
capabilities: List[SourceCapability] = [
|
|
299
|
+
capabilities: List[SourceCapability] = [
|
|
300
|
+
c.capability
|
|
301
|
+
for c in SnowflakeV2Source.get_capabilities() # type: ignore
|
|
302
|
+
if c.capability
|
|
303
|
+
not in (
|
|
304
|
+
SourceCapability.PLATFORM_INSTANCE,
|
|
305
|
+
SourceCapability.DOMAINS,
|
|
306
|
+
SourceCapability.DELETION_DETECTION,
|
|
307
|
+
)
|
|
308
|
+
]
|
|
306
309
|
|
|
307
310
|
cur = conn.query("select current_role()")
|
|
308
311
|
current_role = [row["CURRENT_ROLE()"] for row in cur][0]
|
|
@@ -480,8 +483,8 @@ class SnowflakeV2Source(
|
|
|
480
483
|
identifiers=self.identifiers,
|
|
481
484
|
)
|
|
482
485
|
|
|
483
|
-
self.report.
|
|
484
|
-
|
|
486
|
+
with self.report.new_stage(f"*: {METADATA_EXTRACTION}"):
|
|
487
|
+
yield from schema_extractor.get_workunits_internal()
|
|
485
488
|
|
|
486
489
|
databases = schema_extractor.databases
|
|
487
490
|
|
|
@@ -513,45 +516,46 @@ class SnowflakeV2Source(
|
|
|
513
516
|
discovered_datasets = discovered_tables + discovered_views
|
|
514
517
|
|
|
515
518
|
if self.config.use_queries_v2:
|
|
516
|
-
self.report.
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
self.report.set_ingestion_stage("*", QUERIES_EXTRACTION)
|
|
520
|
-
|
|
521
|
-
schema_resolver = self.aggregator._schema_resolver
|
|
522
|
-
|
|
523
|
-
queries_extractor = SnowflakeQueriesExtractor(
|
|
524
|
-
connection=self.connection,
|
|
525
|
-
config=SnowflakeQueriesExtractorConfig(
|
|
526
|
-
window=self.config,
|
|
527
|
-
temporary_tables_pattern=self.config.temporary_tables_pattern,
|
|
528
|
-
include_lineage=self.config.include_table_lineage,
|
|
529
|
-
include_usage_statistics=self.config.include_usage_stats,
|
|
530
|
-
include_operations=self.config.include_operational_stats,
|
|
531
|
-
user_email_pattern=self.config.user_email_pattern,
|
|
532
|
-
),
|
|
533
|
-
structured_report=self.report,
|
|
534
|
-
filters=self.filters,
|
|
535
|
-
identifiers=self.identifiers,
|
|
536
|
-
schema_resolver=schema_resolver,
|
|
537
|
-
discovered_tables=discovered_datasets,
|
|
538
|
-
graph=self.ctx.graph,
|
|
539
|
-
)
|
|
519
|
+
with self.report.new_stage(f"*: {VIEW_PARSING}"):
|
|
520
|
+
yield from auto_workunit(self.aggregator.gen_metadata())
|
|
540
521
|
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
522
|
+
with self.report.new_stage(f"*: {QUERIES_EXTRACTION}"):
|
|
523
|
+
schema_resolver = self.aggregator._schema_resolver
|
|
524
|
+
|
|
525
|
+
queries_extractor = SnowflakeQueriesExtractor(
|
|
526
|
+
connection=self.connection,
|
|
527
|
+
config=SnowflakeQueriesExtractorConfig(
|
|
528
|
+
window=self.config,
|
|
529
|
+
temporary_tables_pattern=self.config.temporary_tables_pattern,
|
|
530
|
+
include_lineage=self.config.include_table_lineage,
|
|
531
|
+
include_usage_statistics=self.config.include_usage_stats,
|
|
532
|
+
include_operations=self.config.include_operational_stats,
|
|
533
|
+
include_queries=self.config.include_queries,
|
|
534
|
+
include_query_usage_statistics=self.config.include_query_usage_statistics,
|
|
535
|
+
user_email_pattern=self.config.user_email_pattern,
|
|
536
|
+
),
|
|
537
|
+
structured_report=self.report,
|
|
538
|
+
filters=self.filters,
|
|
539
|
+
identifiers=self.identifiers,
|
|
540
|
+
schema_resolver=schema_resolver,
|
|
541
|
+
discovered_tables=discovered_datasets,
|
|
542
|
+
graph=self.ctx.graph,
|
|
543
|
+
)
|
|
544
|
+
|
|
545
|
+
# TODO: This is slightly suboptimal because we create two SqlParsingAggregator instances with different configs
|
|
546
|
+
# but a shared schema resolver. That's fine for now though - once we remove the old lineage/usage extractors,
|
|
547
|
+
# it should be pretty straightforward to refactor this and only initialize the aggregator once.
|
|
548
|
+
self.report.queries_extractor = queries_extractor.report
|
|
549
|
+
yield from queries_extractor.get_workunits_internal()
|
|
550
|
+
queries_extractor.close()
|
|
547
551
|
|
|
548
552
|
else:
|
|
549
553
|
if self.lineage_extractor:
|
|
550
|
-
self.report.
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
554
|
+
with self.report.new_stage(f"*: {LINEAGE_EXTRACTION}"):
|
|
555
|
+
self.lineage_extractor.add_time_based_lineage_to_aggregator(
|
|
556
|
+
discovered_tables=discovered_tables,
|
|
557
|
+
discovered_views=discovered_views,
|
|
558
|
+
)
|
|
555
559
|
|
|
556
560
|
# This would emit view and external table ddl lineage
|
|
557
561
|
# as well as query lineage via lineage_extractor
|
|
@@ -104,9 +104,7 @@ class CustomAthenaRestDialect(AthenaRestDialect):
|
|
|
104
104
|
return "\n".join([r for r in res])
|
|
105
105
|
|
|
106
106
|
@typing.no_type_check
|
|
107
|
-
def _get_column_type(
|
|
108
|
-
self, type_: Union[str, Dict[str, Any]]
|
|
109
|
-
) -> TypeEngine: # noqa: C901
|
|
107
|
+
def _get_column_type(self, type_: Union[str, Dict[str, Any]]) -> TypeEngine: # noqa: C901
|
|
110
108
|
"""Derives the data type of the Athena column.
|
|
111
109
|
|
|
112
110
|
This method is overwritten to extend the behavior of PyAthena.
|