acryl-datahub 1.1.0.5rc7__py3-none-any.whl → 1.1.0.5rc9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.1.0.5rc7.dist-info → acryl_datahub-1.1.0.5rc9.dist-info}/METADATA +2620 -2622
- {acryl_datahub-1.1.0.5rc7.dist-info → acryl_datahub-1.1.0.5rc9.dist-info}/RECORD +59 -59
- datahub/_version.py +1 -1
- datahub/cli/check_cli.py +0 -7
- datahub/cli/cli_utils.py +73 -0
- datahub/cli/delete_cli.py +0 -6
- datahub/cli/docker_check.py +107 -12
- datahub/cli/docker_cli.py +148 -228
- datahub/cli/exists_cli.py +0 -4
- datahub/cli/get_cli.py +0 -4
- datahub/cli/ingest_cli.py +1 -20
- datahub/cli/put_cli.py +0 -6
- datahub/cli/quickstart_versioning.py +50 -5
- datahub/cli/specific/assertions_cli.py +0 -6
- datahub/cli/specific/datacontract_cli.py +0 -6
- datahub/cli/specific/dataproduct_cli.py +0 -22
- datahub/cli/specific/dataset_cli.py +0 -11
- datahub/cli/specific/forms_cli.py +0 -6
- datahub/cli/specific/group_cli.py +0 -4
- datahub/cli/specific/structuredproperties_cli.py +0 -7
- datahub/cli/specific/user_cli.py +0 -4
- datahub/cli/state_cli.py +0 -4
- datahub/cli/timeline_cli.py +0 -4
- datahub/entrypoints.py +4 -3
- datahub/ingestion/autogenerated/capability_summary.json +88 -23
- datahub/ingestion/extractor/schema_util.py +13 -4
- datahub/ingestion/graph/client.py +2 -2
- datahub/ingestion/run/pipeline.py +43 -0
- datahub/ingestion/source/bigquery_v2/bigquery.py +9 -1
- datahub/ingestion/source/datahub/datahub_database_reader.py +1 -2
- datahub/ingestion/source/dremio/dremio_source.py +1 -4
- datahub/ingestion/source/gcs/gcs_source.py +9 -1
- datahub/ingestion/source/identity/okta.py +0 -13
- datahub/ingestion/source/powerbi/powerbi.py +0 -5
- datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +0 -1
- datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
- datahub/ingestion/source/sigma/sigma.py +6 -1
- datahub/ingestion/source/snowflake/snowflake_config.py +11 -0
- datahub/ingestion/source/snowflake/snowflake_queries.py +100 -58
- datahub/ingestion/source/snowflake/snowflake_v2.py +11 -1
- datahub/ingestion/source/snowflake/stored_proc_lineage.py +1 -1
- datahub/ingestion/source/sql/hive_metastore.py +0 -10
- datahub/ingestion/source/sql/sql_common.py +8 -0
- datahub/ingestion/source/sql/teradata.py +993 -234
- datahub/ingestion/source/sql/vertica.py +0 -4
- datahub/ingestion/source/sql_queries.py +2 -2
- datahub/ingestion/source/superset.py +56 -1
- datahub/ingestion/source/tableau/tableau.py +40 -34
- datahub/ingestion/source/tableau/tableau_constant.py +0 -2
- datahub/ingestion/source/unity/source.py +9 -1
- datahub/sdk/lineage_client.py +2 -2
- datahub/sql_parsing/sql_parsing_aggregator.py +21 -12
- datahub/sql_parsing/sqlglot_lineage.py +40 -15
- datahub/upgrade/upgrade.py +46 -13
- datahub/utilities/server_config_util.py +8 -0
- {acryl_datahub-1.1.0.5rc7.dist-info → acryl_datahub-1.1.0.5rc9.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.1.0.5rc7.dist-info → acryl_datahub-1.1.0.5rc9.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-1.1.0.5rc7.dist-info → acryl_datahub-1.1.0.5rc9.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.1.0.5rc7.dist-info → acryl_datahub-1.1.0.5rc9.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
from collections import defaultdict
|
|
3
3
|
from dataclasses import dataclass
|
|
4
|
+
from enum import Enum
|
|
4
5
|
from typing import Dict, List, Optional, Set
|
|
5
6
|
|
|
6
7
|
import pydantic
|
|
@@ -53,6 +54,11 @@ DEFAULT_TEMP_TABLES_PATTERNS = [
|
|
|
53
54
|
]
|
|
54
55
|
|
|
55
56
|
|
|
57
|
+
class QueryDedupStrategyType(Enum):
|
|
58
|
+
STANDARD = "STANDARD"
|
|
59
|
+
NONE = "NONE"
|
|
60
|
+
|
|
61
|
+
|
|
56
62
|
class TagOption(StrEnum):
|
|
57
63
|
with_lineage = "with_lineage"
|
|
58
64
|
without_lineage = "without_lineage"
|
|
@@ -248,6 +254,11 @@ class SnowflakeV2Config(
|
|
|
248
254
|
"This is useful if you have a large number of schemas and want to avoid bulk fetching the schema for each table/view.",
|
|
249
255
|
)
|
|
250
256
|
|
|
257
|
+
query_dedup_strategy: QueryDedupStrategyType = Field(
|
|
258
|
+
default=QueryDedupStrategyType.STANDARD,
|
|
259
|
+
description=f"Experimental: Choose the strategy for query deduplication (default value is appropriate for most use-cases; make sure you understand performance implications before changing it). Allowed values are: {', '.join([s.name for s in QueryDedupStrategyType])}",
|
|
260
|
+
)
|
|
261
|
+
|
|
251
262
|
_check_role_grants_removed = pydantic_removed_field("check_role_grants")
|
|
252
263
|
_provision_role_removed = pydantic_removed_field("provision_role")
|
|
253
264
|
|
|
@@ -28,6 +28,7 @@ from datahub.ingestion.graph.client import DataHubGraph
|
|
|
28
28
|
from datahub.ingestion.source.snowflake.constants import SnowflakeObjectDomain
|
|
29
29
|
from datahub.ingestion.source.snowflake.snowflake_config import (
|
|
30
30
|
DEFAULT_TEMP_TABLES_PATTERNS,
|
|
31
|
+
QueryDedupStrategyType,
|
|
31
32
|
SnowflakeFilterConfig,
|
|
32
33
|
SnowflakeIdentifierConfig,
|
|
33
34
|
)
|
|
@@ -118,6 +119,8 @@ class SnowflakeQueriesExtractorConfig(ConfigModel):
|
|
|
118
119
|
include_query_usage_statistics: bool = True
|
|
119
120
|
include_operations: bool = True
|
|
120
121
|
|
|
122
|
+
query_dedup_strategy: QueryDedupStrategyType = QueryDedupStrategyType.STANDARD
|
|
123
|
+
|
|
121
124
|
|
|
122
125
|
class SnowflakeQueriesSourceConfig(
|
|
123
126
|
SnowflakeQueriesExtractorConfig, SnowflakeIdentifierConfig, SnowflakeFilterConfig
|
|
@@ -374,12 +377,13 @@ class SnowflakeQueriesExtractor(SnowflakeStructuredReportMixin, Closeable):
|
|
|
374
377
|
) -> Iterable[
|
|
375
378
|
Union[PreparsedQuery, TableRename, TableSwap, ObservedQuery, StoredProcCall]
|
|
376
379
|
]:
|
|
377
|
-
query_log_query =
|
|
380
|
+
query_log_query = QueryLogQueryBuilder(
|
|
378
381
|
start_time=self.config.window.start_time,
|
|
379
382
|
end_time=self.config.window.end_time,
|
|
380
383
|
bucket_duration=self.config.window.bucket_duration,
|
|
381
384
|
deny_usernames=self.config.pushdown_deny_usernames,
|
|
382
|
-
|
|
385
|
+
dedup_strategy=self.config.query_dedup_strategy,
|
|
386
|
+
).build_enriched_query_log_query()
|
|
383
387
|
|
|
384
388
|
with self.structured_reporter.report_exc(
|
|
385
389
|
"Error fetching query log from Snowflake"
|
|
@@ -710,63 +714,101 @@ class SnowflakeQueriesSource(Source):
|
|
|
710
714
|
super().close()
|
|
711
715
|
|
|
712
716
|
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
)
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
|
|
717
|
+
class QueryLogQueryBuilder:
|
|
718
|
+
def __init__(
|
|
719
|
+
self,
|
|
720
|
+
start_time: datetime,
|
|
721
|
+
end_time: datetime,
|
|
722
|
+
bucket_duration: BucketDuration,
|
|
723
|
+
deny_usernames: Optional[List[str]],
|
|
724
|
+
max_tables_per_query: int = 20,
|
|
725
|
+
dedup_strategy: QueryDedupStrategyType = QueryDedupStrategyType.STANDARD,
|
|
726
|
+
):
|
|
727
|
+
self.start_time = start_time
|
|
728
|
+
self.end_time = end_time
|
|
729
|
+
self.start_time_millis = int(start_time.timestamp() * 1000)
|
|
730
|
+
self.end_time_millis = int(end_time.timestamp() * 1000)
|
|
731
|
+
self.max_tables_per_query = max_tables_per_query
|
|
732
|
+
self.dedup_strategy = dedup_strategy
|
|
733
|
+
|
|
734
|
+
self.users_filter = "TRUE"
|
|
735
|
+
if deny_usernames:
|
|
736
|
+
user_not_in = ",".join(f"'{user.upper()}'" for user in deny_usernames)
|
|
737
|
+
self.users_filter = f"user_name NOT IN ({user_not_in})"
|
|
738
|
+
|
|
739
|
+
self.time_bucket_size = bucket_duration.value
|
|
740
|
+
assert self.time_bucket_size in ("HOUR", "DAY", "MONTH")
|
|
741
|
+
|
|
742
|
+
def _query_fingerprinted_queries(self):
|
|
743
|
+
if self.dedup_strategy == QueryDedupStrategyType.STANDARD:
|
|
744
|
+
secondary_fingerprint_sql = """
|
|
745
|
+
CASE
|
|
746
|
+
WHEN CONTAINS(query_history.query_text, '-- Hex query metadata:')
|
|
747
|
+
-- Extract project id and hash it
|
|
748
|
+
THEN CAST(HASH(
|
|
749
|
+
REGEXP_SUBSTR(query_history.query_text, '"project_id"\\\\s*:\\\\s*"([^"]+)"', 1, 1, 'e', 1),
|
|
750
|
+
REGEXP_SUBSTR(query_history.query_text, '"context"\\\\s*:\\\\s*"([^"]+)"', 1, 1, 'e', 1)
|
|
751
|
+
) AS VARCHAR)
|
|
752
|
+
ELSE NULL
|
|
753
|
+
END"""
|
|
754
|
+
elif self.dedup_strategy == QueryDedupStrategyType.NONE:
|
|
755
|
+
secondary_fingerprint_sql = "NULL"
|
|
756
|
+
else:
|
|
757
|
+
raise NotImplementedError(
|
|
758
|
+
f"Strategy {self.dedup_strategy} is not implemented by the QueryLogQueryBuilder"
|
|
759
|
+
)
|
|
760
|
+
return f"""
|
|
761
|
+
SELECT *,
|
|
762
|
+
-- TODO: Generate better fingerprints for each query by pushing down regex logic.
|
|
763
|
+
query_history.query_parameterized_hash as query_fingerprint,
|
|
764
|
+
-- Optional and additional hash to be used for query deduplication and final query identity
|
|
765
|
+
{secondary_fingerprint_sql} as query_secondary_fingerprint
|
|
766
|
+
FROM
|
|
767
|
+
snowflake.account_usage.query_history
|
|
768
|
+
WHERE
|
|
769
|
+
query_history.start_time >= to_timestamp_ltz({self.start_time_millis}, 3) -- {self.start_time.isoformat()}
|
|
770
|
+
AND query_history.start_time < to_timestamp_ltz({self.end_time_millis}, 3) -- {self.end_time.isoformat()}
|
|
771
|
+
AND execution_status = 'SUCCESS'
|
|
772
|
+
AND {self.users_filter}"""
|
|
773
|
+
|
|
774
|
+
def _query_deduplicated_queries(self):
|
|
775
|
+
if self.dedup_strategy == QueryDedupStrategyType.STANDARD:
|
|
776
|
+
return f"""
|
|
777
|
+
SELECT
|
|
778
|
+
*,
|
|
779
|
+
DATE_TRUNC(
|
|
780
|
+
{self.time_bucket_size},
|
|
781
|
+
CONVERT_TIMEZONE('UTC', start_time)
|
|
782
|
+
) AS bucket_start_time,
|
|
783
|
+
COUNT(*) OVER (PARTITION BY bucket_start_time, query_fingerprint, query_secondary_fingerprint) AS query_count,
|
|
784
|
+
FROM
|
|
785
|
+
fingerprinted_queries
|
|
786
|
+
QUALIFY
|
|
787
|
+
ROW_NUMBER() OVER (PARTITION BY bucket_start_time, query_fingerprint, query_secondary_fingerprint ORDER BY start_time DESC) = 1"""
|
|
788
|
+
elif self.dedup_strategy == QueryDedupStrategyType.NONE:
|
|
789
|
+
return f"""
|
|
790
|
+
SELECT
|
|
791
|
+
*,
|
|
792
|
+
DATE_TRUNC(
|
|
793
|
+
{self.time_bucket_size},
|
|
794
|
+
CONVERT_TIMEZONE('UTC', start_time)
|
|
795
|
+
) AS bucket_start_time,
|
|
796
|
+
1 AS query_count,
|
|
797
|
+
FROM
|
|
798
|
+
fingerprinted_queries"""
|
|
799
|
+
else:
|
|
800
|
+
raise NotImplementedError(
|
|
801
|
+
f"Strategy {self.dedup_strategy} is not implemented by the QueryLogQueryBuilder"
|
|
802
|
+
)
|
|
733
803
|
|
|
734
|
-
|
|
804
|
+
def build_enriched_query_log_query(self) -> str:
|
|
805
|
+
return f"""\
|
|
735
806
|
WITH
|
|
736
807
|
fingerprinted_queries as (
|
|
737
|
-
|
|
738
|
-
-- TODO: Generate better fingerprints for each query by pushing down regex logic.
|
|
739
|
-
query_history.query_parameterized_hash as query_fingerprint,
|
|
740
|
-
-- Optional and additional hash to be used for query deduplication and final query identity
|
|
741
|
-
CASE
|
|
742
|
-
WHEN CONTAINS(query_history.query_text, '-- Hex query metadata:')
|
|
743
|
-
-- Extract project id and hash it
|
|
744
|
-
THEN CAST(HASH(
|
|
745
|
-
REGEXP_SUBSTR(query_history.query_text, '"project_id"\\\\s*:\\\\s*"([^"]+)"', 1, 1, 'e', 1),
|
|
746
|
-
REGEXP_SUBSTR(query_history.query_text, '"context"\\\\s*:\\\\s*"([^"]+)"', 1, 1, 'e', 1)
|
|
747
|
-
) AS VARCHAR)
|
|
748
|
-
ELSE NULL
|
|
749
|
-
END as query_secondary_fingerprint
|
|
750
|
-
FROM
|
|
751
|
-
snowflake.account_usage.query_history
|
|
752
|
-
WHERE
|
|
753
|
-
query_history.start_time >= to_timestamp_ltz({start_time_millis}, 3) -- {start_time.isoformat()}
|
|
754
|
-
AND query_history.start_time < to_timestamp_ltz({end_time_millis}, 3) -- {end_time.isoformat()}
|
|
755
|
-
AND execution_status = 'SUCCESS'
|
|
756
|
-
AND {users_filter}
|
|
808
|
+
{self._query_fingerprinted_queries()}
|
|
757
809
|
)
|
|
758
810
|
, deduplicated_queries as (
|
|
759
|
-
|
|
760
|
-
*,
|
|
761
|
-
DATE_TRUNC(
|
|
762
|
-
{time_bucket_size},
|
|
763
|
-
CONVERT_TIMEZONE('UTC', start_time)
|
|
764
|
-
) AS bucket_start_time,
|
|
765
|
-
COUNT(*) OVER (PARTITION BY bucket_start_time, query_fingerprint, query_secondary_fingerprint) AS query_count,
|
|
766
|
-
FROM
|
|
767
|
-
fingerprinted_queries
|
|
768
|
-
QUALIFY
|
|
769
|
-
ROW_NUMBER() OVER (PARTITION BY bucket_start_time, query_fingerprint, query_secondary_fingerprint ORDER BY start_time DESC) = 1
|
|
811
|
+
{self._query_deduplicated_queries()}
|
|
770
812
|
)
|
|
771
813
|
, raw_access_history AS (
|
|
772
814
|
SELECT
|
|
@@ -780,9 +822,9 @@ fingerprinted_queries as (
|
|
|
780
822
|
FROM
|
|
781
823
|
snowflake.account_usage.access_history
|
|
782
824
|
WHERE
|
|
783
|
-
query_start_time >= to_timestamp_ltz({start_time_millis}, 3) -- {start_time.isoformat()}
|
|
784
|
-
AND query_start_time < to_timestamp_ltz({end_time_millis}, 3) -- {end_time.isoformat()}
|
|
785
|
-
AND {users_filter}
|
|
825
|
+
query_start_time >= to_timestamp_ltz({self.start_time_millis}, 3) -- {self.start_time.isoformat()}
|
|
826
|
+
AND query_start_time < to_timestamp_ltz({self.end_time_millis}, 3) -- {self.end_time.isoformat()}
|
|
827
|
+
AND {self.users_filter}
|
|
786
828
|
AND query_id IN (
|
|
787
829
|
SELECT query_id FROM deduplicated_queries
|
|
788
830
|
)
|
|
@@ -795,7 +837,7 @@ fingerprinted_queries as (
|
|
|
795
837
|
query_start_time,
|
|
796
838
|
ARRAY_SLICE(
|
|
797
839
|
FILTER(direct_objects_accessed, o -> o:objectDomain IN {SnowflakeQuery.ACCESS_HISTORY_TABLE_VIEW_DOMAINS_FILTER}),
|
|
798
|
-
0, {
|
|
840
|
+
0, {self.max_tables_per_query}
|
|
799
841
|
) as direct_objects_accessed,
|
|
800
842
|
-- TODO: Drop the columns.baseSources subfield.
|
|
801
843
|
FILTER(objects_modified, o -> o:objectDomain IN {SnowflakeQuery.ACCESS_HISTORY_TABLE_VIEW_DOMAINS_FILTER}) as objects_modified,
|
|
@@ -32,6 +32,7 @@ from datahub.ingestion.api.source import (
|
|
|
32
32
|
)
|
|
33
33
|
from datahub.ingestion.api.source_helpers import auto_workunit
|
|
34
34
|
from datahub.ingestion.api.workunit import MetadataWorkUnit
|
|
35
|
+
from datahub.ingestion.source.common.subtypes import SourceCapabilityModifier
|
|
35
36
|
from datahub.ingestion.source.snowflake.constants import (
|
|
36
37
|
GENERIC_PERMISSION_ERROR_KEY,
|
|
37
38
|
SnowflakeEdition,
|
|
@@ -97,7 +98,14 @@ logger: logging.Logger = logging.getLogger(__name__)
|
|
|
97
98
|
@support_status(SupportStatus.CERTIFIED)
|
|
98
99
|
@capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default")
|
|
99
100
|
@capability(SourceCapability.DOMAINS, "Supported via the `domain` config field")
|
|
100
|
-
@capability(
|
|
101
|
+
@capability(
|
|
102
|
+
SourceCapability.CONTAINERS,
|
|
103
|
+
"Enabled by default",
|
|
104
|
+
subtype_modifier=[
|
|
105
|
+
SourceCapabilityModifier.DATABASE,
|
|
106
|
+
SourceCapabilityModifier.SCHEMA,
|
|
107
|
+
],
|
|
108
|
+
)
|
|
101
109
|
@capability(SourceCapability.SCHEMA_METADATA, "Enabled by default")
|
|
102
110
|
@capability(
|
|
103
111
|
SourceCapability.DATA_PROFILING,
|
|
@@ -577,6 +585,7 @@ class SnowflakeV2Source(
|
|
|
577
585
|
|
|
578
586
|
queries_extractor = SnowflakeQueriesExtractor(
|
|
579
587
|
connection=self.connection,
|
|
588
|
+
# TODO: this should be its own section in main recipe
|
|
580
589
|
config=SnowflakeQueriesExtractorConfig(
|
|
581
590
|
window=BaseTimeWindowConfig(
|
|
582
591
|
start_time=self.config.start_time,
|
|
@@ -591,6 +600,7 @@ class SnowflakeV2Source(
|
|
|
591
600
|
include_query_usage_statistics=self.config.include_query_usage_statistics,
|
|
592
601
|
user_email_pattern=self.config.user_email_pattern,
|
|
593
602
|
pushdown_deny_usernames=self.config.pushdown_deny_usernames,
|
|
603
|
+
query_dedup_strategy=self.config.query_dedup_strategy,
|
|
594
604
|
),
|
|
595
605
|
structured_report=self.report,
|
|
596
606
|
filters=self.filters,
|
|
@@ -65,7 +65,7 @@ class StoredProcLineageTracker(Closeable):
|
|
|
65
65
|
# { root_query_id -> StoredProcExecutionLineage }
|
|
66
66
|
self._stored_proc_execution_lineage: FileBackedDict[
|
|
67
67
|
StoredProcExecutionLineage
|
|
68
|
-
] = FileBackedDict(shared_connection)
|
|
68
|
+
] = FileBackedDict(shared_connection, tablename="stored_proc_lineage")
|
|
69
69
|
|
|
70
70
|
def add_stored_proc_call(self, call: StoredProcCall) -> None:
|
|
71
71
|
"""Add a stored procedure call to track."""
|
|
@@ -52,7 +52,6 @@ from datahub.metadata.com.linkedin.pegasus2avro.metadata.snapshot import Dataset
|
|
|
52
52
|
from datahub.metadata.com.linkedin.pegasus2avro.mxe import MetadataChangeEvent
|
|
53
53
|
from datahub.metadata.com.linkedin.pegasus2avro.schema import SchemaField
|
|
54
54
|
from datahub.metadata.schema_classes import (
|
|
55
|
-
ChangeTypeClass,
|
|
56
55
|
DatasetPropertiesClass,
|
|
57
56
|
SubTypesClass,
|
|
58
57
|
ViewPropertiesClass,
|
|
@@ -601,10 +600,7 @@ class HiveMetastoreSource(SQLAlchemySource):
|
|
|
601
600
|
yield dpi_aspect
|
|
602
601
|
|
|
603
602
|
yield MetadataChangeProposalWrapper(
|
|
604
|
-
entityType="dataset",
|
|
605
|
-
changeType=ChangeTypeClass.UPSERT,
|
|
606
603
|
entityUrn=dataset_urn,
|
|
607
|
-
aspectName="subTypes",
|
|
608
604
|
aspect=SubTypesClass(typeNames=[self.table_subtype]),
|
|
609
605
|
).as_workunit()
|
|
610
606
|
|
|
@@ -810,10 +806,7 @@ class HiveMetastoreSource(SQLAlchemySource):
|
|
|
810
806
|
|
|
811
807
|
# Add views subtype
|
|
812
808
|
yield MetadataChangeProposalWrapper(
|
|
813
|
-
entityType="dataset",
|
|
814
|
-
changeType=ChangeTypeClass.UPSERT,
|
|
815
809
|
entityUrn=dataset_urn,
|
|
816
|
-
aspectName="subTypes",
|
|
817
810
|
aspect=SubTypesClass(typeNames=[self.view_subtype]),
|
|
818
811
|
).as_workunit()
|
|
819
812
|
|
|
@@ -824,10 +817,7 @@ class HiveMetastoreSource(SQLAlchemySource):
|
|
|
824
817
|
viewLogic=dataset.view_definition if dataset.view_definition else "",
|
|
825
818
|
)
|
|
826
819
|
yield MetadataChangeProposalWrapper(
|
|
827
|
-
entityType="dataset",
|
|
828
|
-
changeType=ChangeTypeClass.UPSERT,
|
|
829
820
|
entityUrn=dataset_urn,
|
|
830
|
-
aspectName="viewProperties",
|
|
831
821
|
aspect=view_properties_aspect,
|
|
832
822
|
).as_workunit()
|
|
833
823
|
|
|
@@ -292,6 +292,10 @@ class ProfileMetadata:
|
|
|
292
292
|
SourceCapability.CONTAINERS,
|
|
293
293
|
"Enabled by default",
|
|
294
294
|
supported=True,
|
|
295
|
+
subtype_modifier=[
|
|
296
|
+
SourceCapabilityModifier.DATABASE,
|
|
297
|
+
SourceCapabilityModifier.SCHEMA,
|
|
298
|
+
],
|
|
295
299
|
)
|
|
296
300
|
@capability(
|
|
297
301
|
SourceCapability.DESCRIPTIONS,
|
|
@@ -589,6 +593,10 @@ class SQLAlchemySource(StatefulIngestionSourceBase, TestableSource):
|
|
|
589
593
|
)
|
|
590
594
|
|
|
591
595
|
# Generate workunit for aggregated SQL parsing results
|
|
596
|
+
yield from self._generate_aggregator_workunits()
|
|
597
|
+
|
|
598
|
+
def _generate_aggregator_workunits(self) -> Iterable[MetadataWorkUnit]:
|
|
599
|
+
"""Generate work units from SQL parsing aggregator. Can be overridden by subclasses."""
|
|
592
600
|
for mcp in self.aggregator.gen_metadata():
|
|
593
601
|
yield mcp.as_workunit()
|
|
594
602
|
|