acryl-datahub 1.1.0.3rc2__py3-none-any.whl → 1.1.0.4rc2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.1.0.3rc2.dist-info → acryl_datahub-1.1.0.4rc2.dist-info}/METADATA +2470 -2470
- {acryl_datahub-1.1.0.3rc2.dist-info → acryl_datahub-1.1.0.4rc2.dist-info}/RECORD +57 -57
- datahub/_version.py +1 -1
- datahub/cli/check_cli.py +27 -0
- datahub/cli/delete_cli.py +117 -19
- datahub/ingestion/api/source.py +2 -0
- datahub/ingestion/glossary/classification_mixin.py +5 -0
- datahub/ingestion/graph/client.py +42 -2
- datahub/ingestion/source/bigquery_v2/bigquery.py +1 -0
- datahub/ingestion/source/bigquery_v2/common.py +1 -1
- datahub/ingestion/source/dbt/dbt_cloud.py +3 -0
- datahub/ingestion/source/dbt/dbt_common.py +3 -1
- datahub/ingestion/source/dbt/dbt_core.py +3 -0
- datahub/ingestion/source/file.py +3 -0
- datahub/ingestion/source/ge_profiling_config.py +11 -0
- datahub/ingestion/source/iceberg/iceberg.py +3 -1
- datahub/ingestion/source/kafka/kafka.py +16 -0
- datahub/ingestion/source/looker/looker_source.py +1 -0
- datahub/ingestion/source/powerbi/powerbi.py +1 -0
- datahub/ingestion/source/qlik_sense/qlik_sense.py +1 -0
- datahub/ingestion/source/redshift/redshift.py +4 -1
- datahub/ingestion/source/sac/sac.py +3 -1
- datahub/ingestion/source/sigma/sigma.py +1 -0
- datahub/ingestion/source/snowflake/snowflake_config.py +3 -6
- datahub/ingestion/source/snowflake/snowflake_summary.py +5 -0
- datahub/ingestion/source/snowflake/snowflake_utils.py +2 -7
- datahub/ingestion/source/snowflake/snowflake_v2.py +2 -0
- datahub/ingestion/source/sql/clickhouse.py +3 -1
- datahub/ingestion/source/sql/cockroachdb.py +0 -1
- datahub/ingestion/source/sql/hana.py +3 -1
- datahub/ingestion/source/sql/hive_metastore.py +3 -1
- datahub/ingestion/source/sql/mariadb.py +0 -1
- datahub/ingestion/source/sql/mssql/source.py +8 -1
- datahub/ingestion/source/sql/mysql.py +0 -1
- datahub/ingestion/source/sql/postgres.py +0 -1
- datahub/ingestion/source/sql/sql_common.py +12 -0
- datahub/ingestion/source/tableau/tableau.py +1 -0
- datahub/ingestion/source/unity/source.py +1 -0
- datahub/ingestion/source/usage/clickhouse_usage.py +3 -1
- datahub/metadata/_internal_schema_classes.py +25 -0
- datahub/metadata/schema.avsc +18 -1
- datahub/metadata/schemas/ContainerProperties.avsc +6 -0
- datahub/metadata/schemas/DataFlowInfo.avsc +6 -0
- datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
- datahub/metadata/schemas/DataJobInfo.avsc +6 -0
- datahub/metadata/schemas/DataProcessKey.avsc +6 -0
- datahub/metadata/schemas/DatasetKey.avsc +6 -0
- datahub/metadata/schemas/IcebergWarehouseInfo.avsc +6 -0
- datahub/metadata/schemas/MLModelDeploymentKey.avsc +6 -0
- datahub/metadata/schemas/MLModelGroupKey.avsc +6 -0
- datahub/metadata/schemas/MLModelKey.avsc +6 -0
- datahub/metadata/schemas/MetadataChangeEvent.avsc +18 -1
- datahub/sql_parsing/sqlglot_lineage.py +21 -6
- {acryl_datahub-1.1.0.3rc2.dist-info → acryl_datahub-1.1.0.4rc2.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.1.0.3rc2.dist-info → acryl_datahub-1.1.0.4rc2.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-1.1.0.3rc2.dist-info → acryl_datahub-1.1.0.4rc2.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.1.0.3rc2.dist-info → acryl_datahub-1.1.0.4rc2.dist-info}/top_level.txt +0 -0
|
@@ -906,6 +906,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
906
906
|
batch_size: int = 5000,
|
|
907
907
|
extraFilters: Optional[List[RawSearchFilterRule]] = None,
|
|
908
908
|
extra_or_filters: Optional[RawSearchFilter] = None,
|
|
909
|
+
skip_cache: bool = False,
|
|
909
910
|
) -> Iterable[str]:
|
|
910
911
|
"""Fetch all urns that match all of the given filters.
|
|
911
912
|
|
|
@@ -924,6 +925,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
924
925
|
Note that this requires browsePathV2 aspects (added in 0.10.4+).
|
|
925
926
|
:param status: Filter on the deletion status of the entity. The default is only return non-soft-deleted entities.
|
|
926
927
|
:param extraFilters: Additional filters to apply. If specified, the results will match all of the filters.
|
|
928
|
+
:param skip_cache: Whether to bypass caching. Defaults to False.
|
|
927
929
|
|
|
928
930
|
:return: An iterable of urns that match the filters.
|
|
929
931
|
"""
|
|
@@ -951,7 +953,8 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
951
953
|
$query: String!,
|
|
952
954
|
$orFilters: [AndFilterInput!],
|
|
953
955
|
$batchSize: Int!,
|
|
954
|
-
$scrollId: String
|
|
956
|
+
$scrollId: String,
|
|
957
|
+
$skipCache: Boolean!) {
|
|
955
958
|
|
|
956
959
|
scrollAcrossEntities(input: {
|
|
957
960
|
query: $query,
|
|
@@ -962,6 +965,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
962
965
|
searchFlags: {
|
|
963
966
|
skipHighlighting: true
|
|
964
967
|
skipAggregates: true
|
|
968
|
+
skipCache: $skipCache
|
|
965
969
|
}
|
|
966
970
|
}) {
|
|
967
971
|
nextScrollId
|
|
@@ -980,6 +984,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
980
984
|
"query": query,
|
|
981
985
|
"orFilters": orFilters,
|
|
982
986
|
"batchSize": batch_size,
|
|
987
|
+
"skipCache": skip_cache,
|
|
983
988
|
}
|
|
984
989
|
|
|
985
990
|
for entity in self._scroll_across_entities(graphql_query, variables):
|
|
@@ -1085,7 +1090,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
1085
1090
|
"query": query,
|
|
1086
1091
|
"orFilters": or_filters_final,
|
|
1087
1092
|
"batchSize": batch_size,
|
|
1088
|
-
"skipCache":
|
|
1093
|
+
"skipCache": skip_cache,
|
|
1089
1094
|
"fetchExtraFields": extra_source_fields,
|
|
1090
1095
|
}
|
|
1091
1096
|
|
|
@@ -1429,6 +1434,41 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
|
|
|
1429
1434
|
related_aspects = response.get("relatedAspects", [])
|
|
1430
1435
|
return reference_count, related_aspects
|
|
1431
1436
|
|
|
1437
|
+
def restore_indices(
|
|
1438
|
+
self,
|
|
1439
|
+
urn_pattern: str,
|
|
1440
|
+
aspect: Optional[str] = None,
|
|
1441
|
+
start: Optional[int] = None,
|
|
1442
|
+
batch_size: Optional[int] = None,
|
|
1443
|
+
) -> str:
|
|
1444
|
+
"""Restore the indices for a given urn or urn-like pattern.
|
|
1445
|
+
|
|
1446
|
+
Args:
|
|
1447
|
+
urn_pattern: The exact URN or a pattern (with % for wildcard) to match URNs.
|
|
1448
|
+
aspect: Optional aspect string to restore indices for a specific aspect.
|
|
1449
|
+
start: Optional integer to decide which row number of sql store to restore from. Default: 0.
|
|
1450
|
+
batch_size: Optional integer to decide how many rows to restore. Default: 10.
|
|
1451
|
+
|
|
1452
|
+
Returns:
|
|
1453
|
+
A string containing the result of the restore indices operation. This format is subject to change.
|
|
1454
|
+
"""
|
|
1455
|
+
if "%" in urn_pattern:
|
|
1456
|
+
payload_obj: dict = {"urnLike": urn_pattern}
|
|
1457
|
+
else:
|
|
1458
|
+
payload_obj = {"urn": urn_pattern}
|
|
1459
|
+
if aspect is not None:
|
|
1460
|
+
payload_obj["aspect"] = aspect
|
|
1461
|
+
if start is not None:
|
|
1462
|
+
payload_obj["start"] = start
|
|
1463
|
+
if batch_size is not None:
|
|
1464
|
+
payload_obj["batchSize"] = batch_size
|
|
1465
|
+
raw_result = self._post_generic(
|
|
1466
|
+
f"{self._gms_server}/operations?action=restoreIndices", payload_obj
|
|
1467
|
+
)
|
|
1468
|
+
result = raw_result["value"]
|
|
1469
|
+
logger.debug(f"Restore indices result: {result}")
|
|
1470
|
+
return result
|
|
1471
|
+
|
|
1432
1472
|
@functools.lru_cache
|
|
1433
1473
|
def _make_schema_resolver(
|
|
1434
1474
|
self,
|
|
@@ -99,6 +99,7 @@ def cleanup(config: BigQueryV2Config) -> None:
|
|
|
99
99
|
SourceCapability.PARTITION_SUPPORT,
|
|
100
100
|
"Enabled by default, partition keys and clustering keys are supported.",
|
|
101
101
|
)
|
|
102
|
+
@capability(SourceCapability.TEST_CONNECTION, "Enabled by default")
|
|
102
103
|
class BigqueryV2Source(StatefulIngestionSourceBase, TestableSource):
|
|
103
104
|
def __init__(self, ctx: PipelineContext, config: BigQueryV2Config):
|
|
104
105
|
super().__init__(config, ctx)
|
|
@@ -63,7 +63,7 @@ class BigQueryIdentifierBuilder:
|
|
|
63
63
|
)
|
|
64
64
|
|
|
65
65
|
def gen_user_urn(self, user_email: str) -> str:
|
|
66
|
-
return make_user_urn(user_email
|
|
66
|
+
return make_user_urn(user_email)
|
|
67
67
|
|
|
68
68
|
def make_data_platform_urn(self) -> str:
|
|
69
69
|
return make_data_platform_urn(self.platform)
|
|
@@ -9,7 +9,9 @@ import requests
|
|
|
9
9
|
from pydantic import Field, root_validator
|
|
10
10
|
|
|
11
11
|
from datahub.ingestion.api.decorators import (
|
|
12
|
+
SourceCapability,
|
|
12
13
|
SupportStatus,
|
|
14
|
+
capability,
|
|
13
15
|
config_class,
|
|
14
16
|
platform_name,
|
|
15
17
|
support_status,
|
|
@@ -261,6 +263,7 @@ query DatahubMetadataQuery_{type}($jobId: BigInt!, $runId: BigInt) {{
|
|
|
261
263
|
@platform_name("dbt")
|
|
262
264
|
@config_class(DBTCloudConfig)
|
|
263
265
|
@support_status(SupportStatus.CERTIFIED)
|
|
266
|
+
@capability(SourceCapability.TEST_CONNECTION, "Enabled by default")
|
|
264
267
|
class DBTCloudSource(DBTSourceBase, TestableSource):
|
|
265
268
|
config: DBTCloudConfig
|
|
266
269
|
|
|
@@ -823,7 +823,9 @@ def get_column_type(
|
|
|
823
823
|
@platform_name("dbt")
|
|
824
824
|
@config_class(DBTCommonConfig)
|
|
825
825
|
@support_status(SupportStatus.CERTIFIED)
|
|
826
|
-
@capability(
|
|
826
|
+
@capability(
|
|
827
|
+
SourceCapability.DELETION_DETECTION, "Enabled by default via stateful ingestion"
|
|
828
|
+
)
|
|
827
829
|
@capability(SourceCapability.LINEAGE_COARSE, "Enabled by default")
|
|
828
830
|
@capability(
|
|
829
831
|
SourceCapability.LINEAGE_FINE,
|
|
@@ -15,7 +15,9 @@ from datahub.configuration.git import GitReference
|
|
|
15
15
|
from datahub.configuration.validate_field_rename import pydantic_renamed_field
|
|
16
16
|
from datahub.ingestion.api.common import PipelineContext
|
|
17
17
|
from datahub.ingestion.api.decorators import (
|
|
18
|
+
SourceCapability,
|
|
18
19
|
SupportStatus,
|
|
20
|
+
capability,
|
|
19
21
|
config_class,
|
|
20
22
|
platform_name,
|
|
21
23
|
support_status,
|
|
@@ -464,6 +466,7 @@ def load_run_results(
|
|
|
464
466
|
@platform_name("dbt")
|
|
465
467
|
@config_class(DBTCoreConfig)
|
|
466
468
|
@support_status(SupportStatus.CERTIFIED)
|
|
469
|
+
@capability(SourceCapability.TEST_CONNECTION, "Enabled by default")
|
|
467
470
|
class DBTCoreSource(DBTSourceBase, TestableSource):
|
|
468
471
|
config: DBTCoreConfig
|
|
469
472
|
report: DBTCoreReport
|
datahub/ingestion/source/file.py
CHANGED
|
@@ -18,7 +18,9 @@ from datahub.configuration.validate_field_rename import pydantic_renamed_field
|
|
|
18
18
|
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
19
19
|
from datahub.ingestion.api.common import PipelineContext
|
|
20
20
|
from datahub.ingestion.api.decorators import (
|
|
21
|
+
SourceCapability,
|
|
21
22
|
SupportStatus,
|
|
23
|
+
capability,
|
|
22
24
|
config_class,
|
|
23
25
|
platform_name,
|
|
24
26
|
support_status,
|
|
@@ -187,6 +189,7 @@ class FileSourceReport(StaleEntityRemovalSourceReport):
|
|
|
187
189
|
@platform_name("Metadata File")
|
|
188
190
|
@config_class(FileSourceConfig)
|
|
189
191
|
@support_status(SupportStatus.CERTIFIED)
|
|
192
|
+
@capability(SourceCapability.TEST_CONNECTION, "Enabled by default")
|
|
190
193
|
class GenericFileSource(StatefulIngestionSourceBase, TestableSource):
|
|
191
194
|
"""
|
|
192
195
|
This plugin pulls metadata from a previously generated file.
|
|
@@ -125,6 +125,7 @@ class GEProfilingConfig(GEProfilingBaseConfig):
|
|
|
125
125
|
description="Profile table only if it has been updated since these many number of days. "
|
|
126
126
|
"If set to `null`, no constraint of last modified time for tables to profile. "
|
|
127
127
|
"Supported only in `snowflake` and `BigQuery`.",
|
|
128
|
+
schema_extra={"supported_sources": ["snowflake", "bigquery"]},
|
|
128
129
|
)
|
|
129
130
|
|
|
130
131
|
profile_table_size_limit: Optional[int] = Field(
|
|
@@ -132,6 +133,9 @@ class GEProfilingConfig(GEProfilingBaseConfig):
|
|
|
132
133
|
description="Profile tables only if their size is less than specified GBs. If set to `null`, "
|
|
133
134
|
"no limit on the size of tables to profile. Supported only in `Snowflake`, `BigQuery` and "
|
|
134
135
|
"`Databricks`. Supported for `Oracle` based on calculated size from gathered stats.",
|
|
136
|
+
schema_extra={
|
|
137
|
+
"supported_sources": ["snowflake", "bigquery", "unity-catalog", "oracle"]
|
|
138
|
+
},
|
|
135
139
|
)
|
|
136
140
|
|
|
137
141
|
profile_table_row_limit: Optional[int] = Field(
|
|
@@ -139,12 +143,14 @@ class GEProfilingConfig(GEProfilingBaseConfig):
|
|
|
139
143
|
description="Profile tables only if their row count is less than specified count. "
|
|
140
144
|
"If set to `null`, no limit on the row count of tables to profile. Supported only in "
|
|
141
145
|
"`Snowflake`, `BigQuery`. Supported for `Oracle` based on gathered stats.",
|
|
146
|
+
schema_extra={"supported_sources": ["snowflake", "bigquery", "oracle"]},
|
|
142
147
|
)
|
|
143
148
|
|
|
144
149
|
profile_table_row_count_estimate_only: bool = Field(
|
|
145
150
|
default=False,
|
|
146
151
|
description="Use an approximate query for row count. This will be much faster but slightly "
|
|
147
152
|
"less accurate. Only supported for Postgres and MySQL. ",
|
|
153
|
+
schema_extra={"supported_sources": ["postgres", "mysql"]},
|
|
148
154
|
)
|
|
149
155
|
|
|
150
156
|
# The query combiner enables us to combine multiple queries into a single query,
|
|
@@ -161,27 +167,32 @@ class GEProfilingConfig(GEProfilingBaseConfig):
|
|
|
161
167
|
default=True,
|
|
162
168
|
description="Whether to profile partitioned tables. Only BigQuery and Aws Athena supports this. "
|
|
163
169
|
"If enabled, latest partition data is used for profiling.",
|
|
170
|
+
schema_extra={"supported_sources": ["athena", "bigquery"]},
|
|
164
171
|
)
|
|
165
172
|
partition_datetime: Optional[datetime.datetime] = Field(
|
|
166
173
|
default=None,
|
|
167
174
|
description="If specified, profile only the partition which matches this datetime. "
|
|
168
175
|
"If not specified, profile the latest partition. Only Bigquery supports this.",
|
|
176
|
+
schema_extra={"supported_sources": ["bigquery"]},
|
|
169
177
|
)
|
|
170
178
|
use_sampling: bool = Field(
|
|
171
179
|
default=True,
|
|
172
180
|
description="Whether to profile column level stats on sample of table. Only BigQuery and Snowflake support this. "
|
|
173
181
|
"If enabled, profiling is done on rows sampled from table. Sampling is not done for smaller tables. ",
|
|
182
|
+
schema_extra={"supported_sources": ["bigquery", "snowflake"]},
|
|
174
183
|
)
|
|
175
184
|
|
|
176
185
|
sample_size: int = Field(
|
|
177
186
|
default=10000,
|
|
178
187
|
description="Number of rows to be sampled from table for column level profiling."
|
|
179
188
|
"Applicable only if `use_sampling` is set to True.",
|
|
189
|
+
schema_extra={"supported_sources": ["bigquery", "snowflake"]},
|
|
180
190
|
)
|
|
181
191
|
|
|
182
192
|
profile_external_tables: bool = Field(
|
|
183
193
|
default=False,
|
|
184
194
|
description="Whether to profile external tables. Only Snowflake and Redshift supports this.",
|
|
195
|
+
schema_extra={"supported_sources": ["redshift", "snowflake"]},
|
|
185
196
|
)
|
|
186
197
|
|
|
187
198
|
tags_to_ignore_sampling: Optional[List[str]] = pydantic.Field(
|
|
@@ -134,7 +134,9 @@ logging.getLogger("azure.core.pipeline.policies.http_logging_policy").setLevel(
|
|
|
134
134
|
SourceCapability.OWNERSHIP,
|
|
135
135
|
"Automatically ingests ownership information from table properties based on `user_ownership_property` and `group_ownership_property`",
|
|
136
136
|
)
|
|
137
|
-
@capability(
|
|
137
|
+
@capability(
|
|
138
|
+
SourceCapability.DELETION_DETECTION, "Enabled by default via stateful ingestion"
|
|
139
|
+
)
|
|
138
140
|
class IcebergSource(StatefulIngestionSourceBase):
|
|
139
141
|
"""
|
|
140
142
|
## Integration Details
|
|
@@ -189,6 +189,22 @@ class KafkaConnectionTest:
|
|
|
189
189
|
SourceCapability.SCHEMA_METADATA,
|
|
190
190
|
"Schemas associated with each topic are extracted from the schema registry. Avro and Protobuf (certified), JSON (incubating). Schema references are supported.",
|
|
191
191
|
)
|
|
192
|
+
@capability(
|
|
193
|
+
SourceCapability.DATA_PROFILING,
|
|
194
|
+
"Not supported",
|
|
195
|
+
supported=False,
|
|
196
|
+
)
|
|
197
|
+
@capability(
|
|
198
|
+
SourceCapability.LINEAGE_COARSE,
|
|
199
|
+
"Not supported. If you use Kafka Connect, the kafka-connect source can generate lineage.",
|
|
200
|
+
supported=False,
|
|
201
|
+
)
|
|
202
|
+
@capability(
|
|
203
|
+
SourceCapability.LINEAGE_FINE,
|
|
204
|
+
"Not supported",
|
|
205
|
+
supported=False,
|
|
206
|
+
)
|
|
207
|
+
@capability(SourceCapability.TEST_CONNECTION, "Enabled by default")
|
|
192
208
|
class KafkaSource(StatefulIngestionSourceBase, TestableSource):
|
|
193
209
|
"""
|
|
194
210
|
This plugin extracts the following:
|
|
@@ -126,6 +126,7 @@ logger = logging.getLogger(__name__)
|
|
|
126
126
|
SourceCapability.USAGE_STATS,
|
|
127
127
|
"Enabled by default, configured using `extract_usage_history`",
|
|
128
128
|
)
|
|
129
|
+
@capability(SourceCapability.TEST_CONNECTION, "Enabled by default")
|
|
129
130
|
class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
|
|
130
131
|
"""
|
|
131
132
|
This plugin extracts the following:
|
|
@@ -1253,6 +1253,7 @@ class Mapper:
|
|
|
1253
1253
|
SourceCapability.DATA_PROFILING,
|
|
1254
1254
|
"Optionally enabled via configuration profiling.enabled",
|
|
1255
1255
|
)
|
|
1256
|
+
@capability(SourceCapability.TEST_CONNECTION, "Enabled by default")
|
|
1256
1257
|
class PowerBiDashboardSource(StatefulIngestionSourceBase, TestableSource):
|
|
1257
1258
|
"""
|
|
1258
1259
|
This plugin extracts the following:
|
|
@@ -109,6 +109,7 @@ logger = logging.getLogger(__name__)
|
|
|
109
109
|
"Enabled by default, configured using `ingest_owner`",
|
|
110
110
|
)
|
|
111
111
|
@capability(SourceCapability.SCHEMA_METADATA, "Enabled by default")
|
|
112
|
+
@capability(SourceCapability.TEST_CONNECTION, "Enabled by default")
|
|
112
113
|
class QlikSenseSource(StatefulIngestionSourceBase, TestableSource):
|
|
113
114
|
"""
|
|
114
115
|
This plugin extracts the following:
|
|
@@ -140,12 +140,15 @@ logger: logging.Logger = logging.getLogger(__name__)
|
|
|
140
140
|
SourceCapability.USAGE_STATS,
|
|
141
141
|
"Enabled by default, can be disabled via configuration `include_usage_statistics`",
|
|
142
142
|
)
|
|
143
|
-
@capability(
|
|
143
|
+
@capability(
|
|
144
|
+
SourceCapability.DELETION_DETECTION, "Enabled by default via stateful ingestion"
|
|
145
|
+
)
|
|
144
146
|
@capability(
|
|
145
147
|
SourceCapability.CLASSIFICATION,
|
|
146
148
|
"Optionally enabled via `classification.enabled`",
|
|
147
149
|
supported=True,
|
|
148
150
|
)
|
|
151
|
+
@capability(SourceCapability.TEST_CONNECTION, "Enabled by default")
|
|
149
152
|
class RedshiftSource(StatefulIngestionSourceBase, TestableSource):
|
|
150
153
|
"""
|
|
151
154
|
This plugin extracts the following:
|
|
@@ -178,7 +178,9 @@ class SACSourceReport(StaleEntityRemovalSourceReport):
|
|
|
178
178
|
SourceCapability.LINEAGE_COARSE,
|
|
179
179
|
"Enabled by default (only for Live Data Models)",
|
|
180
180
|
)
|
|
181
|
-
@capability(
|
|
181
|
+
@capability(
|
|
182
|
+
SourceCapability.DELETION_DETECTION, "Enabled by default via stateful ingestion"
|
|
183
|
+
)
|
|
182
184
|
@capability(
|
|
183
185
|
SourceCapability.SCHEMA_METADATA,
|
|
184
186
|
"Enabled by default (only for Import Data Models)",
|
|
@@ -105,6 +105,7 @@ logger = logging.getLogger(__name__)
|
|
|
105
105
|
SourceCapability.OWNERSHIP,
|
|
106
106
|
"Enabled by default, configured using `ingest_owner`",
|
|
107
107
|
)
|
|
108
|
+
@capability(SourceCapability.TEST_CONNECTION, "Enabled by default")
|
|
108
109
|
class SigmaSource(StatefulIngestionSourceBase, TestableSource):
|
|
109
110
|
"""
|
|
110
111
|
This plugin extracts the following:
|
|
@@ -154,14 +154,11 @@ class SnowflakeIdentifierConfig(
|
|
|
154
154
|
|
|
155
155
|
email_domain: Optional[str] = pydantic.Field(
|
|
156
156
|
default=None,
|
|
157
|
-
description="Email domain of your organization so users can be displayed on UI appropriately.",
|
|
157
|
+
description="Email domain of your organization so users can be displayed on UI appropriately. This is used only if we cannot infer email ID.",
|
|
158
158
|
)
|
|
159
159
|
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
description="Format user urns as an email, if the snowflake user's email is set. If `email_domain` is "
|
|
163
|
-
"provided, generates email addresses for snowflake users with unset emails, based on their "
|
|
164
|
-
"username.",
|
|
160
|
+
_email_as_user_identifier = pydantic_removed_field(
|
|
161
|
+
"email_as_user_identifier",
|
|
165
162
|
)
|
|
166
163
|
|
|
167
164
|
|
|
@@ -20,6 +20,7 @@ from datahub.ingestion.source.snowflake.snowflake_schema_gen import (
|
|
|
20
20
|
SnowflakeSchemaGenerator,
|
|
21
21
|
)
|
|
22
22
|
from datahub.ingestion.source.snowflake.snowflake_utils import (
|
|
23
|
+
SnowflakeFilter,
|
|
23
24
|
SnowflakeIdentifierBuilder,
|
|
24
25
|
)
|
|
25
26
|
from datahub.ingestion.source_report.time_window import BaseTimeWindowReport
|
|
@@ -81,6 +82,10 @@ class SnowflakeSummarySource(Source):
|
|
|
81
82
|
profiler=None,
|
|
82
83
|
aggregator=None,
|
|
83
84
|
snowsight_url_builder=None,
|
|
85
|
+
filters=SnowflakeFilter(
|
|
86
|
+
filter_config=self.config,
|
|
87
|
+
structured_reporter=self.report,
|
|
88
|
+
),
|
|
84
89
|
)
|
|
85
90
|
|
|
86
91
|
# Databases.
|
|
@@ -325,15 +325,10 @@ class SnowflakeIdentifierBuilder:
|
|
|
325
325
|
user_email: Optional[str],
|
|
326
326
|
) -> str:
|
|
327
327
|
if user_email:
|
|
328
|
-
return self.snowflake_identifier(
|
|
329
|
-
user_email
|
|
330
|
-
if self.identifier_config.email_as_user_identifier is True
|
|
331
|
-
else user_email.split("@")[0]
|
|
332
|
-
)
|
|
328
|
+
return self.snowflake_identifier(user_email)
|
|
333
329
|
return self.snowflake_identifier(
|
|
334
330
|
f"{user_name}@{self.identifier_config.email_domain}"
|
|
335
|
-
if self.identifier_config.
|
|
336
|
-
and self.identifier_config.email_domain is not None
|
|
331
|
+
if self.identifier_config.email_domain is not None
|
|
337
332
|
else user_name
|
|
338
333
|
)
|
|
339
334
|
|
|
@@ -131,6 +131,7 @@ logger: logging.Logger = logging.getLogger(__name__)
|
|
|
131
131
|
"Optionally enabled via `classification.enabled`",
|
|
132
132
|
supported=True,
|
|
133
133
|
)
|
|
134
|
+
@capability(SourceCapability.TEST_CONNECTION, "Enabled by default")
|
|
134
135
|
class SnowflakeV2Source(
|
|
135
136
|
SnowflakeCommonMixin,
|
|
136
137
|
StatefulIngestionSourceBase,
|
|
@@ -311,6 +312,7 @@ class SnowflakeV2Source(
|
|
|
311
312
|
SourceCapability.PLATFORM_INSTANCE,
|
|
312
313
|
SourceCapability.DOMAINS,
|
|
313
314
|
SourceCapability.DELETION_DETECTION,
|
|
315
|
+
SourceCapability.TEST_CONNECTION,
|
|
314
316
|
)
|
|
315
317
|
]
|
|
316
318
|
|
|
@@ -379,7 +379,9 @@ clickhouse_datetime_format = "%Y-%m-%d %H:%M:%S"
|
|
|
379
379
|
@platform_name("ClickHouse")
|
|
380
380
|
@config_class(ClickHouseConfig)
|
|
381
381
|
@support_status(SupportStatus.CERTIFIED)
|
|
382
|
-
@capability(
|
|
382
|
+
@capability(
|
|
383
|
+
SourceCapability.DELETION_DETECTION, "Enabled by default via stateful ingestion"
|
|
384
|
+
)
|
|
383
385
|
@capability(SourceCapability.DATA_PROFILING, "Optionally enabled via configuration")
|
|
384
386
|
class ClickHouseSource(TwoTierSQLAlchemySource):
|
|
385
387
|
"""
|
|
@@ -26,7 +26,6 @@ class CockroachDBConfig(PostgresConfig):
|
|
|
26
26
|
@capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default")
|
|
27
27
|
@capability(SourceCapability.DOMAINS, "Supported via the `domain` config field")
|
|
28
28
|
@capability(SourceCapability.DATA_PROFILING, "Optionally enabled via configuration")
|
|
29
|
-
@capability(SourceCapability.DELETION_DETECTION, "Enabled via stateful ingestion")
|
|
30
29
|
class CockroachDBSource(PostgresSource):
|
|
31
30
|
config: CockroachDBConfig
|
|
32
31
|
|
|
@@ -27,7 +27,9 @@ class HanaConfig(BasicSQLAlchemyConfig):
|
|
|
27
27
|
@capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default")
|
|
28
28
|
@capability(SourceCapability.DOMAINS, "Supported via the `domain` config field")
|
|
29
29
|
@capability(SourceCapability.DATA_PROFILING, "Optionally enabled via configuration")
|
|
30
|
-
@capability(
|
|
30
|
+
@capability(
|
|
31
|
+
SourceCapability.DELETION_DETECTION, "Enabled by default via stateful ingestion"
|
|
32
|
+
)
|
|
31
33
|
class HanaSource(SQLAlchemySource):
|
|
32
34
|
def __init__(self, config: HanaConfig, ctx: PipelineContext):
|
|
33
35
|
super().__init__(config, ctx, "hana")
|
|
@@ -161,7 +161,9 @@ class HiveMetastore(BasicSQLAlchemyConfig):
|
|
|
161
161
|
@platform_name("Hive Metastore")
|
|
162
162
|
@config_class(HiveMetastore)
|
|
163
163
|
@support_status(SupportStatus.CERTIFIED)
|
|
164
|
-
@capability(
|
|
164
|
+
@capability(
|
|
165
|
+
SourceCapability.DELETION_DETECTION, "Enabled by default via stateful ingestion"
|
|
166
|
+
)
|
|
165
167
|
@capability(SourceCapability.DATA_PROFILING, "Not Supported", False)
|
|
166
168
|
@capability(SourceCapability.CLASSIFICATION, "Not Supported", False)
|
|
167
169
|
@capability(
|
|
@@ -15,7 +15,6 @@ from datahub.ingestion.source.sql.mysql import MySQLConfig, MySQLSource
|
|
|
15
15
|
@capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default")
|
|
16
16
|
@capability(SourceCapability.DOMAINS, "Supported via the `domain` config field")
|
|
17
17
|
@capability(SourceCapability.DATA_PROFILING, "Optionally enabled via configuration")
|
|
18
|
-
@capability(SourceCapability.DELETION_DETECTION, "Enabled via stateful ingestion")
|
|
19
18
|
class MariaDBSource(MySQLSource):
|
|
20
19
|
def get_platform(self):
|
|
21
20
|
return "mariadb"
|
|
@@ -174,7 +174,14 @@ class SQLServerConfig(BasicSQLAlchemyConfig):
|
|
|
174
174
|
@capability(SourceCapability.DOMAINS, "Supported via the `domain` config field")
|
|
175
175
|
@capability(SourceCapability.DATA_PROFILING, "Optionally enabled via configuration")
|
|
176
176
|
@capability(SourceCapability.DESCRIPTIONS, "Enabled by default")
|
|
177
|
-
@capability(
|
|
177
|
+
@capability(
|
|
178
|
+
SourceCapability.LINEAGE_COARSE,
|
|
179
|
+
"Enabled by default to get lineage for stored procedures via `include_lineage` and for views via `include_view_lineage`",
|
|
180
|
+
)
|
|
181
|
+
@capability(
|
|
182
|
+
SourceCapability.LINEAGE_FINE,
|
|
183
|
+
"Enabled by default to get lineage for stored procedures via `include_lineage` and for views via `include_view_column_lineage`",
|
|
184
|
+
)
|
|
178
185
|
class SQLServerSource(SQLAlchemySource):
|
|
179
186
|
"""
|
|
180
187
|
This plugin extracts the following:
|
|
@@ -65,7 +65,6 @@ class MySQLConfig(MySQLConnectionConfig, TwoTierSQLAlchemyConfig):
|
|
|
65
65
|
@capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default")
|
|
66
66
|
@capability(SourceCapability.DOMAINS, "Supported via the `domain` config field")
|
|
67
67
|
@capability(SourceCapability.DATA_PROFILING, "Optionally enabled via configuration")
|
|
68
|
-
@capability(SourceCapability.DELETION_DETECTION, "Enabled via stateful ingestion")
|
|
69
68
|
class MySQLSource(TwoTierSQLAlchemySource):
|
|
70
69
|
"""
|
|
71
70
|
This plugin extracts the following:
|
|
@@ -131,7 +131,6 @@ class PostgresConfig(BasePostgresConfig):
|
|
|
131
131
|
@capability(SourceCapability.DOMAINS, "Enabled by default")
|
|
132
132
|
@capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default")
|
|
133
133
|
@capability(SourceCapability.DATA_PROFILING, "Optionally enabled via configuration")
|
|
134
|
-
@capability(SourceCapability.LINEAGE_COARSE, "Optionally enabled via configuration")
|
|
135
134
|
class PostgresSource(SQLAlchemySource):
|
|
136
135
|
"""
|
|
137
136
|
This plugin extracts the following:
|
|
@@ -302,6 +302,18 @@ class ProfileMetadata:
|
|
|
302
302
|
"Enabled by default",
|
|
303
303
|
supported=True,
|
|
304
304
|
)
|
|
305
|
+
@capability(
|
|
306
|
+
SourceCapability.LINEAGE_COARSE,
|
|
307
|
+
"Enabled by default to get lineage for views via `include_view_lineage`",
|
|
308
|
+
)
|
|
309
|
+
@capability(
|
|
310
|
+
SourceCapability.LINEAGE_FINE,
|
|
311
|
+
"Enabled by default to get lineage for views via `include_view_column_lineage`",
|
|
312
|
+
)
|
|
313
|
+
@capability(SourceCapability.TEST_CONNECTION, "Enabled by default")
|
|
314
|
+
@capability(
|
|
315
|
+
SourceCapability.DELETION_DETECTION, "Enabled by default via stateful ingestion"
|
|
316
|
+
)
|
|
305
317
|
class SQLAlchemySource(StatefulIngestionSourceBase, TestableSource):
|
|
306
318
|
"""A Base class for all SQL Sources that use SQLAlchemy to extend"""
|
|
307
319
|
|
|
@@ -879,6 +879,7 @@ def report_user_role(report: TableauSourceReport, server: Server) -> None:
|
|
|
879
879
|
SourceCapability.LINEAGE_FINE,
|
|
880
880
|
"Enabled by default, configure using `extract_column_level_lineage`",
|
|
881
881
|
)
|
|
882
|
+
@capability(SourceCapability.TEST_CONNECTION, "Enabled by default")
|
|
882
883
|
class TableauSource(StatefulIngestionSourceBase, TestableSource):
|
|
883
884
|
platform = "tableau"
|
|
884
885
|
|
|
@@ -162,6 +162,7 @@ logger: logging.Logger = logging.getLogger(__name__)
|
|
|
162
162
|
"Optionally enabled via `stateful_ingestion.remove_stale_metadata`",
|
|
163
163
|
supported=True,
|
|
164
164
|
)
|
|
165
|
+
@capability(SourceCapability.TEST_CONNECTION, "Enabled by default")
|
|
165
166
|
@support_status(SupportStatus.INCUBATING)
|
|
166
167
|
class UnityCatalogSource(StatefulIngestionSourceBase, TestableSource):
|
|
167
168
|
"""
|
|
@@ -85,7 +85,9 @@ class ClickHouseUsageConfig(ClickHouseConfig, BaseUsageConfig, EnvConfigMixin):
|
|
|
85
85
|
@platform_name("ClickHouse")
|
|
86
86
|
@config_class(ClickHouseUsageConfig)
|
|
87
87
|
@support_status(SupportStatus.CERTIFIED)
|
|
88
|
-
@capability(
|
|
88
|
+
@capability(
|
|
89
|
+
SourceCapability.DELETION_DETECTION, "Enabled by default via stateful ingestion"
|
|
90
|
+
)
|
|
89
91
|
@capability(SourceCapability.DATA_PROFILING, "Optionally enabled via configuration")
|
|
90
92
|
@dataclasses.dataclass
|
|
91
93
|
class ClickHouseUsageSource(Source):
|
|
@@ -4590,6 +4590,15 @@ class FabricTypeClass(object):
|
|
|
4590
4590
|
RVW = "RVW"
|
|
4591
4591
|
"""Designates review fabrics"""
|
|
4592
4592
|
|
|
4593
|
+
PRD = "PRD"
|
|
4594
|
+
"""Alternative Prod spelling"""
|
|
4595
|
+
|
|
4596
|
+
TST = "TST"
|
|
4597
|
+
"""Alternative Test spelling"""
|
|
4598
|
+
|
|
4599
|
+
SIT = "SIT"
|
|
4600
|
+
"""System Integration Testing"""
|
|
4601
|
+
|
|
4593
4602
|
SANDBOX = "SANDBOX"
|
|
4594
4603
|
"""Designates sandbox fabrics"""
|
|
4595
4604
|
|
|
@@ -21504,6 +21513,7 @@ class DataHubResourceFilterClass(DictWrapper):
|
|
|
21504
21513
|
resources: Union[None, List[str]]=None,
|
|
21505
21514
|
allResources: Optional[bool]=None,
|
|
21506
21515
|
filter: Union[None, "PolicyMatchFilterClass"]=None,
|
|
21516
|
+
privilegeConstraints: Union[None, "PolicyMatchFilterClass"]=None,
|
|
21507
21517
|
):
|
|
21508
21518
|
super().__init__()
|
|
21509
21519
|
|
|
@@ -21515,12 +21525,14 @@ class DataHubResourceFilterClass(DictWrapper):
|
|
|
21515
21525
|
else:
|
|
21516
21526
|
self.allResources = allResources
|
|
21517
21527
|
self.filter = filter
|
|
21528
|
+
self.privilegeConstraints = privilegeConstraints
|
|
21518
21529
|
|
|
21519
21530
|
def _restore_defaults(self) -> None:
|
|
21520
21531
|
self.type = self.RECORD_SCHEMA.fields_dict["type"].default
|
|
21521
21532
|
self.resources = self.RECORD_SCHEMA.fields_dict["resources"].default
|
|
21522
21533
|
self.allResources = self.RECORD_SCHEMA.fields_dict["allResources"].default
|
|
21523
21534
|
self.filter = self.RECORD_SCHEMA.fields_dict["filter"].default
|
|
21535
|
+
self.privilegeConstraints = self.RECORD_SCHEMA.fields_dict["privilegeConstraints"].default
|
|
21524
21536
|
|
|
21525
21537
|
|
|
21526
21538
|
@property
|
|
@@ -21565,6 +21577,16 @@ class DataHubResourceFilterClass(DictWrapper):
|
|
|
21565
21577
|
self._inner_dict['filter'] = value
|
|
21566
21578
|
|
|
21567
21579
|
|
|
21580
|
+
@property
|
|
21581
|
+
def privilegeConstraints(self) -> Union[None, "PolicyMatchFilterClass"]:
|
|
21582
|
+
"""Constraints around what sub-resources operations are allowed to modify, i.e. NOT_EQUALS - cannot modify a particular defined tag, EQUALS - can only modify a particular defined tag, STARTS_WITH - can only modify a tag starting with xyz"""
|
|
21583
|
+
return self._inner_dict.get('privilegeConstraints') # type: ignore
|
|
21584
|
+
|
|
21585
|
+
@privilegeConstraints.setter
|
|
21586
|
+
def privilegeConstraints(self, value: Union[None, "PolicyMatchFilterClass"]) -> None:
|
|
21587
|
+
self._inner_dict['privilegeConstraints'] = value
|
|
21588
|
+
|
|
21589
|
+
|
|
21568
21590
|
class DataHubRoleInfoClass(_Aspect):
|
|
21569
21591
|
"""Information about a DataHub Role."""
|
|
21570
21592
|
|
|
@@ -21633,6 +21655,9 @@ class PolicyMatchConditionClass(object):
|
|
|
21633
21655
|
STARTS_WITH = "STARTS_WITH"
|
|
21634
21656
|
"""Whether the field value starts with the value"""
|
|
21635
21657
|
|
|
21658
|
+
NOT_EQUALS = "NOT_EQUALS"
|
|
21659
|
+
"""Whether the field does not match the value"""
|
|
21660
|
+
|
|
21636
21661
|
|
|
21637
21662
|
|
|
21638
21663
|
class PolicyMatchCriterionClass(DictWrapper):
|