acryl-datahub 1.1.0.4rc1__py3-none-any.whl → 1.1.0.4rc3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.1.0.4rc1.dist-info → acryl_datahub-1.1.0.4rc3.dist-info}/METADATA +2581 -2581
- {acryl_datahub-1.1.0.4rc1.dist-info → acryl_datahub-1.1.0.4rc3.dist-info}/RECORD +58 -58
- datahub/_version.py +1 -1
- datahub/emitter/rest_emitter.py +18 -1
- datahub/ingestion/api/source.py +2 -0
- datahub/ingestion/source/bigquery_v2/bigquery.py +18 -0
- datahub/ingestion/source/dbt/dbt_cloud.py +3 -0
- datahub/ingestion/source/dbt/dbt_common.py +3 -1
- datahub/ingestion/source/dbt/dbt_core.py +3 -0
- datahub/ingestion/source/dremio/dremio_api.py +98 -68
- datahub/ingestion/source/dremio/dremio_config.py +2 -0
- datahub/ingestion/source/dremio/dremio_reporting.py +23 -2
- datahub/ingestion/source/dremio/dremio_source.py +90 -77
- datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
- datahub/ingestion/source/file.py +3 -0
- datahub/ingestion/source/ge_data_profiler.py +48 -8
- datahub/ingestion/source/iceberg/iceberg.py +3 -1
- datahub/ingestion/source/kafka/kafka.py +1 -0
- datahub/ingestion/source/looker/looker_source.py +1 -0
- datahub/ingestion/source/powerbi/powerbi.py +1 -0
- datahub/ingestion/source/qlik_sense/qlik_sense.py +1 -0
- datahub/ingestion/source/redshift/redshift.py +21 -1
- datahub/ingestion/source/sac/sac.py +3 -1
- datahub/ingestion/source/sigma/sigma.py +1 -0
- datahub/ingestion/source/snowflake/snowflake_config.py +3 -6
- datahub/ingestion/source/snowflake/snowflake_utils.py +2 -7
- datahub/ingestion/source/snowflake/snowflake_v2.py +2 -0
- datahub/ingestion/source/sql/clickhouse.py +3 -1
- datahub/ingestion/source/sql/cockroachdb.py +0 -1
- datahub/ingestion/source/sql/hana.py +3 -1
- datahub/ingestion/source/sql/hive_metastore.py +3 -1
- datahub/ingestion/source/sql/mariadb.py +0 -1
- datahub/ingestion/source/sql/mssql/source.py +8 -1
- datahub/ingestion/source/sql/mysql.py +0 -9
- datahub/ingestion/source/sql/postgres.py +0 -1
- datahub/ingestion/source/sql/sql_common.py +12 -0
- datahub/ingestion/source/tableau/tableau.py +1 -0
- datahub/ingestion/source/unity/source.py +1 -0
- datahub/ingestion/source/usage/clickhouse_usage.py +4 -1
- datahub/ingestion/source/usage/starburst_trino_usage.py +3 -0
- datahub/metadata/_internal_schema_classes.py +25 -0
- datahub/metadata/schema.avsc +18 -1
- datahub/metadata/schemas/ContainerProperties.avsc +6 -0
- datahub/metadata/schemas/DataFlowInfo.avsc +6 -0
- datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
- datahub/metadata/schemas/DataJobInfo.avsc +6 -0
- datahub/metadata/schemas/DataProcessKey.avsc +6 -0
- datahub/metadata/schemas/DatasetKey.avsc +6 -0
- datahub/metadata/schemas/IcebergWarehouseInfo.avsc +6 -0
- datahub/metadata/schemas/MLModelDeploymentKey.avsc +6 -0
- datahub/metadata/schemas/MLModelGroupKey.avsc +6 -0
- datahub/metadata/schemas/MLModelKey.avsc +6 -0
- datahub/metadata/schemas/MetadataChangeEvent.avsc +18 -1
- datahub/utilities/stats_collections.py +4 -0
- {acryl_datahub-1.1.0.4rc1.dist-info → acryl_datahub-1.1.0.4rc3.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.1.0.4rc1.dist-info → acryl_datahub-1.1.0.4rc3.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-1.1.0.4rc1.dist-info → acryl_datahub-1.1.0.4rc3.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.1.0.4rc1.dist-info → acryl_datahub-1.1.0.4rc3.dist-info}/top_level.txt +0 -0
|
@@ -120,7 +120,6 @@ SNOWFLAKE = "snowflake"
|
|
|
120
120
|
BIGQUERY = "bigquery"
|
|
121
121
|
REDSHIFT = "redshift"
|
|
122
122
|
DATABRICKS = "databricks"
|
|
123
|
-
TRINO = "trino"
|
|
124
123
|
|
|
125
124
|
# Type names for Databricks, to match Title Case types in sqlalchemy
|
|
126
125
|
ProfilerTypeMapping.INT_TYPE_NAMES.append("Integer")
|
|
@@ -206,6 +205,17 @@ def get_column_unique_count_dh_patch(self: SqlAlchemyDataset, column: str) -> in
|
|
|
206
205
|
)
|
|
207
206
|
)
|
|
208
207
|
return convert_to_json_serializable(element_values.fetchone()[0])
|
|
208
|
+
elif (
|
|
209
|
+
self.engine.dialect.name.lower() == GXSqlDialect.AWSATHENA
|
|
210
|
+
or self.engine.dialect.name.lower() == GXSqlDialect.TRINO
|
|
211
|
+
):
|
|
212
|
+
return convert_to_json_serializable(
|
|
213
|
+
self.engine.execute(
|
|
214
|
+
sa.select(sa.func.approx_distinct(sa.column(column))).select_from(
|
|
215
|
+
self._table
|
|
216
|
+
)
|
|
217
|
+
).scalar()
|
|
218
|
+
)
|
|
209
219
|
return convert_to_json_serializable(
|
|
210
220
|
self.engine.execute(
|
|
211
221
|
sa.select([sa.func.count(sa.func.distinct(sa.column(column)))]).select_from(
|
|
@@ -734,11 +744,41 @@ class _SingleDatasetProfiler(BasicDatasetProfilerBase):
|
|
|
734
744
|
def _get_dataset_column_distinct_value_frequencies(
|
|
735
745
|
self, column_profile: DatasetFieldProfileClass, column: str
|
|
736
746
|
) -> None:
|
|
737
|
-
if self.config.include_field_distinct_value_frequencies:
|
|
747
|
+
if not self.config.include_field_distinct_value_frequencies:
|
|
748
|
+
return
|
|
749
|
+
try:
|
|
750
|
+
results = self.dataset.engine.execute(
|
|
751
|
+
sa.select(
|
|
752
|
+
[
|
|
753
|
+
sa.column(column),
|
|
754
|
+
sa.func.count(sa.column(column)),
|
|
755
|
+
]
|
|
756
|
+
)
|
|
757
|
+
.select_from(self.dataset._table)
|
|
758
|
+
.where(sa.column(column).is_not(None))
|
|
759
|
+
.group_by(sa.column(column))
|
|
760
|
+
).fetchall()
|
|
761
|
+
|
|
738
762
|
column_profile.distinctValueFrequencies = [
|
|
739
|
-
ValueFrequencyClass(value=str(value), frequency=count)
|
|
740
|
-
for value, count in
|
|
763
|
+
ValueFrequencyClass(value=str(value), frequency=int(count))
|
|
764
|
+
for value, count in results
|
|
741
765
|
]
|
|
766
|
+
# sort so output is deterministic. don't do it in SQL because not all column
|
|
767
|
+
# types are sortable in SQL (such as JSON data types on Athena/Trino).
|
|
768
|
+
column_profile.distinctValueFrequencies = sorted(
|
|
769
|
+
column_profile.distinctValueFrequencies, key=lambda x: x.value
|
|
770
|
+
)
|
|
771
|
+
except Exception as e:
|
|
772
|
+
logger.debug(
|
|
773
|
+
f"Caught exception while attempting to get distinct value frequencies for column {column}. {e}"
|
|
774
|
+
)
|
|
775
|
+
|
|
776
|
+
self.report.report_warning(
|
|
777
|
+
title="Profiling: Unable to Calculate Distinct Value Frequencies",
|
|
778
|
+
message="Distinct value frequencies for the column will not be accessible",
|
|
779
|
+
context=f"{self.dataset_name}.{column}",
|
|
780
|
+
exc=e,
|
|
781
|
+
)
|
|
742
782
|
|
|
743
783
|
@_run_with_query_combiner
|
|
744
784
|
def _get_dataset_column_histogram(
|
|
@@ -1395,12 +1435,12 @@ class DatahubGEProfiler:
|
|
|
1395
1435
|
)
|
|
1396
1436
|
return None
|
|
1397
1437
|
finally:
|
|
1398
|
-
if batch is not None and self.base_engine.engine.name.
|
|
1399
|
-
|
|
1400
|
-
|
|
1438
|
+
if batch is not None and self.base_engine.engine.name.lower() in [
|
|
1439
|
+
GXSqlDialect.TRINO,
|
|
1440
|
+
GXSqlDialect.AWSATHENA,
|
|
1401
1441
|
]:
|
|
1402
1442
|
if (
|
|
1403
|
-
self.base_engine.engine.name.
|
|
1443
|
+
self.base_engine.engine.name.lower() == GXSqlDialect.TRINO
|
|
1404
1444
|
or temp_view is not None
|
|
1405
1445
|
):
|
|
1406
1446
|
self._drop_temp_table(batch)
|
|
@@ -134,7 +134,9 @@ logging.getLogger("azure.core.pipeline.policies.http_logging_policy").setLevel(
|
|
|
134
134
|
SourceCapability.OWNERSHIP,
|
|
135
135
|
"Automatically ingests ownership information from table properties based on `user_ownership_property` and `group_ownership_property`",
|
|
136
136
|
)
|
|
137
|
-
@capability(
|
|
137
|
+
@capability(
|
|
138
|
+
SourceCapability.DELETION_DETECTION, "Enabled by default via stateful ingestion"
|
|
139
|
+
)
|
|
138
140
|
class IcebergSource(StatefulIngestionSourceBase):
|
|
139
141
|
"""
|
|
140
142
|
## Integration Details
|
|
@@ -204,6 +204,7 @@ class KafkaConnectionTest:
|
|
|
204
204
|
"Not supported",
|
|
205
205
|
supported=False,
|
|
206
206
|
)
|
|
207
|
+
@capability(SourceCapability.TEST_CONNECTION, "Enabled by default")
|
|
207
208
|
class KafkaSource(StatefulIngestionSourceBase, TestableSource):
|
|
208
209
|
"""
|
|
209
210
|
This plugin extracts the following:
|
|
@@ -126,6 +126,7 @@ logger = logging.getLogger(__name__)
|
|
|
126
126
|
SourceCapability.USAGE_STATS,
|
|
127
127
|
"Enabled by default, configured using `extract_usage_history`",
|
|
128
128
|
)
|
|
129
|
+
@capability(SourceCapability.TEST_CONNECTION, "Enabled by default")
|
|
129
130
|
class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
|
|
130
131
|
"""
|
|
131
132
|
This plugin extracts the following:
|
|
@@ -1253,6 +1253,7 @@ class Mapper:
|
|
|
1253
1253
|
SourceCapability.DATA_PROFILING,
|
|
1254
1254
|
"Optionally enabled via configuration profiling.enabled",
|
|
1255
1255
|
)
|
|
1256
|
+
@capability(SourceCapability.TEST_CONNECTION, "Enabled by default")
|
|
1256
1257
|
class PowerBiDashboardSource(StatefulIngestionSourceBase, TestableSource):
|
|
1257
1258
|
"""
|
|
1258
1259
|
This plugin extracts the following:
|
|
@@ -109,6 +109,7 @@ logger = logging.getLogger(__name__)
|
|
|
109
109
|
"Enabled by default, configured using `ingest_owner`",
|
|
110
110
|
)
|
|
111
111
|
@capability(SourceCapability.SCHEMA_METADATA, "Enabled by default")
|
|
112
|
+
@capability(SourceCapability.TEST_CONNECTION, "Enabled by default")
|
|
112
113
|
class QlikSenseSource(StatefulIngestionSourceBase, TestableSource):
|
|
113
114
|
"""
|
|
114
115
|
This plugin extracts the following:
|
|
@@ -10,6 +10,7 @@ import humanfriendly
|
|
|
10
10
|
import pydantic
|
|
11
11
|
import redshift_connector
|
|
12
12
|
|
|
13
|
+
from datahub.configuration.common import AllowDenyPattern
|
|
13
14
|
from datahub.configuration.pattern_utils import is_schema_allowed
|
|
14
15
|
from datahub.emitter.mce_builder import (
|
|
15
16
|
make_data_platform_urn,
|
|
@@ -140,12 +141,15 @@ logger: logging.Logger = logging.getLogger(__name__)
|
|
|
140
141
|
SourceCapability.USAGE_STATS,
|
|
141
142
|
"Enabled by default, can be disabled via configuration `include_usage_statistics`",
|
|
142
143
|
)
|
|
143
|
-
@capability(
|
|
144
|
+
@capability(
|
|
145
|
+
SourceCapability.DELETION_DETECTION, "Enabled by default via stateful ingestion"
|
|
146
|
+
)
|
|
144
147
|
@capability(
|
|
145
148
|
SourceCapability.CLASSIFICATION,
|
|
146
149
|
"Optionally enabled via `classification.enabled`",
|
|
147
150
|
supported=True,
|
|
148
151
|
)
|
|
152
|
+
@capability(SourceCapability.TEST_CONNECTION, "Enabled by default")
|
|
149
153
|
class RedshiftSource(StatefulIngestionSourceBase, TestableSource):
|
|
150
154
|
"""
|
|
151
155
|
This plugin extracts the following:
|
|
@@ -354,7 +358,23 @@ class RedshiftSource(StatefulIngestionSourceBase, TestableSource):
|
|
|
354
358
|
).workunit_processor,
|
|
355
359
|
]
|
|
356
360
|
|
|
361
|
+
def _warn_deprecated_configs(self):
|
|
362
|
+
if (
|
|
363
|
+
self.config.match_fully_qualified_names is not None
|
|
364
|
+
and not self.config.match_fully_qualified_names
|
|
365
|
+
and self.config.schema_pattern is not None
|
|
366
|
+
and self.config.schema_pattern != AllowDenyPattern.allow_all()
|
|
367
|
+
):
|
|
368
|
+
self.report.report_warning(
|
|
369
|
+
message="Please update `schema_pattern` to match against fully qualified schema name `<database_name>.<schema_name>` and set config `match_fully_qualified_names : True`."
|
|
370
|
+
"Current default `match_fully_qualified_names: False` is only to maintain backward compatibility. "
|
|
371
|
+
"The config option `match_fully_qualified_names` will be removed in future and the default behavior will be like `match_fully_qualified_names: True`.",
|
|
372
|
+
context="Config option deprecation warning",
|
|
373
|
+
title="Config option deprecation warning",
|
|
374
|
+
)
|
|
375
|
+
|
|
357
376
|
def get_workunits_internal(self) -> Iterable[Union[MetadataWorkUnit, SqlWorkUnit]]:
|
|
377
|
+
self._warn_deprecated_configs()
|
|
358
378
|
connection = self._try_get_redshift_connection(self.config)
|
|
359
379
|
|
|
360
380
|
if connection is None:
|
|
@@ -178,7 +178,9 @@ class SACSourceReport(StaleEntityRemovalSourceReport):
|
|
|
178
178
|
SourceCapability.LINEAGE_COARSE,
|
|
179
179
|
"Enabled by default (only for Live Data Models)",
|
|
180
180
|
)
|
|
181
|
-
@capability(
|
|
181
|
+
@capability(
|
|
182
|
+
SourceCapability.DELETION_DETECTION, "Enabled by default via stateful ingestion"
|
|
183
|
+
)
|
|
182
184
|
@capability(
|
|
183
185
|
SourceCapability.SCHEMA_METADATA,
|
|
184
186
|
"Enabled by default (only for Import Data Models)",
|
|
@@ -105,6 +105,7 @@ logger = logging.getLogger(__name__)
|
|
|
105
105
|
SourceCapability.OWNERSHIP,
|
|
106
106
|
"Enabled by default, configured using `ingest_owner`",
|
|
107
107
|
)
|
|
108
|
+
@capability(SourceCapability.TEST_CONNECTION, "Enabled by default")
|
|
108
109
|
class SigmaSource(StatefulIngestionSourceBase, TestableSource):
|
|
109
110
|
"""
|
|
110
111
|
This plugin extracts the following:
|
|
@@ -154,14 +154,11 @@ class SnowflakeIdentifierConfig(
|
|
|
154
154
|
|
|
155
155
|
email_domain: Optional[str] = pydantic.Field(
|
|
156
156
|
default=None,
|
|
157
|
-
description="Email domain of your organization so users can be displayed on UI appropriately.",
|
|
157
|
+
description="Email domain of your organization so users can be displayed on UI appropriately. This is used only if we cannot infer email ID.",
|
|
158
158
|
)
|
|
159
159
|
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
description="Format user urns as an email, if the snowflake user's email is set. If `email_domain` is "
|
|
163
|
-
"provided, generates email addresses for snowflake users with unset emails, based on their "
|
|
164
|
-
"username.",
|
|
160
|
+
_email_as_user_identifier = pydantic_removed_field(
|
|
161
|
+
"email_as_user_identifier",
|
|
165
162
|
)
|
|
166
163
|
|
|
167
164
|
|
|
@@ -325,15 +325,10 @@ class SnowflakeIdentifierBuilder:
|
|
|
325
325
|
user_email: Optional[str],
|
|
326
326
|
) -> str:
|
|
327
327
|
if user_email:
|
|
328
|
-
return self.snowflake_identifier(
|
|
329
|
-
user_email
|
|
330
|
-
if self.identifier_config.email_as_user_identifier is True
|
|
331
|
-
else user_email.split("@")[0]
|
|
332
|
-
)
|
|
328
|
+
return self.snowflake_identifier(user_email)
|
|
333
329
|
return self.snowflake_identifier(
|
|
334
330
|
f"{user_name}@{self.identifier_config.email_domain}"
|
|
335
|
-
if self.identifier_config.
|
|
336
|
-
and self.identifier_config.email_domain is not None
|
|
331
|
+
if self.identifier_config.email_domain is not None
|
|
337
332
|
else user_name
|
|
338
333
|
)
|
|
339
334
|
|
|
@@ -131,6 +131,7 @@ logger: logging.Logger = logging.getLogger(__name__)
|
|
|
131
131
|
"Optionally enabled via `classification.enabled`",
|
|
132
132
|
supported=True,
|
|
133
133
|
)
|
|
134
|
+
@capability(SourceCapability.TEST_CONNECTION, "Enabled by default")
|
|
134
135
|
class SnowflakeV2Source(
|
|
135
136
|
SnowflakeCommonMixin,
|
|
136
137
|
StatefulIngestionSourceBase,
|
|
@@ -311,6 +312,7 @@ class SnowflakeV2Source(
|
|
|
311
312
|
SourceCapability.PLATFORM_INSTANCE,
|
|
312
313
|
SourceCapability.DOMAINS,
|
|
313
314
|
SourceCapability.DELETION_DETECTION,
|
|
315
|
+
SourceCapability.TEST_CONNECTION,
|
|
314
316
|
)
|
|
315
317
|
]
|
|
316
318
|
|
|
@@ -379,7 +379,9 @@ clickhouse_datetime_format = "%Y-%m-%d %H:%M:%S"
|
|
|
379
379
|
@platform_name("ClickHouse")
|
|
380
380
|
@config_class(ClickHouseConfig)
|
|
381
381
|
@support_status(SupportStatus.CERTIFIED)
|
|
382
|
-
@capability(
|
|
382
|
+
@capability(
|
|
383
|
+
SourceCapability.DELETION_DETECTION, "Enabled by default via stateful ingestion"
|
|
384
|
+
)
|
|
383
385
|
@capability(SourceCapability.DATA_PROFILING, "Optionally enabled via configuration")
|
|
384
386
|
class ClickHouseSource(TwoTierSQLAlchemySource):
|
|
385
387
|
"""
|
|
@@ -26,7 +26,6 @@ class CockroachDBConfig(PostgresConfig):
|
|
|
26
26
|
@capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default")
|
|
27
27
|
@capability(SourceCapability.DOMAINS, "Supported via the `domain` config field")
|
|
28
28
|
@capability(SourceCapability.DATA_PROFILING, "Optionally enabled via configuration")
|
|
29
|
-
@capability(SourceCapability.DELETION_DETECTION, "Enabled via stateful ingestion")
|
|
30
29
|
class CockroachDBSource(PostgresSource):
|
|
31
30
|
config: CockroachDBConfig
|
|
32
31
|
|
|
@@ -27,7 +27,9 @@ class HanaConfig(BasicSQLAlchemyConfig):
|
|
|
27
27
|
@capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default")
|
|
28
28
|
@capability(SourceCapability.DOMAINS, "Supported via the `domain` config field")
|
|
29
29
|
@capability(SourceCapability.DATA_PROFILING, "Optionally enabled via configuration")
|
|
30
|
-
@capability(
|
|
30
|
+
@capability(
|
|
31
|
+
SourceCapability.DELETION_DETECTION, "Enabled by default via stateful ingestion"
|
|
32
|
+
)
|
|
31
33
|
class HanaSource(SQLAlchemySource):
|
|
32
34
|
def __init__(self, config: HanaConfig, ctx: PipelineContext):
|
|
33
35
|
super().__init__(config, ctx, "hana")
|
|
@@ -161,7 +161,9 @@ class HiveMetastore(BasicSQLAlchemyConfig):
|
|
|
161
161
|
@platform_name("Hive Metastore")
|
|
162
162
|
@config_class(HiveMetastore)
|
|
163
163
|
@support_status(SupportStatus.CERTIFIED)
|
|
164
|
-
@capability(
|
|
164
|
+
@capability(
|
|
165
|
+
SourceCapability.DELETION_DETECTION, "Enabled by default via stateful ingestion"
|
|
166
|
+
)
|
|
165
167
|
@capability(SourceCapability.DATA_PROFILING, "Not Supported", False)
|
|
166
168
|
@capability(SourceCapability.CLASSIFICATION, "Not Supported", False)
|
|
167
169
|
@capability(
|
|
@@ -15,7 +15,6 @@ from datahub.ingestion.source.sql.mysql import MySQLConfig, MySQLSource
|
|
|
15
15
|
@capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default")
|
|
16
16
|
@capability(SourceCapability.DOMAINS, "Supported via the `domain` config field")
|
|
17
17
|
@capability(SourceCapability.DATA_PROFILING, "Optionally enabled via configuration")
|
|
18
|
-
@capability(SourceCapability.DELETION_DETECTION, "Enabled via stateful ingestion")
|
|
19
18
|
class MariaDBSource(MySQLSource):
|
|
20
19
|
def get_platform(self):
|
|
21
20
|
return "mariadb"
|
|
@@ -174,7 +174,14 @@ class SQLServerConfig(BasicSQLAlchemyConfig):
|
|
|
174
174
|
@capability(SourceCapability.DOMAINS, "Supported via the `domain` config field")
|
|
175
175
|
@capability(SourceCapability.DATA_PROFILING, "Optionally enabled via configuration")
|
|
176
176
|
@capability(SourceCapability.DESCRIPTIONS, "Enabled by default")
|
|
177
|
-
@capability(
|
|
177
|
+
@capability(
|
|
178
|
+
SourceCapability.LINEAGE_COARSE,
|
|
179
|
+
"Enabled by default to get lineage for stored procedures via `include_lineage` and for views via `include_view_lineage`",
|
|
180
|
+
)
|
|
181
|
+
@capability(
|
|
182
|
+
SourceCapability.LINEAGE_FINE,
|
|
183
|
+
"Enabled by default to get lineage for stored procedures via `include_lineage` and for views via `include_view_column_lineage`",
|
|
184
|
+
)
|
|
178
185
|
class SQLServerSource(SQLAlchemySource):
|
|
179
186
|
"""
|
|
180
187
|
This plugin extracts the following:
|
|
@@ -65,15 +65,6 @@ class MySQLConfig(MySQLConnectionConfig, TwoTierSQLAlchemyConfig):
|
|
|
65
65
|
@capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default")
|
|
66
66
|
@capability(SourceCapability.DOMAINS, "Supported via the `domain` config field")
|
|
67
67
|
@capability(SourceCapability.DATA_PROFILING, "Optionally enabled via configuration")
|
|
68
|
-
@capability(SourceCapability.DELETION_DETECTION, "Enabled via stateful ingestion")
|
|
69
|
-
@capability(
|
|
70
|
-
SourceCapability.LINEAGE_COARSE,
|
|
71
|
-
"Supported for views if `include_view_column_lineage` is enabled.",
|
|
72
|
-
)
|
|
73
|
-
@capability(
|
|
74
|
-
SourceCapability.LINEAGE_FINE,
|
|
75
|
-
"Supported for views if `include_view_column_lineage` is enabled.",
|
|
76
|
-
)
|
|
77
68
|
class MySQLSource(TwoTierSQLAlchemySource):
|
|
78
69
|
"""
|
|
79
70
|
This plugin extracts the following:
|
|
@@ -131,7 +131,6 @@ class PostgresConfig(BasePostgresConfig):
|
|
|
131
131
|
@capability(SourceCapability.DOMAINS, "Enabled by default")
|
|
132
132
|
@capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default")
|
|
133
133
|
@capability(SourceCapability.DATA_PROFILING, "Optionally enabled via configuration")
|
|
134
|
-
@capability(SourceCapability.LINEAGE_COARSE, "Optionally enabled via configuration")
|
|
135
134
|
class PostgresSource(SQLAlchemySource):
|
|
136
135
|
"""
|
|
137
136
|
This plugin extracts the following:
|
|
@@ -302,6 +302,18 @@ class ProfileMetadata:
|
|
|
302
302
|
"Enabled by default",
|
|
303
303
|
supported=True,
|
|
304
304
|
)
|
|
305
|
+
@capability(
|
|
306
|
+
SourceCapability.LINEAGE_COARSE,
|
|
307
|
+
"Enabled by default to get lineage for views via `include_view_lineage`",
|
|
308
|
+
)
|
|
309
|
+
@capability(
|
|
310
|
+
SourceCapability.LINEAGE_FINE,
|
|
311
|
+
"Enabled by default to get lineage for views via `include_view_column_lineage`",
|
|
312
|
+
)
|
|
313
|
+
@capability(SourceCapability.TEST_CONNECTION, "Enabled by default")
|
|
314
|
+
@capability(
|
|
315
|
+
SourceCapability.DELETION_DETECTION, "Enabled by default via stateful ingestion"
|
|
316
|
+
)
|
|
305
317
|
class SQLAlchemySource(StatefulIngestionSourceBase, TestableSource):
|
|
306
318
|
"""A Base class for all SQL Sources that use SQLAlchemy to extend"""
|
|
307
319
|
|
|
@@ -879,6 +879,7 @@ def report_user_role(report: TableauSourceReport, server: Server) -> None:
|
|
|
879
879
|
SourceCapability.LINEAGE_FINE,
|
|
880
880
|
"Enabled by default, configure using `extract_column_level_lineage`",
|
|
881
881
|
)
|
|
882
|
+
@capability(SourceCapability.TEST_CONNECTION, "Enabled by default")
|
|
882
883
|
class TableauSource(StatefulIngestionSourceBase, TestableSource):
|
|
883
884
|
platform = "tableau"
|
|
884
885
|
|
|
@@ -162,6 +162,7 @@ logger: logging.Logger = logging.getLogger(__name__)
|
|
|
162
162
|
"Optionally enabled via `stateful_ingestion.remove_stale_metadata`",
|
|
163
163
|
supported=True,
|
|
164
164
|
)
|
|
165
|
+
@capability(SourceCapability.TEST_CONNECTION, "Enabled by default")
|
|
165
166
|
@support_status(SupportStatus.INCUBATING)
|
|
166
167
|
class UnityCatalogSource(StatefulIngestionSourceBase, TestableSource):
|
|
167
168
|
"""
|
|
@@ -85,8 +85,11 @@ class ClickHouseUsageConfig(ClickHouseConfig, BaseUsageConfig, EnvConfigMixin):
|
|
|
85
85
|
@platform_name("ClickHouse")
|
|
86
86
|
@config_class(ClickHouseUsageConfig)
|
|
87
87
|
@support_status(SupportStatus.CERTIFIED)
|
|
88
|
-
@capability(
|
|
88
|
+
@capability(
|
|
89
|
+
SourceCapability.DELETION_DETECTION, "Enabled by default via stateful ingestion"
|
|
90
|
+
)
|
|
89
91
|
@capability(SourceCapability.DATA_PROFILING, "Optionally enabled via configuration")
|
|
92
|
+
@capability(SourceCapability.USAGE_STATS, "Enabled by default to get usage stats")
|
|
90
93
|
@dataclasses.dataclass
|
|
91
94
|
class ClickHouseUsageSource(Source):
|
|
92
95
|
"""
|
|
@@ -15,7 +15,9 @@ from sqlalchemy.engine import Engine
|
|
|
15
15
|
import datahub.emitter.mce_builder as builder
|
|
16
16
|
from datahub.configuration.time_window_config import get_time_bucket
|
|
17
17
|
from datahub.ingestion.api.decorators import (
|
|
18
|
+
SourceCapability,
|
|
18
19
|
SupportStatus,
|
|
20
|
+
capability,
|
|
19
21
|
config_class,
|
|
20
22
|
platform_name,
|
|
21
23
|
support_status,
|
|
@@ -112,6 +114,7 @@ class TrinoUsageReport(SourceReport):
|
|
|
112
114
|
@platform_name("Trino")
|
|
113
115
|
@config_class(TrinoUsageConfig)
|
|
114
116
|
@support_status(SupportStatus.CERTIFIED)
|
|
117
|
+
@capability(SourceCapability.USAGE_STATS, "Enabled by default to get usage stats")
|
|
115
118
|
@dataclasses.dataclass
|
|
116
119
|
class TrinoUsageSource(Source):
|
|
117
120
|
"""
|
|
@@ -4590,6 +4590,15 @@ class FabricTypeClass(object):
|
|
|
4590
4590
|
RVW = "RVW"
|
|
4591
4591
|
"""Designates review fabrics"""
|
|
4592
4592
|
|
|
4593
|
+
PRD = "PRD"
|
|
4594
|
+
"""Alternative Prod spelling"""
|
|
4595
|
+
|
|
4596
|
+
TST = "TST"
|
|
4597
|
+
"""Alternative Test spelling"""
|
|
4598
|
+
|
|
4599
|
+
SIT = "SIT"
|
|
4600
|
+
"""System Integration Testing"""
|
|
4601
|
+
|
|
4593
4602
|
SANDBOX = "SANDBOX"
|
|
4594
4603
|
"""Designates sandbox fabrics"""
|
|
4595
4604
|
|
|
@@ -21504,6 +21513,7 @@ class DataHubResourceFilterClass(DictWrapper):
|
|
|
21504
21513
|
resources: Union[None, List[str]]=None,
|
|
21505
21514
|
allResources: Optional[bool]=None,
|
|
21506
21515
|
filter: Union[None, "PolicyMatchFilterClass"]=None,
|
|
21516
|
+
privilegeConstraints: Union[None, "PolicyMatchFilterClass"]=None,
|
|
21507
21517
|
):
|
|
21508
21518
|
super().__init__()
|
|
21509
21519
|
|
|
@@ -21515,12 +21525,14 @@ class DataHubResourceFilterClass(DictWrapper):
|
|
|
21515
21525
|
else:
|
|
21516
21526
|
self.allResources = allResources
|
|
21517
21527
|
self.filter = filter
|
|
21528
|
+
self.privilegeConstraints = privilegeConstraints
|
|
21518
21529
|
|
|
21519
21530
|
def _restore_defaults(self) -> None:
|
|
21520
21531
|
self.type = self.RECORD_SCHEMA.fields_dict["type"].default
|
|
21521
21532
|
self.resources = self.RECORD_SCHEMA.fields_dict["resources"].default
|
|
21522
21533
|
self.allResources = self.RECORD_SCHEMA.fields_dict["allResources"].default
|
|
21523
21534
|
self.filter = self.RECORD_SCHEMA.fields_dict["filter"].default
|
|
21535
|
+
self.privilegeConstraints = self.RECORD_SCHEMA.fields_dict["privilegeConstraints"].default
|
|
21524
21536
|
|
|
21525
21537
|
|
|
21526
21538
|
@property
|
|
@@ -21565,6 +21577,16 @@ class DataHubResourceFilterClass(DictWrapper):
|
|
|
21565
21577
|
self._inner_dict['filter'] = value
|
|
21566
21578
|
|
|
21567
21579
|
|
|
21580
|
+
@property
|
|
21581
|
+
def privilegeConstraints(self) -> Union[None, "PolicyMatchFilterClass"]:
|
|
21582
|
+
"""Constraints around what sub-resources operations are allowed to modify, i.e. NOT_EQUALS - cannot modify a particular defined tag, EQUALS - can only modify a particular defined tag, STARTS_WITH - can only modify a tag starting with xyz"""
|
|
21583
|
+
return self._inner_dict.get('privilegeConstraints') # type: ignore
|
|
21584
|
+
|
|
21585
|
+
@privilegeConstraints.setter
|
|
21586
|
+
def privilegeConstraints(self, value: Union[None, "PolicyMatchFilterClass"]) -> None:
|
|
21587
|
+
self._inner_dict['privilegeConstraints'] = value
|
|
21588
|
+
|
|
21589
|
+
|
|
21568
21590
|
class DataHubRoleInfoClass(_Aspect):
|
|
21569
21591
|
"""Information about a DataHub Role."""
|
|
21570
21592
|
|
|
@@ -21633,6 +21655,9 @@ class PolicyMatchConditionClass(object):
|
|
|
21633
21655
|
STARTS_WITH = "STARTS_WITH"
|
|
21634
21656
|
"""Whether the field value starts with the value"""
|
|
21635
21657
|
|
|
21658
|
+
NOT_EQUALS = "NOT_EQUALS"
|
|
21659
|
+
"""Whether the field does not match the value"""
|
|
21660
|
+
|
|
21636
21661
|
|
|
21637
21662
|
|
|
21638
21663
|
class PolicyMatchCriterionClass(DictWrapper):
|
datahub/metadata/schema.avsc
CHANGED
|
@@ -9502,13 +9502,16 @@
|
|
|
9502
9502
|
"DEV": "Designates development fabrics",
|
|
9503
9503
|
"EI": "Designates early-integration fabrics",
|
|
9504
9504
|
"NON_PROD": "Designates non-production fabrics",
|
|
9505
|
+
"PRD": "Alternative Prod spelling",
|
|
9505
9506
|
"PRE": "Designates pre-production fabrics",
|
|
9506
9507
|
"PROD": "Designates production fabrics",
|
|
9507
9508
|
"QA": "Designates quality assurance fabrics",
|
|
9508
9509
|
"RVW": "Designates review fabrics",
|
|
9509
9510
|
"SANDBOX": "Designates sandbox fabrics",
|
|
9511
|
+
"SIT": "System Integration Testing",
|
|
9510
9512
|
"STG": "Designates staging fabrics",
|
|
9511
9513
|
"TEST": "Designates testing fabrics",
|
|
9514
|
+
"TST": "Alternative Test spelling",
|
|
9512
9515
|
"UAT": "Designates user acceptance testing fabrics"
|
|
9513
9516
|
},
|
|
9514
9517
|
"name": "FabricType",
|
|
@@ -9525,6 +9528,9 @@
|
|
|
9525
9528
|
"PROD",
|
|
9526
9529
|
"CORP",
|
|
9527
9530
|
"RVW",
|
|
9531
|
+
"PRD",
|
|
9532
|
+
"TST",
|
|
9533
|
+
"SIT",
|
|
9528
9534
|
"SANDBOX"
|
|
9529
9535
|
],
|
|
9530
9536
|
"doc": "Fabric group type"
|
|
@@ -16441,13 +16447,15 @@
|
|
|
16441
16447
|
"type": "enum",
|
|
16442
16448
|
"symbolDocs": {
|
|
16443
16449
|
"EQUALS": "Whether the field matches the value",
|
|
16450
|
+
"NOT_EQUALS": "Whether the field does not match the value",
|
|
16444
16451
|
"STARTS_WITH": "Whether the field value starts with the value"
|
|
16445
16452
|
},
|
|
16446
16453
|
"name": "PolicyMatchCondition",
|
|
16447
16454
|
"namespace": "com.linkedin.pegasus2avro.policy",
|
|
16448
16455
|
"symbols": [
|
|
16449
16456
|
"EQUALS",
|
|
16450
|
-
"STARTS_WITH"
|
|
16457
|
+
"STARTS_WITH",
|
|
16458
|
+
"NOT_EQUALS"
|
|
16451
16459
|
],
|
|
16452
16460
|
"doc": "The matching condition in a filter criterion"
|
|
16453
16461
|
},
|
|
@@ -16469,6 +16477,15 @@
|
|
|
16469
16477
|
"name": "filter",
|
|
16470
16478
|
"default": null,
|
|
16471
16479
|
"doc": "Filter to apply privileges to"
|
|
16480
|
+
},
|
|
16481
|
+
{
|
|
16482
|
+
"type": [
|
|
16483
|
+
"null",
|
|
16484
|
+
"com.linkedin.pegasus2avro.policy.PolicyMatchFilter"
|
|
16485
|
+
],
|
|
16486
|
+
"name": "privilegeConstraints",
|
|
16487
|
+
"default": null,
|
|
16488
|
+
"doc": "Constraints around what sub-resources operations are allowed to modify, i.e. NOT_EQUALS - cannot modify a particular defined tag, EQUALS - can only modify a particular defined tag, STARTS_WITH - can only modify a tag starting with xyz"
|
|
16472
16489
|
}
|
|
16473
16490
|
],
|
|
16474
16491
|
"doc": "Information used to filter DataHub resource."
|
|
@@ -93,13 +93,16 @@
|
|
|
93
93
|
"DEV": "Designates development fabrics",
|
|
94
94
|
"EI": "Designates early-integration fabrics",
|
|
95
95
|
"NON_PROD": "Designates non-production fabrics",
|
|
96
|
+
"PRD": "Alternative Prod spelling",
|
|
96
97
|
"PRE": "Designates pre-production fabrics",
|
|
97
98
|
"PROD": "Designates production fabrics",
|
|
98
99
|
"QA": "Designates quality assurance fabrics",
|
|
99
100
|
"RVW": "Designates review fabrics",
|
|
100
101
|
"SANDBOX": "Designates sandbox fabrics",
|
|
102
|
+
"SIT": "System Integration Testing",
|
|
101
103
|
"STG": "Designates staging fabrics",
|
|
102
104
|
"TEST": "Designates testing fabrics",
|
|
105
|
+
"TST": "Alternative Test spelling",
|
|
103
106
|
"UAT": "Designates user acceptance testing fabrics"
|
|
104
107
|
},
|
|
105
108
|
"name": "FabricType",
|
|
@@ -116,6 +119,9 @@
|
|
|
116
119
|
"PROD",
|
|
117
120
|
"CORP",
|
|
118
121
|
"RVW",
|
|
122
|
+
"PRD",
|
|
123
|
+
"TST",
|
|
124
|
+
"SIT",
|
|
119
125
|
"SANDBOX"
|
|
120
126
|
],
|
|
121
127
|
"doc": "Fabric group type"
|
|
@@ -147,13 +147,16 @@
|
|
|
147
147
|
"DEV": "Designates development fabrics",
|
|
148
148
|
"EI": "Designates early-integration fabrics",
|
|
149
149
|
"NON_PROD": "Designates non-production fabrics",
|
|
150
|
+
"PRD": "Alternative Prod spelling",
|
|
150
151
|
"PRE": "Designates pre-production fabrics",
|
|
151
152
|
"PROD": "Designates production fabrics",
|
|
152
153
|
"QA": "Designates quality assurance fabrics",
|
|
153
154
|
"RVW": "Designates review fabrics",
|
|
154
155
|
"SANDBOX": "Designates sandbox fabrics",
|
|
156
|
+
"SIT": "System Integration Testing",
|
|
155
157
|
"STG": "Designates staging fabrics",
|
|
156
158
|
"TEST": "Designates testing fabrics",
|
|
159
|
+
"TST": "Alternative Test spelling",
|
|
157
160
|
"UAT": "Designates user acceptance testing fabrics"
|
|
158
161
|
},
|
|
159
162
|
"name": "FabricType",
|
|
@@ -170,6 +173,9 @@
|
|
|
170
173
|
"PROD",
|
|
171
174
|
"CORP",
|
|
172
175
|
"RVW",
|
|
176
|
+
"PRD",
|
|
177
|
+
"TST",
|
|
178
|
+
"SIT",
|
|
173
179
|
"SANDBOX"
|
|
174
180
|
],
|
|
175
181
|
"doc": "Fabric group type"
|
|
@@ -110,13 +110,15 @@
|
|
|
110
110
|
"type": "enum",
|
|
111
111
|
"symbolDocs": {
|
|
112
112
|
"EQUALS": "Whether the field matches the value",
|
|
113
|
+
"NOT_EQUALS": "Whether the field does not match the value",
|
|
113
114
|
"STARTS_WITH": "Whether the field value starts with the value"
|
|
114
115
|
},
|
|
115
116
|
"name": "PolicyMatchCondition",
|
|
116
117
|
"namespace": "com.linkedin.pegasus2avro.policy",
|
|
117
118
|
"symbols": [
|
|
118
119
|
"EQUALS",
|
|
119
|
-
"STARTS_WITH"
|
|
120
|
+
"STARTS_WITH",
|
|
121
|
+
"NOT_EQUALS"
|
|
120
122
|
],
|
|
121
123
|
"doc": "The matching condition in a filter criterion"
|
|
122
124
|
},
|
|
@@ -138,6 +140,15 @@
|
|
|
138
140
|
"name": "filter",
|
|
139
141
|
"default": null,
|
|
140
142
|
"doc": "Filter to apply privileges to"
|
|
143
|
+
},
|
|
144
|
+
{
|
|
145
|
+
"type": [
|
|
146
|
+
"null",
|
|
147
|
+
"com.linkedin.pegasus2avro.policy.PolicyMatchFilter"
|
|
148
|
+
],
|
|
149
|
+
"name": "privilegeConstraints",
|
|
150
|
+
"default": null,
|
|
151
|
+
"doc": "Constraints around what sub-resources operations are allowed to modify, i.e. NOT_EQUALS - cannot modify a particular defined tag, EQUALS - can only modify a particular defined tag, STARTS_WITH - can only modify a tag starting with xyz"
|
|
141
152
|
}
|
|
142
153
|
],
|
|
143
154
|
"doc": "Information used to filter DataHub resource."
|