acryl-datahub 1.1.0.3rc2__py3-none-any.whl → 1.1.0.4rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (57) hide show
  1. {acryl_datahub-1.1.0.3rc2.dist-info → acryl_datahub-1.1.0.4rc2.dist-info}/METADATA +2470 -2470
  2. {acryl_datahub-1.1.0.3rc2.dist-info → acryl_datahub-1.1.0.4rc2.dist-info}/RECORD +57 -57
  3. datahub/_version.py +1 -1
  4. datahub/cli/check_cli.py +27 -0
  5. datahub/cli/delete_cli.py +117 -19
  6. datahub/ingestion/api/source.py +2 -0
  7. datahub/ingestion/glossary/classification_mixin.py +5 -0
  8. datahub/ingestion/graph/client.py +42 -2
  9. datahub/ingestion/source/bigquery_v2/bigquery.py +1 -0
  10. datahub/ingestion/source/bigquery_v2/common.py +1 -1
  11. datahub/ingestion/source/dbt/dbt_cloud.py +3 -0
  12. datahub/ingestion/source/dbt/dbt_common.py +3 -1
  13. datahub/ingestion/source/dbt/dbt_core.py +3 -0
  14. datahub/ingestion/source/file.py +3 -0
  15. datahub/ingestion/source/ge_profiling_config.py +11 -0
  16. datahub/ingestion/source/iceberg/iceberg.py +3 -1
  17. datahub/ingestion/source/kafka/kafka.py +16 -0
  18. datahub/ingestion/source/looker/looker_source.py +1 -0
  19. datahub/ingestion/source/powerbi/powerbi.py +1 -0
  20. datahub/ingestion/source/qlik_sense/qlik_sense.py +1 -0
  21. datahub/ingestion/source/redshift/redshift.py +4 -1
  22. datahub/ingestion/source/sac/sac.py +3 -1
  23. datahub/ingestion/source/sigma/sigma.py +1 -0
  24. datahub/ingestion/source/snowflake/snowflake_config.py +3 -6
  25. datahub/ingestion/source/snowflake/snowflake_summary.py +5 -0
  26. datahub/ingestion/source/snowflake/snowflake_utils.py +2 -7
  27. datahub/ingestion/source/snowflake/snowflake_v2.py +2 -0
  28. datahub/ingestion/source/sql/clickhouse.py +3 -1
  29. datahub/ingestion/source/sql/cockroachdb.py +0 -1
  30. datahub/ingestion/source/sql/hana.py +3 -1
  31. datahub/ingestion/source/sql/hive_metastore.py +3 -1
  32. datahub/ingestion/source/sql/mariadb.py +0 -1
  33. datahub/ingestion/source/sql/mssql/source.py +8 -1
  34. datahub/ingestion/source/sql/mysql.py +0 -1
  35. datahub/ingestion/source/sql/postgres.py +0 -1
  36. datahub/ingestion/source/sql/sql_common.py +12 -0
  37. datahub/ingestion/source/tableau/tableau.py +1 -0
  38. datahub/ingestion/source/unity/source.py +1 -0
  39. datahub/ingestion/source/usage/clickhouse_usage.py +3 -1
  40. datahub/metadata/_internal_schema_classes.py +25 -0
  41. datahub/metadata/schema.avsc +18 -1
  42. datahub/metadata/schemas/ContainerProperties.avsc +6 -0
  43. datahub/metadata/schemas/DataFlowInfo.avsc +6 -0
  44. datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
  45. datahub/metadata/schemas/DataJobInfo.avsc +6 -0
  46. datahub/metadata/schemas/DataProcessKey.avsc +6 -0
  47. datahub/metadata/schemas/DatasetKey.avsc +6 -0
  48. datahub/metadata/schemas/IcebergWarehouseInfo.avsc +6 -0
  49. datahub/metadata/schemas/MLModelDeploymentKey.avsc +6 -0
  50. datahub/metadata/schemas/MLModelGroupKey.avsc +6 -0
  51. datahub/metadata/schemas/MLModelKey.avsc +6 -0
  52. datahub/metadata/schemas/MetadataChangeEvent.avsc +18 -1
  53. datahub/sql_parsing/sqlglot_lineage.py +21 -6
  54. {acryl_datahub-1.1.0.3rc2.dist-info → acryl_datahub-1.1.0.4rc2.dist-info}/WHEEL +0 -0
  55. {acryl_datahub-1.1.0.3rc2.dist-info → acryl_datahub-1.1.0.4rc2.dist-info}/entry_points.txt +0 -0
  56. {acryl_datahub-1.1.0.3rc2.dist-info → acryl_datahub-1.1.0.4rc2.dist-info}/licenses/LICENSE +0 -0
  57. {acryl_datahub-1.1.0.3rc2.dist-info → acryl_datahub-1.1.0.4rc2.dist-info}/top_level.txt +0 -0
@@ -906,6 +906,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
906
906
  batch_size: int = 5000,
907
907
  extraFilters: Optional[List[RawSearchFilterRule]] = None,
908
908
  extra_or_filters: Optional[RawSearchFilter] = None,
909
+ skip_cache: bool = False,
909
910
  ) -> Iterable[str]:
910
911
  """Fetch all urns that match all of the given filters.
911
912
 
@@ -924,6 +925,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
924
925
  Note that this requires browsePathV2 aspects (added in 0.10.4+).
925
926
  :param status: Filter on the deletion status of the entity. The default is only return non-soft-deleted entities.
926
927
  :param extraFilters: Additional filters to apply. If specified, the results will match all of the filters.
928
+ :param skip_cache: Whether to bypass caching. Defaults to False.
927
929
 
928
930
  :return: An iterable of urns that match the filters.
929
931
  """
@@ -951,7 +953,8 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
951
953
  $query: String!,
952
954
  $orFilters: [AndFilterInput!],
953
955
  $batchSize: Int!,
954
- $scrollId: String) {
956
+ $scrollId: String,
957
+ $skipCache: Boolean!) {
955
958
 
956
959
  scrollAcrossEntities(input: {
957
960
  query: $query,
@@ -962,6 +965,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
962
965
  searchFlags: {
963
966
  skipHighlighting: true
964
967
  skipAggregates: true
968
+ skipCache: $skipCache
965
969
  }
966
970
  }) {
967
971
  nextScrollId
@@ -980,6 +984,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
980
984
  "query": query,
981
985
  "orFilters": orFilters,
982
986
  "batchSize": batch_size,
987
+ "skipCache": skip_cache,
983
988
  }
984
989
 
985
990
  for entity in self._scroll_across_entities(graphql_query, variables):
@@ -1085,7 +1090,7 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
1085
1090
  "query": query,
1086
1091
  "orFilters": or_filters_final,
1087
1092
  "batchSize": batch_size,
1088
- "skipCache": "true" if skip_cache else "false",
1093
+ "skipCache": skip_cache,
1089
1094
  "fetchExtraFields": extra_source_fields,
1090
1095
  }
1091
1096
 
@@ -1429,6 +1434,41 @@ class DataHubGraph(DatahubRestEmitter, EntityVersioningAPI):
1429
1434
  related_aspects = response.get("relatedAspects", [])
1430
1435
  return reference_count, related_aspects
1431
1436
 
1437
+ def restore_indices(
1438
+ self,
1439
+ urn_pattern: str,
1440
+ aspect: Optional[str] = None,
1441
+ start: Optional[int] = None,
1442
+ batch_size: Optional[int] = None,
1443
+ ) -> str:
1444
+ """Restore the indices for a given urn or urn-like pattern.
1445
+
1446
+ Args:
1447
+ urn_pattern: The exact URN or a pattern (with % for wildcard) to match URNs.
1448
+ aspect: Optional aspect string to restore indices for a specific aspect.
1449
+ start: Optional integer to decide which row number of sql store to restore from. Default: 0.
1450
+ batch_size: Optional integer to decide how many rows to restore. Default: 10.
1451
+
1452
+ Returns:
1453
+ A string containing the result of the restore indices operation. This format is subject to change.
1454
+ """
1455
+ if "%" in urn_pattern:
1456
+ payload_obj: dict = {"urnLike": urn_pattern}
1457
+ else:
1458
+ payload_obj = {"urn": urn_pattern}
1459
+ if aspect is not None:
1460
+ payload_obj["aspect"] = aspect
1461
+ if start is not None:
1462
+ payload_obj["start"] = start
1463
+ if batch_size is not None:
1464
+ payload_obj["batchSize"] = batch_size
1465
+ raw_result = self._post_generic(
1466
+ f"{self._gms_server}/operations?action=restoreIndices", payload_obj
1467
+ )
1468
+ result = raw_result["value"]
1469
+ logger.debug(f"Restore indices result: {result}")
1470
+ return result
1471
+
1432
1472
  @functools.lru_cache
1433
1473
  def _make_schema_resolver(
1434
1474
  self,
@@ -99,6 +99,7 @@ def cleanup(config: BigQueryV2Config) -> None:
99
99
  SourceCapability.PARTITION_SUPPORT,
100
100
  "Enabled by default, partition keys and clustering keys are supported.",
101
101
  )
102
+ @capability(SourceCapability.TEST_CONNECTION, "Enabled by default")
102
103
  class BigqueryV2Source(StatefulIngestionSourceBase, TestableSource):
103
104
  def __init__(self, ctx: PipelineContext, config: BigQueryV2Config):
104
105
  super().__init__(config, ctx)
@@ -63,7 +63,7 @@ class BigQueryIdentifierBuilder:
63
63
  )
64
64
 
65
65
  def gen_user_urn(self, user_email: str) -> str:
66
- return make_user_urn(user_email.split("@")[0])
66
+ return make_user_urn(user_email)
67
67
 
68
68
  def make_data_platform_urn(self) -> str:
69
69
  return make_data_platform_urn(self.platform)
@@ -9,7 +9,9 @@ import requests
9
9
  from pydantic import Field, root_validator
10
10
 
11
11
  from datahub.ingestion.api.decorators import (
12
+ SourceCapability,
12
13
  SupportStatus,
14
+ capability,
13
15
  config_class,
14
16
  platform_name,
15
17
  support_status,
@@ -261,6 +263,7 @@ query DatahubMetadataQuery_{type}($jobId: BigInt!, $runId: BigInt) {{
261
263
  @platform_name("dbt")
262
264
  @config_class(DBTCloudConfig)
263
265
  @support_status(SupportStatus.CERTIFIED)
266
+ @capability(SourceCapability.TEST_CONNECTION, "Enabled by default")
264
267
  class DBTCloudSource(DBTSourceBase, TestableSource):
265
268
  config: DBTCloudConfig
266
269
 
@@ -823,7 +823,9 @@ def get_column_type(
823
823
  @platform_name("dbt")
824
824
  @config_class(DBTCommonConfig)
825
825
  @support_status(SupportStatus.CERTIFIED)
826
- @capability(SourceCapability.DELETION_DETECTION, "Enabled via stateful ingestion")
826
+ @capability(
827
+ SourceCapability.DELETION_DETECTION, "Enabled by default via stateful ingestion"
828
+ )
827
829
  @capability(SourceCapability.LINEAGE_COARSE, "Enabled by default")
828
830
  @capability(
829
831
  SourceCapability.LINEAGE_FINE,
@@ -15,7 +15,9 @@ from datahub.configuration.git import GitReference
15
15
  from datahub.configuration.validate_field_rename import pydantic_renamed_field
16
16
  from datahub.ingestion.api.common import PipelineContext
17
17
  from datahub.ingestion.api.decorators import (
18
+ SourceCapability,
18
19
  SupportStatus,
20
+ capability,
19
21
  config_class,
20
22
  platform_name,
21
23
  support_status,
@@ -464,6 +466,7 @@ def load_run_results(
464
466
  @platform_name("dbt")
465
467
  @config_class(DBTCoreConfig)
466
468
  @support_status(SupportStatus.CERTIFIED)
469
+ @capability(SourceCapability.TEST_CONNECTION, "Enabled by default")
467
470
  class DBTCoreSource(DBTSourceBase, TestableSource):
468
471
  config: DBTCoreConfig
469
472
  report: DBTCoreReport
@@ -18,7 +18,9 @@ from datahub.configuration.validate_field_rename import pydantic_renamed_field
18
18
  from datahub.emitter.mcp import MetadataChangeProposalWrapper
19
19
  from datahub.ingestion.api.common import PipelineContext
20
20
  from datahub.ingestion.api.decorators import (
21
+ SourceCapability,
21
22
  SupportStatus,
23
+ capability,
22
24
  config_class,
23
25
  platform_name,
24
26
  support_status,
@@ -187,6 +189,7 @@ class FileSourceReport(StaleEntityRemovalSourceReport):
187
189
  @platform_name("Metadata File")
188
190
  @config_class(FileSourceConfig)
189
191
  @support_status(SupportStatus.CERTIFIED)
192
+ @capability(SourceCapability.TEST_CONNECTION, "Enabled by default")
190
193
  class GenericFileSource(StatefulIngestionSourceBase, TestableSource):
191
194
  """
192
195
  This plugin pulls metadata from a previously generated file.
@@ -125,6 +125,7 @@ class GEProfilingConfig(GEProfilingBaseConfig):
125
125
  description="Profile table only if it has been updated since these many number of days. "
126
126
  "If set to `null`, no constraint of last modified time for tables to profile. "
127
127
  "Supported only in `snowflake` and `BigQuery`.",
128
+ schema_extra={"supported_sources": ["snowflake", "bigquery"]},
128
129
  )
129
130
 
130
131
  profile_table_size_limit: Optional[int] = Field(
@@ -132,6 +133,9 @@ class GEProfilingConfig(GEProfilingBaseConfig):
132
133
  description="Profile tables only if their size is less than specified GBs. If set to `null`, "
133
134
  "no limit on the size of tables to profile. Supported only in `Snowflake`, `BigQuery` and "
134
135
  "`Databricks`. Supported for `Oracle` based on calculated size from gathered stats.",
136
+ schema_extra={
137
+ "supported_sources": ["snowflake", "bigquery", "unity-catalog", "oracle"]
138
+ },
135
139
  )
136
140
 
137
141
  profile_table_row_limit: Optional[int] = Field(
@@ -139,12 +143,14 @@ class GEProfilingConfig(GEProfilingBaseConfig):
139
143
  description="Profile tables only if their row count is less than specified count. "
140
144
  "If set to `null`, no limit on the row count of tables to profile. Supported only in "
141
145
  "`Snowflake`, `BigQuery`. Supported for `Oracle` based on gathered stats.",
146
+ schema_extra={"supported_sources": ["snowflake", "bigquery", "oracle"]},
142
147
  )
143
148
 
144
149
  profile_table_row_count_estimate_only: bool = Field(
145
150
  default=False,
146
151
  description="Use an approximate query for row count. This will be much faster but slightly "
147
152
  "less accurate. Only supported for Postgres and MySQL. ",
153
+ schema_extra={"supported_sources": ["postgres", "mysql"]},
148
154
  )
149
155
 
150
156
  # The query combiner enables us to combine multiple queries into a single query,
@@ -161,27 +167,32 @@ class GEProfilingConfig(GEProfilingBaseConfig):
161
167
  default=True,
162
168
  description="Whether to profile partitioned tables. Only BigQuery and Aws Athena supports this. "
163
169
  "If enabled, latest partition data is used for profiling.",
170
+ schema_extra={"supported_sources": ["athena", "bigquery"]},
164
171
  )
165
172
  partition_datetime: Optional[datetime.datetime] = Field(
166
173
  default=None,
167
174
  description="If specified, profile only the partition which matches this datetime. "
168
175
  "If not specified, profile the latest partition. Only Bigquery supports this.",
176
+ schema_extra={"supported_sources": ["bigquery"]},
169
177
  )
170
178
  use_sampling: bool = Field(
171
179
  default=True,
172
180
  description="Whether to profile column level stats on sample of table. Only BigQuery and Snowflake support this. "
173
181
  "If enabled, profiling is done on rows sampled from table. Sampling is not done for smaller tables. ",
182
+ schema_extra={"supported_sources": ["bigquery", "snowflake"]},
174
183
  )
175
184
 
176
185
  sample_size: int = Field(
177
186
  default=10000,
178
187
  description="Number of rows to be sampled from table for column level profiling."
179
188
  "Applicable only if `use_sampling` is set to True.",
189
+ schema_extra={"supported_sources": ["bigquery", "snowflake"]},
180
190
  )
181
191
 
182
192
  profile_external_tables: bool = Field(
183
193
  default=False,
184
194
  description="Whether to profile external tables. Only Snowflake and Redshift supports this.",
195
+ schema_extra={"supported_sources": ["redshift", "snowflake"]},
185
196
  )
186
197
 
187
198
  tags_to_ignore_sampling: Optional[List[str]] = pydantic.Field(
@@ -134,7 +134,9 @@ logging.getLogger("azure.core.pipeline.policies.http_logging_policy").setLevel(
134
134
  SourceCapability.OWNERSHIP,
135
135
  "Automatically ingests ownership information from table properties based on `user_ownership_property` and `group_ownership_property`",
136
136
  )
137
- @capability(SourceCapability.DELETION_DETECTION, "Enabled via stateful ingestion")
137
+ @capability(
138
+ SourceCapability.DELETION_DETECTION, "Enabled by default via stateful ingestion"
139
+ )
138
140
  class IcebergSource(StatefulIngestionSourceBase):
139
141
  """
140
142
  ## Integration Details
@@ -189,6 +189,22 @@ class KafkaConnectionTest:
189
189
  SourceCapability.SCHEMA_METADATA,
190
190
  "Schemas associated with each topic are extracted from the schema registry. Avro and Protobuf (certified), JSON (incubating). Schema references are supported.",
191
191
  )
192
+ @capability(
193
+ SourceCapability.DATA_PROFILING,
194
+ "Not supported",
195
+ supported=False,
196
+ )
197
+ @capability(
198
+ SourceCapability.LINEAGE_COARSE,
199
+ "Not supported. If you use Kafka Connect, the kafka-connect source can generate lineage.",
200
+ supported=False,
201
+ )
202
+ @capability(
203
+ SourceCapability.LINEAGE_FINE,
204
+ "Not supported",
205
+ supported=False,
206
+ )
207
+ @capability(SourceCapability.TEST_CONNECTION, "Enabled by default")
192
208
  class KafkaSource(StatefulIngestionSourceBase, TestableSource):
193
209
  """
194
210
  This plugin extracts the following:
@@ -126,6 +126,7 @@ logger = logging.getLogger(__name__)
126
126
  SourceCapability.USAGE_STATS,
127
127
  "Enabled by default, configured using `extract_usage_history`",
128
128
  )
129
+ @capability(SourceCapability.TEST_CONNECTION, "Enabled by default")
129
130
  class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
130
131
  """
131
132
  This plugin extracts the following:
@@ -1253,6 +1253,7 @@ class Mapper:
1253
1253
  SourceCapability.DATA_PROFILING,
1254
1254
  "Optionally enabled via configuration profiling.enabled",
1255
1255
  )
1256
+ @capability(SourceCapability.TEST_CONNECTION, "Enabled by default")
1256
1257
  class PowerBiDashboardSource(StatefulIngestionSourceBase, TestableSource):
1257
1258
  """
1258
1259
  This plugin extracts the following:
@@ -109,6 +109,7 @@ logger = logging.getLogger(__name__)
109
109
  "Enabled by default, configured using `ingest_owner`",
110
110
  )
111
111
  @capability(SourceCapability.SCHEMA_METADATA, "Enabled by default")
112
+ @capability(SourceCapability.TEST_CONNECTION, "Enabled by default")
112
113
  class QlikSenseSource(StatefulIngestionSourceBase, TestableSource):
113
114
  """
114
115
  This plugin extracts the following:
@@ -140,12 +140,15 @@ logger: logging.Logger = logging.getLogger(__name__)
140
140
  SourceCapability.USAGE_STATS,
141
141
  "Enabled by default, can be disabled via configuration `include_usage_statistics`",
142
142
  )
143
- @capability(SourceCapability.DELETION_DETECTION, "Enabled via stateful ingestion")
143
+ @capability(
144
+ SourceCapability.DELETION_DETECTION, "Enabled by default via stateful ingestion"
145
+ )
144
146
  @capability(
145
147
  SourceCapability.CLASSIFICATION,
146
148
  "Optionally enabled via `classification.enabled`",
147
149
  supported=True,
148
150
  )
151
+ @capability(SourceCapability.TEST_CONNECTION, "Enabled by default")
149
152
  class RedshiftSource(StatefulIngestionSourceBase, TestableSource):
150
153
  """
151
154
  This plugin extracts the following:
@@ -178,7 +178,9 @@ class SACSourceReport(StaleEntityRemovalSourceReport):
178
178
  SourceCapability.LINEAGE_COARSE,
179
179
  "Enabled by default (only for Live Data Models)",
180
180
  )
181
- @capability(SourceCapability.DELETION_DETECTION, "Enabled via stateful ingestion")
181
+ @capability(
182
+ SourceCapability.DELETION_DETECTION, "Enabled by default via stateful ingestion"
183
+ )
182
184
  @capability(
183
185
  SourceCapability.SCHEMA_METADATA,
184
186
  "Enabled by default (only for Import Data Models)",
@@ -105,6 +105,7 @@ logger = logging.getLogger(__name__)
105
105
  SourceCapability.OWNERSHIP,
106
106
  "Enabled by default, configured using `ingest_owner`",
107
107
  )
108
+ @capability(SourceCapability.TEST_CONNECTION, "Enabled by default")
108
109
  class SigmaSource(StatefulIngestionSourceBase, TestableSource):
109
110
  """
110
111
  This plugin extracts the following:
@@ -154,14 +154,11 @@ class SnowflakeIdentifierConfig(
154
154
 
155
155
  email_domain: Optional[str] = pydantic.Field(
156
156
  default=None,
157
- description="Email domain of your organization so users can be displayed on UI appropriately.",
157
+ description="Email domain of your organization so users can be displayed on UI appropriately. This is used only if we cannot infer email ID.",
158
158
  )
159
159
 
160
- email_as_user_identifier: bool = Field(
161
- default=True,
162
- description="Format user urns as an email, if the snowflake user's email is set. If `email_domain` is "
163
- "provided, generates email addresses for snowflake users with unset emails, based on their "
164
- "username.",
160
+ _email_as_user_identifier = pydantic_removed_field(
161
+ "email_as_user_identifier",
165
162
  )
166
163
 
167
164
 
@@ -20,6 +20,7 @@ from datahub.ingestion.source.snowflake.snowflake_schema_gen import (
20
20
  SnowflakeSchemaGenerator,
21
21
  )
22
22
  from datahub.ingestion.source.snowflake.snowflake_utils import (
23
+ SnowflakeFilter,
23
24
  SnowflakeIdentifierBuilder,
24
25
  )
25
26
  from datahub.ingestion.source_report.time_window import BaseTimeWindowReport
@@ -81,6 +82,10 @@ class SnowflakeSummarySource(Source):
81
82
  profiler=None,
82
83
  aggregator=None,
83
84
  snowsight_url_builder=None,
85
+ filters=SnowflakeFilter(
86
+ filter_config=self.config,
87
+ structured_reporter=self.report,
88
+ ),
84
89
  )
85
90
 
86
91
  # Databases.
@@ -325,15 +325,10 @@ class SnowflakeIdentifierBuilder:
325
325
  user_email: Optional[str],
326
326
  ) -> str:
327
327
  if user_email:
328
- return self.snowflake_identifier(
329
- user_email
330
- if self.identifier_config.email_as_user_identifier is True
331
- else user_email.split("@")[0]
332
- )
328
+ return self.snowflake_identifier(user_email)
333
329
  return self.snowflake_identifier(
334
330
  f"{user_name}@{self.identifier_config.email_domain}"
335
- if self.identifier_config.email_as_user_identifier is True
336
- and self.identifier_config.email_domain is not None
331
+ if self.identifier_config.email_domain is not None
337
332
  else user_name
338
333
  )
339
334
 
@@ -131,6 +131,7 @@ logger: logging.Logger = logging.getLogger(__name__)
131
131
  "Optionally enabled via `classification.enabled`",
132
132
  supported=True,
133
133
  )
134
+ @capability(SourceCapability.TEST_CONNECTION, "Enabled by default")
134
135
  class SnowflakeV2Source(
135
136
  SnowflakeCommonMixin,
136
137
  StatefulIngestionSourceBase,
@@ -311,6 +312,7 @@ class SnowflakeV2Source(
311
312
  SourceCapability.PLATFORM_INSTANCE,
312
313
  SourceCapability.DOMAINS,
313
314
  SourceCapability.DELETION_DETECTION,
315
+ SourceCapability.TEST_CONNECTION,
314
316
  )
315
317
  ]
316
318
 
@@ -379,7 +379,9 @@ clickhouse_datetime_format = "%Y-%m-%d %H:%M:%S"
379
379
  @platform_name("ClickHouse")
380
380
  @config_class(ClickHouseConfig)
381
381
  @support_status(SupportStatus.CERTIFIED)
382
- @capability(SourceCapability.DELETION_DETECTION, "Enabled via stateful ingestion")
382
+ @capability(
383
+ SourceCapability.DELETION_DETECTION, "Enabled by default via stateful ingestion"
384
+ )
383
385
  @capability(SourceCapability.DATA_PROFILING, "Optionally enabled via configuration")
384
386
  class ClickHouseSource(TwoTierSQLAlchemySource):
385
387
  """
@@ -26,7 +26,6 @@ class CockroachDBConfig(PostgresConfig):
26
26
  @capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default")
27
27
  @capability(SourceCapability.DOMAINS, "Supported via the `domain` config field")
28
28
  @capability(SourceCapability.DATA_PROFILING, "Optionally enabled via configuration")
29
- @capability(SourceCapability.DELETION_DETECTION, "Enabled via stateful ingestion")
30
29
  class CockroachDBSource(PostgresSource):
31
30
  config: CockroachDBConfig
32
31
 
@@ -27,7 +27,9 @@ class HanaConfig(BasicSQLAlchemyConfig):
27
27
  @capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default")
28
28
  @capability(SourceCapability.DOMAINS, "Supported via the `domain` config field")
29
29
  @capability(SourceCapability.DATA_PROFILING, "Optionally enabled via configuration")
30
- @capability(SourceCapability.DELETION_DETECTION, "Enabled via stateful ingestion")
30
+ @capability(
31
+ SourceCapability.DELETION_DETECTION, "Enabled by default via stateful ingestion"
32
+ )
31
33
  class HanaSource(SQLAlchemySource):
32
34
  def __init__(self, config: HanaConfig, ctx: PipelineContext):
33
35
  super().__init__(config, ctx, "hana")
@@ -161,7 +161,9 @@ class HiveMetastore(BasicSQLAlchemyConfig):
161
161
  @platform_name("Hive Metastore")
162
162
  @config_class(HiveMetastore)
163
163
  @support_status(SupportStatus.CERTIFIED)
164
- @capability(SourceCapability.DELETION_DETECTION, "Enabled via stateful ingestion")
164
+ @capability(
165
+ SourceCapability.DELETION_DETECTION, "Enabled by default via stateful ingestion"
166
+ )
165
167
  @capability(SourceCapability.DATA_PROFILING, "Not Supported", False)
166
168
  @capability(SourceCapability.CLASSIFICATION, "Not Supported", False)
167
169
  @capability(
@@ -15,7 +15,6 @@ from datahub.ingestion.source.sql.mysql import MySQLConfig, MySQLSource
15
15
  @capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default")
16
16
  @capability(SourceCapability.DOMAINS, "Supported via the `domain` config field")
17
17
  @capability(SourceCapability.DATA_PROFILING, "Optionally enabled via configuration")
18
- @capability(SourceCapability.DELETION_DETECTION, "Enabled via stateful ingestion")
19
18
  class MariaDBSource(MySQLSource):
20
19
  def get_platform(self):
21
20
  return "mariadb"
@@ -174,7 +174,14 @@ class SQLServerConfig(BasicSQLAlchemyConfig):
174
174
  @capability(SourceCapability.DOMAINS, "Supported via the `domain` config field")
175
175
  @capability(SourceCapability.DATA_PROFILING, "Optionally enabled via configuration")
176
176
  @capability(SourceCapability.DESCRIPTIONS, "Enabled by default")
177
- @capability(SourceCapability.DELETION_DETECTION, "Enabled via stateful ingestion")
177
+ @capability(
178
+ SourceCapability.LINEAGE_COARSE,
179
+ "Enabled by default to get lineage for stored procedures via `include_lineage` and for views via `include_view_lineage`",
180
+ )
181
+ @capability(
182
+ SourceCapability.LINEAGE_FINE,
183
+ "Enabled by default to get lineage for stored procedures via `include_lineage` and for views via `include_view_column_lineage`",
184
+ )
178
185
  class SQLServerSource(SQLAlchemySource):
179
186
  """
180
187
  This plugin extracts the following:
@@ -65,7 +65,6 @@ class MySQLConfig(MySQLConnectionConfig, TwoTierSQLAlchemyConfig):
65
65
  @capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default")
66
66
  @capability(SourceCapability.DOMAINS, "Supported via the `domain` config field")
67
67
  @capability(SourceCapability.DATA_PROFILING, "Optionally enabled via configuration")
68
- @capability(SourceCapability.DELETION_DETECTION, "Enabled via stateful ingestion")
69
68
  class MySQLSource(TwoTierSQLAlchemySource):
70
69
  """
71
70
  This plugin extracts the following:
@@ -131,7 +131,6 @@ class PostgresConfig(BasePostgresConfig):
131
131
  @capability(SourceCapability.DOMAINS, "Enabled by default")
132
132
  @capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default")
133
133
  @capability(SourceCapability.DATA_PROFILING, "Optionally enabled via configuration")
134
- @capability(SourceCapability.LINEAGE_COARSE, "Optionally enabled via configuration")
135
134
  class PostgresSource(SQLAlchemySource):
136
135
  """
137
136
  This plugin extracts the following:
@@ -302,6 +302,18 @@ class ProfileMetadata:
302
302
  "Enabled by default",
303
303
  supported=True,
304
304
  )
305
+ @capability(
306
+ SourceCapability.LINEAGE_COARSE,
307
+ "Enabled by default to get lineage for views via `include_view_lineage`",
308
+ )
309
+ @capability(
310
+ SourceCapability.LINEAGE_FINE,
311
+ "Enabled by default to get lineage for views via `include_view_column_lineage`",
312
+ )
313
+ @capability(SourceCapability.TEST_CONNECTION, "Enabled by default")
314
+ @capability(
315
+ SourceCapability.DELETION_DETECTION, "Enabled by default via stateful ingestion"
316
+ )
305
317
  class SQLAlchemySource(StatefulIngestionSourceBase, TestableSource):
306
318
  """A Base class for all SQL Sources that use SQLAlchemy to extend"""
307
319
 
@@ -879,6 +879,7 @@ def report_user_role(report: TableauSourceReport, server: Server) -> None:
879
879
  SourceCapability.LINEAGE_FINE,
880
880
  "Enabled by default, configure using `extract_column_level_lineage`",
881
881
  )
882
+ @capability(SourceCapability.TEST_CONNECTION, "Enabled by default")
882
883
  class TableauSource(StatefulIngestionSourceBase, TestableSource):
883
884
  platform = "tableau"
884
885
 
@@ -162,6 +162,7 @@ logger: logging.Logger = logging.getLogger(__name__)
162
162
  "Optionally enabled via `stateful_ingestion.remove_stale_metadata`",
163
163
  supported=True,
164
164
  )
165
+ @capability(SourceCapability.TEST_CONNECTION, "Enabled by default")
165
166
  @support_status(SupportStatus.INCUBATING)
166
167
  class UnityCatalogSource(StatefulIngestionSourceBase, TestableSource):
167
168
  """
@@ -85,7 +85,9 @@ class ClickHouseUsageConfig(ClickHouseConfig, BaseUsageConfig, EnvConfigMixin):
85
85
  @platform_name("ClickHouse")
86
86
  @config_class(ClickHouseUsageConfig)
87
87
  @support_status(SupportStatus.CERTIFIED)
88
- @capability(SourceCapability.DELETION_DETECTION, "Enabled via stateful ingestion")
88
+ @capability(
89
+ SourceCapability.DELETION_DETECTION, "Enabled by default via stateful ingestion"
90
+ )
89
91
  @capability(SourceCapability.DATA_PROFILING, "Optionally enabled via configuration")
90
92
  @dataclasses.dataclass
91
93
  class ClickHouseUsageSource(Source):
@@ -4590,6 +4590,15 @@ class FabricTypeClass(object):
4590
4590
  RVW = "RVW"
4591
4591
  """Designates review fabrics"""
4592
4592
 
4593
+ PRD = "PRD"
4594
+ """Alternative Prod spelling"""
4595
+
4596
+ TST = "TST"
4597
+ """Alternative Test spelling"""
4598
+
4599
+ SIT = "SIT"
4600
+ """System Integration Testing"""
4601
+
4593
4602
  SANDBOX = "SANDBOX"
4594
4603
  """Designates sandbox fabrics"""
4595
4604
 
@@ -21504,6 +21513,7 @@ class DataHubResourceFilterClass(DictWrapper):
21504
21513
  resources: Union[None, List[str]]=None,
21505
21514
  allResources: Optional[bool]=None,
21506
21515
  filter: Union[None, "PolicyMatchFilterClass"]=None,
21516
+ privilegeConstraints: Union[None, "PolicyMatchFilterClass"]=None,
21507
21517
  ):
21508
21518
  super().__init__()
21509
21519
 
@@ -21515,12 +21525,14 @@ class DataHubResourceFilterClass(DictWrapper):
21515
21525
  else:
21516
21526
  self.allResources = allResources
21517
21527
  self.filter = filter
21528
+ self.privilegeConstraints = privilegeConstraints
21518
21529
 
21519
21530
  def _restore_defaults(self) -> None:
21520
21531
  self.type = self.RECORD_SCHEMA.fields_dict["type"].default
21521
21532
  self.resources = self.RECORD_SCHEMA.fields_dict["resources"].default
21522
21533
  self.allResources = self.RECORD_SCHEMA.fields_dict["allResources"].default
21523
21534
  self.filter = self.RECORD_SCHEMA.fields_dict["filter"].default
21535
+ self.privilegeConstraints = self.RECORD_SCHEMA.fields_dict["privilegeConstraints"].default
21524
21536
 
21525
21537
 
21526
21538
  @property
@@ -21565,6 +21577,16 @@ class DataHubResourceFilterClass(DictWrapper):
21565
21577
  self._inner_dict['filter'] = value
21566
21578
 
21567
21579
 
21580
+ @property
21581
+ def privilegeConstraints(self) -> Union[None, "PolicyMatchFilterClass"]:
21582
+ """Constraints around what sub-resources operations are allowed to modify, i.e. NOT_EQUALS - cannot modify a particular defined tag, EQUALS - can only modify a particular defined tag, STARTS_WITH - can only modify a tag starting with xyz"""
21583
+ return self._inner_dict.get('privilegeConstraints') # type: ignore
21584
+
21585
+ @privilegeConstraints.setter
21586
+ def privilegeConstraints(self, value: Union[None, "PolicyMatchFilterClass"]) -> None:
21587
+ self._inner_dict['privilegeConstraints'] = value
21588
+
21589
+
21568
21590
  class DataHubRoleInfoClass(_Aspect):
21569
21591
  """Information about a DataHub Role."""
21570
21592
 
@@ -21633,6 +21655,9 @@ class PolicyMatchConditionClass(object):
21633
21655
  STARTS_WITH = "STARTS_WITH"
21634
21656
  """Whether the field value starts with the value"""
21635
21657
 
21658
+ NOT_EQUALS = "NOT_EQUALS"
21659
+ """Whether the field does not match the value"""
21660
+
21636
21661
 
21637
21662
 
21638
21663
  class PolicyMatchCriterionClass(DictWrapper):