acryl-datahub 1.1.0.4rc1__py3-none-any.whl → 1.1.0.4rc3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (58) hide show
  1. {acryl_datahub-1.1.0.4rc1.dist-info → acryl_datahub-1.1.0.4rc3.dist-info}/METADATA +2581 -2581
  2. {acryl_datahub-1.1.0.4rc1.dist-info → acryl_datahub-1.1.0.4rc3.dist-info}/RECORD +58 -58
  3. datahub/_version.py +1 -1
  4. datahub/emitter/rest_emitter.py +18 -1
  5. datahub/ingestion/api/source.py +2 -0
  6. datahub/ingestion/source/bigquery_v2/bigquery.py +18 -0
  7. datahub/ingestion/source/dbt/dbt_cloud.py +3 -0
  8. datahub/ingestion/source/dbt/dbt_common.py +3 -1
  9. datahub/ingestion/source/dbt/dbt_core.py +3 -0
  10. datahub/ingestion/source/dremio/dremio_api.py +98 -68
  11. datahub/ingestion/source/dremio/dremio_config.py +2 -0
  12. datahub/ingestion/source/dremio/dremio_reporting.py +23 -2
  13. datahub/ingestion/source/dremio/dremio_source.py +90 -77
  14. datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
  15. datahub/ingestion/source/file.py +3 -0
  16. datahub/ingestion/source/ge_data_profiler.py +48 -8
  17. datahub/ingestion/source/iceberg/iceberg.py +3 -1
  18. datahub/ingestion/source/kafka/kafka.py +1 -0
  19. datahub/ingestion/source/looker/looker_source.py +1 -0
  20. datahub/ingestion/source/powerbi/powerbi.py +1 -0
  21. datahub/ingestion/source/qlik_sense/qlik_sense.py +1 -0
  22. datahub/ingestion/source/redshift/redshift.py +21 -1
  23. datahub/ingestion/source/sac/sac.py +3 -1
  24. datahub/ingestion/source/sigma/sigma.py +1 -0
  25. datahub/ingestion/source/snowflake/snowflake_config.py +3 -6
  26. datahub/ingestion/source/snowflake/snowflake_utils.py +2 -7
  27. datahub/ingestion/source/snowflake/snowflake_v2.py +2 -0
  28. datahub/ingestion/source/sql/clickhouse.py +3 -1
  29. datahub/ingestion/source/sql/cockroachdb.py +0 -1
  30. datahub/ingestion/source/sql/hana.py +3 -1
  31. datahub/ingestion/source/sql/hive_metastore.py +3 -1
  32. datahub/ingestion/source/sql/mariadb.py +0 -1
  33. datahub/ingestion/source/sql/mssql/source.py +8 -1
  34. datahub/ingestion/source/sql/mysql.py +0 -9
  35. datahub/ingestion/source/sql/postgres.py +0 -1
  36. datahub/ingestion/source/sql/sql_common.py +12 -0
  37. datahub/ingestion/source/tableau/tableau.py +1 -0
  38. datahub/ingestion/source/unity/source.py +1 -0
  39. datahub/ingestion/source/usage/clickhouse_usage.py +4 -1
  40. datahub/ingestion/source/usage/starburst_trino_usage.py +3 -0
  41. datahub/metadata/_internal_schema_classes.py +25 -0
  42. datahub/metadata/schema.avsc +18 -1
  43. datahub/metadata/schemas/ContainerProperties.avsc +6 -0
  44. datahub/metadata/schemas/DataFlowInfo.avsc +6 -0
  45. datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
  46. datahub/metadata/schemas/DataJobInfo.avsc +6 -0
  47. datahub/metadata/schemas/DataProcessKey.avsc +6 -0
  48. datahub/metadata/schemas/DatasetKey.avsc +6 -0
  49. datahub/metadata/schemas/IcebergWarehouseInfo.avsc +6 -0
  50. datahub/metadata/schemas/MLModelDeploymentKey.avsc +6 -0
  51. datahub/metadata/schemas/MLModelGroupKey.avsc +6 -0
  52. datahub/metadata/schemas/MLModelKey.avsc +6 -0
  53. datahub/metadata/schemas/MetadataChangeEvent.avsc +18 -1
  54. datahub/utilities/stats_collections.py +4 -0
  55. {acryl_datahub-1.1.0.4rc1.dist-info → acryl_datahub-1.1.0.4rc3.dist-info}/WHEEL +0 -0
  56. {acryl_datahub-1.1.0.4rc1.dist-info → acryl_datahub-1.1.0.4rc3.dist-info}/entry_points.txt +0 -0
  57. {acryl_datahub-1.1.0.4rc1.dist-info → acryl_datahub-1.1.0.4rc3.dist-info}/licenses/LICENSE +0 -0
  58. {acryl_datahub-1.1.0.4rc1.dist-info → acryl_datahub-1.1.0.4rc3.dist-info}/top_level.txt +0 -0
@@ -120,7 +120,6 @@ SNOWFLAKE = "snowflake"
120
120
  BIGQUERY = "bigquery"
121
121
  REDSHIFT = "redshift"
122
122
  DATABRICKS = "databricks"
123
- TRINO = "trino"
124
123
 
125
124
  # Type names for Databricks, to match Title Case types in sqlalchemy
126
125
  ProfilerTypeMapping.INT_TYPE_NAMES.append("Integer")
@@ -206,6 +205,17 @@ def get_column_unique_count_dh_patch(self: SqlAlchemyDataset, column: str) -> in
206
205
  )
207
206
  )
208
207
  return convert_to_json_serializable(element_values.fetchone()[0])
208
+ elif (
209
+ self.engine.dialect.name.lower() == GXSqlDialect.AWSATHENA
210
+ or self.engine.dialect.name.lower() == GXSqlDialect.TRINO
211
+ ):
212
+ return convert_to_json_serializable(
213
+ self.engine.execute(
214
+ sa.select(sa.func.approx_distinct(sa.column(column))).select_from(
215
+ self._table
216
+ )
217
+ ).scalar()
218
+ )
209
219
  return convert_to_json_serializable(
210
220
  self.engine.execute(
211
221
  sa.select([sa.func.count(sa.func.distinct(sa.column(column)))]).select_from(
@@ -734,11 +744,41 @@ class _SingleDatasetProfiler(BasicDatasetProfilerBase):
734
744
  def _get_dataset_column_distinct_value_frequencies(
735
745
  self, column_profile: DatasetFieldProfileClass, column: str
736
746
  ) -> None:
737
- if self.config.include_field_distinct_value_frequencies:
747
+ if not self.config.include_field_distinct_value_frequencies:
748
+ return
749
+ try:
750
+ results = self.dataset.engine.execute(
751
+ sa.select(
752
+ [
753
+ sa.column(column),
754
+ sa.func.count(sa.column(column)),
755
+ ]
756
+ )
757
+ .select_from(self.dataset._table)
758
+ .where(sa.column(column).is_not(None))
759
+ .group_by(sa.column(column))
760
+ ).fetchall()
761
+
738
762
  column_profile.distinctValueFrequencies = [
739
- ValueFrequencyClass(value=str(value), frequency=count)
740
- for value, count in self.dataset.get_column_value_counts(column).items()
763
+ ValueFrequencyClass(value=str(value), frequency=int(count))
764
+ for value, count in results
741
765
  ]
766
+ # sort so output is deterministic. don't do it in SQL because not all column
767
+ # types are sortable in SQL (such as JSON data types on Athena/Trino).
768
+ column_profile.distinctValueFrequencies = sorted(
769
+ column_profile.distinctValueFrequencies, key=lambda x: x.value
770
+ )
771
+ except Exception as e:
772
+ logger.debug(
773
+ f"Caught exception while attempting to get distinct value frequencies for column {column}. {e}"
774
+ )
775
+
776
+ self.report.report_warning(
777
+ title="Profiling: Unable to Calculate Distinct Value Frequencies",
778
+ message="Distinct value frequencies for the column will not be accessible",
779
+ context=f"{self.dataset_name}.{column}",
780
+ exc=e,
781
+ )
742
782
 
743
783
  @_run_with_query_combiner
744
784
  def _get_dataset_column_histogram(
@@ -1395,12 +1435,12 @@ class DatahubGEProfiler:
1395
1435
  )
1396
1436
  return None
1397
1437
  finally:
1398
- if batch is not None and self.base_engine.engine.name.upper() in [
1399
- "TRINO",
1400
- "AWSATHENA",
1438
+ if batch is not None and self.base_engine.engine.name.lower() in [
1439
+ GXSqlDialect.TRINO,
1440
+ GXSqlDialect.AWSATHENA,
1401
1441
  ]:
1402
1442
  if (
1403
- self.base_engine.engine.name.upper() == "TRINO"
1443
+ self.base_engine.engine.name.lower() == GXSqlDialect.TRINO
1404
1444
  or temp_view is not None
1405
1445
  ):
1406
1446
  self._drop_temp_table(batch)
@@ -134,7 +134,9 @@ logging.getLogger("azure.core.pipeline.policies.http_logging_policy").setLevel(
134
134
  SourceCapability.OWNERSHIP,
135
135
  "Automatically ingests ownership information from table properties based on `user_ownership_property` and `group_ownership_property`",
136
136
  )
137
- @capability(SourceCapability.DELETION_DETECTION, "Enabled via stateful ingestion")
137
+ @capability(
138
+ SourceCapability.DELETION_DETECTION, "Enabled by default via stateful ingestion"
139
+ )
138
140
  class IcebergSource(StatefulIngestionSourceBase):
139
141
  """
140
142
  ## Integration Details
@@ -204,6 +204,7 @@ class KafkaConnectionTest:
204
204
  "Not supported",
205
205
  supported=False,
206
206
  )
207
+ @capability(SourceCapability.TEST_CONNECTION, "Enabled by default")
207
208
  class KafkaSource(StatefulIngestionSourceBase, TestableSource):
208
209
  """
209
210
  This plugin extracts the following:
@@ -126,6 +126,7 @@ logger = logging.getLogger(__name__)
126
126
  SourceCapability.USAGE_STATS,
127
127
  "Enabled by default, configured using `extract_usage_history`",
128
128
  )
129
+ @capability(SourceCapability.TEST_CONNECTION, "Enabled by default")
129
130
  class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
130
131
  """
131
132
  This plugin extracts the following:
@@ -1253,6 +1253,7 @@ class Mapper:
1253
1253
  SourceCapability.DATA_PROFILING,
1254
1254
  "Optionally enabled via configuration profiling.enabled",
1255
1255
  )
1256
+ @capability(SourceCapability.TEST_CONNECTION, "Enabled by default")
1256
1257
  class PowerBiDashboardSource(StatefulIngestionSourceBase, TestableSource):
1257
1258
  """
1258
1259
  This plugin extracts the following:
@@ -109,6 +109,7 @@ logger = logging.getLogger(__name__)
109
109
  "Enabled by default, configured using `ingest_owner`",
110
110
  )
111
111
  @capability(SourceCapability.SCHEMA_METADATA, "Enabled by default")
112
+ @capability(SourceCapability.TEST_CONNECTION, "Enabled by default")
112
113
  class QlikSenseSource(StatefulIngestionSourceBase, TestableSource):
113
114
  """
114
115
  This plugin extracts the following:
@@ -10,6 +10,7 @@ import humanfriendly
10
10
  import pydantic
11
11
  import redshift_connector
12
12
 
13
+ from datahub.configuration.common import AllowDenyPattern
13
14
  from datahub.configuration.pattern_utils import is_schema_allowed
14
15
  from datahub.emitter.mce_builder import (
15
16
  make_data_platform_urn,
@@ -140,12 +141,15 @@ logger: logging.Logger = logging.getLogger(__name__)
140
141
  SourceCapability.USAGE_STATS,
141
142
  "Enabled by default, can be disabled via configuration `include_usage_statistics`",
142
143
  )
143
- @capability(SourceCapability.DELETION_DETECTION, "Enabled via stateful ingestion")
144
+ @capability(
145
+ SourceCapability.DELETION_DETECTION, "Enabled by default via stateful ingestion"
146
+ )
144
147
  @capability(
145
148
  SourceCapability.CLASSIFICATION,
146
149
  "Optionally enabled via `classification.enabled`",
147
150
  supported=True,
148
151
  )
152
+ @capability(SourceCapability.TEST_CONNECTION, "Enabled by default")
149
153
  class RedshiftSource(StatefulIngestionSourceBase, TestableSource):
150
154
  """
151
155
  This plugin extracts the following:
@@ -354,7 +358,23 @@ class RedshiftSource(StatefulIngestionSourceBase, TestableSource):
354
358
  ).workunit_processor,
355
359
  ]
356
360
 
361
+ def _warn_deprecated_configs(self):
362
+ if (
363
+ self.config.match_fully_qualified_names is not None
364
+ and not self.config.match_fully_qualified_names
365
+ and self.config.schema_pattern is not None
366
+ and self.config.schema_pattern != AllowDenyPattern.allow_all()
367
+ ):
368
+ self.report.report_warning(
369
+ message="Please update `schema_pattern` to match against fully qualified schema name `<database_name>.<schema_name>` and set config `match_fully_qualified_names : True`."
370
+ "Current default `match_fully_qualified_names: False` is only to maintain backward compatibility. "
371
+ "The config option `match_fully_qualified_names` will be removed in future and the default behavior will be like `match_fully_qualified_names: True`.",
372
+ context="Config option deprecation warning",
373
+ title="Config option deprecation warning",
374
+ )
375
+
357
376
  def get_workunits_internal(self) -> Iterable[Union[MetadataWorkUnit, SqlWorkUnit]]:
377
+ self._warn_deprecated_configs()
358
378
  connection = self._try_get_redshift_connection(self.config)
359
379
 
360
380
  if connection is None:
@@ -178,7 +178,9 @@ class SACSourceReport(StaleEntityRemovalSourceReport):
178
178
  SourceCapability.LINEAGE_COARSE,
179
179
  "Enabled by default (only for Live Data Models)",
180
180
  )
181
- @capability(SourceCapability.DELETION_DETECTION, "Enabled via stateful ingestion")
181
+ @capability(
182
+ SourceCapability.DELETION_DETECTION, "Enabled by default via stateful ingestion"
183
+ )
182
184
  @capability(
183
185
  SourceCapability.SCHEMA_METADATA,
184
186
  "Enabled by default (only for Import Data Models)",
@@ -105,6 +105,7 @@ logger = logging.getLogger(__name__)
105
105
  SourceCapability.OWNERSHIP,
106
106
  "Enabled by default, configured using `ingest_owner`",
107
107
  )
108
+ @capability(SourceCapability.TEST_CONNECTION, "Enabled by default")
108
109
  class SigmaSource(StatefulIngestionSourceBase, TestableSource):
109
110
  """
110
111
  This plugin extracts the following:
@@ -154,14 +154,11 @@ class SnowflakeIdentifierConfig(
154
154
 
155
155
  email_domain: Optional[str] = pydantic.Field(
156
156
  default=None,
157
- description="Email domain of your organization so users can be displayed on UI appropriately.",
157
+ description="Email domain of your organization so users can be displayed on UI appropriately. This is used only if we cannot infer email ID.",
158
158
  )
159
159
 
160
- email_as_user_identifier: bool = Field(
161
- default=True,
162
- description="Format user urns as an email, if the snowflake user's email is set. If `email_domain` is "
163
- "provided, generates email addresses for snowflake users with unset emails, based on their "
164
- "username.",
160
+ _email_as_user_identifier = pydantic_removed_field(
161
+ "email_as_user_identifier",
165
162
  )
166
163
 
167
164
 
@@ -325,15 +325,10 @@ class SnowflakeIdentifierBuilder:
325
325
  user_email: Optional[str],
326
326
  ) -> str:
327
327
  if user_email:
328
- return self.snowflake_identifier(
329
- user_email
330
- if self.identifier_config.email_as_user_identifier is True
331
- else user_email.split("@")[0]
332
- )
328
+ return self.snowflake_identifier(user_email)
333
329
  return self.snowflake_identifier(
334
330
  f"{user_name}@{self.identifier_config.email_domain}"
335
- if self.identifier_config.email_as_user_identifier is True
336
- and self.identifier_config.email_domain is not None
331
+ if self.identifier_config.email_domain is not None
337
332
  else user_name
338
333
  )
339
334
 
@@ -131,6 +131,7 @@ logger: logging.Logger = logging.getLogger(__name__)
131
131
  "Optionally enabled via `classification.enabled`",
132
132
  supported=True,
133
133
  )
134
+ @capability(SourceCapability.TEST_CONNECTION, "Enabled by default")
134
135
  class SnowflakeV2Source(
135
136
  SnowflakeCommonMixin,
136
137
  StatefulIngestionSourceBase,
@@ -311,6 +312,7 @@ class SnowflakeV2Source(
311
312
  SourceCapability.PLATFORM_INSTANCE,
312
313
  SourceCapability.DOMAINS,
313
314
  SourceCapability.DELETION_DETECTION,
315
+ SourceCapability.TEST_CONNECTION,
314
316
  )
315
317
  ]
316
318
 
@@ -379,7 +379,9 @@ clickhouse_datetime_format = "%Y-%m-%d %H:%M:%S"
379
379
  @platform_name("ClickHouse")
380
380
  @config_class(ClickHouseConfig)
381
381
  @support_status(SupportStatus.CERTIFIED)
382
- @capability(SourceCapability.DELETION_DETECTION, "Enabled via stateful ingestion")
382
+ @capability(
383
+ SourceCapability.DELETION_DETECTION, "Enabled by default via stateful ingestion"
384
+ )
383
385
  @capability(SourceCapability.DATA_PROFILING, "Optionally enabled via configuration")
384
386
  class ClickHouseSource(TwoTierSQLAlchemySource):
385
387
  """
@@ -26,7 +26,6 @@ class CockroachDBConfig(PostgresConfig):
26
26
  @capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default")
27
27
  @capability(SourceCapability.DOMAINS, "Supported via the `domain` config field")
28
28
  @capability(SourceCapability.DATA_PROFILING, "Optionally enabled via configuration")
29
- @capability(SourceCapability.DELETION_DETECTION, "Enabled via stateful ingestion")
30
29
  class CockroachDBSource(PostgresSource):
31
30
  config: CockroachDBConfig
32
31
 
@@ -27,7 +27,9 @@ class HanaConfig(BasicSQLAlchemyConfig):
27
27
  @capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default")
28
28
  @capability(SourceCapability.DOMAINS, "Supported via the `domain` config field")
29
29
  @capability(SourceCapability.DATA_PROFILING, "Optionally enabled via configuration")
30
- @capability(SourceCapability.DELETION_DETECTION, "Enabled via stateful ingestion")
30
+ @capability(
31
+ SourceCapability.DELETION_DETECTION, "Enabled by default via stateful ingestion"
32
+ )
31
33
  class HanaSource(SQLAlchemySource):
32
34
  def __init__(self, config: HanaConfig, ctx: PipelineContext):
33
35
  super().__init__(config, ctx, "hana")
@@ -161,7 +161,9 @@ class HiveMetastore(BasicSQLAlchemyConfig):
161
161
  @platform_name("Hive Metastore")
162
162
  @config_class(HiveMetastore)
163
163
  @support_status(SupportStatus.CERTIFIED)
164
- @capability(SourceCapability.DELETION_DETECTION, "Enabled via stateful ingestion")
164
+ @capability(
165
+ SourceCapability.DELETION_DETECTION, "Enabled by default via stateful ingestion"
166
+ )
165
167
  @capability(SourceCapability.DATA_PROFILING, "Not Supported", False)
166
168
  @capability(SourceCapability.CLASSIFICATION, "Not Supported", False)
167
169
  @capability(
@@ -15,7 +15,6 @@ from datahub.ingestion.source.sql.mysql import MySQLConfig, MySQLSource
15
15
  @capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default")
16
16
  @capability(SourceCapability.DOMAINS, "Supported via the `domain` config field")
17
17
  @capability(SourceCapability.DATA_PROFILING, "Optionally enabled via configuration")
18
- @capability(SourceCapability.DELETION_DETECTION, "Enabled via stateful ingestion")
19
18
  class MariaDBSource(MySQLSource):
20
19
  def get_platform(self):
21
20
  return "mariadb"
@@ -174,7 +174,14 @@ class SQLServerConfig(BasicSQLAlchemyConfig):
174
174
  @capability(SourceCapability.DOMAINS, "Supported via the `domain` config field")
175
175
  @capability(SourceCapability.DATA_PROFILING, "Optionally enabled via configuration")
176
176
  @capability(SourceCapability.DESCRIPTIONS, "Enabled by default")
177
- @capability(SourceCapability.DELETION_DETECTION, "Enabled via stateful ingestion")
177
+ @capability(
178
+ SourceCapability.LINEAGE_COARSE,
179
+ "Enabled by default to get lineage for stored procedures via `include_lineage` and for views via `include_view_lineage`",
180
+ )
181
+ @capability(
182
+ SourceCapability.LINEAGE_FINE,
183
+ "Enabled by default to get lineage for stored procedures via `include_lineage` and for views via `include_view_column_lineage`",
184
+ )
178
185
  class SQLServerSource(SQLAlchemySource):
179
186
  """
180
187
  This plugin extracts the following:
@@ -65,15 +65,6 @@ class MySQLConfig(MySQLConnectionConfig, TwoTierSQLAlchemyConfig):
65
65
  @capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default")
66
66
  @capability(SourceCapability.DOMAINS, "Supported via the `domain` config field")
67
67
  @capability(SourceCapability.DATA_PROFILING, "Optionally enabled via configuration")
68
- @capability(SourceCapability.DELETION_DETECTION, "Enabled via stateful ingestion")
69
- @capability(
70
- SourceCapability.LINEAGE_COARSE,
71
- "Supported for views if `include_view_column_lineage` is enabled.",
72
- )
73
- @capability(
74
- SourceCapability.LINEAGE_FINE,
75
- "Supported for views if `include_view_column_lineage` is enabled.",
76
- )
77
68
  class MySQLSource(TwoTierSQLAlchemySource):
78
69
  """
79
70
  This plugin extracts the following:
@@ -131,7 +131,6 @@ class PostgresConfig(BasePostgresConfig):
131
131
  @capability(SourceCapability.DOMAINS, "Enabled by default")
132
132
  @capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default")
133
133
  @capability(SourceCapability.DATA_PROFILING, "Optionally enabled via configuration")
134
- @capability(SourceCapability.LINEAGE_COARSE, "Optionally enabled via configuration")
135
134
  class PostgresSource(SQLAlchemySource):
136
135
  """
137
136
  This plugin extracts the following:
@@ -302,6 +302,18 @@ class ProfileMetadata:
302
302
  "Enabled by default",
303
303
  supported=True,
304
304
  )
305
+ @capability(
306
+ SourceCapability.LINEAGE_COARSE,
307
+ "Enabled by default to get lineage for views via `include_view_lineage`",
308
+ )
309
+ @capability(
310
+ SourceCapability.LINEAGE_FINE,
311
+ "Enabled by default to get lineage for views via `include_view_column_lineage`",
312
+ )
313
+ @capability(SourceCapability.TEST_CONNECTION, "Enabled by default")
314
+ @capability(
315
+ SourceCapability.DELETION_DETECTION, "Enabled by default via stateful ingestion"
316
+ )
305
317
  class SQLAlchemySource(StatefulIngestionSourceBase, TestableSource):
306
318
  """A Base class for all SQL Sources that use SQLAlchemy to extend"""
307
319
 
@@ -879,6 +879,7 @@ def report_user_role(report: TableauSourceReport, server: Server) -> None:
879
879
  SourceCapability.LINEAGE_FINE,
880
880
  "Enabled by default, configure using `extract_column_level_lineage`",
881
881
  )
882
+ @capability(SourceCapability.TEST_CONNECTION, "Enabled by default")
882
883
  class TableauSource(StatefulIngestionSourceBase, TestableSource):
883
884
  platform = "tableau"
884
885
 
@@ -162,6 +162,7 @@ logger: logging.Logger = logging.getLogger(__name__)
162
162
  "Optionally enabled via `stateful_ingestion.remove_stale_metadata`",
163
163
  supported=True,
164
164
  )
165
+ @capability(SourceCapability.TEST_CONNECTION, "Enabled by default")
165
166
  @support_status(SupportStatus.INCUBATING)
166
167
  class UnityCatalogSource(StatefulIngestionSourceBase, TestableSource):
167
168
  """
@@ -85,8 +85,11 @@ class ClickHouseUsageConfig(ClickHouseConfig, BaseUsageConfig, EnvConfigMixin):
85
85
  @platform_name("ClickHouse")
86
86
  @config_class(ClickHouseUsageConfig)
87
87
  @support_status(SupportStatus.CERTIFIED)
88
- @capability(SourceCapability.DELETION_DETECTION, "Enabled via stateful ingestion")
88
+ @capability(
89
+ SourceCapability.DELETION_DETECTION, "Enabled by default via stateful ingestion"
90
+ )
89
91
  @capability(SourceCapability.DATA_PROFILING, "Optionally enabled via configuration")
92
+ @capability(SourceCapability.USAGE_STATS, "Enabled by default to get usage stats")
90
93
  @dataclasses.dataclass
91
94
  class ClickHouseUsageSource(Source):
92
95
  """
@@ -15,7 +15,9 @@ from sqlalchemy.engine import Engine
15
15
  import datahub.emitter.mce_builder as builder
16
16
  from datahub.configuration.time_window_config import get_time_bucket
17
17
  from datahub.ingestion.api.decorators import (
18
+ SourceCapability,
18
19
  SupportStatus,
20
+ capability,
19
21
  config_class,
20
22
  platform_name,
21
23
  support_status,
@@ -112,6 +114,7 @@ class TrinoUsageReport(SourceReport):
112
114
  @platform_name("Trino")
113
115
  @config_class(TrinoUsageConfig)
114
116
  @support_status(SupportStatus.CERTIFIED)
117
+ @capability(SourceCapability.USAGE_STATS, "Enabled by default to get usage stats")
115
118
  @dataclasses.dataclass
116
119
  class TrinoUsageSource(Source):
117
120
  """
@@ -4590,6 +4590,15 @@ class FabricTypeClass(object):
4590
4590
  RVW = "RVW"
4591
4591
  """Designates review fabrics"""
4592
4592
 
4593
+ PRD = "PRD"
4594
+ """Alternative Prod spelling"""
4595
+
4596
+ TST = "TST"
4597
+ """Alternative Test spelling"""
4598
+
4599
+ SIT = "SIT"
4600
+ """System Integration Testing"""
4601
+
4593
4602
  SANDBOX = "SANDBOX"
4594
4603
  """Designates sandbox fabrics"""
4595
4604
 
@@ -21504,6 +21513,7 @@ class DataHubResourceFilterClass(DictWrapper):
21504
21513
  resources: Union[None, List[str]]=None,
21505
21514
  allResources: Optional[bool]=None,
21506
21515
  filter: Union[None, "PolicyMatchFilterClass"]=None,
21516
+ privilegeConstraints: Union[None, "PolicyMatchFilterClass"]=None,
21507
21517
  ):
21508
21518
  super().__init__()
21509
21519
 
@@ -21515,12 +21525,14 @@ class DataHubResourceFilterClass(DictWrapper):
21515
21525
  else:
21516
21526
  self.allResources = allResources
21517
21527
  self.filter = filter
21528
+ self.privilegeConstraints = privilegeConstraints
21518
21529
 
21519
21530
  def _restore_defaults(self) -> None:
21520
21531
  self.type = self.RECORD_SCHEMA.fields_dict["type"].default
21521
21532
  self.resources = self.RECORD_SCHEMA.fields_dict["resources"].default
21522
21533
  self.allResources = self.RECORD_SCHEMA.fields_dict["allResources"].default
21523
21534
  self.filter = self.RECORD_SCHEMA.fields_dict["filter"].default
21535
+ self.privilegeConstraints = self.RECORD_SCHEMA.fields_dict["privilegeConstraints"].default
21524
21536
 
21525
21537
 
21526
21538
  @property
@@ -21565,6 +21577,16 @@ class DataHubResourceFilterClass(DictWrapper):
21565
21577
  self._inner_dict['filter'] = value
21566
21578
 
21567
21579
 
21580
+ @property
21581
+ def privilegeConstraints(self) -> Union[None, "PolicyMatchFilterClass"]:
21582
+ """Constraints around what sub-resources operations are allowed to modify, i.e. NOT_EQUALS - cannot modify a particular defined tag, EQUALS - can only modify a particular defined tag, STARTS_WITH - can only modify a tag starting with xyz"""
21583
+ return self._inner_dict.get('privilegeConstraints') # type: ignore
21584
+
21585
+ @privilegeConstraints.setter
21586
+ def privilegeConstraints(self, value: Union[None, "PolicyMatchFilterClass"]) -> None:
21587
+ self._inner_dict['privilegeConstraints'] = value
21588
+
21589
+
21568
21590
  class DataHubRoleInfoClass(_Aspect):
21569
21591
  """Information about a DataHub Role."""
21570
21592
 
@@ -21633,6 +21655,9 @@ class PolicyMatchConditionClass(object):
21633
21655
  STARTS_WITH = "STARTS_WITH"
21634
21656
  """Whether the field value starts with the value"""
21635
21657
 
21658
+ NOT_EQUALS = "NOT_EQUALS"
21659
+ """Whether the field does not match the value"""
21660
+
21636
21661
 
21637
21662
 
21638
21663
  class PolicyMatchCriterionClass(DictWrapper):
@@ -9502,13 +9502,16 @@
9502
9502
  "DEV": "Designates development fabrics",
9503
9503
  "EI": "Designates early-integration fabrics",
9504
9504
  "NON_PROD": "Designates non-production fabrics",
9505
+ "PRD": "Alternative Prod spelling",
9505
9506
  "PRE": "Designates pre-production fabrics",
9506
9507
  "PROD": "Designates production fabrics",
9507
9508
  "QA": "Designates quality assurance fabrics",
9508
9509
  "RVW": "Designates review fabrics",
9509
9510
  "SANDBOX": "Designates sandbox fabrics",
9511
+ "SIT": "System Integration Testing",
9510
9512
  "STG": "Designates staging fabrics",
9511
9513
  "TEST": "Designates testing fabrics",
9514
+ "TST": "Alternative Test spelling",
9512
9515
  "UAT": "Designates user acceptance testing fabrics"
9513
9516
  },
9514
9517
  "name": "FabricType",
@@ -9525,6 +9528,9 @@
9525
9528
  "PROD",
9526
9529
  "CORP",
9527
9530
  "RVW",
9531
+ "PRD",
9532
+ "TST",
9533
+ "SIT",
9528
9534
  "SANDBOX"
9529
9535
  ],
9530
9536
  "doc": "Fabric group type"
@@ -16441,13 +16447,15 @@
16441
16447
  "type": "enum",
16442
16448
  "symbolDocs": {
16443
16449
  "EQUALS": "Whether the field matches the value",
16450
+ "NOT_EQUALS": "Whether the field does not match the value",
16444
16451
  "STARTS_WITH": "Whether the field value starts with the value"
16445
16452
  },
16446
16453
  "name": "PolicyMatchCondition",
16447
16454
  "namespace": "com.linkedin.pegasus2avro.policy",
16448
16455
  "symbols": [
16449
16456
  "EQUALS",
16450
- "STARTS_WITH"
16457
+ "STARTS_WITH",
16458
+ "NOT_EQUALS"
16451
16459
  ],
16452
16460
  "doc": "The matching condition in a filter criterion"
16453
16461
  },
@@ -16469,6 +16477,15 @@
16469
16477
  "name": "filter",
16470
16478
  "default": null,
16471
16479
  "doc": "Filter to apply privileges to"
16480
+ },
16481
+ {
16482
+ "type": [
16483
+ "null",
16484
+ "com.linkedin.pegasus2avro.policy.PolicyMatchFilter"
16485
+ ],
16486
+ "name": "privilegeConstraints",
16487
+ "default": null,
16488
+ "doc": "Constraints around what sub-resources operations are allowed to modify, i.e. NOT_EQUALS - cannot modify a particular defined tag, EQUALS - can only modify a particular defined tag, STARTS_WITH - can only modify a tag starting with xyz"
16472
16489
  }
16473
16490
  ],
16474
16491
  "doc": "Information used to filter DataHub resource."
@@ -93,13 +93,16 @@
93
93
  "DEV": "Designates development fabrics",
94
94
  "EI": "Designates early-integration fabrics",
95
95
  "NON_PROD": "Designates non-production fabrics",
96
+ "PRD": "Alternative Prod spelling",
96
97
  "PRE": "Designates pre-production fabrics",
97
98
  "PROD": "Designates production fabrics",
98
99
  "QA": "Designates quality assurance fabrics",
99
100
  "RVW": "Designates review fabrics",
100
101
  "SANDBOX": "Designates sandbox fabrics",
102
+ "SIT": "System Integration Testing",
101
103
  "STG": "Designates staging fabrics",
102
104
  "TEST": "Designates testing fabrics",
105
+ "TST": "Alternative Test spelling",
103
106
  "UAT": "Designates user acceptance testing fabrics"
104
107
  },
105
108
  "name": "FabricType",
@@ -116,6 +119,9 @@
116
119
  "PROD",
117
120
  "CORP",
118
121
  "RVW",
122
+ "PRD",
123
+ "TST",
124
+ "SIT",
119
125
  "SANDBOX"
120
126
  ],
121
127
  "doc": "Fabric group type"
@@ -147,13 +147,16 @@
147
147
  "DEV": "Designates development fabrics",
148
148
  "EI": "Designates early-integration fabrics",
149
149
  "NON_PROD": "Designates non-production fabrics",
150
+ "PRD": "Alternative Prod spelling",
150
151
  "PRE": "Designates pre-production fabrics",
151
152
  "PROD": "Designates production fabrics",
152
153
  "QA": "Designates quality assurance fabrics",
153
154
  "RVW": "Designates review fabrics",
154
155
  "SANDBOX": "Designates sandbox fabrics",
156
+ "SIT": "System Integration Testing",
155
157
  "STG": "Designates staging fabrics",
156
158
  "TEST": "Designates testing fabrics",
159
+ "TST": "Alternative Test spelling",
157
160
  "UAT": "Designates user acceptance testing fabrics"
158
161
  },
159
162
  "name": "FabricType",
@@ -170,6 +173,9 @@
170
173
  "PROD",
171
174
  "CORP",
172
175
  "RVW",
176
+ "PRD",
177
+ "TST",
178
+ "SIT",
173
179
  "SANDBOX"
174
180
  ],
175
181
  "doc": "Fabric group type"
@@ -110,13 +110,15 @@
110
110
  "type": "enum",
111
111
  "symbolDocs": {
112
112
  "EQUALS": "Whether the field matches the value",
113
+ "NOT_EQUALS": "Whether the field does not match the value",
113
114
  "STARTS_WITH": "Whether the field value starts with the value"
114
115
  },
115
116
  "name": "PolicyMatchCondition",
116
117
  "namespace": "com.linkedin.pegasus2avro.policy",
117
118
  "symbols": [
118
119
  "EQUALS",
119
- "STARTS_WITH"
120
+ "STARTS_WITH",
121
+ "NOT_EQUALS"
120
122
  ],
121
123
  "doc": "The matching condition in a filter criterion"
122
124
  },
@@ -138,6 +140,15 @@
138
140
  "name": "filter",
139
141
  "default": null,
140
142
  "doc": "Filter to apply privileges to"
143
+ },
144
+ {
145
+ "type": [
146
+ "null",
147
+ "com.linkedin.pegasus2avro.policy.PolicyMatchFilter"
148
+ ],
149
+ "name": "privilegeConstraints",
150
+ "default": null,
151
+ "doc": "Constraints around what sub-resources operations are allowed to modify, i.e. NOT_EQUALS - cannot modify a particular defined tag, EQUALS - can only modify a particular defined tag, STARTS_WITH - can only modify a tag starting with xyz"
141
152
  }
142
153
  ],
143
154
  "doc": "Information used to filter DataHub resource."