acryl-datahub 1.1.1rc4__py3-none-any.whl → 1.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.2.0.dist-info}/METADATA +2558 -2531
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.2.0.dist-info}/RECORD +221 -187
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.2.0.dist-info}/entry_points.txt +2 -0
- datahub/_version.py +1 -1
- datahub/api/entities/dataset/dataset.py +1 -1
- datahub/api/entities/external/__init__.py +0 -0
- datahub/api/entities/external/external_entities.py +239 -0
- datahub/api/entities/external/external_tag.py +145 -0
- datahub/api/entities/external/lake_formation_external_entites.py +161 -0
- datahub/api/entities/external/restricted_text.py +247 -0
- datahub/api/entities/external/unity_catalog_external_entites.py +173 -0
- datahub/cli/check_cli.py +88 -7
- datahub/cli/cli_utils.py +63 -0
- datahub/cli/container_cli.py +5 -0
- datahub/cli/delete_cli.py +124 -27
- datahub/cli/docker_check.py +107 -12
- datahub/cli/docker_cli.py +149 -227
- datahub/cli/exists_cli.py +0 -2
- datahub/cli/get_cli.py +0 -2
- datahub/cli/iceberg_cli.py +5 -0
- datahub/cli/ingest_cli.py +3 -15
- datahub/cli/migrate.py +2 -0
- datahub/cli/put_cli.py +1 -4
- datahub/cli/quickstart_versioning.py +50 -7
- datahub/cli/specific/assertions_cli.py +0 -4
- datahub/cli/specific/datacontract_cli.py +0 -3
- datahub/cli/specific/dataproduct_cli.py +0 -11
- datahub/cli/specific/dataset_cli.py +1 -8
- datahub/cli/specific/forms_cli.py +0 -4
- datahub/cli/specific/group_cli.py +0 -2
- datahub/cli/specific/structuredproperties_cli.py +1 -4
- datahub/cli/specific/user_cli.py +0 -2
- datahub/cli/state_cli.py +0 -2
- datahub/cli/timeline_cli.py +0 -2
- datahub/emitter/rest_emitter.py +70 -12
- datahub/entrypoints.py +4 -3
- datahub/ingestion/api/decorators.py +15 -3
- datahub/ingestion/api/report.py +332 -3
- datahub/ingestion/api/sink.py +3 -0
- datahub/ingestion/api/source.py +48 -44
- datahub/ingestion/autogenerated/__init__.py +0 -0
- datahub/ingestion/autogenerated/capability_summary.json +3449 -0
- datahub/ingestion/autogenerated/lineage.json +401 -0
- datahub/ingestion/autogenerated/lineage_helper.py +177 -0
- datahub/ingestion/extractor/schema_util.py +13 -4
- datahub/ingestion/glossary/classification_mixin.py +5 -0
- datahub/ingestion/graph/client.py +100 -15
- datahub/ingestion/graph/config.py +1 -0
- datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +20 -10
- datahub/ingestion/run/pipeline.py +54 -2
- datahub/ingestion/sink/datahub_rest.py +13 -0
- datahub/ingestion/source/abs/source.py +1 -1
- datahub/ingestion/source/aws/aws_common.py +4 -0
- datahub/ingestion/source/aws/glue.py +489 -244
- datahub/ingestion/source/aws/tag_entities.py +292 -0
- datahub/ingestion/source/azure/azure_common.py +2 -2
- datahub/ingestion/source/bigquery_v2/bigquery.py +50 -23
- datahub/ingestion/source/bigquery_v2/bigquery_config.py +1 -1
- datahub/ingestion/source/bigquery_v2/bigquery_queries.py +1 -0
- datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +2 -0
- datahub/ingestion/source/bigquery_v2/common.py +1 -1
- datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
- datahub/ingestion/source/bigquery_v2/queries.py +3 -3
- datahub/ingestion/source/cassandra/cassandra.py +1 -1
- datahub/ingestion/source/cassandra/cassandra_profiling.py +6 -5
- datahub/ingestion/source/common/subtypes.py +45 -0
- datahub/ingestion/source/data_lake_common/object_store.py +115 -27
- datahub/ingestion/source/data_lake_common/path_spec.py +10 -21
- datahub/ingestion/source/datahub/datahub_database_reader.py +1 -2
- datahub/ingestion/source/dbt/dbt_cloud.py +10 -2
- datahub/ingestion/source/dbt/dbt_common.py +6 -2
- datahub/ingestion/source/dbt/dbt_core.py +3 -0
- datahub/ingestion/source/debug/__init__.py +0 -0
- datahub/ingestion/source/debug/datahub_debug.py +300 -0
- datahub/ingestion/source/dremio/dremio_api.py +114 -73
- datahub/ingestion/source/dremio/dremio_config.py +2 -0
- datahub/ingestion/source/dremio/dremio_reporting.py +23 -2
- datahub/ingestion/source/dremio/dremio_source.py +94 -81
- datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
- datahub/ingestion/source/file.py +3 -0
- datahub/ingestion/source/fivetran/fivetran.py +34 -26
- datahub/ingestion/source/gcs/gcs_source.py +13 -2
- datahub/ingestion/source/ge_data_profiler.py +76 -28
- datahub/ingestion/source/ge_profiling_config.py +11 -0
- datahub/ingestion/source/hex/api.py +26 -1
- datahub/ingestion/source/iceberg/iceberg.py +3 -1
- datahub/ingestion/source/identity/azure_ad.py +1 -1
- datahub/ingestion/source/identity/okta.py +1 -14
- datahub/ingestion/source/kafka/kafka.py +16 -0
- datahub/ingestion/source/kafka_connect/sink_connectors.py +156 -47
- datahub/ingestion/source/kafka_connect/source_connectors.py +59 -4
- datahub/ingestion/source/looker/looker_source.py +1 -0
- datahub/ingestion/source/mlflow.py +11 -1
- datahub/ingestion/source/mock_data/__init__.py +0 -0
- datahub/ingestion/source/mock_data/datahub_mock_data.py +472 -0
- datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
- datahub/ingestion/source/mock_data/table_naming_helper.py +91 -0
- datahub/ingestion/source/nifi.py +1 -1
- datahub/ingestion/source/powerbi/powerbi.py +1 -5
- datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +0 -1
- datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
- datahub/ingestion/source/preset.py +2 -2
- datahub/ingestion/source/qlik_sense/qlik_sense.py +1 -0
- datahub/ingestion/source/redshift/redshift.py +21 -1
- datahub/ingestion/source/redshift/usage.py +4 -3
- datahub/ingestion/source/s3/report.py +4 -2
- datahub/ingestion/source/s3/source.py +367 -115
- datahub/ingestion/source/sac/sac.py +3 -1
- datahub/ingestion/source/salesforce.py +6 -3
- datahub/ingestion/source/sigma/sigma.py +7 -1
- datahub/ingestion/source/slack/slack.py +2 -1
- datahub/ingestion/source/snowflake/snowflake_config.py +30 -7
- datahub/ingestion/source/snowflake/snowflake_queries.py +348 -82
- datahub/ingestion/source/snowflake/snowflake_summary.py +5 -0
- datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
- datahub/ingestion/source/snowflake/snowflake_utils.py +2 -7
- datahub/ingestion/source/snowflake/snowflake_v2.py +16 -2
- datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
- datahub/ingestion/source/sql/athena.py +119 -11
- datahub/ingestion/source/sql/athena_properties_extractor.py +777 -0
- datahub/ingestion/source/sql/clickhouse.py +3 -1
- datahub/ingestion/source/sql/cockroachdb.py +0 -1
- datahub/ingestion/source/sql/hana.py +3 -1
- datahub/ingestion/source/sql/hive_metastore.py +3 -11
- datahub/ingestion/source/sql/mariadb.py +0 -1
- datahub/ingestion/source/sql/mssql/source.py +239 -34
- datahub/ingestion/source/sql/mysql.py +0 -1
- datahub/ingestion/source/sql/oracle.py +1 -1
- datahub/ingestion/source/sql/postgres.py +0 -1
- datahub/ingestion/source/sql/sql_common.py +121 -34
- datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
- datahub/ingestion/source/sql/teradata.py +997 -235
- datahub/ingestion/source/sql/vertica.py +10 -6
- datahub/ingestion/source/sql_queries.py +2 -2
- datahub/ingestion/source/state/stateful_ingestion_base.py +1 -1
- datahub/ingestion/source/superset.py +58 -3
- datahub/ingestion/source/tableau/tableau.py +58 -37
- datahub/ingestion/source/tableau/tableau_common.py +4 -2
- datahub/ingestion/source/tableau/tableau_constant.py +0 -4
- datahub/ingestion/source/unity/config.py +5 -0
- datahub/ingestion/source/unity/proxy.py +118 -0
- datahub/ingestion/source/unity/source.py +195 -17
- datahub/ingestion/source/unity/tag_entities.py +295 -0
- datahub/ingestion/source/usage/clickhouse_usage.py +4 -1
- datahub/ingestion/source/usage/starburst_trino_usage.py +3 -0
- datahub/ingestion/transformer/add_dataset_ownership.py +18 -2
- datahub/integrations/assertion/snowflake/compiler.py +4 -3
- datahub/metadata/_internal_schema_classes.py +1433 -546
- datahub/metadata/_urns/urn_defs.py +1826 -1658
- datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +4 -0
- datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +27 -0
- datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +4 -0
- datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +25 -0
- datahub/metadata/schema.avsc +17736 -17112
- datahub/metadata/schemas/ApplicationKey.avsc +31 -0
- datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
- datahub/metadata/schemas/Applications.avsc +38 -0
- datahub/metadata/schemas/ChartKey.avsc +1 -0
- datahub/metadata/schemas/ContainerKey.avsc +1 -0
- datahub/metadata/schemas/ContainerProperties.avsc +8 -0
- datahub/metadata/schemas/CorpUserSettings.avsc +41 -0
- datahub/metadata/schemas/DashboardKey.avsc +1 -0
- datahub/metadata/schemas/DataFlowInfo.avsc +8 -0
- datahub/metadata/schemas/DataFlowKey.avsc +1 -0
- datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageModuleProperties.avsc +200 -0
- datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +175 -0
- datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
- datahub/metadata/schemas/DataJobInfo.avsc +8 -0
- datahub/metadata/schemas/DataJobKey.avsc +1 -0
- datahub/metadata/schemas/DataProcessKey.avsc +8 -0
- datahub/metadata/schemas/DataProductKey.avsc +1 -0
- datahub/metadata/schemas/DataProductProperties.avsc +1 -1
- datahub/metadata/schemas/DatasetKey.avsc +11 -1
- datahub/metadata/schemas/GlobalSettingsInfo.avsc +62 -0
- datahub/metadata/schemas/GlossaryTermKey.avsc +1 -0
- datahub/metadata/schemas/IcebergWarehouseInfo.avsc +8 -0
- datahub/metadata/schemas/LogicalParent.avsc +140 -0
- datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
- datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
- datahub/metadata/schemas/MLModelDeploymentKey.avsc +8 -0
- datahub/metadata/schemas/MLModelGroupKey.avsc +9 -0
- datahub/metadata/schemas/MLModelKey.avsc +9 -0
- datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
- datahub/metadata/schemas/MetadataChangeEvent.avsc +20 -1
- datahub/metadata/schemas/NotebookKey.avsc +1 -0
- datahub/metadata/schemas/QuerySubjects.avsc +1 -12
- datahub/metadata/schemas/SchemaFieldKey.avsc +2 -1
- datahub/metadata/schemas/__init__.py +3 -3
- datahub/sdk/__init__.py +2 -0
- datahub/sdk/_all_entities.py +7 -0
- datahub/sdk/_shared.py +116 -0
- datahub/sdk/chart.py +315 -0
- datahub/sdk/container.py +7 -0
- datahub/sdk/dashboard.py +432 -0
- datahub/sdk/dataflow.py +7 -0
- datahub/sdk/datajob.py +45 -13
- datahub/sdk/dataset.py +8 -2
- datahub/sdk/entity_client.py +82 -2
- datahub/sdk/lineage_client.py +683 -82
- datahub/sdk/main_client.py +46 -16
- datahub/sdk/mlmodel.py +101 -38
- datahub/sdk/mlmodelgroup.py +7 -0
- datahub/sdk/search_client.py +4 -3
- datahub/specific/chart.py +1 -1
- datahub/specific/dataproduct.py +4 -0
- datahub/sql_parsing/sql_parsing_aggregator.py +29 -17
- datahub/sql_parsing/sqlglot_lineage.py +62 -13
- datahub/telemetry/telemetry.py +17 -11
- datahub/testing/sdk_v2_helpers.py +7 -1
- datahub/upgrade/upgrade.py +46 -13
- datahub/utilities/server_config_util.py +8 -0
- datahub/utilities/sqlalchemy_query_combiner.py +5 -2
- datahub/utilities/stats_collections.py +4 -0
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.2.0.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.2.0.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.2.0.dist-info}/top_level.txt +0 -0
|
@@ -52,7 +52,6 @@ from datahub.ingestion.source.state.stateful_ingestion_base import (
|
|
|
52
52
|
from datahub.metadata.com.linkedin.pegasus2avro.common import ChangeAuditStamps
|
|
53
53
|
from datahub.metadata.schema_classes import (
|
|
54
54
|
BrowsePathsClass,
|
|
55
|
-
ChangeTypeClass,
|
|
56
55
|
CorpUserInfoClass,
|
|
57
56
|
CorpUserKeyClass,
|
|
58
57
|
DashboardInfoClass,
|
|
@@ -243,20 +242,14 @@ class Mapper:
|
|
|
243
242
|
|
|
244
243
|
@staticmethod
|
|
245
244
|
def new_mcp(
|
|
246
|
-
entity_type,
|
|
247
245
|
entity_urn,
|
|
248
|
-
aspect_name,
|
|
249
246
|
aspect,
|
|
250
|
-
change_type=ChangeTypeClass.UPSERT,
|
|
251
247
|
):
|
|
252
248
|
"""
|
|
253
249
|
Create MCP
|
|
254
250
|
"""
|
|
255
251
|
return MetadataChangeProposalWrapper(
|
|
256
|
-
entityType=entity_type,
|
|
257
|
-
changeType=change_type,
|
|
258
252
|
entityUrn=entity_urn,
|
|
259
|
-
aspectName=aspect_name,
|
|
260
253
|
aspect=aspect,
|
|
261
254
|
)
|
|
262
255
|
|
|
@@ -343,17 +336,13 @@ class Mapper:
|
|
|
343
336
|
)
|
|
344
337
|
|
|
345
338
|
info_mcp = self.new_mcp(
|
|
346
|
-
entity_type=Constant.DASHBOARD,
|
|
347
339
|
entity_urn=dashboard_urn,
|
|
348
|
-
aspect_name=Constant.DASHBOARD_INFO,
|
|
349
340
|
aspect=dashboard_info_cls,
|
|
350
341
|
)
|
|
351
342
|
|
|
352
343
|
# removed status mcp
|
|
353
344
|
removed_status_mcp = self.new_mcp(
|
|
354
|
-
entity_type=Constant.DASHBOARD,
|
|
355
345
|
entity_urn=dashboard_urn,
|
|
356
|
-
aspect_name=Constant.STATUS,
|
|
357
346
|
aspect=StatusClass(removed=False),
|
|
358
347
|
)
|
|
359
348
|
|
|
@@ -365,9 +354,7 @@ class Mapper:
|
|
|
365
354
|
|
|
366
355
|
# Dashboard key
|
|
367
356
|
dashboard_key_mcp = self.new_mcp(
|
|
368
|
-
entity_type=Constant.DASHBOARD,
|
|
369
357
|
entity_urn=dashboard_urn,
|
|
370
|
-
aspect_name=Constant.DASHBOARD_KEY,
|
|
371
358
|
aspect=dashboard_key_cls,
|
|
372
359
|
)
|
|
373
360
|
|
|
@@ -378,9 +365,7 @@ class Mapper:
|
|
|
378
365
|
ownership = OwnershipClass(owners=owners)
|
|
379
366
|
# Dashboard owner MCP
|
|
380
367
|
owner_mcp = self.new_mcp(
|
|
381
|
-
entity_type=Constant.DASHBOARD,
|
|
382
368
|
entity_urn=dashboard_urn,
|
|
383
|
-
aspect_name=Constant.OWNERSHIP,
|
|
384
369
|
aspect=ownership,
|
|
385
370
|
)
|
|
386
371
|
|
|
@@ -396,9 +381,7 @@ class Mapper:
|
|
|
396
381
|
]
|
|
397
382
|
)
|
|
398
383
|
browse_path_mcp = self.new_mcp(
|
|
399
|
-
entity_type=Constant.DASHBOARD,
|
|
400
384
|
entity_urn=dashboard_urn,
|
|
401
|
-
aspect_name=Constant.BROWSERPATH,
|
|
402
385
|
aspect=browse_path,
|
|
403
386
|
)
|
|
404
387
|
|
|
@@ -429,27 +412,21 @@ class Mapper:
|
|
|
429
412
|
)
|
|
430
413
|
|
|
431
414
|
info_mcp = self.new_mcp(
|
|
432
|
-
entity_type=Constant.CORP_USER,
|
|
433
415
|
entity_urn=user_urn,
|
|
434
|
-
aspect_name=Constant.CORP_USER_INFO,
|
|
435
416
|
aspect=user_info_instance,
|
|
436
417
|
)
|
|
437
418
|
user_mcps.append(info_mcp)
|
|
438
419
|
|
|
439
420
|
# removed status mcp
|
|
440
421
|
status_mcp = self.new_mcp(
|
|
441
|
-
entity_type=Constant.CORP_USER,
|
|
442
422
|
entity_urn=user_urn,
|
|
443
|
-
aspect_name=Constant.STATUS,
|
|
444
423
|
aspect=StatusClass(removed=False),
|
|
445
424
|
)
|
|
446
425
|
user_mcps.append(status_mcp)
|
|
447
426
|
user_key = CorpUserKeyClass(username=user.username)
|
|
448
427
|
|
|
449
428
|
user_key_mcp = self.new_mcp(
|
|
450
|
-
entity_type=Constant.CORP_USER,
|
|
451
429
|
entity_urn=user_urn,
|
|
452
|
-
aspect_name=Constant.CORP_USER_KEY,
|
|
453
430
|
aspect=user_key,
|
|
454
431
|
)
|
|
455
432
|
user_mcps.append(user_key_mcp)
|
|
@@ -69,9 +69,9 @@ class PresetConfig(SupersetConfig):
|
|
|
69
69
|
|
|
70
70
|
@platform_name("Preset")
|
|
71
71
|
@config_class(PresetConfig)
|
|
72
|
-
@support_status(SupportStatus.
|
|
72
|
+
@support_status(SupportStatus.CERTIFIED)
|
|
73
73
|
@capability(
|
|
74
|
-
SourceCapability.DELETION_DETECTION, "
|
|
74
|
+
SourceCapability.DELETION_DETECTION, "Enabled by default via stateful ingestion"
|
|
75
75
|
)
|
|
76
76
|
class PresetSource(SupersetSource):
|
|
77
77
|
"""
|
|
@@ -109,6 +109,7 @@ logger = logging.getLogger(__name__)
|
|
|
109
109
|
"Enabled by default, configured using `ingest_owner`",
|
|
110
110
|
)
|
|
111
111
|
@capability(SourceCapability.SCHEMA_METADATA, "Enabled by default")
|
|
112
|
+
@capability(SourceCapability.TEST_CONNECTION, "Enabled by default")
|
|
112
113
|
class QlikSenseSource(StatefulIngestionSourceBase, TestableSource):
|
|
113
114
|
"""
|
|
114
115
|
This plugin extracts the following:
|
|
@@ -10,6 +10,7 @@ import humanfriendly
|
|
|
10
10
|
import pydantic
|
|
11
11
|
import redshift_connector
|
|
12
12
|
|
|
13
|
+
from datahub.configuration.common import AllowDenyPattern
|
|
13
14
|
from datahub.configuration.pattern_utils import is_schema_allowed
|
|
14
15
|
from datahub.emitter.mce_builder import (
|
|
15
16
|
make_data_platform_urn,
|
|
@@ -140,12 +141,15 @@ logger: logging.Logger = logging.getLogger(__name__)
|
|
|
140
141
|
SourceCapability.USAGE_STATS,
|
|
141
142
|
"Enabled by default, can be disabled via configuration `include_usage_statistics`",
|
|
142
143
|
)
|
|
143
|
-
@capability(
|
|
144
|
+
@capability(
|
|
145
|
+
SourceCapability.DELETION_DETECTION, "Enabled by default via stateful ingestion"
|
|
146
|
+
)
|
|
144
147
|
@capability(
|
|
145
148
|
SourceCapability.CLASSIFICATION,
|
|
146
149
|
"Optionally enabled via `classification.enabled`",
|
|
147
150
|
supported=True,
|
|
148
151
|
)
|
|
152
|
+
@capability(SourceCapability.TEST_CONNECTION, "Enabled by default")
|
|
149
153
|
class RedshiftSource(StatefulIngestionSourceBase, TestableSource):
|
|
150
154
|
"""
|
|
151
155
|
This plugin extracts the following:
|
|
@@ -354,7 +358,23 @@ class RedshiftSource(StatefulIngestionSourceBase, TestableSource):
|
|
|
354
358
|
).workunit_processor,
|
|
355
359
|
]
|
|
356
360
|
|
|
361
|
+
def _warn_deprecated_configs(self):
|
|
362
|
+
if (
|
|
363
|
+
self.config.match_fully_qualified_names is not None
|
|
364
|
+
and not self.config.match_fully_qualified_names
|
|
365
|
+
and self.config.schema_pattern is not None
|
|
366
|
+
and self.config.schema_pattern != AllowDenyPattern.allow_all()
|
|
367
|
+
):
|
|
368
|
+
self.report.report_warning(
|
|
369
|
+
message="Please update `schema_pattern` to match against fully qualified schema name `<database_name>.<schema_name>` and set config `match_fully_qualified_names : True`."
|
|
370
|
+
"Current default `match_fully_qualified_names: False` is only to maintain backward compatibility. "
|
|
371
|
+
"The config option `match_fully_qualified_names` will be removed in future and the default behavior will be like `match_fully_qualified_names: True`.",
|
|
372
|
+
context="Config option deprecation warning",
|
|
373
|
+
title="Config option deprecation warning",
|
|
374
|
+
)
|
|
375
|
+
|
|
357
376
|
def get_workunits_internal(self) -> Iterable[Union[MetadataWorkUnit, SqlWorkUnit]]:
|
|
377
|
+
self._warn_deprecated_configs()
|
|
358
378
|
connection = self._try_get_redshift_connection(self.config)
|
|
359
379
|
|
|
360
380
|
if connection is None:
|
|
@@ -182,9 +182,10 @@ class RedshiftUsageExtractor:
|
|
|
182
182
|
self.report.num_operational_stats_filtered = 0
|
|
183
183
|
|
|
184
184
|
if self.config.include_operational_stats:
|
|
185
|
-
with
|
|
186
|
-
USAGE_EXTRACTION_OPERATIONAL_STATS
|
|
187
|
-
|
|
185
|
+
with (
|
|
186
|
+
self.report.new_stage(USAGE_EXTRACTION_OPERATIONAL_STATS),
|
|
187
|
+
PerfTimer() as timer,
|
|
188
|
+
):
|
|
188
189
|
# Generate operation aspect workunits
|
|
189
190
|
yield from self._gen_operation_aspect_workunits(
|
|
190
191
|
self.connection, all_tables
|
|
@@ -1,19 +1,21 @@
|
|
|
1
1
|
import dataclasses
|
|
2
2
|
from dataclasses import field as dataclass_field
|
|
3
|
-
from typing import List
|
|
4
3
|
|
|
5
4
|
from datahub.ingestion.source.state.stale_entity_removal_handler import (
|
|
6
5
|
StaleEntityRemovalSourceReport,
|
|
7
6
|
)
|
|
7
|
+
from datahub.utilities.lossy_collections import LossyList
|
|
8
8
|
|
|
9
9
|
|
|
10
10
|
@dataclasses.dataclass
|
|
11
11
|
class DataLakeSourceReport(StaleEntityRemovalSourceReport):
|
|
12
12
|
files_scanned = 0
|
|
13
|
-
filtered:
|
|
13
|
+
filtered: LossyList[str] = dataclass_field(default_factory=LossyList)
|
|
14
|
+
number_of_files_filtered: int = 0
|
|
14
15
|
|
|
15
16
|
def report_file_scanned(self) -> None:
|
|
16
17
|
self.files_scanned += 1
|
|
17
18
|
|
|
18
19
|
def report_file_dropped(self, file: str) -> None:
|
|
19
20
|
self.filtered.append(file)
|
|
21
|
+
self.number_of_files_filtered += 1
|