acryl-datahub 1.1.0.4rc3__py3-none-any.whl → 1.1.0.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.1.0.4rc3.dist-info → acryl_datahub-1.1.0.5.dist-info}/METADATA +2499 -2501
- {acryl_datahub-1.1.0.4rc3.dist-info → acryl_datahub-1.1.0.5.dist-info}/RECORD +149 -131
- {acryl_datahub-1.1.0.4rc3.dist-info → acryl_datahub-1.1.0.5.dist-info}/entry_points.txt +1 -0
- datahub/_version.py +1 -1
- datahub/api/entities/dataset/dataset.py +1 -1
- datahub/cli/check_cli.py +65 -11
- datahub/cli/cli_utils.py +63 -0
- datahub/cli/container_cli.py +5 -0
- datahub/cli/delete_cli.py +3 -4
- datahub/cli/docker_check.py +107 -12
- datahub/cli/docker_cli.py +149 -227
- datahub/cli/exists_cli.py +0 -2
- datahub/cli/get_cli.py +0 -2
- datahub/cli/iceberg_cli.py +5 -0
- datahub/cli/ingest_cli.py +3 -15
- datahub/cli/migrate.py +2 -0
- datahub/cli/put_cli.py +1 -4
- datahub/cli/quickstart_versioning.py +50 -7
- datahub/cli/specific/assertions_cli.py +0 -4
- datahub/cli/specific/datacontract_cli.py +0 -3
- datahub/cli/specific/dataproduct_cli.py +0 -11
- datahub/cli/specific/dataset_cli.py +1 -8
- datahub/cli/specific/forms_cli.py +0 -4
- datahub/cli/specific/group_cli.py +0 -2
- datahub/cli/specific/structuredproperties_cli.py +1 -4
- datahub/cli/specific/user_cli.py +0 -2
- datahub/cli/state_cli.py +0 -2
- datahub/cli/timeline_cli.py +0 -2
- datahub/emitter/rest_emitter.py +24 -8
- datahub/entrypoints.py +4 -3
- datahub/ingestion/api/decorators.py +15 -3
- datahub/ingestion/api/report.py +332 -3
- datahub/ingestion/api/sink.py +3 -0
- datahub/ingestion/api/source.py +47 -45
- datahub/ingestion/autogenerated/__init__.py +0 -0
- datahub/ingestion/autogenerated/capability_summary.json +3449 -0
- datahub/ingestion/autogenerated/lineage.json +401 -0
- datahub/ingestion/autogenerated/lineage_helper.py +177 -0
- datahub/ingestion/extractor/schema_util.py +13 -4
- datahub/ingestion/graph/client.py +73 -30
- datahub/ingestion/run/pipeline.py +54 -2
- datahub/ingestion/sink/datahub_rest.py +12 -0
- datahub/ingestion/source/abs/source.py +1 -1
- datahub/ingestion/source/aws/glue.py +1 -1
- datahub/ingestion/source/azure/azure_common.py +2 -2
- datahub/ingestion/source/bigquery_v2/bigquery.py +32 -23
- datahub/ingestion/source/bigquery_v2/bigquery_config.py +1 -1
- datahub/ingestion/source/bigquery_v2/bigquery_queries.py +1 -0
- datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
- datahub/ingestion/source/bigquery_v2/queries.py +3 -3
- datahub/ingestion/source/cassandra/cassandra.py +1 -1
- datahub/ingestion/source/cassandra/cassandra_profiling.py +6 -5
- datahub/ingestion/source/common/subtypes.py +45 -0
- datahub/ingestion/source/data_lake_common/object_store.py +115 -27
- datahub/ingestion/source/data_lake_common/path_spec.py +10 -21
- datahub/ingestion/source/datahub/datahub_database_reader.py +1 -2
- datahub/ingestion/source/dbt/dbt_cloud.py +7 -2
- datahub/ingestion/source/dbt/dbt_common.py +3 -1
- datahub/ingestion/source/dremio/dremio_api.py +38 -27
- datahub/ingestion/source/dremio/dremio_source.py +7 -7
- datahub/ingestion/source/fivetran/fivetran.py +34 -26
- datahub/ingestion/source/gcs/gcs_source.py +13 -2
- datahub/ingestion/source/ge_data_profiler.py +28 -20
- datahub/ingestion/source/hex/api.py +26 -1
- datahub/ingestion/source/identity/azure_ad.py +1 -1
- datahub/ingestion/source/identity/okta.py +1 -14
- datahub/ingestion/source/kafka_connect/sink_connectors.py +156 -47
- datahub/ingestion/source/kafka_connect/source_connectors.py +59 -4
- datahub/ingestion/source/mlflow.py +11 -1
- datahub/ingestion/source/mock_data/__init__.py +0 -0
- datahub/ingestion/source/mock_data/datahub_mock_data.py +472 -0
- datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
- datahub/ingestion/source/mock_data/table_naming_helper.py +91 -0
- datahub/ingestion/source/powerbi/powerbi.py +0 -5
- datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +0 -1
- datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
- datahub/ingestion/source/preset.py +2 -2
- datahub/ingestion/source/redshift/usage.py +4 -3
- datahub/ingestion/source/s3/report.py +4 -2
- datahub/ingestion/source/s3/source.py +367 -115
- datahub/ingestion/source/salesforce.py +6 -3
- datahub/ingestion/source/sigma/sigma.py +6 -1
- datahub/ingestion/source/slack/slack.py +2 -1
- datahub/ingestion/source/snowflake/snowflake_config.py +27 -1
- datahub/ingestion/source/snowflake/snowflake_queries.py +348 -82
- datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
- datahub/ingestion/source/snowflake/snowflake_v2.py +14 -2
- datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
- datahub/ingestion/source/sql/athena.py +119 -12
- datahub/ingestion/source/sql/athena_properties_extractor.py +777 -0
- datahub/ingestion/source/sql/hive_metastore.py +0 -10
- datahub/ingestion/source/sql/mssql/source.py +24 -15
- datahub/ingestion/source/sql/oracle.py +1 -1
- datahub/ingestion/source/sql/sql_common.py +11 -0
- datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
- datahub/ingestion/source/sql/teradata.py +997 -235
- datahub/ingestion/source/sql/vertica.py +10 -6
- datahub/ingestion/source/sql_queries.py +2 -2
- datahub/ingestion/source/state/stateful_ingestion_base.py +1 -1
- datahub/ingestion/source/superset.py +57 -2
- datahub/ingestion/source/tableau/tableau.py +57 -37
- datahub/ingestion/source/tableau/tableau_common.py +4 -2
- datahub/ingestion/source/tableau/tableau_constant.py +0 -4
- datahub/ingestion/source/unity/proxy.py +4 -3
- datahub/ingestion/source/unity/source.py +56 -30
- datahub/ingestion/transformer/add_dataset_ownership.py +18 -2
- datahub/integrations/assertion/snowflake/compiler.py +4 -3
- datahub/metadata/_internal_schema_classes.py +1253 -536
- datahub/metadata/_urns/urn_defs.py +1797 -1685
- datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +4 -0
- datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +27 -0
- datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +4 -0
- datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +25 -0
- datahub/metadata/schema.avsc +16614 -16538
- datahub/metadata/schemas/ContainerProperties.avsc +2 -0
- datahub/metadata/schemas/CorpUserSettings.avsc +41 -0
- datahub/metadata/schemas/DataFlowInfo.avsc +2 -0
- datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageModuleProperties.avsc +200 -0
- datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +175 -0
- datahub/metadata/schemas/DataJobInfo.avsc +2 -0
- datahub/metadata/schemas/DataProcessKey.avsc +2 -0
- datahub/metadata/schemas/DatasetKey.avsc +4 -1
- datahub/metadata/schemas/GlobalSettingsInfo.avsc +62 -0
- datahub/metadata/schemas/IcebergWarehouseInfo.avsc +2 -0
- datahub/metadata/schemas/LogicalParent.avsc +140 -0
- datahub/metadata/schemas/MLModelDeploymentKey.avsc +2 -0
- datahub/metadata/schemas/MLModelGroupKey.avsc +2 -0
- datahub/metadata/schemas/MLModelKey.avsc +2 -0
- datahub/metadata/schemas/MetadataChangeEvent.avsc +2 -0
- datahub/metadata/schemas/QuerySubjects.avsc +1 -12
- datahub/metadata/schemas/SchemaFieldKey.avsc +2 -1
- datahub/sdk/datajob.py +39 -15
- datahub/sdk/lineage_client.py +2 -0
- datahub/sdk/main_client.py +14 -2
- datahub/sdk/search_client.py +4 -3
- datahub/specific/dataproduct.py +4 -0
- datahub/sql_parsing/sql_parsing_aggregator.py +29 -17
- datahub/sql_parsing/sqlglot_lineage.py +40 -13
- datahub/telemetry/telemetry.py +17 -11
- datahub/upgrade/upgrade.py +46 -13
- datahub/utilities/server_config_util.py +8 -0
- datahub/utilities/sqlalchemy_query_combiner.py +5 -2
- {acryl_datahub-1.1.0.4rc3.dist-info → acryl_datahub-1.1.0.5.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.1.0.4rc3.dist-info → acryl_datahub-1.1.0.5.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.1.0.4rc3.dist-info → acryl_datahub-1.1.0.5.dist-info}/top_level.txt +0 -0
|
@@ -52,7 +52,6 @@ from datahub.ingestion.source.state.stateful_ingestion_base import (
|
|
|
52
52
|
from datahub.metadata.com.linkedin.pegasus2avro.common import ChangeAuditStamps
|
|
53
53
|
from datahub.metadata.schema_classes import (
|
|
54
54
|
BrowsePathsClass,
|
|
55
|
-
ChangeTypeClass,
|
|
56
55
|
CorpUserInfoClass,
|
|
57
56
|
CorpUserKeyClass,
|
|
58
57
|
DashboardInfoClass,
|
|
@@ -243,20 +242,14 @@ class Mapper:
|
|
|
243
242
|
|
|
244
243
|
@staticmethod
|
|
245
244
|
def new_mcp(
|
|
246
|
-
entity_type,
|
|
247
245
|
entity_urn,
|
|
248
|
-
aspect_name,
|
|
249
246
|
aspect,
|
|
250
|
-
change_type=ChangeTypeClass.UPSERT,
|
|
251
247
|
):
|
|
252
248
|
"""
|
|
253
249
|
Create MCP
|
|
254
250
|
"""
|
|
255
251
|
return MetadataChangeProposalWrapper(
|
|
256
|
-
entityType=entity_type,
|
|
257
|
-
changeType=change_type,
|
|
258
252
|
entityUrn=entity_urn,
|
|
259
|
-
aspectName=aspect_name,
|
|
260
253
|
aspect=aspect,
|
|
261
254
|
)
|
|
262
255
|
|
|
@@ -343,17 +336,13 @@ class Mapper:
|
|
|
343
336
|
)
|
|
344
337
|
|
|
345
338
|
info_mcp = self.new_mcp(
|
|
346
|
-
entity_type=Constant.DASHBOARD,
|
|
347
339
|
entity_urn=dashboard_urn,
|
|
348
|
-
aspect_name=Constant.DASHBOARD_INFO,
|
|
349
340
|
aspect=dashboard_info_cls,
|
|
350
341
|
)
|
|
351
342
|
|
|
352
343
|
# removed status mcp
|
|
353
344
|
removed_status_mcp = self.new_mcp(
|
|
354
|
-
entity_type=Constant.DASHBOARD,
|
|
355
345
|
entity_urn=dashboard_urn,
|
|
356
|
-
aspect_name=Constant.STATUS,
|
|
357
346
|
aspect=StatusClass(removed=False),
|
|
358
347
|
)
|
|
359
348
|
|
|
@@ -365,9 +354,7 @@ class Mapper:
|
|
|
365
354
|
|
|
366
355
|
# Dashboard key
|
|
367
356
|
dashboard_key_mcp = self.new_mcp(
|
|
368
|
-
entity_type=Constant.DASHBOARD,
|
|
369
357
|
entity_urn=dashboard_urn,
|
|
370
|
-
aspect_name=Constant.DASHBOARD_KEY,
|
|
371
358
|
aspect=dashboard_key_cls,
|
|
372
359
|
)
|
|
373
360
|
|
|
@@ -378,9 +365,7 @@ class Mapper:
|
|
|
378
365
|
ownership = OwnershipClass(owners=owners)
|
|
379
366
|
# Dashboard owner MCP
|
|
380
367
|
owner_mcp = self.new_mcp(
|
|
381
|
-
entity_type=Constant.DASHBOARD,
|
|
382
368
|
entity_urn=dashboard_urn,
|
|
383
|
-
aspect_name=Constant.OWNERSHIP,
|
|
384
369
|
aspect=ownership,
|
|
385
370
|
)
|
|
386
371
|
|
|
@@ -396,9 +381,7 @@ class Mapper:
|
|
|
396
381
|
]
|
|
397
382
|
)
|
|
398
383
|
browse_path_mcp = self.new_mcp(
|
|
399
|
-
entity_type=Constant.DASHBOARD,
|
|
400
384
|
entity_urn=dashboard_urn,
|
|
401
|
-
aspect_name=Constant.BROWSERPATH,
|
|
402
385
|
aspect=browse_path,
|
|
403
386
|
)
|
|
404
387
|
|
|
@@ -429,27 +412,21 @@ class Mapper:
|
|
|
429
412
|
)
|
|
430
413
|
|
|
431
414
|
info_mcp = self.new_mcp(
|
|
432
|
-
entity_type=Constant.CORP_USER,
|
|
433
415
|
entity_urn=user_urn,
|
|
434
|
-
aspect_name=Constant.CORP_USER_INFO,
|
|
435
416
|
aspect=user_info_instance,
|
|
436
417
|
)
|
|
437
418
|
user_mcps.append(info_mcp)
|
|
438
419
|
|
|
439
420
|
# removed status mcp
|
|
440
421
|
status_mcp = self.new_mcp(
|
|
441
|
-
entity_type=Constant.CORP_USER,
|
|
442
422
|
entity_urn=user_urn,
|
|
443
|
-
aspect_name=Constant.STATUS,
|
|
444
423
|
aspect=StatusClass(removed=False),
|
|
445
424
|
)
|
|
446
425
|
user_mcps.append(status_mcp)
|
|
447
426
|
user_key = CorpUserKeyClass(username=user.username)
|
|
448
427
|
|
|
449
428
|
user_key_mcp = self.new_mcp(
|
|
450
|
-
entity_type=Constant.CORP_USER,
|
|
451
429
|
entity_urn=user_urn,
|
|
452
|
-
aspect_name=Constant.CORP_USER_KEY,
|
|
453
430
|
aspect=user_key,
|
|
454
431
|
)
|
|
455
432
|
user_mcps.append(user_key_mcp)
|
|
@@ -69,9 +69,9 @@ class PresetConfig(SupersetConfig):
|
|
|
69
69
|
|
|
70
70
|
@platform_name("Preset")
|
|
71
71
|
@config_class(PresetConfig)
|
|
72
|
-
@support_status(SupportStatus.
|
|
72
|
+
@support_status(SupportStatus.CERTIFIED)
|
|
73
73
|
@capability(
|
|
74
|
-
SourceCapability.DELETION_DETECTION, "
|
|
74
|
+
SourceCapability.DELETION_DETECTION, "Enabled by default via stateful ingestion"
|
|
75
75
|
)
|
|
76
76
|
class PresetSource(SupersetSource):
|
|
77
77
|
"""
|
|
@@ -182,9 +182,10 @@ class RedshiftUsageExtractor:
|
|
|
182
182
|
self.report.num_operational_stats_filtered = 0
|
|
183
183
|
|
|
184
184
|
if self.config.include_operational_stats:
|
|
185
|
-
with
|
|
186
|
-
USAGE_EXTRACTION_OPERATIONAL_STATS
|
|
187
|
-
|
|
185
|
+
with (
|
|
186
|
+
self.report.new_stage(USAGE_EXTRACTION_OPERATIONAL_STATS),
|
|
187
|
+
PerfTimer() as timer,
|
|
188
|
+
):
|
|
188
189
|
# Generate operation aspect workunits
|
|
189
190
|
yield from self._gen_operation_aspect_workunits(
|
|
190
191
|
self.connection, all_tables
|
|
@@ -1,19 +1,21 @@
|
|
|
1
1
|
import dataclasses
|
|
2
2
|
from dataclasses import field as dataclass_field
|
|
3
|
-
from typing import List
|
|
4
3
|
|
|
5
4
|
from datahub.ingestion.source.state.stale_entity_removal_handler import (
|
|
6
5
|
StaleEntityRemovalSourceReport,
|
|
7
6
|
)
|
|
7
|
+
from datahub.utilities.lossy_collections import LossyList
|
|
8
8
|
|
|
9
9
|
|
|
10
10
|
@dataclasses.dataclass
|
|
11
11
|
class DataLakeSourceReport(StaleEntityRemovalSourceReport):
|
|
12
12
|
files_scanned = 0
|
|
13
|
-
filtered:
|
|
13
|
+
filtered: LossyList[str] = dataclass_field(default_factory=LossyList)
|
|
14
|
+
number_of_files_filtered: int = 0
|
|
14
15
|
|
|
15
16
|
def report_file_scanned(self) -> None:
|
|
16
17
|
self.files_scanned += 1
|
|
17
18
|
|
|
18
19
|
def report_file_dropped(self, file: str) -> None:
|
|
19
20
|
self.filtered.append(file)
|
|
21
|
+
self.number_of_files_filtered += 1
|