acryl-datahub 1.1.0.5rc3__py3-none-any.whl → 1.1.0.5rc5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.1.0.5rc3.dist-info → acryl_datahub-1.1.0.5rc5.dist-info}/METADATA +2575 -2575
- {acryl_datahub-1.1.0.5rc3.dist-info → acryl_datahub-1.1.0.5rc5.dist-info}/RECORD +52 -45
- datahub/_version.py +1 -1
- datahub/cli/check_cli.py +21 -4
- datahub/ingestion/api/decorators.py +14 -3
- datahub/ingestion/api/report.py +123 -2
- datahub/ingestion/api/source.py +45 -44
- datahub/ingestion/autogenerated/lineage_helper.py +193 -0
- datahub/ingestion/graph/client.py +71 -28
- datahub/ingestion/run/pipeline.py +6 -0
- datahub/ingestion/source/aws/glue.py +1 -1
- datahub/ingestion/source/bigquery_v2/bigquery_queries.py +1 -0
- datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
- datahub/ingestion/source/bigquery_v2/queries.py +4 -4
- datahub/ingestion/source/common/subtypes.py +43 -0
- datahub/ingestion/source/dbt/dbt_common.py +1 -1
- datahub/ingestion/source/fivetran/fivetran.py +34 -26
- datahub/ingestion/source/hex/api.py +26 -1
- datahub/ingestion/source/kafka_connect/sink_connectors.py +156 -47
- datahub/ingestion/source/mock_data/datahub_mock_data.py +11 -15
- datahub/ingestion/source/salesforce.py +6 -3
- datahub/ingestion/source/slack/slack.py +2 -1
- datahub/ingestion/source/snowflake/snowflake_queries.py +1 -0
- datahub/ingestion/source/sql/athena.py +15 -3
- datahub/ingestion/source/sql/mssql/source.py +9 -0
- datahub/ingestion/source/sql/sql_common.py +3 -0
- datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
- datahub/ingestion/source/sql/teradata.py +4 -1
- datahub/ingestion/source/sql/vertica.py +9 -1
- datahub/ingestion/source/tableau/tableau.py +6 -1
- datahub/ingestion/source/unity/source.py +36 -20
- datahub/ingestion/transformer/add_dataset_ownership.py +18 -2
- datahub/metadata/_internal_schema_classes.py +601 -0
- datahub/metadata/_urns/urn_defs.py +112 -0
- datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +4 -0
- datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +27 -0
- datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +25 -0
- datahub/metadata/schema.avsc +383 -0
- datahub/metadata/schemas/CorpUserSettings.avsc +25 -0
- datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageModuleProperties.avsc +202 -0
- datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +175 -0
- datahub/metadata/schemas/GlobalSettingsInfo.avsc +25 -0
- datahub/sdk/datajob.py +39 -15
- datahub/specific/dataproduct.py +4 -0
- {acryl_datahub-1.1.0.5rc3.dist-info → acryl_datahub-1.1.0.5rc5.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.1.0.5rc3.dist-info → acryl_datahub-1.1.0.5rc5.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-1.1.0.5rc3.dist-info → acryl_datahub-1.1.0.5rc5.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.1.0.5rc3.dist-info → acryl_datahub-1.1.0.5rc5.dist-info}/top_level.txt +0 -0
|
@@ -29,7 +29,10 @@ from datahub.ingestion.api.decorators import (
|
|
|
29
29
|
from datahub.ingestion.api.source import StructuredLogLevel
|
|
30
30
|
from datahub.ingestion.api.workunit import MetadataWorkUnit
|
|
31
31
|
from datahub.ingestion.source.aws.s3_util import make_s3_urn
|
|
32
|
-
from datahub.ingestion.source.common.subtypes import
|
|
32
|
+
from datahub.ingestion.source.common.subtypes import (
|
|
33
|
+
DatasetContainerSubTypes,
|
|
34
|
+
SourceCapabilityModifier,
|
|
35
|
+
)
|
|
33
36
|
from datahub.ingestion.source.ge_profiling_config import GEProfilingConfig
|
|
34
37
|
from datahub.ingestion.source.sql.sql_common import (
|
|
35
38
|
SQLAlchemySource,
|
|
@@ -321,9 +324,18 @@ class Partitionitem:
|
|
|
321
324
|
@capability(
|
|
322
325
|
SourceCapability.DATA_PROFILING,
|
|
323
326
|
"Optionally enabled via configuration. Profiling uses sql queries on whole table which can be expensive operation.",
|
|
327
|
+
subtype_modifier=[SourceCapabilityModifier.TABLE],
|
|
328
|
+
)
|
|
329
|
+
@capability(
|
|
330
|
+
SourceCapability.LINEAGE_COARSE,
|
|
331
|
+
"Supported for S3 tables",
|
|
332
|
+
subtype_modifier=[SourceCapabilityModifier.TABLE],
|
|
333
|
+
)
|
|
334
|
+
@capability(
|
|
335
|
+
SourceCapability.LINEAGE_FINE,
|
|
336
|
+
"Supported for S3 tables",
|
|
337
|
+
subtype_modifier=[SourceCapabilityModifier.TABLE],
|
|
324
338
|
)
|
|
325
|
-
@capability(SourceCapability.LINEAGE_COARSE, "Supported for S3 tables")
|
|
326
|
-
@capability(SourceCapability.LINEAGE_FINE, "Supported for S3 tables")
|
|
327
339
|
@capability(SourceCapability.DESCRIPTIONS, "Enabled by default")
|
|
328
340
|
class AthenaSource(SQLAlchemySource):
|
|
329
341
|
"""
|
|
@@ -27,6 +27,7 @@ from datahub.ingestion.api.decorators import (
|
|
|
27
27
|
from datahub.ingestion.api.source import StructuredLogLevel
|
|
28
28
|
from datahub.ingestion.api.source_helpers import auto_workunit
|
|
29
29
|
from datahub.ingestion.api.workunit import MetadataWorkUnit
|
|
30
|
+
from datahub.ingestion.source.common.subtypes import SourceCapabilityModifier
|
|
30
31
|
from datahub.ingestion.source.sql.mssql.job_models import (
|
|
31
32
|
JobStep,
|
|
32
33
|
MSSQLDataFlow,
|
|
@@ -177,10 +178,18 @@ class SQLServerConfig(BasicSQLAlchemyConfig):
|
|
|
177
178
|
@capability(
|
|
178
179
|
SourceCapability.LINEAGE_COARSE,
|
|
179
180
|
"Enabled by default to get lineage for stored procedures via `include_lineage` and for views via `include_view_lineage`",
|
|
181
|
+
subtype_modifier=[
|
|
182
|
+
SourceCapabilityModifier.STORED_PROCEDURE,
|
|
183
|
+
SourceCapabilityModifier.VIEW,
|
|
184
|
+
],
|
|
180
185
|
)
|
|
181
186
|
@capability(
|
|
182
187
|
SourceCapability.LINEAGE_FINE,
|
|
183
188
|
"Enabled by default to get lineage for stored procedures via `include_lineage` and for views via `include_view_column_lineage`",
|
|
189
|
+
subtype_modifier=[
|
|
190
|
+
SourceCapabilityModifier.STORED_PROCEDURE,
|
|
191
|
+
SourceCapabilityModifier.VIEW,
|
|
192
|
+
],
|
|
184
193
|
)
|
|
185
194
|
class SQLServerSource(SQLAlchemySource):
|
|
186
195
|
"""
|
|
@@ -54,6 +54,7 @@ from datahub.ingestion.source.common.data_reader import DataReader
|
|
|
54
54
|
from datahub.ingestion.source.common.subtypes import (
|
|
55
55
|
DatasetContainerSubTypes,
|
|
56
56
|
DatasetSubTypes,
|
|
57
|
+
SourceCapabilityModifier,
|
|
57
58
|
)
|
|
58
59
|
from datahub.ingestion.source.sql.sql_config import SQLCommonConfig
|
|
59
60
|
from datahub.ingestion.source.sql.sql_report import SQLSourceReport
|
|
@@ -305,10 +306,12 @@ class ProfileMetadata:
|
|
|
305
306
|
@capability(
|
|
306
307
|
SourceCapability.LINEAGE_COARSE,
|
|
307
308
|
"Enabled by default to get lineage for views via `include_view_lineage`",
|
|
309
|
+
subtype_modifier=[SourceCapabilityModifier.VIEW],
|
|
308
310
|
)
|
|
309
311
|
@capability(
|
|
310
312
|
SourceCapability.LINEAGE_FINE,
|
|
311
313
|
"Enabled by default to get lineage for views via `include_view_column_lineage`",
|
|
314
|
+
subtype_modifier=[SourceCapabilityModifier.VIEW],
|
|
312
315
|
)
|
|
313
316
|
@capability(SourceCapability.TEST_CONNECTION, "Enabled by default")
|
|
314
317
|
@capability(
|
|
@@ -57,10 +57,11 @@ class GenericProfiler:
|
|
|
57
57
|
platform: Optional[str] = None,
|
|
58
58
|
profiler_args: Optional[Dict] = None,
|
|
59
59
|
) -> Iterable[MetadataWorkUnit]:
|
|
60
|
+
# We don't run ge profiling queries if table profiling is enabled or if the row count is 0.
|
|
60
61
|
ge_profile_requests: List[GEProfilerRequest] = [
|
|
61
62
|
cast(GEProfilerRequest, request)
|
|
62
63
|
for request in requests
|
|
63
|
-
if not request.profile_table_level_only
|
|
64
|
+
if not request.profile_table_level_only or request.table.rows_count == 0
|
|
64
65
|
]
|
|
65
66
|
table_level_profile_requests: List[TableProfilerRequest] = [
|
|
66
67
|
request for request in requests if request.profile_table_level_only
|
|
@@ -445,7 +445,10 @@ class TeradataConfig(BaseTeradataConfig, BaseTimeWindowConfig):
|
|
|
445
445
|
@capability(SourceCapability.DOMAINS, "Enabled by default")
|
|
446
446
|
@capability(SourceCapability.CONTAINERS, "Enabled by default")
|
|
447
447
|
@capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default")
|
|
448
|
-
@capability(
|
|
448
|
+
@capability(
|
|
449
|
+
SourceCapability.DELETION_DETECTION,
|
|
450
|
+
"Enabled by default when stateful ingestion is turned on",
|
|
451
|
+
)
|
|
449
452
|
@capability(SourceCapability.DATA_PROFILING, "Optionally enabled via configuration")
|
|
450
453
|
@capability(SourceCapability.LINEAGE_COARSE, "Optionally enabled via configuration")
|
|
451
454
|
@capability(SourceCapability.LINEAGE_FINE, "Optionally enabled via configuration")
|
|
@@ -25,6 +25,10 @@ from datahub.ingestion.api.decorators import (
|
|
|
25
25
|
)
|
|
26
26
|
from datahub.ingestion.api.workunit import MetadataWorkUnit
|
|
27
27
|
from datahub.ingestion.source.common.data_reader import DataReader
|
|
28
|
+
from datahub.ingestion.source.common.subtypes import (
|
|
29
|
+
DatasetSubTypes,
|
|
30
|
+
SourceCapabilityModifier,
|
|
31
|
+
)
|
|
28
32
|
from datahub.ingestion.source.sql.sql_common import (
|
|
29
33
|
SQLAlchemySource,
|
|
30
34
|
SqlWorkUnit,
|
|
@@ -113,6 +117,10 @@ class VerticaConfig(BasicSQLAlchemyConfig):
|
|
|
113
117
|
@capability(
|
|
114
118
|
SourceCapability.LINEAGE_COARSE,
|
|
115
119
|
"Enabled by default, can be disabled via configuration `include_view_lineage` and `include_projection_lineage`",
|
|
120
|
+
subtype_modifier=[
|
|
121
|
+
SourceCapabilityModifier.VIEW,
|
|
122
|
+
SourceCapabilityModifier.PROJECTIONS,
|
|
123
|
+
],
|
|
116
124
|
)
|
|
117
125
|
@capability(
|
|
118
126
|
SourceCapability.DELETION_DETECTION,
|
|
@@ -497,7 +505,7 @@ class VerticaSource(SQLAlchemySource):
|
|
|
497
505
|
changeType=ChangeTypeClass.UPSERT,
|
|
498
506
|
entityUrn=dataset_urn,
|
|
499
507
|
aspectName="subTypes",
|
|
500
|
-
aspect=SubTypesClass(typeNames=[
|
|
508
|
+
aspect=SubTypesClass(typeNames=[DatasetSubTypes.PROJECTIONS]),
|
|
501
509
|
).as_workunit()
|
|
502
510
|
|
|
503
511
|
if self.config.domain:
|
|
@@ -80,6 +80,7 @@ from datahub.ingestion.api.workunit import MetadataWorkUnit
|
|
|
80
80
|
from datahub.ingestion.source.common.subtypes import (
|
|
81
81
|
BIContainerSubTypes,
|
|
82
82
|
DatasetSubTypes,
|
|
83
|
+
SourceCapabilityModifier,
|
|
83
84
|
)
|
|
84
85
|
from datahub.ingestion.source.state.stale_entity_removal_handler import (
|
|
85
86
|
StaleEntityRemovalHandler,
|
|
@@ -867,10 +868,14 @@ def report_user_role(report: TableauSourceReport, server: Server) -> None:
|
|
|
867
868
|
@capability(
|
|
868
869
|
SourceCapability.USAGE_STATS,
|
|
869
870
|
"Dashboard/Chart view counts, enabled using extract_usage_stats config",
|
|
871
|
+
subtype_modifier=[
|
|
872
|
+
SourceCapabilityModifier.DASHBOARD,
|
|
873
|
+
SourceCapabilityModifier.CHART,
|
|
874
|
+
],
|
|
870
875
|
)
|
|
871
876
|
@capability(
|
|
872
877
|
SourceCapability.DELETION_DETECTION,
|
|
873
|
-
"Enabled by default
|
|
878
|
+
"Enabled by default via stateful ingestion.",
|
|
874
879
|
)
|
|
875
880
|
@capability(SourceCapability.OWNERSHIP, "Requires recipe configuration")
|
|
876
881
|
@capability(SourceCapability.TAGS, "Requires recipe configuration")
|
|
@@ -1020,29 +1020,45 @@ class UnityCatalogSource(StatefulIngestionSourceBase, TestableSource):
|
|
|
1020
1020
|
) -> Iterable[MetadataWorkUnit]:
|
|
1021
1021
|
if self.ctx.graph and self.platform_resource_repository:
|
|
1022
1022
|
for tag in tags:
|
|
1023
|
-
|
|
1024
|
-
|
|
1025
|
-
|
|
1026
|
-
|
|
1027
|
-
|
|
1028
|
-
|
|
1023
|
+
try:
|
|
1024
|
+
platform_resource_id = UnityCatalogTagPlatformResourceId.from_tag(
|
|
1025
|
+
platform_instance=self.platform_instance_name,
|
|
1026
|
+
platform_resource_repository=self.platform_resource_repository,
|
|
1027
|
+
tag=tag,
|
|
1028
|
+
)
|
|
1029
|
+
logger.debug(f"Created platform resource {platform_resource_id}")
|
|
1029
1030
|
|
|
1030
|
-
|
|
1031
|
-
|
|
1032
|
-
|
|
1033
|
-
|
|
1034
|
-
|
|
1035
|
-
|
|
1036
|
-
):
|
|
1037
|
-
unity_catalog_tag.datahub_linked_resources().add(
|
|
1038
|
-
tag.to_datahub_tag_urn().urn()
|
|
1031
|
+
unity_catalog_tag = (
|
|
1032
|
+
UnityCatalogTagPlatformResource.get_from_datahub(
|
|
1033
|
+
platform_resource_id,
|
|
1034
|
+
self.platform_resource_repository,
|
|
1035
|
+
False,
|
|
1036
|
+
)
|
|
1039
1037
|
)
|
|
1040
|
-
|
|
1041
|
-
|
|
1042
|
-
|
|
1043
|
-
|
|
1044
|
-
|
|
1038
|
+
if (
|
|
1039
|
+
tag.to_datahub_tag_urn().urn()
|
|
1040
|
+
not in unity_catalog_tag.datahub_linked_resources().urns
|
|
1041
|
+
):
|
|
1042
|
+
unity_catalog_tag.datahub_linked_resources().add(
|
|
1043
|
+
tag.to_datahub_tag_urn().urn()
|
|
1045
1044
|
)
|
|
1045
|
+
platform_resource = unity_catalog_tag.as_platform_resource()
|
|
1046
|
+
for mcp in platform_resource.to_mcps():
|
|
1047
|
+
yield MetadataWorkUnit(
|
|
1048
|
+
id=f"platform_resource-{platform_resource.id}",
|
|
1049
|
+
mcp=mcp,
|
|
1050
|
+
)
|
|
1051
|
+
except Exception as e:
|
|
1052
|
+
logger.exception(
|
|
1053
|
+
f"Error processing platform resource for tag {tag}"
|
|
1054
|
+
)
|
|
1055
|
+
self.report.report_warning(
|
|
1056
|
+
message="Error processing platform resource for tag",
|
|
1057
|
+
context=str(tag),
|
|
1058
|
+
title="Error processing platform resource for tag",
|
|
1059
|
+
exc=e,
|
|
1060
|
+
)
|
|
1061
|
+
continue
|
|
1046
1062
|
|
|
1047
1063
|
def _create_schema_metadata_aspect(
|
|
1048
1064
|
self, table: Table
|
|
@@ -71,8 +71,24 @@ class AddDatasetOwnership(OwnershipTransformer):
|
|
|
71
71
|
|
|
72
72
|
server_ownership = graph.get_ownership(entity_urn=urn)
|
|
73
73
|
if server_ownership:
|
|
74
|
-
owners = {
|
|
75
|
-
|
|
74
|
+
owners = {
|
|
75
|
+
(
|
|
76
|
+
owner.owner,
|
|
77
|
+
owner.type,
|
|
78
|
+
owner.typeUrn,
|
|
79
|
+
): owner
|
|
80
|
+
for owner in server_ownership.owners
|
|
81
|
+
}
|
|
82
|
+
owners.update(
|
|
83
|
+
{
|
|
84
|
+
(
|
|
85
|
+
owner.owner,
|
|
86
|
+
owner.type,
|
|
87
|
+
owner.typeUrn,
|
|
88
|
+
): owner
|
|
89
|
+
for owner in mce_ownership.owners
|
|
90
|
+
}
|
|
91
|
+
)
|
|
76
92
|
mce_ownership.owners = list(owners.values())
|
|
77
93
|
|
|
78
94
|
return mce_ownership
|