acryl-datahub 1.2.0.11rc3__py3-none-any.whl → 1.2.0.11rc5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.2.0.11rc3.dist-info → acryl_datahub-1.2.0.11rc5.dist-info}/METADATA +2472 -2469
- {acryl_datahub-1.2.0.11rc3.dist-info → acryl_datahub-1.2.0.11rc5.dist-info}/RECORD +43 -41
- datahub/_version.py +1 -1
- datahub/cli/docker_check.py +1 -1
- datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +296 -0
- datahub/ingestion/api/source.py +29 -5
- datahub/ingestion/api/source_protocols.py +23 -0
- datahub/ingestion/source/bigquery_v2/bigquery_report.py +0 -2
- datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +2 -2
- datahub/ingestion/source/cassandra/cassandra_profiling.py +2 -2
- datahub/ingestion/source/cassandra/cassandra_utils.py +1 -2
- datahub/ingestion/source/dremio/dremio_reporting.py +0 -2
- datahub/ingestion/source/dremio/dremio_source.py +2 -2
- datahub/ingestion/source/fivetran/config.py +32 -5
- datahub/ingestion/source/fivetran/fivetran.py +0 -1
- datahub/ingestion/source/fivetran/fivetran_log_api.py +13 -0
- datahub/ingestion/source/fivetran/fivetran_query.py +43 -28
- datahub/ingestion/source/gc/datahub_gc.py +0 -2
- datahub/ingestion/source/grafana/models.py +9 -1
- datahub/ingestion/source/grafana/report.py +1 -2
- datahub/ingestion/source/hex/hex.py +0 -2
- datahub/ingestion/source/redshift/redshift.py +2 -2
- datahub/ingestion/source/redshift/report.py +0 -2
- datahub/ingestion/source/snowflake/snowflake_queries.py +23 -7
- datahub/ingestion/source/snowflake/snowflake_report.py +0 -2
- datahub/ingestion/source/snowflake/snowflake_schema_gen.py +2 -2
- datahub/ingestion/source/sql/oracle.py +1 -1
- datahub/ingestion/source/sql/sql_common.py +25 -17
- datahub/ingestion/source/sql/teradata.py +1 -2
- datahub/ingestion/source/sql_queries.py +1 -2
- datahub/ingestion/source/tableau/tableau.py +0 -2
- datahub/ingestion/source/unity/config.py +49 -29
- datahub/ingestion/source/unity/report.py +1 -2
- datahub/ingestion/source_report/ingestion_stage.py +54 -12
- datahub/metadata/_internal_schema_classes.py +169 -0
- datahub/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py +4 -0
- datahub/metadata/schema.avsc +101 -0
- datahub/metadata/schemas/RelationshipChangeEvent.avsc +215 -0
- datahub/metadata/schemas/StructuredPropertySettings.avsc +9 -0
- {acryl_datahub-1.2.0.11rc3.dist-info → acryl_datahub-1.2.0.11rc5.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.2.0.11rc3.dist-info → acryl_datahub-1.2.0.11rc5.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-1.2.0.11rc3.dist-info → acryl_datahub-1.2.0.11rc5.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.2.0.11rc3.dist-info → acryl_datahub-1.2.0.11rc5.dist-info}/top_level.txt +0 -0
|
@@ -46,6 +46,7 @@ from datahub.ingestion.api.source import (
|
|
|
46
46
|
TestableSource,
|
|
47
47
|
TestConnectionReport,
|
|
48
48
|
)
|
|
49
|
+
from datahub.ingestion.api.source_protocols import MetadataWorkUnitIterable
|
|
49
50
|
from datahub.ingestion.api.workunit import MetadataWorkUnit
|
|
50
51
|
from datahub.ingestion.glossary.classification_mixin import (
|
|
51
52
|
SAMPLE_SIZE_MULTIPLIER,
|
|
@@ -578,19 +579,6 @@ class SQLAlchemySource(StatefulIngestionSourceBase, TestableSource):
|
|
|
578
579
|
self._add_default_options(sql_config)
|
|
579
580
|
|
|
580
581
|
for inspector in self.get_inspectors():
|
|
581
|
-
profiler = None
|
|
582
|
-
profile_requests: List["GEProfilerRequest"] = []
|
|
583
|
-
if sql_config.is_profiling_enabled():
|
|
584
|
-
profiler = self.get_profiler_instance(inspector)
|
|
585
|
-
try:
|
|
586
|
-
self.add_profile_metadata(inspector)
|
|
587
|
-
except Exception as e:
|
|
588
|
-
self.warn(
|
|
589
|
-
logger,
|
|
590
|
-
"profile_metadata",
|
|
591
|
-
f"Failed to get enrichment data for profile {e}",
|
|
592
|
-
)
|
|
593
|
-
|
|
594
582
|
db_name = self.get_db_name(inspector)
|
|
595
583
|
yield from self.get_database_level_workunits(
|
|
596
584
|
inspector=inspector,
|
|
@@ -606,19 +594,39 @@ class SQLAlchemySource(StatefulIngestionSourceBase, TestableSource):
|
|
|
606
594
|
database=db_name,
|
|
607
595
|
)
|
|
608
596
|
|
|
597
|
+
# Generate workunit for aggregated SQL parsing results
|
|
598
|
+
yield from self._generate_aggregator_workunits()
|
|
599
|
+
|
|
600
|
+
def is_profiling_enabled_internal(self) -> bool:
|
|
601
|
+
return self.config.is_profiling_enabled()
|
|
602
|
+
|
|
603
|
+
def get_profiling_internal(
|
|
604
|
+
self,
|
|
605
|
+
) -> MetadataWorkUnitIterable:
|
|
606
|
+
sql_config = self.config
|
|
607
|
+
for inspector in self.get_inspectors():
|
|
608
|
+
profiler = None
|
|
609
|
+
profile_requests: List["GEProfilerRequest"] = []
|
|
610
|
+
profiler = self.get_profiler_instance(inspector)
|
|
611
|
+
try:
|
|
612
|
+
self.add_profile_metadata(inspector)
|
|
613
|
+
except Exception as e:
|
|
614
|
+
self.warn(
|
|
615
|
+
logger,
|
|
616
|
+
"profile_metadata",
|
|
617
|
+
f"Failed to get enrichment data for profile {e}",
|
|
618
|
+
)
|
|
619
|
+
db_name = self.get_db_name(inspector)
|
|
620
|
+
for schema in self.get_allowed_schemas(inspector, db_name):
|
|
609
621
|
if profiler:
|
|
610
622
|
profile_requests += list(
|
|
611
623
|
self.loop_profiler_requests(inspector, schema, sql_config)
|
|
612
624
|
)
|
|
613
|
-
|
|
614
625
|
if profiler and profile_requests:
|
|
615
626
|
yield from self.loop_profiler(
|
|
616
627
|
profile_requests, profiler, platform=self.platform
|
|
617
628
|
)
|
|
618
629
|
|
|
619
|
-
# Generate workunit for aggregated SQL parsing results
|
|
620
|
-
yield from self._generate_aggregator_workunits()
|
|
621
|
-
|
|
622
630
|
def _generate_aggregator_workunits(self) -> Iterable[MetadataWorkUnit]:
|
|
623
631
|
"""Generate work units from SQL parsing aggregator. Can be overridden by subclasses."""
|
|
624
632
|
for mcp in self.aggregator.gen_metadata():
|
|
@@ -51,7 +51,6 @@ from datahub.ingestion.source.sql.two_tier_sql_source import (
|
|
|
51
51
|
TwoTierSQLAlchemySource,
|
|
52
52
|
)
|
|
53
53
|
from datahub.ingestion.source.usage.usage_common import BaseUsageConfig
|
|
54
|
-
from datahub.ingestion.source_report.ingestion_stage import IngestionStageReport
|
|
55
54
|
from datahub.ingestion.source_report.time_window import BaseTimeWindowReport
|
|
56
55
|
from datahub.metadata.com.linkedin.pegasus2avro.schema import (
|
|
57
56
|
BytesTypeClass,
|
|
@@ -434,7 +433,7 @@ def optimized_get_view_definition(
|
|
|
434
433
|
|
|
435
434
|
|
|
436
435
|
@dataclass
|
|
437
|
-
class TeradataReport(SQLSourceReport,
|
|
436
|
+
class TeradataReport(SQLSourceReport, BaseTimeWindowReport):
|
|
438
437
|
# View processing metrics (actively used)
|
|
439
438
|
num_views_processed: int = 0
|
|
440
439
|
num_view_processing_failures: int = 0
|
|
@@ -40,7 +40,6 @@ from datahub.ingestion.api.source_helpers import auto_workunit_reporter
|
|
|
40
40
|
from datahub.ingestion.api.workunit import MetadataWorkUnit
|
|
41
41
|
from datahub.ingestion.graph.client import DataHubGraph
|
|
42
42
|
from datahub.ingestion.source.usage.usage_common import BaseUsageConfig
|
|
43
|
-
from datahub.ingestion.source_report.ingestion_stage import IngestionStageReport
|
|
44
43
|
from datahub.metadata.urns import CorpUserUrn, DatasetUrn
|
|
45
44
|
from datahub.sql_parsing.schema_resolver import SchemaResolver
|
|
46
45
|
from datahub.sql_parsing.sql_parsing_aggregator import (
|
|
@@ -86,7 +85,7 @@ class SqlQueriesSourceConfig(
|
|
|
86
85
|
|
|
87
86
|
|
|
88
87
|
@dataclass
|
|
89
|
-
class SqlQueriesSourceReport(SourceReport
|
|
88
|
+
class SqlQueriesSourceReport(SourceReport):
|
|
90
89
|
num_entries_processed: int = 0
|
|
91
90
|
num_entries_failed: int = 0
|
|
92
91
|
num_queries_aggregator_failures: int = 0
|
|
@@ -120,7 +120,6 @@ from datahub.ingestion.source.tableau.tableau_common import (
|
|
|
120
120
|
)
|
|
121
121
|
from datahub.ingestion.source.tableau.tableau_server_wrapper import UserInfo
|
|
122
122
|
from datahub.ingestion.source.tableau.tableau_validation import check_user_role
|
|
123
|
-
from datahub.ingestion.source_report.ingestion_stage import IngestionStageReport
|
|
124
123
|
from datahub.metadata.com.linkedin.pegasus2avro.common import (
|
|
125
124
|
AuditStamp,
|
|
126
125
|
ChangeAuditStamps,
|
|
@@ -795,7 +794,6 @@ class SiteIdContentUrl:
|
|
|
795
794
|
@dataclass
|
|
796
795
|
class TableauSourceReport(
|
|
797
796
|
StaleEntityRemovalSourceReport,
|
|
798
|
-
IngestionStageReport,
|
|
799
797
|
):
|
|
800
798
|
get_all_datasources_query_failed: bool = False
|
|
801
799
|
num_get_datasource_query_failures: int = 0
|
|
@@ -132,14 +132,13 @@ class UnityCatalogGEProfilerConfig(UnityCatalogProfilerConfig, GEProfilingConfig
|
|
|
132
132
|
)
|
|
133
133
|
|
|
134
134
|
|
|
135
|
-
class
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
):
|
|
135
|
+
class UnityCatalogConnectionConfig(ConfigModel):
|
|
136
|
+
"""
|
|
137
|
+
Configuration for connecting to Databricks Unity Catalog.
|
|
138
|
+
Contains only connection-related fields that can be reused across different sources.
|
|
139
|
+
"""
|
|
140
|
+
|
|
141
|
+
scheme: str = DATABRICKS
|
|
143
142
|
token: str = pydantic.Field(description="Databricks personal access token")
|
|
144
143
|
workspace_url: str = pydantic.Field(
|
|
145
144
|
description="Databricks workspace url. e.g. https://my-workspace.cloud.databricks.com"
|
|
@@ -156,15 +155,41 @@ class UnityCatalogSourceConfig(
|
|
|
156
155
|
"When warehouse_id is missing, these features will be automatically disabled (with warnings) to allow ingestion to continue."
|
|
157
156
|
),
|
|
158
157
|
)
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
workspace_name: Optional[str] = pydantic.Field(
|
|
164
|
-
default=None,
|
|
165
|
-
description="Name of the workspace. Default to deployment name present in workspace_url",
|
|
158
|
+
|
|
159
|
+
extra_client_options: Dict[str, Any] = Field(
|
|
160
|
+
default={},
|
|
161
|
+
description="Additional options to pass to Databricks SQLAlchemy client.",
|
|
166
162
|
)
|
|
167
163
|
|
|
164
|
+
def __init__(self, **data: Any):
|
|
165
|
+
super().__init__(**data)
|
|
166
|
+
|
|
167
|
+
def get_sql_alchemy_url(self, database: Optional[str] = None) -> str:
|
|
168
|
+
uri_opts = {"http_path": f"/sql/1.0/warehouses/{self.warehouse_id}"}
|
|
169
|
+
if database:
|
|
170
|
+
uri_opts["catalog"] = database
|
|
171
|
+
return make_sqlalchemy_uri(
|
|
172
|
+
scheme=self.scheme,
|
|
173
|
+
username="token",
|
|
174
|
+
password=self.token,
|
|
175
|
+
at=urlparse(self.workspace_url).netloc,
|
|
176
|
+
db=database,
|
|
177
|
+
uri_opts=uri_opts,
|
|
178
|
+
)
|
|
179
|
+
|
|
180
|
+
def get_options(self) -> dict:
|
|
181
|
+
return self.extra_client_options
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
class UnityCatalogSourceConfig(
|
|
185
|
+
UnityCatalogConnectionConfig,
|
|
186
|
+
SQLCommonConfig,
|
|
187
|
+
StatefulIngestionConfigBase,
|
|
188
|
+
BaseUsageConfig,
|
|
189
|
+
DatasetSourceConfigMixin,
|
|
190
|
+
StatefulProfilingConfigMixin,
|
|
191
|
+
LowerCaseDatasetUrnConfigMixin,
|
|
192
|
+
):
|
|
168
193
|
include_metastore: bool = pydantic.Field(
|
|
169
194
|
default=False,
|
|
170
195
|
description=(
|
|
@@ -344,7 +369,15 @@ class UnityCatalogSourceConfig(
|
|
|
344
369
|
_forced_disable_tag_extraction: bool = pydantic.PrivateAttr(default=False)
|
|
345
370
|
_forced_disable_hive_metastore_extraction = pydantic.PrivateAttr(default=False)
|
|
346
371
|
|
|
347
|
-
|
|
372
|
+
include_hive_metastore: bool = pydantic.Field(
|
|
373
|
+
default=INCLUDE_HIVE_METASTORE_DEFAULT,
|
|
374
|
+
description="Whether to ingest legacy `hive_metastore` catalog. This requires executing queries on SQL warehouse.",
|
|
375
|
+
)
|
|
376
|
+
|
|
377
|
+
workspace_name: Optional[str] = pydantic.Field(
|
|
378
|
+
default=None,
|
|
379
|
+
description="Name of the workspace. Default to deployment name present in workspace_url",
|
|
380
|
+
)
|
|
348
381
|
|
|
349
382
|
def __init__(self, **data):
|
|
350
383
|
# First, let the parent handle the root validators and field processing
|
|
@@ -386,19 +419,6 @@ class UnityCatalogSourceConfig(
|
|
|
386
419
|
forced_disable_hive_metastore_extraction
|
|
387
420
|
)
|
|
388
421
|
|
|
389
|
-
def get_sql_alchemy_url(self, database: Optional[str] = None) -> str:
|
|
390
|
-
uri_opts = {"http_path": f"/sql/1.0/warehouses/{self.warehouse_id}"}
|
|
391
|
-
if database:
|
|
392
|
-
uri_opts["catalog"] = database
|
|
393
|
-
return make_sqlalchemy_uri(
|
|
394
|
-
scheme=self.scheme,
|
|
395
|
-
username="token",
|
|
396
|
-
password=self.token,
|
|
397
|
-
at=urlparse(self.workspace_url).netloc,
|
|
398
|
-
db=database,
|
|
399
|
-
uri_opts=uri_opts,
|
|
400
|
-
)
|
|
401
|
-
|
|
402
422
|
def is_profiling_enabled(self) -> bool:
|
|
403
423
|
return self.profiling.enabled and is_profiling_enabled(
|
|
404
424
|
self.profiling.operation_config
|
|
@@ -3,7 +3,6 @@ from typing import TYPE_CHECKING, Optional, Tuple
|
|
|
3
3
|
|
|
4
4
|
from datahub.ingestion.api.report import EntityFilterReport, Report
|
|
5
5
|
from datahub.ingestion.source.sql.sql_report import SQLSourceReport
|
|
6
|
-
from datahub.ingestion.source_report.ingestion_stage import IngestionStageReport
|
|
7
6
|
from datahub.utilities.lossy_collections import LossyDict, LossyList
|
|
8
7
|
from datahub.utilities.perf_timer import PerfTimer
|
|
9
8
|
|
|
@@ -24,7 +23,7 @@ class UnityCatalogUsagePerfReport(Report):
|
|
|
24
23
|
|
|
25
24
|
|
|
26
25
|
@dataclass
|
|
27
|
-
class UnityCatalogReport(
|
|
26
|
+
class UnityCatalogReport(SQLSourceReport):
|
|
28
27
|
metastores: EntityFilterReport = EntityFilterReport.field(type="metastore")
|
|
29
28
|
catalogs: EntityFilterReport = EntityFilterReport.field(type="catalog")
|
|
30
29
|
schemas: EntityFilterReport = EntityFilterReport.field(type="schema")
|
|
@@ -1,7 +1,10 @@
|
|
|
1
1
|
import logging
|
|
2
|
+
from collections import defaultdict
|
|
2
3
|
from contextlib import AbstractContextManager
|
|
3
4
|
from dataclasses import dataclass, field
|
|
4
5
|
from datetime import datetime, timezone
|
|
6
|
+
from enum import Enum
|
|
7
|
+
from typing import Tuple
|
|
5
8
|
|
|
6
9
|
from datahub.utilities.perf_timer import PerfTimer
|
|
7
10
|
from datahub.utilities.stats_collections import TopKDict
|
|
@@ -20,31 +23,70 @@ QUERIES_EXTRACTION = "Queries Extraction"
|
|
|
20
23
|
PROFILING = "Profiling"
|
|
21
24
|
|
|
22
25
|
|
|
26
|
+
class IngestionHighStage(Enum):
|
|
27
|
+
"""
|
|
28
|
+
The high-level stages at the framework level
|
|
29
|
+
Team to add more stages as needed
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
PROFILING = "Profiling"
|
|
33
|
+
_UNDEFINED = "Ingestion"
|
|
34
|
+
|
|
35
|
+
|
|
23
36
|
@dataclass
|
|
24
37
|
class IngestionStageReport:
|
|
25
|
-
|
|
38
|
+
ingestion_high_stage_seconds: dict[IngestionHighStage, float] = field(
|
|
39
|
+
default_factory=lambda: defaultdict(float)
|
|
40
|
+
)
|
|
41
|
+
ingestion_stage_durations: TopKDict[Tuple[IngestionHighStage, str], float] = field(
|
|
42
|
+
default_factory=TopKDict
|
|
43
|
+
)
|
|
26
44
|
|
|
27
|
-
def new_stage(
|
|
28
|
-
|
|
45
|
+
def new_stage(
|
|
46
|
+
self, stage: str, high_stage: IngestionHighStage = IngestionHighStage._UNDEFINED
|
|
47
|
+
) -> "IngestionStageContext":
|
|
48
|
+
return IngestionStageContext(stage, self, high_stage)
|
|
49
|
+
|
|
50
|
+
def new_high_stage(self, stage: IngestionHighStage) -> "IngestionStageContext":
|
|
51
|
+
return IngestionStageContext("", self, stage)
|
|
29
52
|
|
|
30
53
|
|
|
31
54
|
@dataclass
|
|
32
55
|
class IngestionStageContext(AbstractContextManager):
|
|
33
|
-
def __init__(
|
|
34
|
-
self
|
|
56
|
+
def __init__(
|
|
57
|
+
self,
|
|
58
|
+
stage: str,
|
|
59
|
+
report: IngestionStageReport,
|
|
60
|
+
high_stage: IngestionHighStage = IngestionHighStage._UNDEFINED,
|
|
61
|
+
):
|
|
62
|
+
self._high_stage = high_stage
|
|
63
|
+
self._ingestion_stage = (
|
|
64
|
+
f"{stage} at {datetime.now(timezone.utc)}" if stage else ""
|
|
65
|
+
)
|
|
35
66
|
self._timer: PerfTimer = PerfTimer()
|
|
36
67
|
self._report = report
|
|
37
68
|
|
|
38
69
|
def __enter__(self) -> "IngestionStageContext":
|
|
39
|
-
|
|
70
|
+
if self._ingestion_stage:
|
|
71
|
+
logger.info(f"Stage started: {self._ingestion_stage}")
|
|
72
|
+
else:
|
|
73
|
+
logger.info(f"High stage started: {self._high_stage.value}")
|
|
40
74
|
self._timer.start()
|
|
41
75
|
return self
|
|
42
76
|
|
|
43
77
|
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
44
78
|
elapsed = self._timer.elapsed_seconds(digits=2)
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
79
|
+
if self._ingestion_stage:
|
|
80
|
+
logger.info(
|
|
81
|
+
f"Time spent in stage <{self._ingestion_stage}>: {elapsed} seconds",
|
|
82
|
+
stacklevel=2,
|
|
83
|
+
)
|
|
84
|
+
self._report.ingestion_stage_durations[
|
|
85
|
+
(self._high_stage, self._ingestion_stage)
|
|
86
|
+
] = elapsed
|
|
87
|
+
else:
|
|
88
|
+
logger.info(
|
|
89
|
+
f"Time spent in stage <{self._high_stage.value}>: {elapsed} seconds",
|
|
90
|
+
stacklevel=2,
|
|
91
|
+
)
|
|
92
|
+
self._report.ingestion_high_stage_seconds[self._high_stage] += elapsed
|
|
@@ -21758,6 +21758,153 @@ class ParametersClass(DictWrapper):
|
|
|
21758
21758
|
pass
|
|
21759
21759
|
|
|
21760
21760
|
|
|
21761
|
+
class RelationshipChangeEventClass(DictWrapper):
|
|
21762
|
+
"""Kafka event for proposing a relationship change between two entities.
|
|
21763
|
+
For example, when dataset1 establishes a new downstream relationship with dataset2."""
|
|
21764
|
+
|
|
21765
|
+
RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.platform.event.v1.RelationshipChangeEvent")
|
|
21766
|
+
def __init__(self,
|
|
21767
|
+
sourceUrn: str,
|
|
21768
|
+
destinationUrn: str,
|
|
21769
|
+
operation: Union[str, "RelationshipChangeOperationClass"],
|
|
21770
|
+
relationshipType: str,
|
|
21771
|
+
auditStamp: "AuditStampClass",
|
|
21772
|
+
auditHeader: Union[None, "KafkaAuditHeaderClass"]=None,
|
|
21773
|
+
lifecycleOwner: Union[None, str]=None,
|
|
21774
|
+
via: Union[None, str]=None,
|
|
21775
|
+
properties: Union[None, Dict[str, str]]=None,
|
|
21776
|
+
):
|
|
21777
|
+
super().__init__()
|
|
21778
|
+
|
|
21779
|
+
self.auditHeader = auditHeader
|
|
21780
|
+
self.sourceUrn = sourceUrn
|
|
21781
|
+
self.destinationUrn = destinationUrn
|
|
21782
|
+
self.operation = operation
|
|
21783
|
+
self.relationshipType = relationshipType
|
|
21784
|
+
self.lifecycleOwner = lifecycleOwner
|
|
21785
|
+
self.via = via
|
|
21786
|
+
self.properties = properties
|
|
21787
|
+
self.auditStamp = auditStamp
|
|
21788
|
+
|
|
21789
|
+
def _restore_defaults(self) -> None:
|
|
21790
|
+
self.auditHeader = self.RECORD_SCHEMA.fields_dict["auditHeader"].default
|
|
21791
|
+
self.sourceUrn = str()
|
|
21792
|
+
self.destinationUrn = str()
|
|
21793
|
+
self.operation = RelationshipChangeOperationClass.ADD
|
|
21794
|
+
self.relationshipType = str()
|
|
21795
|
+
self.lifecycleOwner = self.RECORD_SCHEMA.fields_dict["lifecycleOwner"].default
|
|
21796
|
+
self.via = self.RECORD_SCHEMA.fields_dict["via"].default
|
|
21797
|
+
self.properties = self.RECORD_SCHEMA.fields_dict["properties"].default
|
|
21798
|
+
self.auditStamp = AuditStampClass._construct_with_defaults()
|
|
21799
|
+
|
|
21800
|
+
|
|
21801
|
+
@property
|
|
21802
|
+
def auditHeader(self) -> Union[None, "KafkaAuditHeaderClass"]:
|
|
21803
|
+
"""Kafka audit header containing metadata about the message itself.
|
|
21804
|
+
Includes information like message ID, timestamp, and server details."""
|
|
21805
|
+
return self._inner_dict.get('auditHeader') # type: ignore
|
|
21806
|
+
|
|
21807
|
+
@auditHeader.setter
|
|
21808
|
+
def auditHeader(self, value: Union[None, "KafkaAuditHeaderClass"]) -> None:
|
|
21809
|
+
self._inner_dict['auditHeader'] = value
|
|
21810
|
+
|
|
21811
|
+
|
|
21812
|
+
@property
|
|
21813
|
+
def sourceUrn(self) -> str:
|
|
21814
|
+
"""The URN (Uniform Resource Name) of the source entity in the relationship.
|
|
21815
|
+
In a downstream relationship example, this would be the URN of the upstream dataset."""
|
|
21816
|
+
return self._inner_dict.get('sourceUrn') # type: ignore
|
|
21817
|
+
|
|
21818
|
+
@sourceUrn.setter
|
|
21819
|
+
def sourceUrn(self, value: str) -> None:
|
|
21820
|
+
self._inner_dict['sourceUrn'] = value
|
|
21821
|
+
|
|
21822
|
+
|
|
21823
|
+
@property
|
|
21824
|
+
def destinationUrn(self) -> str:
|
|
21825
|
+
"""The URN of the destination entity in the relationship.
|
|
21826
|
+
In a downstream relationship example, this would be the URN of the downstream dataset."""
|
|
21827
|
+
return self._inner_dict.get('destinationUrn') # type: ignore
|
|
21828
|
+
|
|
21829
|
+
@destinationUrn.setter
|
|
21830
|
+
def destinationUrn(self, value: str) -> None:
|
|
21831
|
+
self._inner_dict['destinationUrn'] = value
|
|
21832
|
+
|
|
21833
|
+
|
|
21834
|
+
@property
|
|
21835
|
+
def operation(self) -> Union[str, "RelationshipChangeOperationClass"]:
|
|
21836
|
+
"""The operation being performed on this relationship.
|
|
21837
|
+
Typically includes operations like ADD, REMOVE, or RESTATE."""
|
|
21838
|
+
return self._inner_dict.get('operation') # type: ignore
|
|
21839
|
+
|
|
21840
|
+
@operation.setter
|
|
21841
|
+
def operation(self, value: Union[str, "RelationshipChangeOperationClass"]) -> None:
|
|
21842
|
+
self._inner_dict['operation'] = value
|
|
21843
|
+
|
|
21844
|
+
|
|
21845
|
+
@property
|
|
21846
|
+
def relationshipType(self) -> str:
|
|
21847
|
+
"""The type/category of relationship being established or modified.
|
|
21848
|
+
Examples: "DownstreamOf", "Contains", "OwnedBy", "DerivedFrom", etc."""
|
|
21849
|
+
return self._inner_dict.get('relationshipType') # type: ignore
|
|
21850
|
+
|
|
21851
|
+
@relationshipType.setter
|
|
21852
|
+
def relationshipType(self, value: str) -> None:
|
|
21853
|
+
self._inner_dict['relationshipType'] = value
|
|
21854
|
+
|
|
21855
|
+
|
|
21856
|
+
@property
|
|
21857
|
+
def lifecycleOwner(self) -> Union[None, str]:
|
|
21858
|
+
"""The system or service responsible for managing the lifecycle of this relationship.
|
|
21859
|
+
This helps identify which component has authority over the relationship."""
|
|
21860
|
+
return self._inner_dict.get('lifecycleOwner') # type: ignore
|
|
21861
|
+
|
|
21862
|
+
@lifecycleOwner.setter
|
|
21863
|
+
def lifecycleOwner(self, value: Union[None, str]) -> None:
|
|
21864
|
+
self._inner_dict['lifecycleOwner'] = value
|
|
21865
|
+
|
|
21866
|
+
|
|
21867
|
+
@property
|
|
21868
|
+
def via(self) -> Union[None, str]:
|
|
21869
|
+
"""Information about how or through what means this relationship was established.
|
|
21870
|
+
Could indicate a specific pipeline, process, or tool that discovered/created the relationship."""
|
|
21871
|
+
return self._inner_dict.get('via') # type: ignore
|
|
21872
|
+
|
|
21873
|
+
@via.setter
|
|
21874
|
+
def via(self, value: Union[None, str]) -> None:
|
|
21875
|
+
self._inner_dict['via'] = value
|
|
21876
|
+
|
|
21877
|
+
|
|
21878
|
+
@property
|
|
21879
|
+
def properties(self) -> Union[None, Dict[str, str]]:
|
|
21880
|
+
"""Additional custom properties associated with this relationship.
|
|
21881
|
+
Allows for flexible extension without changing the schema."""
|
|
21882
|
+
return self._inner_dict.get('properties') # type: ignore
|
|
21883
|
+
|
|
21884
|
+
@properties.setter
|
|
21885
|
+
def properties(self, value: Union[None, Dict[str, str]]) -> None:
|
|
21886
|
+
self._inner_dict['properties'] = value
|
|
21887
|
+
|
|
21888
|
+
|
|
21889
|
+
@property
|
|
21890
|
+
def auditStamp(self) -> "AuditStampClass":
|
|
21891
|
+
"""Stores information about who made this change and when.
|
|
21892
|
+
Contains the actor (user or system) that performed the action and the timestamp."""
|
|
21893
|
+
return self._inner_dict.get('auditStamp') # type: ignore
|
|
21894
|
+
|
|
21895
|
+
@auditStamp.setter
|
|
21896
|
+
def auditStamp(self, value: "AuditStampClass") -> None:
|
|
21897
|
+
self._inner_dict['auditStamp'] = value
|
|
21898
|
+
|
|
21899
|
+
|
|
21900
|
+
class RelationshipChangeOperationClass(object):
|
|
21901
|
+
# No docs available.
|
|
21902
|
+
|
|
21903
|
+
ADD = "ADD"
|
|
21904
|
+
REMOVE = "REMOVE"
|
|
21905
|
+
RESTATE = "RESTATE"
|
|
21906
|
+
|
|
21907
|
+
|
|
21761
21908
|
class PlatformResourceInfoClass(_Aspect):
|
|
21762
21909
|
"""Platform Resource Info.
|
|
21763
21910
|
These entities are for miscelaneous data that is used in non-core parts of the system.
|
|
@@ -25875,6 +26022,7 @@ class StructuredPropertySettingsClass(_Aspect):
|
|
|
25875
26022
|
isHidden: Optional[bool]=None,
|
|
25876
26023
|
showInSearchFilters: Optional[bool]=None,
|
|
25877
26024
|
showInAssetSummary: Optional[bool]=None,
|
|
26025
|
+
hideInAssetSummaryWhenEmpty: Optional[bool]=None,
|
|
25878
26026
|
showAsAssetBadge: Optional[bool]=None,
|
|
25879
26027
|
showInColumnsTable: Optional[bool]=None,
|
|
25880
26028
|
lastModified: Union[None, "AuditStampClass"]=None,
|
|
@@ -25896,6 +26044,11 @@ class StructuredPropertySettingsClass(_Aspect):
|
|
|
25896
26044
|
self.showInAssetSummary = self.RECORD_SCHEMA.fields_dict["showInAssetSummary"].default
|
|
25897
26045
|
else:
|
|
25898
26046
|
self.showInAssetSummary = showInAssetSummary
|
|
26047
|
+
if hideInAssetSummaryWhenEmpty is None:
|
|
26048
|
+
# default: False
|
|
26049
|
+
self.hideInAssetSummaryWhenEmpty = self.RECORD_SCHEMA.fields_dict["hideInAssetSummaryWhenEmpty"].default
|
|
26050
|
+
else:
|
|
26051
|
+
self.hideInAssetSummaryWhenEmpty = hideInAssetSummaryWhenEmpty
|
|
25899
26052
|
if showAsAssetBadge is None:
|
|
25900
26053
|
# default: False
|
|
25901
26054
|
self.showAsAssetBadge = self.RECORD_SCHEMA.fields_dict["showAsAssetBadge"].default
|
|
@@ -25912,6 +26065,7 @@ class StructuredPropertySettingsClass(_Aspect):
|
|
|
25912
26065
|
self.isHidden = self.RECORD_SCHEMA.fields_dict["isHidden"].default
|
|
25913
26066
|
self.showInSearchFilters = self.RECORD_SCHEMA.fields_dict["showInSearchFilters"].default
|
|
25914
26067
|
self.showInAssetSummary = self.RECORD_SCHEMA.fields_dict["showInAssetSummary"].default
|
|
26068
|
+
self.hideInAssetSummaryWhenEmpty = self.RECORD_SCHEMA.fields_dict["hideInAssetSummaryWhenEmpty"].default
|
|
25915
26069
|
self.showAsAssetBadge = self.RECORD_SCHEMA.fields_dict["showAsAssetBadge"].default
|
|
25916
26070
|
self.showInColumnsTable = self.RECORD_SCHEMA.fields_dict["showInColumnsTable"].default
|
|
25917
26071
|
self.lastModified = self.RECORD_SCHEMA.fields_dict["lastModified"].default
|
|
@@ -25947,6 +26101,17 @@ class StructuredPropertySettingsClass(_Aspect):
|
|
|
25947
26101
|
self._inner_dict['showInAssetSummary'] = value
|
|
25948
26102
|
|
|
25949
26103
|
|
|
26104
|
+
@property
|
|
26105
|
+
def hideInAssetSummaryWhenEmpty(self) -> bool:
|
|
26106
|
+
"""Whether or not this asset should be hidden in the asset sidebar (showInAssetSummary should be enabled)
|
|
26107
|
+
when its value is empty"""
|
|
26108
|
+
return self._inner_dict.get('hideInAssetSummaryWhenEmpty') # type: ignore
|
|
26109
|
+
|
|
26110
|
+
@hideInAssetSummaryWhenEmpty.setter
|
|
26111
|
+
def hideInAssetSummaryWhenEmpty(self, value: bool) -> None:
|
|
26112
|
+
self._inner_dict['hideInAssetSummaryWhenEmpty'] = value
|
|
26113
|
+
|
|
26114
|
+
|
|
25950
26115
|
@property
|
|
25951
26116
|
def showAsAssetBadge(self) -> bool:
|
|
25952
26117
|
"""Whether or not this asset should be displayed as an asset badge on other
|
|
@@ -27759,6 +27924,8 @@ __SCHEMA_TYPES = {
|
|
|
27759
27924
|
'com.linkedin.pegasus2avro.persona.DataHubPersonaInfo': DataHubPersonaInfoClass,
|
|
27760
27925
|
'com.linkedin.pegasus2avro.platform.event.v1.EntityChangeEvent': EntityChangeEventClass,
|
|
27761
27926
|
'com.linkedin.pegasus2avro.platform.event.v1.Parameters': ParametersClass,
|
|
27927
|
+
'com.linkedin.pegasus2avro.platform.event.v1.RelationshipChangeEvent': RelationshipChangeEventClass,
|
|
27928
|
+
'com.linkedin.pegasus2avro.platform.event.v1.RelationshipChangeOperation': RelationshipChangeOperationClass,
|
|
27762
27929
|
'com.linkedin.pegasus2avro.platformresource.PlatformResourceInfo': PlatformResourceInfoClass,
|
|
27763
27930
|
'com.linkedin.pegasus2avro.platformresource.PlatformResourceKey': PlatformResourceKeyClass,
|
|
27764
27931
|
'com.linkedin.pegasus2avro.policy.DataHubActorFilter': DataHubActorFilterClass,
|
|
@@ -28279,6 +28446,8 @@ __SCHEMA_TYPES = {
|
|
|
28279
28446
|
'DataHubPersonaInfo': DataHubPersonaInfoClass,
|
|
28280
28447
|
'EntityChangeEvent': EntityChangeEventClass,
|
|
28281
28448
|
'Parameters': ParametersClass,
|
|
28449
|
+
'RelationshipChangeEvent': RelationshipChangeEventClass,
|
|
28450
|
+
'RelationshipChangeOperation': RelationshipChangeOperationClass,
|
|
28282
28451
|
'PlatformResourceInfo': PlatformResourceInfoClass,
|
|
28283
28452
|
'PlatformResourceKey': PlatformResourceKeyClass,
|
|
28284
28453
|
'DataHubActorFilter': DataHubActorFilterClass,
|
|
@@ -9,9 +9,13 @@
|
|
|
9
9
|
# isort: skip_file
|
|
10
10
|
from .......schema_classes import EntityChangeEventClass
|
|
11
11
|
from .......schema_classes import ParametersClass
|
|
12
|
+
from .......schema_classes import RelationshipChangeEventClass
|
|
13
|
+
from .......schema_classes import RelationshipChangeOperationClass
|
|
12
14
|
|
|
13
15
|
|
|
14
16
|
EntityChangeEvent = EntityChangeEventClass
|
|
15
17
|
Parameters = ParametersClass
|
|
18
|
+
RelationshipChangeEvent = RelationshipChangeEventClass
|
|
19
|
+
RelationshipChangeOperation = RelationshipChangeOperationClass
|
|
16
20
|
|
|
17
21
|
# fmt: on
|