acryl-datahub 1.0.0rc5__py3-none-any.whl → 1.0.0rc6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.0.0rc5.dist-info → acryl_datahub-1.0.0rc6.dist-info}/METADATA +2415 -2415
- {acryl_datahub-1.0.0rc5.dist-info → acryl_datahub-1.0.0rc6.dist-info}/RECORD +47 -46
- {acryl_datahub-1.0.0rc5.dist-info → acryl_datahub-1.0.0rc6.dist-info}/WHEEL +1 -1
- datahub/_version.py +1 -1
- datahub/cli/ingest_cli.py +3 -1
- datahub/ingestion/api/source_helpers.py +4 -0
- datahub/ingestion/run/pipeline.py +109 -143
- datahub/ingestion/run/sink_callback.py +77 -0
- datahub/ingestion/source/cassandra/cassandra.py +152 -233
- datahub/ingestion/source/cassandra/cassandra_api.py +11 -4
- datahub/ingestion/source/delta_lake/config.py +8 -1
- datahub/ingestion/source/delta_lake/report.py +4 -2
- datahub/ingestion/source/delta_lake/source.py +20 -5
- datahub/ingestion/source/elastic_search.py +26 -6
- datahub/ingestion/source/feast.py +27 -8
- datahub/ingestion/source/file.py +1 -1
- datahub/ingestion/source/identity/okta.py +1 -2
- datahub/ingestion/source/mlflow.py +30 -7
- datahub/ingestion/source/mode.py +7 -2
- datahub/ingestion/source/neo4j/neo4j_source.py +26 -6
- datahub/ingestion/source/nifi.py +29 -6
- datahub/ingestion/source/powerbi_report_server/report_server.py +25 -6
- datahub/ingestion/source/pulsar.py +1 -0
- datahub/ingestion/source/redash.py +29 -6
- datahub/ingestion/source/s3/config.py +3 -1
- datahub/ingestion/source/salesforce.py +28 -6
- datahub/ingestion/source/slack/slack.py +31 -10
- datahub/ingestion/source/snowflake/snowflake_query.py +6 -4
- datahub/ingestion/source/snowflake/snowflake_schema.py +3 -4
- datahub/ingestion/source/sql/oracle.py +34 -0
- datahub/ingestion/source_config/pulsar.py +3 -1
- datahub/ingestion/transformer/pattern_cleanup_ownership.py +25 -7
- datahub/metadata/_schema_classes.py +517 -410
- datahub/metadata/_urns/urn_defs.py +1670 -1670
- datahub/metadata/com/linkedin/pegasus2avro/incident/__init__.py +4 -0
- datahub/metadata/schema.avsc +17362 -17638
- datahub/metadata/schemas/IncidentInfo.avsc +130 -46
- datahub/metadata/schemas/__init__.py +3 -3
- datahub/sdk/__init__.py +29 -12
- datahub/sdk/_entity.py +18 -1
- datahub/sdk/container.py +3 -1
- datahub/sdk/dataset.py +5 -3
- datahub/sql_parsing/_sqlglot_patch.py +2 -10
- datahub/utilities/unified_diff.py +5 -1
- {acryl_datahub-1.0.0rc5.dist-info → acryl_datahub-1.0.0rc6.dist-info}/LICENSE +0 -0
- {acryl_datahub-1.0.0rc5.dist-info → acryl_datahub-1.0.0rc6.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-1.0.0rc5.dist-info → acryl_datahub-1.0.0rc6.dist-info}/top_level.txt +0 -0
|
@@ -17,7 +17,9 @@ from datahub.configuration.common import (
|
|
|
17
17
|
ConfigModel,
|
|
18
18
|
ConfigurationError,
|
|
19
19
|
)
|
|
20
|
-
from datahub.configuration.source_common import
|
|
20
|
+
from datahub.configuration.source_common import (
|
|
21
|
+
DatasetSourceConfigMixin,
|
|
22
|
+
)
|
|
21
23
|
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
22
24
|
from datahub.emitter.mcp_builder import add_domain_to_entity_wu
|
|
23
25
|
from datahub.ingestion.api.common import PipelineContext
|
|
@@ -29,9 +31,17 @@ from datahub.ingestion.api.decorators import (
|
|
|
29
31
|
platform_name,
|
|
30
32
|
support_status,
|
|
31
33
|
)
|
|
32
|
-
from datahub.ingestion.api.source import
|
|
34
|
+
from datahub.ingestion.api.source import MetadataWorkUnitProcessor, SourceReport
|
|
33
35
|
from datahub.ingestion.api.workunit import MetadataWorkUnit
|
|
34
36
|
from datahub.ingestion.source.common.subtypes import DatasetSubTypes
|
|
37
|
+
from datahub.ingestion.source.state.stale_entity_removal_handler import (
|
|
38
|
+
StaleEntityRemovalHandler,
|
|
39
|
+
StaleEntityRemovalSourceReport,
|
|
40
|
+
)
|
|
41
|
+
from datahub.ingestion.source.state.stateful_ingestion_base import (
|
|
42
|
+
StatefulIngestionConfigBase,
|
|
43
|
+
StatefulIngestionSourceBase,
|
|
44
|
+
)
|
|
35
45
|
from datahub.ingestion.source_config.operation_config import (
|
|
36
46
|
OperationConfig,
|
|
37
47
|
is_profiling_enabled,
|
|
@@ -85,7 +95,10 @@ class SalesforceProfilingConfig(ConfigModel):
|
|
|
85
95
|
# TODO - support field level profiling
|
|
86
96
|
|
|
87
97
|
|
|
88
|
-
class SalesforceConfig(
|
|
98
|
+
class SalesforceConfig(
|
|
99
|
+
StatefulIngestionConfigBase,
|
|
100
|
+
DatasetSourceConfigMixin,
|
|
101
|
+
):
|
|
89
102
|
platform: str = "salesforce"
|
|
90
103
|
|
|
91
104
|
auth: SalesforceAuthType = SalesforceAuthType.USERNAME_PASSWORD
|
|
@@ -149,7 +162,7 @@ class SalesforceConfig(DatasetSourceConfigMixin):
|
|
|
149
162
|
|
|
150
163
|
|
|
151
164
|
@dataclass
|
|
152
|
-
class SalesforceSourceReport(
|
|
165
|
+
class SalesforceSourceReport(StaleEntityRemovalSourceReport):
|
|
153
166
|
filtered: LossyList[str] = dataclass_field(default_factory=LossyList)
|
|
154
167
|
|
|
155
168
|
def report_dropped(self, ent_name: str) -> None:
|
|
@@ -214,7 +227,7 @@ FIELD_TYPE_MAPPING = {
|
|
|
214
227
|
capability_name=SourceCapability.TAGS,
|
|
215
228
|
description="Enabled by default",
|
|
216
229
|
)
|
|
217
|
-
class SalesforceSource(
|
|
230
|
+
class SalesforceSource(StatefulIngestionSourceBase):
|
|
218
231
|
base_url: str
|
|
219
232
|
config: SalesforceConfig
|
|
220
233
|
report: SalesforceSourceReport
|
|
@@ -223,7 +236,8 @@ class SalesforceSource(Source):
|
|
|
223
236
|
fieldCounts: Dict[str, int]
|
|
224
237
|
|
|
225
238
|
def __init__(self, config: SalesforceConfig, ctx: PipelineContext) -> None:
|
|
226
|
-
super().__init__(ctx)
|
|
239
|
+
super().__init__(config, ctx)
|
|
240
|
+
self.ctx = ctx
|
|
227
241
|
self.config = config
|
|
228
242
|
self.report = SalesforceSourceReport()
|
|
229
243
|
self.session = requests.Session()
|
|
@@ -328,6 +342,14 @@ class SalesforceSource(Source):
|
|
|
328
342
|
)
|
|
329
343
|
)
|
|
330
344
|
|
|
345
|
+
def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
|
|
346
|
+
return [
|
|
347
|
+
*super().get_workunit_processors(),
|
|
348
|
+
StaleEntityRemovalHandler.create(
|
|
349
|
+
self, self.config, self.ctx
|
|
350
|
+
).workunit_processor,
|
|
351
|
+
]
|
|
352
|
+
|
|
331
353
|
def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
|
|
332
354
|
try:
|
|
333
355
|
sObjects = self.get_salesforce_objects()
|
|
@@ -9,7 +9,6 @@ from tenacity import retry, wait_exponential
|
|
|
9
9
|
from tenacity.before_sleep import before_sleep_log
|
|
10
10
|
|
|
11
11
|
import datahub.emitter.mce_builder as builder
|
|
12
|
-
from datahub.configuration.common import ConfigModel
|
|
13
12
|
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
14
13
|
from datahub.ingestion.api.common import PipelineContext
|
|
15
14
|
from datahub.ingestion.api.decorators import (
|
|
@@ -18,8 +17,19 @@ from datahub.ingestion.api.decorators import (
|
|
|
18
17
|
platform_name,
|
|
19
18
|
support_status,
|
|
20
19
|
)
|
|
21
|
-
from datahub.ingestion.api.source import
|
|
20
|
+
from datahub.ingestion.api.source import (
|
|
21
|
+
MetadataWorkUnitProcessor,
|
|
22
|
+
SourceReport,
|
|
23
|
+
)
|
|
22
24
|
from datahub.ingestion.api.workunit import MetadataWorkUnit
|
|
25
|
+
from datahub.ingestion.source.state.stale_entity_removal_handler import (
|
|
26
|
+
StaleEntityRemovalHandler,
|
|
27
|
+
StaleEntityRemovalSourceReport,
|
|
28
|
+
)
|
|
29
|
+
from datahub.ingestion.source.state.stateful_ingestion_base import (
|
|
30
|
+
StatefulIngestionConfigBase,
|
|
31
|
+
StatefulIngestionSourceBase,
|
|
32
|
+
)
|
|
23
33
|
from datahub.metadata.schema_classes import (
|
|
24
34
|
CorpUserEditableInfoClass,
|
|
25
35
|
DatasetPropertiesClass,
|
|
@@ -44,7 +54,9 @@ class CorpUser:
|
|
|
44
54
|
slack_display_name: Optional[str] = None
|
|
45
55
|
|
|
46
56
|
|
|
47
|
-
class SlackSourceConfig(
|
|
57
|
+
class SlackSourceConfig(
|
|
58
|
+
StatefulIngestionConfigBase,
|
|
59
|
+
):
|
|
48
60
|
bot_token: SecretStr = Field(
|
|
49
61
|
description="Bot token for the Slack workspace. Needs `users:read`, `users:read.email` and `users.profile:read` scopes.",
|
|
50
62
|
)
|
|
@@ -58,22 +70,22 @@ class SlackSourceConfig(ConfigModel):
|
|
|
58
70
|
default=10,
|
|
59
71
|
description="Number of API requests per minute. Low-level config. Do not tweak unless you are facing any issues.",
|
|
60
72
|
)
|
|
61
|
-
ingest_public_channels = Field(
|
|
73
|
+
ingest_public_channels: bool = Field(
|
|
62
74
|
type=bool,
|
|
63
75
|
default=False,
|
|
64
76
|
description="Whether to ingest public channels. If set to true needs `channels:read` scope.",
|
|
65
77
|
)
|
|
66
|
-
channels_iteration_limit = Field(
|
|
78
|
+
channels_iteration_limit: int = Field(
|
|
67
79
|
type=int,
|
|
68
80
|
default=200,
|
|
69
81
|
description="Limit the number of channels to be ingested in a iteration. Low-level config. Do not tweak unless you are facing any issues.",
|
|
70
82
|
)
|
|
71
|
-
channel_min_members = Field(
|
|
83
|
+
channel_min_members: int = Field(
|
|
72
84
|
type=int,
|
|
73
85
|
default=2,
|
|
74
86
|
description="Ingest channels with at least this many members.",
|
|
75
87
|
)
|
|
76
|
-
should_ingest_archived_channels = Field(
|
|
88
|
+
should_ingest_archived_channels: bool = Field(
|
|
77
89
|
type=bool,
|
|
78
90
|
default=False,
|
|
79
91
|
description="Whether to ingest archived channels.",
|
|
@@ -81,7 +93,7 @@ class SlackSourceConfig(ConfigModel):
|
|
|
81
93
|
|
|
82
94
|
|
|
83
95
|
@dataclass
|
|
84
|
-
class SlackSourceReport(
|
|
96
|
+
class SlackSourceReport(StaleEntityRemovalSourceReport):
|
|
85
97
|
channels_reported: int = 0
|
|
86
98
|
archived_channels_reported: int = 0
|
|
87
99
|
|
|
@@ -92,11 +104,12 @@ PLATFORM_NAME = "slack"
|
|
|
92
104
|
@platform_name("Slack")
|
|
93
105
|
@config_class(SlackSourceConfig)
|
|
94
106
|
@support_status(SupportStatus.TESTING)
|
|
95
|
-
class SlackSource(
|
|
107
|
+
class SlackSource(StatefulIngestionSourceBase):
|
|
96
108
|
def __init__(self, ctx: PipelineContext, config: SlackSourceConfig):
|
|
109
|
+
super().__init__(config, ctx)
|
|
97
110
|
self.ctx = ctx
|
|
98
111
|
self.config = config
|
|
99
|
-
self.report = SlackSourceReport()
|
|
112
|
+
self.report: SlackSourceReport = SlackSourceReport()
|
|
100
113
|
self.workspace_base_url: Optional[str] = None
|
|
101
114
|
self.rate_limiter = RateLimiter(
|
|
102
115
|
max_calls=self.config.api_requests_per_min, period=60
|
|
@@ -111,6 +124,14 @@ class SlackSource(Source):
|
|
|
111
124
|
def get_slack_client(self) -> WebClient:
|
|
112
125
|
return WebClient(token=self.config.bot_token.get_secret_value())
|
|
113
126
|
|
|
127
|
+
def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
|
|
128
|
+
return [
|
|
129
|
+
*super().get_workunit_processors(),
|
|
130
|
+
StaleEntityRemovalHandler.create(
|
|
131
|
+
self, self.config, self.ctx
|
|
132
|
+
).workunit_processor,
|
|
133
|
+
]
|
|
134
|
+
|
|
114
135
|
def get_workunits_internal(
|
|
115
136
|
self,
|
|
116
137
|
) -> Iterable[MetadataWorkUnit]:
|
|
@@ -134,10 +134,11 @@ class SnowflakeQuery:
|
|
|
134
134
|
clustering_key AS "CLUSTERING_KEY",
|
|
135
135
|
auto_clustering_on AS "AUTO_CLUSTERING_ON",
|
|
136
136
|
is_dynamic AS "IS_DYNAMIC",
|
|
137
|
-
is_iceberg AS "IS_ICEBERG"
|
|
137
|
+
is_iceberg AS "IS_ICEBERG",
|
|
138
|
+
is_hybrid AS "IS_HYBRID"
|
|
138
139
|
FROM {db_clause}information_schema.tables t
|
|
139
140
|
WHERE table_schema != 'INFORMATION_SCHEMA'
|
|
140
|
-
and table_type in ( 'BASE TABLE', 'EXTERNAL TABLE'
|
|
141
|
+
and table_type in ( 'BASE TABLE', 'EXTERNAL TABLE')
|
|
141
142
|
order by table_schema, table_name"""
|
|
142
143
|
|
|
143
144
|
@staticmethod
|
|
@@ -156,10 +157,11 @@ class SnowflakeQuery:
|
|
|
156
157
|
clustering_key AS "CLUSTERING_KEY",
|
|
157
158
|
auto_clustering_on AS "AUTO_CLUSTERING_ON",
|
|
158
159
|
is_dynamic AS "IS_DYNAMIC",
|
|
159
|
-
is_iceberg AS "IS_ICEBERG"
|
|
160
|
+
is_iceberg AS "IS_ICEBERG",
|
|
161
|
+
is_hybrid AS "IS_HYBRID"
|
|
160
162
|
FROM {db_clause}information_schema.tables t
|
|
161
163
|
where table_schema='{schema_name}'
|
|
162
|
-
and table_type in ('BASE TABLE', 'EXTERNAL TABLE'
|
|
164
|
+
and table_type in ('BASE TABLE', 'EXTERNAL TABLE')
|
|
163
165
|
order by table_schema, table_name"""
|
|
164
166
|
|
|
165
167
|
@staticmethod
|
|
@@ -96,10 +96,7 @@ class SnowflakeTable(BaseTable):
|
|
|
96
96
|
column_tags: Dict[str, List[SnowflakeTag]] = field(default_factory=dict)
|
|
97
97
|
is_dynamic: bool = False
|
|
98
98
|
is_iceberg: bool = False
|
|
99
|
-
|
|
100
|
-
@property
|
|
101
|
-
def is_hybrid(self) -> bool:
|
|
102
|
-
return self.type is not None and self.type == "HYBRID TABLE"
|
|
99
|
+
is_hybrid: bool = False
|
|
103
100
|
|
|
104
101
|
def get_subtype(self) -> DatasetSubTypes:
|
|
105
102
|
return DatasetSubTypes.TABLE
|
|
@@ -369,6 +366,7 @@ class SnowflakeDataDictionary(SupportsAsObj):
|
|
|
369
366
|
clustering_key=table["CLUSTERING_KEY"],
|
|
370
367
|
is_dynamic=table.get("IS_DYNAMIC", "NO").upper() == "YES",
|
|
371
368
|
is_iceberg=table.get("IS_ICEBERG", "NO").upper() == "YES",
|
|
369
|
+
is_hybrid=table.get("IS_HYBRID", "NO").upper() == "YES",
|
|
372
370
|
)
|
|
373
371
|
)
|
|
374
372
|
return tables
|
|
@@ -395,6 +393,7 @@ class SnowflakeDataDictionary(SupportsAsObj):
|
|
|
395
393
|
clustering_key=table["CLUSTERING_KEY"],
|
|
396
394
|
is_dynamic=table.get("IS_DYNAMIC", "NO").upper() == "YES",
|
|
397
395
|
is_iceberg=table.get("IS_ICEBERG", "NO").upper() == "YES",
|
|
396
|
+
is_hybrid=table.get("IS_HYBRID", "NO").upper() == "YES",
|
|
398
397
|
)
|
|
399
398
|
)
|
|
400
399
|
return tables
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import datetime
|
|
2
2
|
import logging
|
|
3
|
+
import platform
|
|
3
4
|
import re
|
|
4
5
|
|
|
5
6
|
# This import verifies that the dependencies are available.
|
|
@@ -85,6 +86,16 @@ class OracleConfig(BasicSQLAlchemyConfig):
|
|
|
85
86
|
description="The data dictionary views mode, to extract information about schema objects "
|
|
86
87
|
"('ALL' and 'DBA' views are supported). (https://docs.oracle.com/cd/E11882_01/nav/catalog_views.htm)",
|
|
87
88
|
)
|
|
89
|
+
# oracledb settings to enable thick mode and client library location
|
|
90
|
+
enable_thick_mode: Optional[bool] = Field(
|
|
91
|
+
default=False,
|
|
92
|
+
description="Connection defaults to thin mode. Set to True to enable thick mode.",
|
|
93
|
+
)
|
|
94
|
+
thick_mode_lib_dir: Optional[str] = Field(
|
|
95
|
+
default=None,
|
|
96
|
+
description="If using thick mode on Windows or Mac, set thick_mode_lib_dir to the oracle client libraries path. "
|
|
97
|
+
"On Linux, this value is ignored, as ldconfig or LD_LIBRARY_PATH will define the location.",
|
|
98
|
+
)
|
|
88
99
|
|
|
89
100
|
@pydantic.validator("service_name")
|
|
90
101
|
def check_service_name(cls, v, values):
|
|
@@ -100,6 +111,18 @@ class OracleConfig(BasicSQLAlchemyConfig):
|
|
|
100
111
|
raise ValueError("Specify one of data dictionary views mode: 'ALL', 'DBA'.")
|
|
101
112
|
return values
|
|
102
113
|
|
|
114
|
+
@pydantic.validator("thick_mode_lib_dir", always=True)
|
|
115
|
+
def check_thick_mode_lib_dir(cls, v, values):
|
|
116
|
+
if (
|
|
117
|
+
v is None
|
|
118
|
+
and values.get("enable_thick_mode")
|
|
119
|
+
and (platform.system() == "Darwin" or platform.system() == "Windows")
|
|
120
|
+
):
|
|
121
|
+
raise ValueError(
|
|
122
|
+
"Specify 'thick_mode_lib_dir' on Mac/Windows when enable_thick_mode is true"
|
|
123
|
+
)
|
|
124
|
+
return v
|
|
125
|
+
|
|
103
126
|
def get_sql_alchemy_url(self):
|
|
104
127
|
url = super().get_sql_alchemy_url()
|
|
105
128
|
if self.service_name:
|
|
@@ -586,6 +609,17 @@ class OracleSource(SQLAlchemySource):
|
|
|
586
609
|
def __init__(self, config, ctx):
|
|
587
610
|
super().__init__(config, ctx, "oracle")
|
|
588
611
|
|
|
612
|
+
# if connecting to oracle with enable_thick_mode, it must be initialized before calling
|
|
613
|
+
# create_engine, which is called in get_inspectors()
|
|
614
|
+
# https://python-oracledb.readthedocs.io/en/latest/user_guide/initialization.html#enabling-python-oracledb-thick-mode
|
|
615
|
+
if self.config.enable_thick_mode:
|
|
616
|
+
if platform.system() == "Darwin" or platform.system() == "Windows":
|
|
617
|
+
# windows and mac os require lib_dir to be set explicitly
|
|
618
|
+
oracledb.init_oracle_client(lib_dir=self.config.thick_mode_lib_dir)
|
|
619
|
+
else:
|
|
620
|
+
# linux requires configurating the library path with ldconfig or LD_LIBRARY_PATH
|
|
621
|
+
oracledb.init_oracle_client()
|
|
622
|
+
|
|
589
623
|
@classmethod
|
|
590
624
|
def create(cls, config_dict, ctx):
|
|
591
625
|
config = OracleConfig.parse_obj(config_dict)
|
|
@@ -33,7 +33,9 @@ def _is_valid_hostname(hostname: str) -> bool:
|
|
|
33
33
|
|
|
34
34
|
|
|
35
35
|
class PulsarSourceConfig(
|
|
36
|
-
StatefulIngestionConfigBase,
|
|
36
|
+
StatefulIngestionConfigBase,
|
|
37
|
+
PlatformInstanceConfigMixin,
|
|
38
|
+
EnvConfigMixin,
|
|
37
39
|
):
|
|
38
40
|
web_service_url: str = Field(
|
|
39
41
|
default="http://localhost:8080", description="The web URL for the cluster."
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import logging
|
|
1
2
|
import re
|
|
2
3
|
from typing import List, Optional, Set, cast
|
|
3
4
|
|
|
@@ -10,8 +11,11 @@ from datahub.metadata.schema_classes import (
|
|
|
10
11
|
OwnershipClass,
|
|
11
12
|
OwnershipTypeClass,
|
|
12
13
|
)
|
|
14
|
+
from datahub.metadata.urns import CorpGroupUrn, CorpUserUrn
|
|
15
|
+
from datahub.utilities.urns._urn_base import Urn
|
|
16
|
+
from datahub.utilities.urns.error import InvalidUrnError
|
|
13
17
|
|
|
14
|
-
|
|
18
|
+
logger = logging.getLogger(__name__)
|
|
15
19
|
|
|
16
20
|
|
|
17
21
|
class PatternCleanUpOwnershipConfig(ConfigModel):
|
|
@@ -49,6 +53,11 @@ class PatternCleanUpOwnership(OwnershipTransformer):
|
|
|
49
53
|
else:
|
|
50
54
|
return set()
|
|
51
55
|
|
|
56
|
+
def _process_owner(self, name: str) -> str:
|
|
57
|
+
for value in self.config.pattern_for_cleanup:
|
|
58
|
+
name = re.sub(value, "", name)
|
|
59
|
+
return name
|
|
60
|
+
|
|
52
61
|
def transform_aspect(
|
|
53
62
|
self, entity_urn: str, aspect_name: str, aspect: Optional[builder.Aspect]
|
|
54
63
|
) -> Optional[builder.Aspect]:
|
|
@@ -58,14 +67,23 @@ class PatternCleanUpOwnership(OwnershipTransformer):
|
|
|
58
67
|
# clean all the owners based on the parameters received from config
|
|
59
68
|
cleaned_owner_urns: List[str] = []
|
|
60
69
|
for owner_urn in current_owner_urns:
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
70
|
+
username = ""
|
|
71
|
+
try:
|
|
72
|
+
owner: Urn = Urn.from_string(owner_urn)
|
|
73
|
+
if isinstance(owner, CorpUserUrn):
|
|
74
|
+
username = str(CorpUserUrn(self._process_owner(owner.username)))
|
|
75
|
+
elif isinstance(owner, CorpGroupUrn):
|
|
76
|
+
username = str(CorpGroupUrn(self._process_owner(owner.name)))
|
|
77
|
+
else:
|
|
78
|
+
logger.warning(f"{owner_urn} is not a supported owner type.")
|
|
79
|
+
username = owner_urn
|
|
80
|
+
except InvalidUrnError:
|
|
81
|
+
logger.warning(f"Could not parse {owner_urn} from {entity_urn}")
|
|
82
|
+
username = owner_urn
|
|
83
|
+
cleaned_owner_urns.append(username)
|
|
66
84
|
|
|
67
85
|
ownership_type, ownership_type_urn = builder.validate_ownership_type(
|
|
68
|
-
OwnershipTypeClass.
|
|
86
|
+
OwnershipTypeClass.TECHNICAL_OWNER
|
|
69
87
|
)
|
|
70
88
|
owners = [
|
|
71
89
|
OwnerClass(
|