acryl-datahub 1.2.0.10rc1__py3-none-any.whl → 1.2.0.10rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

@@ -282,6 +282,7 @@ def get_filter_name(filter_obj):
282
282
  )
283
283
  @capability(SourceCapability.DOMAINS, "Enabled by `domain` config to assign domain_key")
284
284
  @capability(SourceCapability.LINEAGE_COARSE, "Supported by default")
285
+ @capability(SourceCapability.TAGS, "Supported by default")
285
286
  class SupersetSource(StatefulIngestionSourceBase):
286
287
  """
287
288
  This plugin extracts the following:
@@ -521,6 +522,11 @@ class SupersetSource(StatefulIngestionSourceBase):
521
522
  )
522
523
  dashboard_snapshot.aspects.append(owners_info)
523
524
 
525
+ superset_tags = self._extract_and_map_tags(dashboard_data.get("tags", []))
526
+ tags = self._merge_tags_with_existing(dashboard_urn, superset_tags)
527
+ if tags:
528
+ dashboard_snapshot.aspects.append(tags)
529
+
524
530
  return dashboard_snapshot
525
531
 
526
532
  def _process_dashboard(self, dashboard_data: Any) -> Iterable[MetadataWorkUnit]:
@@ -919,6 +925,12 @@ class SupersetSource(StatefulIngestionSourceBase):
919
925
  lastModified=last_modified,
920
926
  )
921
927
  chart_snapshot.aspects.append(owners_info)
928
+
929
+ superset_tags = self._extract_and_map_tags(chart_data.get("tags", []))
930
+ tags = self._merge_tags_with_existing(chart_urn, superset_tags)
931
+ if tags:
932
+ chart_snapshot.aspects.append(tags)
933
+
922
934
  yield MetadataWorkUnit(
923
935
  id=chart_urn, mce=MetadataChangeEvent(proposedSnapshot=chart_snapshot)
924
936
  )
@@ -1288,17 +1300,18 @@ class SupersetSource(StatefulIngestionSourceBase):
1288
1300
  externalUrl=dataset_url,
1289
1301
  lastModified=TimeStamp(time=modified_ts),
1290
1302
  )
1291
- global_tags = GlobalTagsClass(tags=[TagAssociationClass(tag=tag_urn)])
1292
-
1293
- aspects_items: List[Any] = []
1294
- aspects_items.extend(
1295
- [
1296
- self.gen_schema_metadata(dataset_response),
1297
- dataset_info,
1298
- upstream_lineage,
1299
- global_tags,
1300
- ]
1301
- )
1303
+
1304
+ dataset_tags = GlobalTagsClass(tags=[TagAssociationClass(tag=tag_urn)])
1305
+ tags = self._merge_tags_with_existing(datasource_urn, dataset_tags)
1306
+
1307
+ aspects_items: List[Any] = [
1308
+ self.gen_schema_metadata(dataset_response),
1309
+ dataset_info,
1310
+ upstream_lineage,
1311
+ ]
1312
+
1313
+ if tags:
1314
+ aspects_items.append(tags)
1302
1315
 
1303
1316
  dataset_snapshot = DatasetSnapshot(
1304
1317
  urn=datasource_urn,
@@ -1320,6 +1333,75 @@ class SupersetSource(StatefulIngestionSourceBase):
1320
1333
 
1321
1334
  return dataset_snapshot
1322
1335
 
1336
+ def _extract_and_map_tags(
1337
+ self, raw_tags: List[Dict[str, Any]]
1338
+ ) -> Optional[GlobalTagsClass]:
1339
+ """Extract and map Superset tags to DataHub GlobalTagsClass.
1340
+
1341
+ Filters out system-generated tags (type != 1) and only processes user-defined tags
1342
+ from the Superset API response.
1343
+
1344
+ Args:
1345
+ raw_tags: List of tag dictionaries from Superset API
1346
+
1347
+ Returns:
1348
+ GlobalTagsClass with user-defined tags, or None if no tags found
1349
+ """
1350
+ user_tags = [
1351
+ tag.get("name", "")
1352
+ for tag in raw_tags
1353
+ if tag.get("type") == 1 and tag.get("name")
1354
+ ]
1355
+
1356
+ if not user_tags:
1357
+ return None
1358
+
1359
+ tag_urns = [builder.make_tag_urn(tag) for tag in user_tags]
1360
+ return GlobalTagsClass(
1361
+ tags=[TagAssociationClass(tag=tag_urn) for tag_urn in tag_urns]
1362
+ )
1363
+
1364
+ def _merge_tags_with_existing(
1365
+ self, entity_urn: str, new_tags: Optional[GlobalTagsClass]
1366
+ ) -> Optional[GlobalTagsClass]:
1367
+ """Merge new tags with existing ones from DataHub to preserve manually added tags.
1368
+
1369
+ This method ensures that tags manually added via DataHub UI are not overwritten
1370
+ during ingestion. It fetches existing tags from the graph and merges them with
1371
+ new tags from the source system, avoiding duplicates.
1372
+
1373
+ Args:
1374
+ entity_urn: URN of the entity to check for existing tags
1375
+ new_tags: New tags to add as GlobalTagsClass object
1376
+
1377
+ Returns:
1378
+ GlobalTagsClass with merged tags preserving existing ones, or None if no tags
1379
+ """
1380
+ if not new_tags or not new_tags.tags:
1381
+ return None
1382
+
1383
+ # Fetch existing tags from DataHub
1384
+ existing_global_tags = None
1385
+ if self.ctx.graph:
1386
+ existing_global_tags = self.ctx.graph.get_aspect(
1387
+ entity_urn=entity_urn, aspect_type=GlobalTagsClass
1388
+ )
1389
+
1390
+ # Merge existing tags with new ones, avoiding duplicates
1391
+ all_tags = []
1392
+ existing_tag_urns = set()
1393
+
1394
+ if existing_global_tags and existing_global_tags.tags:
1395
+ all_tags.extend(existing_global_tags.tags)
1396
+ existing_tag_urns = {tag.tag for tag in existing_global_tags.tags}
1397
+
1398
+ # Add new tags that don't already exist
1399
+ for new_tag in new_tags.tags:
1400
+ if new_tag.tag not in existing_tag_urns:
1401
+ all_tags.append(new_tag)
1402
+
1403
+ return GlobalTagsClass(tags=all_tags) if all_tags else None
1404
+
1323
1405
  def _process_dataset(self, dataset_data: Any) -> Iterable[MetadataWorkUnit]:
1324
1406
  dataset_name = ""
1325
1407
  try:
@@ -15271,7 +15271,7 @@ class DataHubIngestionSourceSourceTypeClass(object):
15271
15271
 
15272
15272
 
15273
15273
  class LogicalParentClass(_Aspect):
15274
- # No docs available.
15274
+ """Relates a physical asset to a logical model."""
15275
15275
 
15276
15276
 
15277
15277
  ASPECT_NAME = 'logicalParent'
@@ -24921,6 +24921,7 @@ class GlobalSettingsInfoClass(_Aspect):
24921
24921
 
24922
24922
  def __init__(self,
24923
24923
  sso: Union[None, "SsoSettingsClass"]=None,
24924
+ oauth: Union[None, "OAuthSettingsClass"]=None,
24924
24925
  views: Union[None, "GlobalViewsSettingsClass"]=None,
24925
24926
  docPropagation: Optional[Union["DocPropagationFeatureSettingsClass", None]]=None,
24926
24927
  homePage: Union[None, "GlobalHomePageSettingsClass"]=None,
@@ -24929,6 +24930,7 @@ class GlobalSettingsInfoClass(_Aspect):
24929
24930
  super().__init__()
24930
24931
 
24931
24932
  self.sso = sso
24933
+ self.oauth = oauth
24932
24934
  self.views = views
24933
24935
  if docPropagation is None:
24934
24936
  # default: {'configVersion': None, 'config': None, 'enabled': True, 'columnPropagationEnabled': True}
@@ -24940,6 +24942,7 @@ class GlobalSettingsInfoClass(_Aspect):
24940
24942
 
24941
24943
  def _restore_defaults(self) -> None:
24942
24944
  self.sso = self.RECORD_SCHEMA.fields_dict["sso"].default
24945
+ self.oauth = self.RECORD_SCHEMA.fields_dict["oauth"].default
24943
24946
  self.views = self.RECORD_SCHEMA.fields_dict["views"].default
24944
24947
  self.docPropagation = _json_converter.from_json_object(self.RECORD_SCHEMA.fields_dict["docPropagation"].default, writers_schema=self.RECORD_SCHEMA.fields_dict["docPropagation"].type)
24945
24948
  self.homePage = self.RECORD_SCHEMA.fields_dict["homePage"].default
@@ -24956,6 +24959,16 @@ class GlobalSettingsInfoClass(_Aspect):
24956
24959
  self._inner_dict['sso'] = value
24957
24960
 
24958
24961
 
24962
+ @property
24963
+ def oauth(self) -> Union[None, "OAuthSettingsClass"]:
24964
+ """Settings related to the oauth authentication provider"""
24965
+ return self._inner_dict.get('oauth') # type: ignore
24966
+
24967
+ @oauth.setter
24968
+ def oauth(self, value: Union[None, "OAuthSettingsClass"]) -> None:
24969
+ self._inner_dict['oauth'] = value
24970
+
24971
+
24959
24972
  @property
24960
24973
  def views(self) -> Union[None, "GlobalViewsSettingsClass"]:
24961
24974
  """Settings related to the Views Feature"""
@@ -25021,6 +25034,145 @@ class GlobalViewsSettingsClass(DictWrapper):
25021
25034
  self._inner_dict['defaultView'] = value
25022
25035
 
25023
25036
 
25037
+ class OAuthProviderClass(DictWrapper):
25038
+ """An OAuth Provider. This provides information required to validate inbound
25039
+ requests with OAuth 2.0 bearer tokens."""
25040
+
25041
+ RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.settings.global.OAuthProvider")
25042
+ def __init__(self,
25043
+ enabled: bool,
25044
+ name: str,
25045
+ issuer: str,
25046
+ audience: str,
25047
+ jwksUri: Union[None, str]=None,
25048
+ algorithm: Optional[str]=None,
25049
+ userIdClaim: Optional[str]=None,
25050
+ ):
25051
+ super().__init__()
25052
+
25053
+ self.enabled = enabled
25054
+ self.name = name
25055
+ self.jwksUri = jwksUri
25056
+ self.issuer = issuer
25057
+ self.audience = audience
25058
+ if algorithm is None:
25059
+ # default: 'RS256'
25060
+ self.algorithm = self.RECORD_SCHEMA.fields_dict["algorithm"].default
25061
+ else:
25062
+ self.algorithm = algorithm
25063
+ if userIdClaim is None:
25064
+ # default: 'sub'
25065
+ self.userIdClaim = self.RECORD_SCHEMA.fields_dict["userIdClaim"].default
25066
+ else:
25067
+ self.userIdClaim = userIdClaim
25068
+
25069
+ def _restore_defaults(self) -> None:
25070
+ self.enabled = bool()
25071
+ self.name = str()
25072
+ self.jwksUri = self.RECORD_SCHEMA.fields_dict["jwksUri"].default
25073
+ self.issuer = str()
25074
+ self.audience = str()
25075
+ self.algorithm = self.RECORD_SCHEMA.fields_dict["algorithm"].default
25076
+ self.userIdClaim = self.RECORD_SCHEMA.fields_dict["userIdClaim"].default
25077
+
25078
+
25079
+ @property
25080
+ def enabled(self) -> bool:
25081
+ """Whether this OAuth provider is enabled."""
25082
+ return self._inner_dict.get('enabled') # type: ignore
25083
+
25084
+ @enabled.setter
25085
+ def enabled(self, value: bool) -> None:
25086
+ self._inner_dict['enabled'] = value
25087
+
25088
+
25089
+ @property
25090
+ def name(self) -> str:
25091
+ """The name of this OAuth provider. This is used for display purposes only."""
25092
+ return self._inner_dict.get('name') # type: ignore
25093
+
25094
+ @name.setter
25095
+ def name(self, value: str) -> None:
25096
+ self._inner_dict['name'] = value
25097
+
25098
+
25099
+ @property
25100
+ def jwksUri(self) -> Union[None, str]:
25101
+ """The URI of the JSON Web Key Set (JWKS) endpoint for this OAuth provider."""
25102
+ return self._inner_dict.get('jwksUri') # type: ignore
25103
+
25104
+ @jwksUri.setter
25105
+ def jwksUri(self, value: Union[None, str]) -> None:
25106
+ self._inner_dict['jwksUri'] = value
25107
+
25108
+
25109
+ @property
25110
+ def issuer(self) -> str:
25111
+ """The expected issuer (iss) claim in the JWTs issued by this OAuth provider."""
25112
+ return self._inner_dict.get('issuer') # type: ignore
25113
+
25114
+ @issuer.setter
25115
+ def issuer(self, value: str) -> None:
25116
+ self._inner_dict['issuer'] = value
25117
+
25118
+
25119
+ @property
25120
+ def audience(self) -> str:
25121
+ """The expected audience (aud) claim in the JWTs issued by this OAuth provider."""
25122
+ return self._inner_dict.get('audience') # type: ignore
25123
+
25124
+ @audience.setter
25125
+ def audience(self, value: str) -> None:
25126
+ self._inner_dict['audience'] = value
25127
+
25128
+
25129
+ @property
25130
+ def algorithm(self) -> str:
25131
+ """The JWT signing algorithm required for this provider.
25132
+ Prevents algorithm confusion attacks. Common values: RS256, RS384, RS512, PS256, ES256"""
25133
+ return self._inner_dict.get('algorithm') # type: ignore
25134
+
25135
+ @algorithm.setter
25136
+ def algorithm(self, value: str) -> None:
25137
+ self._inner_dict['algorithm'] = value
25138
+
25139
+
25140
+ @property
25141
+ def userIdClaim(self) -> str:
25142
+ """The JWT claim to use as the user identifier for this provider.
25143
+ Different providers use different claims (sub, email, preferred_username, etc.)"""
25144
+ return self._inner_dict.get('userIdClaim') # type: ignore
25145
+
25146
+ @userIdClaim.setter
25147
+ def userIdClaim(self, value: str) -> None:
25148
+ self._inner_dict['userIdClaim'] = value
25149
+
25150
+
25151
+ class OAuthSettingsClass(DictWrapper):
25152
+ """Trust oauth providers to use for authentication."""
25153
+
25154
+ RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.settings.global.OAuthSettings")
25155
+ def __init__(self,
25156
+ providers: List["OAuthProviderClass"],
25157
+ ):
25158
+ super().__init__()
25159
+
25160
+ self.providers = providers
25161
+
25162
+ def _restore_defaults(self) -> None:
25163
+ self.providers = list()
25164
+
25165
+
25166
+ @property
25167
+ def providers(self) -> List["OAuthProviderClass"]:
25168
+ """Trusted OAuth Providers"""
25169
+ return self._inner_dict.get('providers') # type: ignore
25170
+
25171
+ @providers.setter
25172
+ def providers(self, value: List["OAuthProviderClass"]) -> None:
25173
+ self._inner_dict['providers'] = value
25174
+
25175
+
25024
25176
  class OidcSettingsClass(DictWrapper):
25025
25177
  """Settings for OIDC SSO integration."""
25026
25178
 
@@ -27636,6 +27788,8 @@ __SCHEMA_TYPES = {
27636
27788
  'com.linkedin.pegasus2avro.settings.global.GlobalHomePageSettings': GlobalHomePageSettingsClass,
27637
27789
  'com.linkedin.pegasus2avro.settings.global.GlobalSettingsInfo': GlobalSettingsInfoClass,
27638
27790
  'com.linkedin.pegasus2avro.settings.global.GlobalViewsSettings': GlobalViewsSettingsClass,
27791
+ 'com.linkedin.pegasus2avro.settings.global.OAuthProvider': OAuthProviderClass,
27792
+ 'com.linkedin.pegasus2avro.settings.global.OAuthSettings': OAuthSettingsClass,
27639
27793
  'com.linkedin.pegasus2avro.settings.global.OidcSettings': OidcSettingsClass,
27640
27794
  'com.linkedin.pegasus2avro.settings.global.SsoSettings': SsoSettingsClass,
27641
27795
  'com.linkedin.pegasus2avro.step.DataHubStepStateProperties': DataHubStepStatePropertiesClass,
@@ -28153,6 +28307,8 @@ __SCHEMA_TYPES = {
28153
28307
  'GlobalHomePageSettings': GlobalHomePageSettingsClass,
28154
28308
  'GlobalSettingsInfo': GlobalSettingsInfoClass,
28155
28309
  'GlobalViewsSettings': GlobalViewsSettingsClass,
28310
+ 'OAuthProvider': OAuthProviderClass,
28311
+ 'OAuthSettings': OAuthSettingsClass,
28156
28312
  'OidcSettings': OidcSettingsClass,
28157
28313
  'SsoSettings': SsoSettingsClass,
28158
28314
  'DataHubStepStateProperties': DataHubStepStatePropertiesClass,
@@ -12,6 +12,8 @@ from ......schema_classes import DocPropagationFeatureSettingsClass
12
12
  from ......schema_classes import GlobalHomePageSettingsClass
13
13
  from ......schema_classes import GlobalSettingsInfoClass
14
14
  from ......schema_classes import GlobalViewsSettingsClass
15
+ from ......schema_classes import OAuthProviderClass
16
+ from ......schema_classes import OAuthSettingsClass
15
17
  from ......schema_classes import OidcSettingsClass
16
18
  from ......schema_classes import SsoSettingsClass
17
19
 
@@ -21,6 +23,8 @@ DocPropagationFeatureSettings = DocPropagationFeatureSettingsClass
21
23
  GlobalHomePageSettings = GlobalHomePageSettingsClass
22
24
  GlobalSettingsInfo = GlobalSettingsInfoClass
23
25
  GlobalViewsSettings = GlobalViewsSettingsClass
26
+ OAuthProvider = OAuthProviderClass
27
+ OAuthSettings = OAuthSettingsClass
24
28
  OidcSettings = OidcSettingsClass
25
29
  SsoSettings = SsoSettingsClass
26
30
 
@@ -10607,7 +10607,8 @@
10607
10607
  "name": "parent",
10608
10608
  "default": null
10609
10609
  }
10610
- ]
10610
+ ],
10611
+ "doc": "Relates a physical asset to a logical model."
10611
10612
  },
10612
10613
  {
10613
10614
  "type": "record",
@@ -11887,6 +11888,78 @@
11887
11888
  "default": null,
11888
11889
  "doc": "SSO integrations between DataHub and identity providers"
11889
11890
  },
11891
+ {
11892
+ "type": [
11893
+ "null",
11894
+ {
11895
+ "type": "record",
11896
+ "name": "OAuthSettings",
11897
+ "namespace": "com.linkedin.pegasus2avro.settings.global",
11898
+ "fields": [
11899
+ {
11900
+ "type": {
11901
+ "type": "array",
11902
+ "items": {
11903
+ "type": "record",
11904
+ "name": "OAuthProvider",
11905
+ "namespace": "com.linkedin.pegasus2avro.settings.global",
11906
+ "fields": [
11907
+ {
11908
+ "type": "boolean",
11909
+ "name": "enabled",
11910
+ "doc": "Whether this OAuth provider is enabled."
11911
+ },
11912
+ {
11913
+ "type": "string",
11914
+ "name": "name",
11915
+ "doc": "The name of this OAuth provider. This is used for display purposes only."
11916
+ },
11917
+ {
11918
+ "type": [
11919
+ "null",
11920
+ "string"
11921
+ ],
11922
+ "name": "jwksUri",
11923
+ "default": null,
11924
+ "doc": "The URI of the JSON Web Key Set (JWKS) endpoint for this OAuth provider."
11925
+ },
11926
+ {
11927
+ "type": "string",
11928
+ "name": "issuer",
11929
+ "doc": "The expected issuer (iss) claim in the JWTs issued by this OAuth provider."
11930
+ },
11931
+ {
11932
+ "type": "string",
11933
+ "name": "audience",
11934
+ "doc": "The expected audience (aud) claim in the JWTs issued by this OAuth provider."
11935
+ },
11936
+ {
11937
+ "type": "string",
11938
+ "name": "algorithm",
11939
+ "default": "RS256",
11940
+ "doc": "The JWT signing algorithm required for this provider.\nPrevents algorithm confusion attacks. Common values: RS256, RS384, RS512, PS256, ES256"
11941
+ },
11942
+ {
11943
+ "type": "string",
11944
+ "name": "userIdClaim",
11945
+ "default": "sub",
11946
+ "doc": "The JWT claim to use as the user identifier for this provider.\nDifferent providers use different claims (sub, email, preferred_username, etc.)"
11947
+ }
11948
+ ],
11949
+ "doc": "An OAuth Provider. This provides information required to validate inbound\nrequests with OAuth 2.0 bearer tokens."
11950
+ }
11951
+ },
11952
+ "name": "providers",
11953
+ "doc": "Trusted OAuth Providers"
11954
+ }
11955
+ ],
11956
+ "doc": "Trust oauth providers to use for authentication."
11957
+ }
11958
+ ],
11959
+ "name": "oauth",
11960
+ "default": null,
11961
+ "doc": "Settings related to the oauth authentication provider"
11962
+ },
11890
11963
  {
11891
11964
  "type": [
11892
11965
  "null",
@@ -198,6 +198,78 @@
198
198
  "default": null,
199
199
  "doc": "SSO integrations between DataHub and identity providers"
200
200
  },
201
+ {
202
+ "type": [
203
+ "null",
204
+ {
205
+ "type": "record",
206
+ "name": "OAuthSettings",
207
+ "namespace": "com.linkedin.pegasus2avro.settings.global",
208
+ "fields": [
209
+ {
210
+ "type": {
211
+ "type": "array",
212
+ "items": {
213
+ "type": "record",
214
+ "name": "OAuthProvider",
215
+ "namespace": "com.linkedin.pegasus2avro.settings.global",
216
+ "fields": [
217
+ {
218
+ "type": "boolean",
219
+ "name": "enabled",
220
+ "doc": "Whether this OAuth provider is enabled."
221
+ },
222
+ {
223
+ "type": "string",
224
+ "name": "name",
225
+ "doc": "The name of this OAuth provider. This is used for display purposes only."
226
+ },
227
+ {
228
+ "type": [
229
+ "null",
230
+ "string"
231
+ ],
232
+ "name": "jwksUri",
233
+ "default": null,
234
+ "doc": "The URI of the JSON Web Key Set (JWKS) endpoint for this OAuth provider."
235
+ },
236
+ {
237
+ "type": "string",
238
+ "name": "issuer",
239
+ "doc": "The expected issuer (iss) claim in the JWTs issued by this OAuth provider."
240
+ },
241
+ {
242
+ "type": "string",
243
+ "name": "audience",
244
+ "doc": "The expected audience (aud) claim in the JWTs issued by this OAuth provider."
245
+ },
246
+ {
247
+ "type": "string",
248
+ "name": "algorithm",
249
+ "default": "RS256",
250
+ "doc": "The JWT signing algorithm required for this provider.\nPrevents algorithm confusion attacks. Common values: RS256, RS384, RS512, PS256, ES256"
251
+ },
252
+ {
253
+ "type": "string",
254
+ "name": "userIdClaim",
255
+ "default": "sub",
256
+ "doc": "The JWT claim to use as the user identifier for this provider.\nDifferent providers use different claims (sub, email, preferred_username, etc.)"
257
+ }
258
+ ],
259
+ "doc": "An OAuth Provider. This provides information required to validate inbound\nrequests with OAuth 2.0 bearer tokens."
260
+ }
261
+ },
262
+ "name": "providers",
263
+ "doc": "Trusted OAuth Providers"
264
+ }
265
+ ],
266
+ "doc": "Trust oauth providers to use for authentication."
267
+ }
268
+ ],
269
+ "name": "oauth",
270
+ "default": null,
271
+ "doc": "Settings related to the oauth authentication provider"
272
+ },
201
273
  {
202
274
  "type": [
203
275
  "null",
@@ -140,5 +140,6 @@
140
140
  "name": "parent",
141
141
  "default": null
142
142
  }
143
- ]
143
+ ],
144
+ "doc": "Relates a physical asset to a logical model."
144
145
  }