acryl-datahub 1.2.0.10rc2__py3-none-any.whl → 1.2.0.10rc3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

@@ -12,6 +12,8 @@ import sqlglot
12
12
  from pydantic import BaseModel
13
13
  from pydantic.class_validators import root_validator, validator
14
14
  from pydantic.fields import Field
15
+ from requests.adapters import HTTPAdapter
16
+ from urllib3.util.retry import Retry
15
17
 
16
18
  import datahub.emitter.mce_builder as builder
17
19
  from datahub.configuration.common import AllowDenyPattern
@@ -109,6 +111,12 @@ logger = logging.getLogger(__name__)
109
111
 
110
112
  PAGE_SIZE = 25
111
113
 
114
+ # Retry configuration constants
115
+ RETRY_MAX_TIMES = 3
116
+ RETRY_STATUS_CODES = [429, 500, 502, 503, 504]
117
+ RETRY_BACKOFF_FACTOR = 1
118
+ RETRY_ALLOWED_METHODS = ["GET"]
119
+
112
120
 
113
121
  chart_type_from_viz_type = {
114
122
  "line": ChartTypeClass.LINE,
@@ -328,6 +336,19 @@ class SupersetSource(StatefulIngestionSourceBase):
328
336
  logger.debug("Got access token from superset")
329
337
 
330
338
  requests_session = requests.Session()
339
+
340
+ # Configure retry strategy for transient failures
341
+ retry_strategy = Retry(
342
+ total=RETRY_MAX_TIMES,
343
+ status_forcelist=RETRY_STATUS_CODES,
344
+ backoff_factor=RETRY_BACKOFF_FACTOR,
345
+ allowed_methods=RETRY_ALLOWED_METHODS,
346
+ raise_on_status=False,
347
+ )
348
+ adapter = HTTPAdapter(max_retries=retry_strategy)
349
+ requests_session.mount("http://", adapter)
350
+ requests_session.mount("https://", adapter)
351
+
331
352
  requests_session.headers.update(
332
353
  {
333
354
  "Authorization": f"Bearer {self.access_token}",
@@ -360,8 +381,13 @@ class SupersetSource(StatefulIngestionSourceBase):
360
381
  )
361
382
 
362
383
  if response.status_code != 200:
363
- logger.warning(f"Failed to get {entity_type} data: {response.text}")
364
- continue
384
+ self.report.warning(
385
+ title="Failed to fetch data from Superset API",
386
+ message="Incomplete metadata extraction due to Superset API failure",
387
+ context=f"Entity Type: {entity_type}, HTTP Status Code: {response.status_code}, Page: {current_page}. Response: {response.text}",
388
+ )
389
+ # we stop pagination for this entity type and we continue the overall ingestion
390
+ break
365
391
 
366
392
  payload = response.json()
367
393
  # Update total_items with the actual count from the response
@@ -524,6 +524,10 @@ class TableauConfig(
524
524
  default=False,
525
525
  description="Ingest Owner from source. This will override Owner info entered from UI",
526
526
  )
527
+ use_email_as_username: bool = Field(
528
+ default=False,
529
+ description="Use email address instead of username for entity owners. Requires ingest_owner to be True.",
530
+ )
527
531
  ingest_tables_external: bool = Field(
528
532
  default=False,
529
533
  description="Ingest details for tables external to (not embedded in) tableau as entities.",
@@ -678,6 +682,14 @@ class TableauConfig(
678
682
  raise ValueError(
679
683
  "tags_for_hidden_assets is only allowed with ingest_tags enabled. Be aware that this will overwrite tags entered from the UI."
680
684
  )
685
+
686
+ use_email_as_username = values.get("use_email_as_username")
687
+ ingest_owner = values.get("ingest_owner")
688
+ if use_email_as_username and not ingest_owner:
689
+ raise ValueError(
690
+ "use_email_as_username requires ingest_owner to be enabled."
691
+ )
692
+
681
693
  return values
682
694
 
683
695
 
@@ -839,6 +851,9 @@ class TableauSourceReport(
839
851
  default_factory=(lambda: defaultdict(int))
840
852
  )
841
853
 
854
+ # Owner extraction statistics
855
+ num_email_fallback_to_username: int = 0
856
+
842
857
 
843
858
  def report_user_role(report: TableauSourceReport, server: Server) -> None:
844
859
  title: str = "Insufficient Permissions"
@@ -2716,13 +2731,12 @@ class TableauSiteSource:
2716
2731
  dataset_snapshot.aspects.append(browse_paths)
2717
2732
 
2718
2733
  # Ownership
2719
- owner = (
2720
- self._get_ownership(datasource_info[c.OWNER][c.USERNAME])
2721
- if datasource_info
2722
- and datasource_info.get(c.OWNER)
2723
- and datasource_info[c.OWNER].get(c.USERNAME)
2734
+ owner_identifier = (
2735
+ self._get_owner_identifier(datasource_info[c.OWNER])
2736
+ if datasource_info and datasource_info.get(c.OWNER)
2724
2737
  else None
2725
2738
  )
2739
+ owner = self._get_ownership(owner_identifier) if owner_identifier else None
2726
2740
  if owner is not None:
2727
2741
  dataset_snapshot.aspects.append(owner)
2728
2742
 
@@ -3127,7 +3141,7 @@ class TableauSiteSource:
3127
3141
 
3128
3142
  creator: Optional[str] = None
3129
3143
  if workbook is not None and workbook.get(c.OWNER) is not None:
3130
- creator = workbook[c.OWNER].get(c.USERNAME)
3144
+ creator = self._get_owner_identifier(workbook[c.OWNER])
3131
3145
  created_at = sheet.get(c.CREATED_AT, datetime.now())
3132
3146
  updated_at = sheet.get(c.UPDATED_AT, datetime.now())
3133
3147
  last_modified = self.get_last_modified(creator, created_at, updated_at)
@@ -3276,7 +3290,7 @@ class TableauSiteSource:
3276
3290
 
3277
3291
  def emit_workbook_as_container(self, workbook: Dict) -> Iterable[MetadataWorkUnit]:
3278
3292
  workbook_container_key = self.gen_workbook_key(workbook[c.ID])
3279
- creator = workbook.get(c.OWNER, {}).get(c.USERNAME)
3293
+ creator = self._get_owner_identifier(workbook.get(c.OWNER, {}))
3280
3294
 
3281
3295
  owner_urn = (
3282
3296
  builder.make_user_urn(creator)
@@ -3458,7 +3472,7 @@ class TableauSiteSource:
3458
3472
 
3459
3473
  creator: Optional[str] = None
3460
3474
  if workbook is not None and workbook.get(c.OWNER) is not None:
3461
- creator = workbook[c.OWNER].get(c.USERNAME)
3475
+ creator = self._get_owner_identifier(workbook[c.OWNER])
3462
3476
  created_at = dashboard.get(c.CREATED_AT, datetime.now())
3463
3477
  updated_at = dashboard.get(c.UPDATED_AT, datetime.now())
3464
3478
  last_modified = self.get_last_modified(creator, created_at, updated_at)
@@ -3605,6 +3619,20 @@ class TableauSiteSource:
3605
3619
  )
3606
3620
  return last_modified
3607
3621
 
3622
+ def _get_owner_identifier(self, owner_dict: dict) -> Optional[str]:
3623
+ """Extract owner identifier (email or username) based on configuration."""
3624
+ if not owner_dict:
3625
+ return None
3626
+
3627
+ if self.config.use_email_as_username:
3628
+ email = owner_dict.get(c.EMAIL)
3629
+ if email:
3630
+ return email
3631
+ # Fall back to username if email is not available
3632
+ self.report.num_email_fallback_to_username += 1
3633
+
3634
+ return owner_dict.get(c.USERNAME)
3635
+
3608
3636
  @lru_cache(maxsize=None)
3609
3637
  def _get_ownership(self, user: str) -> Optional[OwnershipClass]:
3610
3638
  if self.config.ingest_owner and user:
@@ -3828,3 +3856,15 @@ class TableauSiteSource:
3828
3856
  self.report.emit_upstream_tables_timer[self.site_content_url] = (
3829
3857
  timer.elapsed_seconds(digits=2)
3830
3858
  )
3859
+
3860
+ # Log owner extraction statistics if there were fallbacks
3861
+ if (
3862
+ self.config.use_email_as_username
3863
+ and self.config.ingest_owner
3864
+ and self.report.num_email_fallback_to_username > 0
3865
+ ):
3866
+ logger.info(
3867
+ f"Owner extraction summary for site '{self.site_content_url}': "
3868
+ f"{self.report.num_email_fallback_to_username} entities fell back from email to username "
3869
+ f"(email was not available)"
3870
+ )
@@ -65,6 +65,7 @@ workbook_graphql_query = """
65
65
  projectName
66
66
  owner {
67
67
  username
68
+ email
68
69
  }
69
70
  description
70
71
  uri
@@ -107,6 +108,7 @@ sheet_graphql_query = """
107
108
  luid
108
109
  owner {
109
110
  username
111
+ email
110
112
  }
111
113
  }
112
114
  datasourceFields {
@@ -185,6 +187,7 @@ dashboard_graphql_query = """
185
187
  luid
186
188
  owner {
187
189
  username
190
+ email
188
191
  }
189
192
  }
190
193
  }
@@ -268,6 +271,7 @@ embedded_datasource_graphql_query = """
268
271
  luid
269
272
  owner {
270
273
  username
274
+ email
271
275
  }
272
276
  }
273
277
  }
@@ -424,6 +428,7 @@ published_datasource_graphql_query = """
424
428
  }
425
429
  owner {
426
430
  username
431
+ email
427
432
  }
428
433
  description
429
434
  uri
@@ -59,6 +59,7 @@ LUID = "luid"
59
59
  EMBEDDED_DATA_SOURCE = "EmbeddedDatasource"
60
60
  OWNER = "owner"
61
61
  USERNAME = "username"
62
+ EMAIL = "email"
62
63
  HAS_EXTRACTS = "hasExtracts"
63
64
  EXTRACT_LAST_REFRESH_TIME = "extractLastRefreshTime"
64
65
  EXTRACT_LAST_INCREMENTAL_UPDATE_TIME = "extractLastIncrementalUpdateTime"
@@ -1,4 +1,5 @@
1
1
  from dataclasses import dataclass
2
+ from typing import Optional
2
3
 
3
4
  from tableauserverclient import Server, UserItem
4
5
 
@@ -10,6 +11,7 @@ class UserInfo:
10
11
  user_name: str
11
12
  site_role: str
12
13
  site_id: str
14
+ email: Optional[str] = None
13
15
 
14
16
  def has_site_administrator_explorer_privileges(self):
15
17
  return self.site_role in [
@@ -34,4 +36,5 @@ class UserInfo:
34
36
  user_name=user.name,
35
37
  site_role=user.site_role,
36
38
  site_id=server.site_id,
39
+ email=user.email,
37
40
  )
@@ -5356,6 +5356,7 @@ class InstitutionalMemoryMetadataClass(DictWrapper):
5356
5356
  description: str,
5357
5357
  createStamp: "AuditStampClass",
5358
5358
  updateStamp: Union[None, "AuditStampClass"]=None,
5359
+ settings: Union[None, "InstitutionalMemoryMetadataSettingsClass"]=None,
5359
5360
  ):
5360
5361
  super().__init__()
5361
5362
 
@@ -5363,12 +5364,14 @@ class InstitutionalMemoryMetadataClass(DictWrapper):
5363
5364
  self.description = description
5364
5365
  self.createStamp = createStamp
5365
5366
  self.updateStamp = updateStamp
5367
+ self.settings = settings
5366
5368
 
5367
5369
  def _restore_defaults(self) -> None:
5368
5370
  self.url = str()
5369
5371
  self.description = str()
5370
5372
  self.createStamp = AuditStampClass._construct_with_defaults()
5371
5373
  self.updateStamp = self.RECORD_SCHEMA.fields_dict["updateStamp"].default
5374
+ self.settings = self.RECORD_SCHEMA.fields_dict["settings"].default
5372
5375
 
5373
5376
 
5374
5377
  @property
@@ -5411,6 +5414,45 @@ class InstitutionalMemoryMetadataClass(DictWrapper):
5411
5414
  self._inner_dict['updateStamp'] = value
5412
5415
 
5413
5416
 
5417
+ @property
5418
+ def settings(self) -> Union[None, "InstitutionalMemoryMetadataSettingsClass"]:
5419
+ """Settings for this record"""
5420
+ return self._inner_dict.get('settings') # type: ignore
5421
+
5422
+ @settings.setter
5423
+ def settings(self, value: Union[None, "InstitutionalMemoryMetadataSettingsClass"]) -> None:
5424
+ self._inner_dict['settings'] = value
5425
+
5426
+
5427
+ class InstitutionalMemoryMetadataSettingsClass(DictWrapper):
5428
+ """Settings related to a record of InstitutionalMemoryMetadata"""
5429
+
5430
+ RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.common.InstitutionalMemoryMetadataSettings")
5431
+ def __init__(self,
5432
+ showInAssetPreview: Optional[bool]=None,
5433
+ ):
5434
+ super().__init__()
5435
+
5436
+ if showInAssetPreview is None:
5437
+ # default: False
5438
+ self.showInAssetPreview = self.RECORD_SCHEMA.fields_dict["showInAssetPreview"].default
5439
+ else:
5440
+ self.showInAssetPreview = showInAssetPreview
5441
+
5442
+ def _restore_defaults(self) -> None:
5443
+ self.showInAssetPreview = self.RECORD_SCHEMA.fields_dict["showInAssetPreview"].default
5444
+
5445
+
5446
+ @property
5447
+ def showInAssetPreview(self) -> bool:
5448
+ """Show record in asset preview like on entity header and search previews"""
5449
+ return self._inner_dict.get('showInAssetPreview') # type: ignore
5450
+
5451
+ @showInAssetPreview.setter
5452
+ def showInAssetPreview(self, value: bool) -> None:
5453
+ self._inner_dict['showInAssetPreview'] = value
5454
+
5455
+
5414
5456
  class MLFeatureDataTypeClass(object):
5415
5457
  """MLFeature Data Type"""
5416
5458
 
@@ -16689,7 +16731,7 @@ class MLModelGroupKeyClass(_Aspect):
16689
16731
 
16690
16732
 
16691
16733
  ASPECT_NAME = 'mlModelGroupKey'
16692
- ASPECT_INFO = {'keyForEntity': 'mlModelGroup', 'entityCategory': 'core', 'entityAspects': ['glossaryTerms', 'editableMlModelGroupProperties', 'domains', 'applications', 'mlModelGroupProperties', 'ownership', 'status', 'deprecation', 'browsePaths', 'globalTags', 'dataPlatformInstance', 'browsePathsV2', 'structuredProperties', 'forms', 'testResults', 'subTypes', 'container']}
16734
+ ASPECT_INFO = {'keyForEntity': 'mlModelGroup', 'entityCategory': 'core', 'entityAspects': ['glossaryTerms', 'editableMlModelGroupProperties', 'domains', 'applications', 'mlModelGroupProperties', 'ownership', 'status', 'deprecation', 'browsePaths', 'globalTags', 'dataPlatformInstance', 'browsePathsV2', 'structuredProperties', 'forms', 'testResults', 'subTypes', 'container', 'institutionalMemory']}
16693
16735
  RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.metadata.key.MLModelGroupKey")
16694
16736
 
16695
16737
  def __init__(self,
@@ -27420,6 +27462,7 @@ __SCHEMA_TYPES = {
27420
27462
  'com.linkedin.pegasus2avro.common.InputFields': InputFieldsClass,
27421
27463
  'com.linkedin.pegasus2avro.common.InstitutionalMemory': InstitutionalMemoryClass,
27422
27464
  'com.linkedin.pegasus2avro.common.InstitutionalMemoryMetadata': InstitutionalMemoryMetadataClass,
27465
+ 'com.linkedin.pegasus2avro.common.InstitutionalMemoryMetadataSettings': InstitutionalMemoryMetadataSettingsClass,
27423
27466
  'com.linkedin.pegasus2avro.common.MLFeatureDataType': MLFeatureDataTypeClass,
27424
27467
  'com.linkedin.pegasus2avro.common.Media': MediaClass,
27425
27468
  'com.linkedin.pegasus2avro.common.MediaType': MediaTypeClass,
@@ -27939,6 +27982,7 @@ __SCHEMA_TYPES = {
27939
27982
  'InputFields': InputFieldsClass,
27940
27983
  'InstitutionalMemory': InstitutionalMemoryClass,
27941
27984
  'InstitutionalMemoryMetadata': InstitutionalMemoryMetadataClass,
27985
+ 'InstitutionalMemoryMetadataSettings': InstitutionalMemoryMetadataSettingsClass,
27942
27986
  'MLFeatureDataType': MLFeatureDataTypeClass,
27943
27987
  'Media': MediaClass,
27944
27988
  'MediaType': MediaTypeClass,
@@ -45,6 +45,7 @@ from .....schema_classes import InputFieldClass
45
45
  from .....schema_classes import InputFieldsClass
46
46
  from .....schema_classes import InstitutionalMemoryClass
47
47
  from .....schema_classes import InstitutionalMemoryMetadataClass
48
+ from .....schema_classes import InstitutionalMemoryMetadataSettingsClass
48
49
  from .....schema_classes import MLFeatureDataTypeClass
49
50
  from .....schema_classes import MediaClass
50
51
  from .....schema_classes import MediaTypeClass
@@ -111,6 +112,7 @@ InputField = InputFieldClass
111
112
  InputFields = InputFieldsClass
112
113
  InstitutionalMemory = InstitutionalMemoryClass
113
114
  InstitutionalMemoryMetadata = InstitutionalMemoryMetadataClass
115
+ InstitutionalMemoryMetadataSettings = InstitutionalMemoryMetadataSettingsClass
114
116
  MLFeatureDataType = MLFeatureDataTypeClass
115
117
  Media = MediaClass
116
118
  MediaType = MediaTypeClass
@@ -1074,6 +1074,28 @@
1074
1074
  "name": "updateStamp",
1075
1075
  "default": null,
1076
1076
  "doc": "Audit stamp associated with updation of this record"
1077
+ },
1078
+ {
1079
+ "type": [
1080
+ "null",
1081
+ {
1082
+ "type": "record",
1083
+ "name": "InstitutionalMemoryMetadataSettings",
1084
+ "namespace": "com.linkedin.pegasus2avro.common",
1085
+ "fields": [
1086
+ {
1087
+ "type": "boolean",
1088
+ "name": "showInAssetPreview",
1089
+ "default": false,
1090
+ "doc": "Show record in asset preview like on entity header and search previews"
1091
+ }
1092
+ ],
1093
+ "doc": "Settings related to a record of InstitutionalMemoryMetadata"
1094
+ }
1095
+ ],
1096
+ "name": "settings",
1097
+ "default": null,
1098
+ "doc": "Settings for this record"
1077
1099
  }
1078
1100
  ],
1079
1101
  "doc": "Metadata corresponding to a record of institutional memory."
@@ -4280,7 +4302,8 @@
4280
4302
  "forms",
4281
4303
  "testResults",
4282
4304
  "subTypes",
4283
- "container"
4305
+ "container",
4306
+ "institutionalMemory"
4284
4307
  ]
4285
4308
  },
4286
4309
  "name": "MLModelGroupKey",
@@ -84,6 +84,28 @@
84
84
  "name": "updateStamp",
85
85
  "default": null,
86
86
  "doc": "Audit stamp associated with updation of this record"
87
+ },
88
+ {
89
+ "type": [
90
+ "null",
91
+ {
92
+ "type": "record",
93
+ "name": "InstitutionalMemoryMetadataSettings",
94
+ "namespace": "com.linkedin.pegasus2avro.common",
95
+ "fields": [
96
+ {
97
+ "type": "boolean",
98
+ "name": "showInAssetPreview",
99
+ "default": false,
100
+ "doc": "Show record in asset preview like on entity header and search previews"
101
+ }
102
+ ],
103
+ "doc": "Settings related to a record of InstitutionalMemoryMetadata"
104
+ }
105
+ ],
106
+ "name": "settings",
107
+ "default": null,
108
+ "doc": "Settings for this record"
87
109
  }
88
110
  ],
89
111
  "doc": "Metadata corresponding to a record of institutional memory."
@@ -21,7 +21,8 @@
21
21
  "forms",
22
22
  "testResults",
23
23
  "subTypes",
24
- "container"
24
+ "container",
25
+ "institutionalMemory"
25
26
  ]
26
27
  },
27
28
  "name": "MLModelGroupKey",
@@ -1143,6 +1143,28 @@
1143
1143
  "name": "updateStamp",
1144
1144
  "default": null,
1145
1145
  "doc": "Audit stamp associated with updation of this record"
1146
+ },
1147
+ {
1148
+ "type": [
1149
+ "null",
1150
+ {
1151
+ "type": "record",
1152
+ "name": "InstitutionalMemoryMetadataSettings",
1153
+ "namespace": "com.linkedin.pegasus2avro.common",
1154
+ "fields": [
1155
+ {
1156
+ "type": "boolean",
1157
+ "name": "showInAssetPreview",
1158
+ "default": false,
1159
+ "doc": "Show record in asset preview like on entity header and search previews"
1160
+ }
1161
+ ],
1162
+ "doc": "Settings related to a record of InstitutionalMemoryMetadata"
1163
+ }
1164
+ ],
1165
+ "name": "settings",
1166
+ "default": null,
1167
+ "doc": "Settings for this record"
1146
1168
  }
1147
1169
  ],
1148
1170
  "doc": "Metadata corresponding to a record of institutional memory."
datahub/sdk/dashboard.py CHANGED
@@ -171,8 +171,6 @@ class Dashboard(
171
171
  )
172
172
  ),
173
173
  customProperties={},
174
- chartEdges=[],
175
- datasetEdges=[],
176
174
  dashboards=[],
177
175
  )
178
176
  )