acryl-datahub 1.0.0.2rc4__py3-none-any.whl → 1.0.0.3rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (30) hide show
  1. {acryl_datahub-1.0.0.2rc4.dist-info → acryl_datahub-1.0.0.3rc1.dist-info}/METADATA +2433 -2433
  2. {acryl_datahub-1.0.0.2rc4.dist-info → acryl_datahub-1.0.0.3rc1.dist-info}/RECORD +30 -28
  3. datahub/_version.py +1 -1
  4. datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +2 -1
  5. datahub/ingestion/source/hex/api.py +1 -20
  6. datahub/ingestion/source/hex/query_fetcher.py +4 -1
  7. datahub/ingestion/source/sigma/config.py +75 -6
  8. datahub/ingestion/source/sigma/sigma.py +16 -1
  9. datahub/ingestion/source/sigma/sigma_api.py +99 -58
  10. datahub/ingestion/source/snowflake/snowflake_queries.py +18 -4
  11. datahub/ingestion/source/snowflake/snowflake_query.py +1 -1
  12. datahub/ingestion/source/sql/stored_procedures/base.py +12 -1
  13. datahub/metadata/_schema_classes.py +47 -2
  14. datahub/metadata/_urns/urn_defs.py +56 -0
  15. datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +2 -0
  16. datahub/metadata/schema.avsc +121 -85
  17. datahub/metadata/schemas/DataHubOpenAPISchemaKey.avsc +22 -0
  18. datahub/metadata/schemas/DataTransformLogic.avsc +4 -2
  19. datahub/metadata/schemas/FormInfo.avsc +5 -0
  20. datahub/metadata/schemas/MLModelDeploymentProperties.avsc +3 -0
  21. datahub/metadata/schemas/MetadataChangeEvent.avsc +6 -0
  22. datahub/metadata/schemas/MetadataChangeLog.avsc +3 -0
  23. datahub/metadata/schemas/MetadataChangeProposal.avsc +3 -0
  24. datahub/metadata/schemas/QueryProperties.avsc +4 -2
  25. datahub/metadata/schemas/SystemMetadata.avsc +86 -0
  26. datahub/sql_parsing/sqlglot_utils.py +16 -8
  27. {acryl_datahub-1.0.0.2rc4.dist-info → acryl_datahub-1.0.0.3rc1.dist-info}/WHEEL +0 -0
  28. {acryl_datahub-1.0.0.2rc4.dist-info → acryl_datahub-1.0.0.3rc1.dist-info}/entry_points.txt +0 -0
  29. {acryl_datahub-1.0.0.2rc4.dist-info → acryl_datahub-1.0.0.3rc1.dist-info}/licenses/LICENSE +0 -0
  30. {acryl_datahub-1.0.0.2rc4.dist-info → acryl_datahub-1.0.0.3rc1.dist-info}/top_level.txt +0 -0
@@ -95,22 +95,22 @@ class SigmaAPI:
95
95
  return get_response
96
96
 
97
97
  def get_workspace(self, workspace_id: str) -> Optional[Workspace]:
98
+ if workspace_id in self.workspaces:
99
+ return self.workspaces[workspace_id]
100
+
98
101
  logger.debug(f"Fetching workspace metadata with id '{workspace_id}'")
99
102
  try:
100
- if workspace_id in self.workspaces:
101
- return self.workspaces[workspace_id]
102
- else:
103
- response = self._get_api_call(
104
- f"{self.config.api_url}/workspaces/{workspace_id}"
105
- )
106
- if response.status_code == 403:
107
- logger.debug(f"Workspace {workspace_id} not accessible.")
108
- self.report.non_accessible_workspaces_count += 1
109
- return None
110
- response.raise_for_status()
111
- workspace = Workspace.parse_obj(response.json())
112
- self.workspaces[workspace.workspaceId] = workspace
113
- return workspace
103
+ response = self._get_api_call(
104
+ f"{self.config.api_url}/workspaces/{workspace_id}"
105
+ )
106
+ if response.status_code == 403:
107
+ logger.debug(f"Workspace {workspace_id} not accessible.")
108
+ self.report.non_accessible_workspaces_count += 1
109
+ return None
110
+ response.raise_for_status()
111
+ workspace = Workspace.parse_obj(response.json())
112
+ self.workspaces[workspace.workspaceId] = workspace
113
+ return workspace
114
114
  except Exception as e:
115
115
  self._log_http_error(
116
116
  message=f"Unable to fetch workspace '{workspace_id}'. Exception: {e}"
@@ -187,7 +187,9 @@ class SigmaAPI:
187
187
  @functools.lru_cache
188
188
  def _get_files_metadata(self, file_type: str) -> Dict[str, File]:
189
189
  logger.debug(f"Fetching file metadata with type {file_type}.")
190
- file_url = url = f"{self.config.api_url}/files?typeFilters={file_type}"
190
+ file_url = url = (
191
+ f"{self.config.api_url}/files?permissionFilter=view&typeFilters={file_type}"
192
+ )
191
193
  try:
192
194
  files_metadata: Dict[str, File] = {}
193
195
  while True:
@@ -225,31 +227,50 @@ class SigmaAPI:
225
227
  for dataset_dict in response_dict[Constant.ENTRIES]:
226
228
  dataset = SigmaDataset.parse_obj(dataset_dict)
227
229
 
228
- if dataset.datasetId in dataset_files_metadata:
229
- dataset.path = dataset_files_metadata[dataset.datasetId].path
230
- dataset.badge = dataset_files_metadata[dataset.datasetId].badge
231
-
232
- workspace_id = dataset_files_metadata[
233
- dataset.datasetId
234
- ].workspaceId
235
- if workspace_id:
236
- dataset.workspaceId = workspace_id
237
- workspace = self.get_workspace(dataset.workspaceId)
238
- if workspace:
239
- if self.config.workspace_pattern.allowed(
240
- workspace.name
241
- ):
242
- datasets.append(dataset)
243
- elif self.config.ingest_shared_entities:
244
- # If no workspace for dataset we can consider it as shared entity
245
- self.report.shared_entities_count += 1
246
- datasets.append(dataset)
230
+ if dataset.datasetId not in dataset_files_metadata:
231
+ self.report.datasets.dropped(
232
+ f"{dataset.name} ({dataset.datasetId}) (missing file metadata)"
233
+ )
234
+ continue
235
+
236
+ dataset.workspaceId = dataset_files_metadata[
237
+ dataset.datasetId
238
+ ].workspaceId
239
+
240
+ dataset.path = dataset_files_metadata[dataset.datasetId].path
241
+ dataset.badge = dataset_files_metadata[dataset.datasetId].badge
242
+
243
+ workspace = None
244
+ if dataset.workspaceId:
245
+ workspace = self.get_workspace(dataset.workspaceId)
246
+
247
+ if workspace:
248
+ if self.config.workspace_pattern.allowed(workspace.name):
249
+ self.report.datasets.processed(
250
+ f"{dataset.name} ({dataset.datasetId}) in {workspace.name}"
251
+ )
252
+ datasets.append(dataset)
253
+ else:
254
+ self.report.datasets.dropped(
255
+ f"{dataset.name} ({dataset.datasetId}) in {workspace.name}"
256
+ )
257
+ elif self.config.ingest_shared_entities:
258
+ # If no workspace for dataset we can consider it as shared entity
259
+ self.report.datasets_without_workspace += 1
260
+ self.report.datasets.processed(
261
+ f"{dataset.name} ({dataset.datasetId}) in workspace id {dataset.workspaceId or 'unknown'}"
262
+ )
263
+ datasets.append(dataset)
264
+ else:
265
+ self.report.datasets.dropped(
266
+ f"{dataset.name} ({dataset.datasetId}) in workspace id {dataset.workspaceId or 'unknown'}"
267
+ )
247
268
 
248
269
  if response_dict[Constant.NEXTPAGE]:
249
270
  url = f"{dataset_url}?page={response_dict[Constant.NEXTPAGE]}"
250
271
  else:
251
272
  break
252
- self.report.number_of_datasets = len(datasets)
273
+
253
274
  return datasets
254
275
  except Exception as e:
255
276
  self._log_http_error(
@@ -381,34 +402,54 @@ class SigmaAPI:
381
402
  for workbook_dict in response_dict[Constant.ENTRIES]:
382
403
  workbook = Workbook.parse_obj(workbook_dict)
383
404
 
384
- if workbook.workbookId in workbook_files_metadata:
385
- workbook.badge = workbook_files_metadata[
386
- workbook.workbookId
387
- ].badge
388
-
389
- workspace_id = workbook_files_metadata[
390
- workbook.workbookId
391
- ].workspaceId
392
- if workspace_id:
393
- workbook.workspaceId = workspace_id
394
- workspace = self.get_workspace(workbook.workspaceId)
395
- if workspace:
396
- if self.config.workspace_pattern.allowed(
397
- workspace.name
398
- ):
399
- workbook.pages = self.get_workbook_pages(workbook)
400
- workbooks.append(workbook)
401
- elif self.config.ingest_shared_entities:
402
- # If no workspace for workbook we can consider it as shared entity
403
- self.report.shared_entities_count += 1
404
- workbook.pages = self.get_workbook_pages(workbook)
405
- workbooks.append(workbook)
405
+ if workbook.workbookId not in workbook_files_metadata:
406
+ # Due to a bug in the Sigma API, it seems like the /files endpoint does not
407
+ # return file metadata when the user has access via admin permissions. In
408
+ # those cases, the user associated with the token needs to be manually added
409
+ # to the workspace.
410
+ self.report.workbooks.dropped(
411
+ f"{workbook.name} ({workbook.workbookId}) (missing file metadata; path: {workbook.path}; likely need to manually add user to workspace)"
412
+ )
413
+ continue
414
+
415
+ workbook.workspaceId = workbook_files_metadata[
416
+ workbook.workbookId
417
+ ].workspaceId
418
+
419
+ workbook.badge = workbook_files_metadata[workbook.workbookId].badge
420
+
421
+ workspace = None
422
+ if workbook.workspaceId:
423
+ workspace = self.get_workspace(workbook.workspaceId)
424
+
425
+ if workspace:
426
+ if self.config.workspace_pattern.allowed(workspace.name):
427
+ self.report.workbooks.processed(
428
+ f"{workbook.name} ({workbook.workbookId}) in {workspace.name}"
429
+ )
430
+ workbook.pages = self.get_workbook_pages(workbook)
431
+ workbooks.append(workbook)
432
+ else:
433
+ self.report.workbooks.dropped(
434
+ f"{workbook.name} ({workbook.workbookId}) in {workspace.name}"
435
+ )
436
+ elif self.config.ingest_shared_entities:
437
+ # If no workspace for workbook we can consider it as shared entity
438
+ self.report.workbooks_without_workspace += 1
439
+ self.report.workbooks.processed(
440
+ f"{workbook.name} ({workbook.workbookId}) in workspace id {workbook.workspaceId or 'unknown'}"
441
+ )
442
+ workbook.pages = self.get_workbook_pages(workbook)
443
+ workbooks.append(workbook)
444
+ else:
445
+ self.report.workbooks.dropped(
446
+ f"{workbook.name} ({workbook.workbookId}) in workspace id {workbook.workspaceId or 'unknown'}"
447
+ )
406
448
 
407
449
  if response_dict[Constant.NEXTPAGE]:
408
450
  url = f"{workbook_url}?page={response_dict[Constant.NEXTPAGE]}"
409
451
  else:
410
452
  break
411
- self.report.number_of_workbooks = len(workbooks)
412
453
  return workbooks
413
454
  except Exception as e:
414
455
  self._log_http_error(
@@ -515,7 +515,10 @@ class SnowflakeQueriesExtractor(SnowflakeStructuredReportMixin, Closeable):
515
515
  # job at eliminating redundant / repetitive queries. As such, we include the fast fingerprint
516
516
  # here
517
517
  query_id=get_query_fingerprint(
518
- res["query_text"], self.identifiers.platform, fast=True
518
+ res["query_text"],
519
+ self.identifiers.platform,
520
+ fast=True,
521
+ secondary_id=res["query_secondary_fingerprint"],
519
522
  ),
520
523
  query_text=res["query_text"],
521
524
  upstreams=upstreams,
@@ -654,7 +657,17 @@ WITH
654
657
  fingerprinted_queries as (
655
658
  SELECT *,
656
659
  -- TODO: Generate better fingerprints for each query by pushing down regex logic.
657
- query_history.query_parameterized_hash as query_fingerprint
660
+ query_history.query_parameterized_hash as query_fingerprint,
661
+ -- Optional and additional hash to be used for query deduplication and final query identity
662
+ CASE
663
+ WHEN CONTAINS(query_history.query_text, '-- Hex query metadata:')
664
+ -- Extract project id and hash it
665
+ THEN CAST(HASH(
666
+ REGEXP_SUBSTR(query_history.query_text, '"project_id"\\\\s*:\\\\s*"([^"]+)"', 1, 1, 'e', 1),
667
+ REGEXP_SUBSTR(query_history.query_text, '"context"\\\\s*:\\\\s*"([^"]+)"', 1, 1, 'e', 1)
668
+ ) AS VARCHAR)
669
+ ELSE NULL
670
+ END as query_secondary_fingerprint
658
671
  FROM
659
672
  snowflake.account_usage.query_history
660
673
  WHERE
@@ -670,11 +683,11 @@ fingerprinted_queries as (
670
683
  {time_bucket_size},
671
684
  CONVERT_TIMEZONE('UTC', start_time)
672
685
  ) AS bucket_start_time,
673
- COUNT(*) OVER (PARTITION BY bucket_start_time, query_fingerprint) AS query_count,
686
+ COUNT(*) OVER (PARTITION BY bucket_start_time, query_fingerprint, query_secondary_fingerprint) AS query_count,
674
687
  FROM
675
688
  fingerprinted_queries
676
689
  QUALIFY
677
- ROW_NUMBER() OVER (PARTITION BY bucket_start_time, query_fingerprint ORDER BY start_time DESC) = 1
690
+ ROW_NUMBER() OVER (PARTITION BY bucket_start_time, query_fingerprint, query_secondary_fingerprint ORDER BY start_time DESC) = 1
678
691
  )
679
692
  , raw_access_history AS (
680
693
  SELECT
@@ -714,6 +727,7 @@ fingerprinted_queries as (
714
727
  q.bucket_start_time,
715
728
  q.query_id,
716
729
  q.query_fingerprint,
730
+ q.query_secondary_fingerprint,
717
731
  q.query_count,
718
732
  q.session_id AS "SESSION_ID",
719
733
  q.start_time AS "QUERY_START_TIME",
@@ -1000,4 +1000,4 @@ WHERE table_schema='{schema_name}' AND {extra_clause}"""
1000
1000
  from_clause = (
1001
1001
  f"""FROM '{stream_pagination_marker}'""" if stream_pagination_marker else ""
1002
1002
  )
1003
- return f"""SHOW STREAMS IN DATABASE {db_name} LIMIT {limit} {from_clause};"""
1003
+ return f"""SHOW STREAMS IN DATABASE "{db_name}" LIMIT {limit} {from_clause};"""
@@ -26,6 +26,7 @@ from datahub.metadata.schema_classes import (
26
26
  DataPlatformInstanceClass,
27
27
  DataTransformClass,
28
28
  DataTransformLogicClass,
29
+ QueryLanguageClass,
29
30
  QueryStatementClass,
30
31
  SubTypesClass,
31
32
  )
@@ -176,7 +177,17 @@ def _generate_job_workunits(
176
177
  DataTransformClass(
177
178
  queryStatement=QueryStatementClass(
178
179
  value=procedure.procedure_definition,
179
- language=procedure.language,
180
+ language=(
181
+ QueryLanguageClass.SQL
182
+ if procedure.language == "SQL"
183
+ # The language field uses a pretty limited enum.
184
+ # The "UNKNOWN" enum value is pretty new, so we don't want to
185
+ # emit it until it has broader server-side support. As a
186
+ # short-term solution, we map all languages to "SQL".
187
+ # TODO: Once we've released server 1.1.0, we should change
188
+ # this to be "UNKNOWN" for all languages except "SQL".
189
+ else QueryLanguageClass.SQL
190
+ ),
180
191
  ),
181
192
  )
182
193
  ]
@@ -15442,6 +15442,35 @@ class DataHubIngestionSourceKeyClass(_Aspect):
15442
15442
  self._inner_dict['id'] = value
15443
15443
 
15444
15444
 
15445
+ class DataHubOpenAPISchemaKeyClass(_Aspect):
15446
+ """Key for a Query"""
15447
+
15448
+
15449
+ ASPECT_NAME = 'dataHubOpenAPISchemaKey'
15450
+ ASPECT_INFO = {'keyForEntity': 'dataHubOpenAPISchema', 'entityCategory': 'internal', 'entityAspects': ['systemMetadata'], 'entityDoc': 'Contains aspects which are used in OpenAPI requests/responses which are not otherwise present in the data model.'}
15451
+ RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.metadata.key.DataHubOpenAPISchemaKey")
15452
+
15453
+ def __init__(self,
15454
+ id: str,
15455
+ ):
15456
+ super().__init__()
15457
+
15458
+ self.id = id
15459
+
15460
+ def _restore_defaults(self) -> None:
15461
+ self.id = str()
15462
+
15463
+
15464
+ @property
15465
+ def id(self) -> str:
15466
+ """A unique id for the DataHub OpenAPI schema."""
15467
+ return self._inner_dict.get('id') # type: ignore
15468
+
15469
+ @id.setter
15470
+ def id(self, value: str) -> None:
15471
+ self._inner_dict['id'] = value
15472
+
15473
+
15445
15474
  class DataHubPersonaKeyClass(_Aspect):
15446
15475
  """Key for a persona type"""
15447
15476
 
@@ -20128,10 +20157,14 @@ class PlatformEventHeaderClass(DictWrapper):
20128
20157
  self._inner_dict['timestampMillis'] = value
20129
20158
 
20130
20159
 
20131
- class SystemMetadataClass(DictWrapper):
20160
+ class SystemMetadataClass(_Aspect):
20132
20161
  """Metadata associated with each metadata change that is processed by the system"""
20133
-
20162
+
20163
+
20164
+ ASPECT_NAME = 'systemMetadata'
20165
+ ASPECT_INFO = {}
20134
20166
  RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.mxe.SystemMetadata")
20167
+
20135
20168
  def __init__(self,
20136
20169
  lastObserved: Optional[Union[int, None]]=None,
20137
20170
  runId: Optional[Union[str, None]]=None,
@@ -21738,6 +21771,9 @@ class QueryLanguageClass(object):
21738
21771
  SQL = "SQL"
21739
21772
  """A SQL Query"""
21740
21773
 
21774
+ UNKNOWN = "UNKNOWN"
21775
+ """Unknown query language"""
21776
+
21741
21777
 
21742
21778
 
21743
21779
  class QueryPropertiesClass(_Aspect):
@@ -26135,6 +26171,7 @@ __SCHEMA_TYPES = {
26135
26171
  'com.linkedin.pegasus2avro.metadata.key.DataHubActionKey': DataHubActionKeyClass,
26136
26172
  'com.linkedin.pegasus2avro.metadata.key.DataHubConnectionKey': DataHubConnectionKeyClass,
26137
26173
  'com.linkedin.pegasus2avro.metadata.key.DataHubIngestionSourceKey': DataHubIngestionSourceKeyClass,
26174
+ 'com.linkedin.pegasus2avro.metadata.key.DataHubOpenAPISchemaKey': DataHubOpenAPISchemaKeyClass,
26138
26175
  'com.linkedin.pegasus2avro.metadata.key.DataHubPersonaKey': DataHubPersonaKeyClass,
26139
26176
  'com.linkedin.pegasus2avro.metadata.key.DataHubPolicyKey': DataHubPolicyKeyClass,
26140
26177
  'com.linkedin.pegasus2avro.metadata.key.DataHubRetentionKey': DataHubRetentionKeyClass,
@@ -26620,6 +26657,7 @@ __SCHEMA_TYPES = {
26620
26657
  'DataHubActionKey': DataHubActionKeyClass,
26621
26658
  'DataHubConnectionKey': DataHubConnectionKeyClass,
26622
26659
  'DataHubIngestionSourceKey': DataHubIngestionSourceKeyClass,
26660
+ 'DataHubOpenAPISchemaKey': DataHubOpenAPISchemaKeyClass,
26623
26661
  'DataHubPersonaKey': DataHubPersonaKeyClass,
26624
26662
  'DataHubPolicyKey': DataHubPolicyKeyClass,
26625
26663
  'DataHubRetentionKey': DataHubRetentionKeyClass,
@@ -26879,6 +26917,7 @@ ASPECT_CLASSES: List[Type[_Aspect]] = [
26879
26917
  ContainerClass,
26880
26918
  ContainerPropertiesClass,
26881
26919
  EditableContainerPropertiesClass,
26920
+ SystemMetadataClass,
26882
26921
  DataHubSecretValueClass,
26883
26922
  DataHubUpgradeRequestClass,
26884
26923
  DataHubUpgradeResultClass,
@@ -26935,6 +26974,7 @@ ASPECT_CLASSES: List[Type[_Aspect]] = [
26935
26974
  MLModelKeyClass,
26936
26975
  NotebookKeyClass,
26937
26976
  RoleKeyClass,
26977
+ DataHubOpenAPISchemaKeyClass,
26938
26978
  GlobalSettingsKeyClass,
26939
26979
  DatasetKeyClass,
26940
26980
  ChartKeyClass,
@@ -27102,6 +27142,7 @@ class AspectBag(TypedDict, total=False):
27102
27142
  container: ContainerClass
27103
27143
  containerProperties: ContainerPropertiesClass
27104
27144
  editableContainerProperties: EditableContainerPropertiesClass
27145
+ systemMetadata: SystemMetadataClass
27105
27146
  dataHubSecretValue: DataHubSecretValueClass
27106
27147
  dataHubUpgradeRequest: DataHubUpgradeRequestClass
27107
27148
  dataHubUpgradeResult: DataHubUpgradeResultClass
@@ -27158,6 +27199,7 @@ class AspectBag(TypedDict, total=False):
27158
27199
  mlModelKey: MLModelKeyClass
27159
27200
  notebookKey: NotebookKeyClass
27160
27201
  roleKey: RoleKeyClass
27202
+ dataHubOpenAPISchemaKey: DataHubOpenAPISchemaKeyClass
27161
27203
  globalSettingsKey: GlobalSettingsKeyClass
27162
27204
  datasetKey: DatasetKeyClass
27163
27205
  chartKey: ChartKeyClass
@@ -27292,6 +27334,7 @@ KEY_ASPECTS: Dict[str, Type[_Aspect]] = {
27292
27334
  'mlModel': MLModelKeyClass,
27293
27335
  'notebook': NotebookKeyClass,
27294
27336
  'role': RoleKeyClass,
27337
+ 'dataHubOpenAPISchema': DataHubOpenAPISchemaKeyClass,
27295
27338
  'globalSettings': GlobalSettingsKeyClass,
27296
27339
  'dataset': DatasetKeyClass,
27297
27340
  'chart': ChartKeyClass,
@@ -27352,6 +27395,7 @@ ENTITY_TYPE_NAMES: List[str] = [
27352
27395
  'mlModel',
27353
27396
  'notebook',
27354
27397
  'role',
27398
+ 'dataHubOpenAPISchema',
27355
27399
  'globalSettings',
27356
27400
  'dataset',
27357
27401
  'chart',
@@ -27411,6 +27455,7 @@ EntityTypeName = Literal[
27411
27455
  'mlModel',
27412
27456
  'notebook',
27413
27457
  'role',
27458
+ 'dataHubOpenAPISchema',
27414
27459
  'globalSettings',
27415
27460
  'dataset',
27416
27461
  'chart',
@@ -594,6 +594,62 @@ class RoleUrn(_SpecificUrn):
594
594
  def id(self) -> str:
595
595
  return self._entity_ids[0]
596
596
 
597
+ if TYPE_CHECKING:
598
+ from datahub.metadata.schema_classes import DataHubOpenAPISchemaKeyClass
599
+
600
+ class DataHubOpenAPISchemaUrn(_SpecificUrn):
601
+ ENTITY_TYPE: ClassVar[Literal["dataHubOpenAPISchema"]] = "dataHubOpenAPISchema"
602
+ _URN_PARTS: ClassVar[int] = 1
603
+
604
+ def __init__(self, id: Union["DataHubOpenAPISchemaUrn", str], *, _allow_coercion: bool = True) -> None:
605
+ if _allow_coercion:
606
+ # Field coercion logic (if any is required).
607
+ if isinstance(id, str):
608
+ if id.startswith('urn:li:'):
609
+ try:
610
+ id = DataHubOpenAPISchemaUrn.from_string(id)
611
+ except InvalidUrnError:
612
+ raise InvalidUrnError(f'Expecting a DataHubOpenAPISchemaUrn but got {id}')
613
+ else:
614
+ id = UrnEncoder.encode_string(id)
615
+
616
+ # Validation logic.
617
+ if not id:
618
+ raise InvalidUrnError("DataHubOpenAPISchemaUrn id cannot be empty")
619
+ if isinstance(id, DataHubOpenAPISchemaUrn):
620
+ id = id.id
621
+ elif isinstance(id, Urn):
622
+ raise InvalidUrnError(f'Expecting a DataHubOpenAPISchemaUrn but got {id}')
623
+ if UrnEncoder.contains_reserved_char(id):
624
+ raise InvalidUrnError(f'DataHubOpenAPISchemaUrn id contains reserved characters')
625
+
626
+ super().__init__(self.ENTITY_TYPE, [id])
627
+
628
+ @classmethod
629
+ def _parse_ids(cls, entity_ids: List[str]) -> "DataHubOpenAPISchemaUrn":
630
+ if len(entity_ids) != cls._URN_PARTS:
631
+ raise InvalidUrnError(f"DataHubOpenAPISchemaUrn should have {cls._URN_PARTS} parts, got {len(entity_ids)}: {entity_ids}")
632
+ return cls(id=entity_ids[0], _allow_coercion=False)
633
+
634
+ @classmethod
635
+ def underlying_key_aspect_type(cls) -> Type["DataHubOpenAPISchemaKeyClass"]:
636
+ from datahub.metadata.schema_classes import DataHubOpenAPISchemaKeyClass
637
+
638
+ return DataHubOpenAPISchemaKeyClass
639
+
640
+ def to_key_aspect(self) -> "DataHubOpenAPISchemaKeyClass":
641
+ from datahub.metadata.schema_classes import DataHubOpenAPISchemaKeyClass
642
+
643
+ return DataHubOpenAPISchemaKeyClass(id=self.id)
644
+
645
+ @classmethod
646
+ def from_key_aspect(cls, key_aspect: "DataHubOpenAPISchemaKeyClass") -> "DataHubOpenAPISchemaUrn":
647
+ return cls(id=key_aspect.id)
648
+
649
+ @property
650
+ def id(self) -> str:
651
+ return self._entity_ids[0]
652
+
597
653
  if TYPE_CHECKING:
598
654
  from datahub.metadata.schema_classes import GlobalSettingsKeyClass
599
655
 
@@ -19,6 +19,7 @@ from ......schema_classes import DataHubAccessTokenKeyClass
19
19
  from ......schema_classes import DataHubActionKeyClass
20
20
  from ......schema_classes import DataHubConnectionKeyClass
21
21
  from ......schema_classes import DataHubIngestionSourceKeyClass
22
+ from ......schema_classes import DataHubOpenAPISchemaKeyClass
22
23
  from ......schema_classes import DataHubPersonaKeyClass
23
24
  from ......schema_classes import DataHubPolicyKeyClass
24
25
  from ......schema_classes import DataHubRetentionKeyClass
@@ -72,6 +73,7 @@ DataHubAccessTokenKey = DataHubAccessTokenKeyClass
72
73
  DataHubActionKey = DataHubActionKeyClass
73
74
  DataHubConnectionKey = DataHubConnectionKeyClass
74
75
  DataHubIngestionSourceKey = DataHubIngestionSourceKeyClass
76
+ DataHubOpenAPISchemaKey = DataHubOpenAPISchemaKeyClass
75
77
  DataHubPersonaKey = DataHubPersonaKeyClass
76
78
  DataHubPolicyKey = DataHubPolicyKeyClass
77
79
  DataHubRetentionKey = DataHubRetentionKeyClass