acryl-datahub 1.0.0.2rc4__py3-none-any.whl → 1.0.0.3rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.0.0.2rc4.dist-info → acryl_datahub-1.0.0.3rc1.dist-info}/METADATA +2433 -2433
- {acryl_datahub-1.0.0.2rc4.dist-info → acryl_datahub-1.0.0.3rc1.dist-info}/RECORD +30 -28
- datahub/_version.py +1 -1
- datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +2 -1
- datahub/ingestion/source/hex/api.py +1 -20
- datahub/ingestion/source/hex/query_fetcher.py +4 -1
- datahub/ingestion/source/sigma/config.py +75 -6
- datahub/ingestion/source/sigma/sigma.py +16 -1
- datahub/ingestion/source/sigma/sigma_api.py +99 -58
- datahub/ingestion/source/snowflake/snowflake_queries.py +18 -4
- datahub/ingestion/source/snowflake/snowflake_query.py +1 -1
- datahub/ingestion/source/sql/stored_procedures/base.py +12 -1
- datahub/metadata/_schema_classes.py +47 -2
- datahub/metadata/_urns/urn_defs.py +56 -0
- datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +2 -0
- datahub/metadata/schema.avsc +121 -85
- datahub/metadata/schemas/DataHubOpenAPISchemaKey.avsc +22 -0
- datahub/metadata/schemas/DataTransformLogic.avsc +4 -2
- datahub/metadata/schemas/FormInfo.avsc +5 -0
- datahub/metadata/schemas/MLModelDeploymentProperties.avsc +3 -0
- datahub/metadata/schemas/MetadataChangeEvent.avsc +6 -0
- datahub/metadata/schemas/MetadataChangeLog.avsc +3 -0
- datahub/metadata/schemas/MetadataChangeProposal.avsc +3 -0
- datahub/metadata/schemas/QueryProperties.avsc +4 -2
- datahub/metadata/schemas/SystemMetadata.avsc +86 -0
- datahub/sql_parsing/sqlglot_utils.py +16 -8
- {acryl_datahub-1.0.0.2rc4.dist-info → acryl_datahub-1.0.0.3rc1.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.0.0.2rc4.dist-info → acryl_datahub-1.0.0.3rc1.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-1.0.0.2rc4.dist-info → acryl_datahub-1.0.0.3rc1.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.0.0.2rc4.dist-info → acryl_datahub-1.0.0.3rc1.dist-info}/top_level.txt +0 -0
|
@@ -95,22 +95,22 @@ class SigmaAPI:
|
|
|
95
95
|
return get_response
|
|
96
96
|
|
|
97
97
|
def get_workspace(self, workspace_id: str) -> Optional[Workspace]:
|
|
98
|
+
if workspace_id in self.workspaces:
|
|
99
|
+
return self.workspaces[workspace_id]
|
|
100
|
+
|
|
98
101
|
logger.debug(f"Fetching workspace metadata with id '{workspace_id}'")
|
|
99
102
|
try:
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
workspace = Workspace.parse_obj(response.json())
|
|
112
|
-
self.workspaces[workspace.workspaceId] = workspace
|
|
113
|
-
return workspace
|
|
103
|
+
response = self._get_api_call(
|
|
104
|
+
f"{self.config.api_url}/workspaces/{workspace_id}"
|
|
105
|
+
)
|
|
106
|
+
if response.status_code == 403:
|
|
107
|
+
logger.debug(f"Workspace {workspace_id} not accessible.")
|
|
108
|
+
self.report.non_accessible_workspaces_count += 1
|
|
109
|
+
return None
|
|
110
|
+
response.raise_for_status()
|
|
111
|
+
workspace = Workspace.parse_obj(response.json())
|
|
112
|
+
self.workspaces[workspace.workspaceId] = workspace
|
|
113
|
+
return workspace
|
|
114
114
|
except Exception as e:
|
|
115
115
|
self._log_http_error(
|
|
116
116
|
message=f"Unable to fetch workspace '{workspace_id}'. Exception: {e}"
|
|
@@ -187,7 +187,9 @@ class SigmaAPI:
|
|
|
187
187
|
@functools.lru_cache
|
|
188
188
|
def _get_files_metadata(self, file_type: str) -> Dict[str, File]:
|
|
189
189
|
logger.debug(f"Fetching file metadata with type {file_type}.")
|
|
190
|
-
file_url = url =
|
|
190
|
+
file_url = url = (
|
|
191
|
+
f"{self.config.api_url}/files?permissionFilter=view&typeFilters={file_type}"
|
|
192
|
+
)
|
|
191
193
|
try:
|
|
192
194
|
files_metadata: Dict[str, File] = {}
|
|
193
195
|
while True:
|
|
@@ -225,31 +227,50 @@ class SigmaAPI:
|
|
|
225
227
|
for dataset_dict in response_dict[Constant.ENTRIES]:
|
|
226
228
|
dataset = SigmaDataset.parse_obj(dataset_dict)
|
|
227
229
|
|
|
228
|
-
if dataset.datasetId in dataset_files_metadata:
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
230
|
+
if dataset.datasetId not in dataset_files_metadata:
|
|
231
|
+
self.report.datasets.dropped(
|
|
232
|
+
f"{dataset.name} ({dataset.datasetId}) (missing file metadata)"
|
|
233
|
+
)
|
|
234
|
+
continue
|
|
235
|
+
|
|
236
|
+
dataset.workspaceId = dataset_files_metadata[
|
|
237
|
+
dataset.datasetId
|
|
238
|
+
].workspaceId
|
|
239
|
+
|
|
240
|
+
dataset.path = dataset_files_metadata[dataset.datasetId].path
|
|
241
|
+
dataset.badge = dataset_files_metadata[dataset.datasetId].badge
|
|
242
|
+
|
|
243
|
+
workspace = None
|
|
244
|
+
if dataset.workspaceId:
|
|
245
|
+
workspace = self.get_workspace(dataset.workspaceId)
|
|
246
|
+
|
|
247
|
+
if workspace:
|
|
248
|
+
if self.config.workspace_pattern.allowed(workspace.name):
|
|
249
|
+
self.report.datasets.processed(
|
|
250
|
+
f"{dataset.name} ({dataset.datasetId}) in {workspace.name}"
|
|
251
|
+
)
|
|
252
|
+
datasets.append(dataset)
|
|
253
|
+
else:
|
|
254
|
+
self.report.datasets.dropped(
|
|
255
|
+
f"{dataset.name} ({dataset.datasetId}) in {workspace.name}"
|
|
256
|
+
)
|
|
257
|
+
elif self.config.ingest_shared_entities:
|
|
258
|
+
# If no workspace for dataset we can consider it as shared entity
|
|
259
|
+
self.report.datasets_without_workspace += 1
|
|
260
|
+
self.report.datasets.processed(
|
|
261
|
+
f"{dataset.name} ({dataset.datasetId}) in workspace id {dataset.workspaceId or 'unknown'}"
|
|
262
|
+
)
|
|
263
|
+
datasets.append(dataset)
|
|
264
|
+
else:
|
|
265
|
+
self.report.datasets.dropped(
|
|
266
|
+
f"{dataset.name} ({dataset.datasetId}) in workspace id {dataset.workspaceId or 'unknown'}"
|
|
267
|
+
)
|
|
247
268
|
|
|
248
269
|
if response_dict[Constant.NEXTPAGE]:
|
|
249
270
|
url = f"{dataset_url}?page={response_dict[Constant.NEXTPAGE]}"
|
|
250
271
|
else:
|
|
251
272
|
break
|
|
252
|
-
|
|
273
|
+
|
|
253
274
|
return datasets
|
|
254
275
|
except Exception as e:
|
|
255
276
|
self._log_http_error(
|
|
@@ -381,34 +402,54 @@ class SigmaAPI:
|
|
|
381
402
|
for workbook_dict in response_dict[Constant.ENTRIES]:
|
|
382
403
|
workbook = Workbook.parse_obj(workbook_dict)
|
|
383
404
|
|
|
384
|
-
if workbook.workbookId in workbook_files_metadata:
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
workbook.workbookId
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
405
|
+
if workbook.workbookId not in workbook_files_metadata:
|
|
406
|
+
# Due to a bug in the Sigma API, it seems like the /files endpoint does not
|
|
407
|
+
# return file metadata when the user has access via admin permissions. In
|
|
408
|
+
# those cases, the user associated with the token needs to be manually added
|
|
409
|
+
# to the workspace.
|
|
410
|
+
self.report.workbooks.dropped(
|
|
411
|
+
f"{workbook.name} ({workbook.workbookId}) (missing file metadata; path: {workbook.path}; likely need to manually add user to workspace)"
|
|
412
|
+
)
|
|
413
|
+
continue
|
|
414
|
+
|
|
415
|
+
workbook.workspaceId = workbook_files_metadata[
|
|
416
|
+
workbook.workbookId
|
|
417
|
+
].workspaceId
|
|
418
|
+
|
|
419
|
+
workbook.badge = workbook_files_metadata[workbook.workbookId].badge
|
|
420
|
+
|
|
421
|
+
workspace = None
|
|
422
|
+
if workbook.workspaceId:
|
|
423
|
+
workspace = self.get_workspace(workbook.workspaceId)
|
|
424
|
+
|
|
425
|
+
if workspace:
|
|
426
|
+
if self.config.workspace_pattern.allowed(workspace.name):
|
|
427
|
+
self.report.workbooks.processed(
|
|
428
|
+
f"{workbook.name} ({workbook.workbookId}) in {workspace.name}"
|
|
429
|
+
)
|
|
430
|
+
workbook.pages = self.get_workbook_pages(workbook)
|
|
431
|
+
workbooks.append(workbook)
|
|
432
|
+
else:
|
|
433
|
+
self.report.workbooks.dropped(
|
|
434
|
+
f"{workbook.name} ({workbook.workbookId}) in {workspace.name}"
|
|
435
|
+
)
|
|
436
|
+
elif self.config.ingest_shared_entities:
|
|
437
|
+
# If no workspace for workbook we can consider it as shared entity
|
|
438
|
+
self.report.workbooks_without_workspace += 1
|
|
439
|
+
self.report.workbooks.processed(
|
|
440
|
+
f"{workbook.name} ({workbook.workbookId}) in workspace id {workbook.workspaceId or 'unknown'}"
|
|
441
|
+
)
|
|
442
|
+
workbook.pages = self.get_workbook_pages(workbook)
|
|
443
|
+
workbooks.append(workbook)
|
|
444
|
+
else:
|
|
445
|
+
self.report.workbooks.dropped(
|
|
446
|
+
f"{workbook.name} ({workbook.workbookId}) in workspace id {workbook.workspaceId or 'unknown'}"
|
|
447
|
+
)
|
|
406
448
|
|
|
407
449
|
if response_dict[Constant.NEXTPAGE]:
|
|
408
450
|
url = f"{workbook_url}?page={response_dict[Constant.NEXTPAGE]}"
|
|
409
451
|
else:
|
|
410
452
|
break
|
|
411
|
-
self.report.number_of_workbooks = len(workbooks)
|
|
412
453
|
return workbooks
|
|
413
454
|
except Exception as e:
|
|
414
455
|
self._log_http_error(
|
|
@@ -515,7 +515,10 @@ class SnowflakeQueriesExtractor(SnowflakeStructuredReportMixin, Closeable):
|
|
|
515
515
|
# job at eliminating redundant / repetitive queries. As such, we include the fast fingerprint
|
|
516
516
|
# here
|
|
517
517
|
query_id=get_query_fingerprint(
|
|
518
|
-
res["query_text"],
|
|
518
|
+
res["query_text"],
|
|
519
|
+
self.identifiers.platform,
|
|
520
|
+
fast=True,
|
|
521
|
+
secondary_id=res["query_secondary_fingerprint"],
|
|
519
522
|
),
|
|
520
523
|
query_text=res["query_text"],
|
|
521
524
|
upstreams=upstreams,
|
|
@@ -654,7 +657,17 @@ WITH
|
|
|
654
657
|
fingerprinted_queries as (
|
|
655
658
|
SELECT *,
|
|
656
659
|
-- TODO: Generate better fingerprints for each query by pushing down regex logic.
|
|
657
|
-
query_history.query_parameterized_hash as query_fingerprint
|
|
660
|
+
query_history.query_parameterized_hash as query_fingerprint,
|
|
661
|
+
-- Optional and additional hash to be used for query deduplication and final query identity
|
|
662
|
+
CASE
|
|
663
|
+
WHEN CONTAINS(query_history.query_text, '-- Hex query metadata:')
|
|
664
|
+
-- Extract project id and hash it
|
|
665
|
+
THEN CAST(HASH(
|
|
666
|
+
REGEXP_SUBSTR(query_history.query_text, '"project_id"\\\\s*:\\\\s*"([^"]+)"', 1, 1, 'e', 1),
|
|
667
|
+
REGEXP_SUBSTR(query_history.query_text, '"context"\\\\s*:\\\\s*"([^"]+)"', 1, 1, 'e', 1)
|
|
668
|
+
) AS VARCHAR)
|
|
669
|
+
ELSE NULL
|
|
670
|
+
END as query_secondary_fingerprint
|
|
658
671
|
FROM
|
|
659
672
|
snowflake.account_usage.query_history
|
|
660
673
|
WHERE
|
|
@@ -670,11 +683,11 @@ fingerprinted_queries as (
|
|
|
670
683
|
{time_bucket_size},
|
|
671
684
|
CONVERT_TIMEZONE('UTC', start_time)
|
|
672
685
|
) AS bucket_start_time,
|
|
673
|
-
COUNT(*) OVER (PARTITION BY bucket_start_time, query_fingerprint) AS query_count,
|
|
686
|
+
COUNT(*) OVER (PARTITION BY bucket_start_time, query_fingerprint, query_secondary_fingerprint) AS query_count,
|
|
674
687
|
FROM
|
|
675
688
|
fingerprinted_queries
|
|
676
689
|
QUALIFY
|
|
677
|
-
ROW_NUMBER() OVER (PARTITION BY bucket_start_time, query_fingerprint ORDER BY start_time DESC) = 1
|
|
690
|
+
ROW_NUMBER() OVER (PARTITION BY bucket_start_time, query_fingerprint, query_secondary_fingerprint ORDER BY start_time DESC) = 1
|
|
678
691
|
)
|
|
679
692
|
, raw_access_history AS (
|
|
680
693
|
SELECT
|
|
@@ -714,6 +727,7 @@ fingerprinted_queries as (
|
|
|
714
727
|
q.bucket_start_time,
|
|
715
728
|
q.query_id,
|
|
716
729
|
q.query_fingerprint,
|
|
730
|
+
q.query_secondary_fingerprint,
|
|
717
731
|
q.query_count,
|
|
718
732
|
q.session_id AS "SESSION_ID",
|
|
719
733
|
q.start_time AS "QUERY_START_TIME",
|
|
@@ -1000,4 +1000,4 @@ WHERE table_schema='{schema_name}' AND {extra_clause}"""
|
|
|
1000
1000
|
from_clause = (
|
|
1001
1001
|
f"""FROM '{stream_pagination_marker}'""" if stream_pagination_marker else ""
|
|
1002
1002
|
)
|
|
1003
|
-
return f"""SHOW STREAMS IN DATABASE {db_name} LIMIT {limit} {from_clause};"""
|
|
1003
|
+
return f"""SHOW STREAMS IN DATABASE "{db_name}" LIMIT {limit} {from_clause};"""
|
|
@@ -26,6 +26,7 @@ from datahub.metadata.schema_classes import (
|
|
|
26
26
|
DataPlatformInstanceClass,
|
|
27
27
|
DataTransformClass,
|
|
28
28
|
DataTransformLogicClass,
|
|
29
|
+
QueryLanguageClass,
|
|
29
30
|
QueryStatementClass,
|
|
30
31
|
SubTypesClass,
|
|
31
32
|
)
|
|
@@ -176,7 +177,17 @@ def _generate_job_workunits(
|
|
|
176
177
|
DataTransformClass(
|
|
177
178
|
queryStatement=QueryStatementClass(
|
|
178
179
|
value=procedure.procedure_definition,
|
|
179
|
-
language=
|
|
180
|
+
language=(
|
|
181
|
+
QueryLanguageClass.SQL
|
|
182
|
+
if procedure.language == "SQL"
|
|
183
|
+
# The language field uses a pretty limited enum.
|
|
184
|
+
# The "UNKNOWN" enum value is pretty new, so we don't want to
|
|
185
|
+
# emit it until it has broader server-side support. As a
|
|
186
|
+
# short-term solution, we map all languages to "SQL".
|
|
187
|
+
# TODO: Once we've released server 1.1.0, we should change
|
|
188
|
+
# this to be "UNKNOWN" for all languages except "SQL".
|
|
189
|
+
else QueryLanguageClass.SQL
|
|
190
|
+
),
|
|
180
191
|
),
|
|
181
192
|
)
|
|
182
193
|
]
|
|
@@ -15442,6 +15442,35 @@ class DataHubIngestionSourceKeyClass(_Aspect):
|
|
|
15442
15442
|
self._inner_dict['id'] = value
|
|
15443
15443
|
|
|
15444
15444
|
|
|
15445
|
+
class DataHubOpenAPISchemaKeyClass(_Aspect):
|
|
15446
|
+
"""Key for a Query"""
|
|
15447
|
+
|
|
15448
|
+
|
|
15449
|
+
ASPECT_NAME = 'dataHubOpenAPISchemaKey'
|
|
15450
|
+
ASPECT_INFO = {'keyForEntity': 'dataHubOpenAPISchema', 'entityCategory': 'internal', 'entityAspects': ['systemMetadata'], 'entityDoc': 'Contains aspects which are used in OpenAPI requests/responses which are not otherwise present in the data model.'}
|
|
15451
|
+
RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.metadata.key.DataHubOpenAPISchemaKey")
|
|
15452
|
+
|
|
15453
|
+
def __init__(self,
|
|
15454
|
+
id: str,
|
|
15455
|
+
):
|
|
15456
|
+
super().__init__()
|
|
15457
|
+
|
|
15458
|
+
self.id = id
|
|
15459
|
+
|
|
15460
|
+
def _restore_defaults(self) -> None:
|
|
15461
|
+
self.id = str()
|
|
15462
|
+
|
|
15463
|
+
|
|
15464
|
+
@property
|
|
15465
|
+
def id(self) -> str:
|
|
15466
|
+
"""A unique id for the DataHub OpenAPI schema."""
|
|
15467
|
+
return self._inner_dict.get('id') # type: ignore
|
|
15468
|
+
|
|
15469
|
+
@id.setter
|
|
15470
|
+
def id(self, value: str) -> None:
|
|
15471
|
+
self._inner_dict['id'] = value
|
|
15472
|
+
|
|
15473
|
+
|
|
15445
15474
|
class DataHubPersonaKeyClass(_Aspect):
|
|
15446
15475
|
"""Key for a persona type"""
|
|
15447
15476
|
|
|
@@ -20128,10 +20157,14 @@ class PlatformEventHeaderClass(DictWrapper):
|
|
|
20128
20157
|
self._inner_dict['timestampMillis'] = value
|
|
20129
20158
|
|
|
20130
20159
|
|
|
20131
|
-
class SystemMetadataClass(
|
|
20160
|
+
class SystemMetadataClass(_Aspect):
|
|
20132
20161
|
"""Metadata associated with each metadata change that is processed by the system"""
|
|
20133
|
-
|
|
20162
|
+
|
|
20163
|
+
|
|
20164
|
+
ASPECT_NAME = 'systemMetadata'
|
|
20165
|
+
ASPECT_INFO = {}
|
|
20134
20166
|
RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.mxe.SystemMetadata")
|
|
20167
|
+
|
|
20135
20168
|
def __init__(self,
|
|
20136
20169
|
lastObserved: Optional[Union[int, None]]=None,
|
|
20137
20170
|
runId: Optional[Union[str, None]]=None,
|
|
@@ -21738,6 +21771,9 @@ class QueryLanguageClass(object):
|
|
|
21738
21771
|
SQL = "SQL"
|
|
21739
21772
|
"""A SQL Query"""
|
|
21740
21773
|
|
|
21774
|
+
UNKNOWN = "UNKNOWN"
|
|
21775
|
+
"""Unknown query language"""
|
|
21776
|
+
|
|
21741
21777
|
|
|
21742
21778
|
|
|
21743
21779
|
class QueryPropertiesClass(_Aspect):
|
|
@@ -26135,6 +26171,7 @@ __SCHEMA_TYPES = {
|
|
|
26135
26171
|
'com.linkedin.pegasus2avro.metadata.key.DataHubActionKey': DataHubActionKeyClass,
|
|
26136
26172
|
'com.linkedin.pegasus2avro.metadata.key.DataHubConnectionKey': DataHubConnectionKeyClass,
|
|
26137
26173
|
'com.linkedin.pegasus2avro.metadata.key.DataHubIngestionSourceKey': DataHubIngestionSourceKeyClass,
|
|
26174
|
+
'com.linkedin.pegasus2avro.metadata.key.DataHubOpenAPISchemaKey': DataHubOpenAPISchemaKeyClass,
|
|
26138
26175
|
'com.linkedin.pegasus2avro.metadata.key.DataHubPersonaKey': DataHubPersonaKeyClass,
|
|
26139
26176
|
'com.linkedin.pegasus2avro.metadata.key.DataHubPolicyKey': DataHubPolicyKeyClass,
|
|
26140
26177
|
'com.linkedin.pegasus2avro.metadata.key.DataHubRetentionKey': DataHubRetentionKeyClass,
|
|
@@ -26620,6 +26657,7 @@ __SCHEMA_TYPES = {
|
|
|
26620
26657
|
'DataHubActionKey': DataHubActionKeyClass,
|
|
26621
26658
|
'DataHubConnectionKey': DataHubConnectionKeyClass,
|
|
26622
26659
|
'DataHubIngestionSourceKey': DataHubIngestionSourceKeyClass,
|
|
26660
|
+
'DataHubOpenAPISchemaKey': DataHubOpenAPISchemaKeyClass,
|
|
26623
26661
|
'DataHubPersonaKey': DataHubPersonaKeyClass,
|
|
26624
26662
|
'DataHubPolicyKey': DataHubPolicyKeyClass,
|
|
26625
26663
|
'DataHubRetentionKey': DataHubRetentionKeyClass,
|
|
@@ -26879,6 +26917,7 @@ ASPECT_CLASSES: List[Type[_Aspect]] = [
|
|
|
26879
26917
|
ContainerClass,
|
|
26880
26918
|
ContainerPropertiesClass,
|
|
26881
26919
|
EditableContainerPropertiesClass,
|
|
26920
|
+
SystemMetadataClass,
|
|
26882
26921
|
DataHubSecretValueClass,
|
|
26883
26922
|
DataHubUpgradeRequestClass,
|
|
26884
26923
|
DataHubUpgradeResultClass,
|
|
@@ -26935,6 +26974,7 @@ ASPECT_CLASSES: List[Type[_Aspect]] = [
|
|
|
26935
26974
|
MLModelKeyClass,
|
|
26936
26975
|
NotebookKeyClass,
|
|
26937
26976
|
RoleKeyClass,
|
|
26977
|
+
DataHubOpenAPISchemaKeyClass,
|
|
26938
26978
|
GlobalSettingsKeyClass,
|
|
26939
26979
|
DatasetKeyClass,
|
|
26940
26980
|
ChartKeyClass,
|
|
@@ -27102,6 +27142,7 @@ class AspectBag(TypedDict, total=False):
|
|
|
27102
27142
|
container: ContainerClass
|
|
27103
27143
|
containerProperties: ContainerPropertiesClass
|
|
27104
27144
|
editableContainerProperties: EditableContainerPropertiesClass
|
|
27145
|
+
systemMetadata: SystemMetadataClass
|
|
27105
27146
|
dataHubSecretValue: DataHubSecretValueClass
|
|
27106
27147
|
dataHubUpgradeRequest: DataHubUpgradeRequestClass
|
|
27107
27148
|
dataHubUpgradeResult: DataHubUpgradeResultClass
|
|
@@ -27158,6 +27199,7 @@ class AspectBag(TypedDict, total=False):
|
|
|
27158
27199
|
mlModelKey: MLModelKeyClass
|
|
27159
27200
|
notebookKey: NotebookKeyClass
|
|
27160
27201
|
roleKey: RoleKeyClass
|
|
27202
|
+
dataHubOpenAPISchemaKey: DataHubOpenAPISchemaKeyClass
|
|
27161
27203
|
globalSettingsKey: GlobalSettingsKeyClass
|
|
27162
27204
|
datasetKey: DatasetKeyClass
|
|
27163
27205
|
chartKey: ChartKeyClass
|
|
@@ -27292,6 +27334,7 @@ KEY_ASPECTS: Dict[str, Type[_Aspect]] = {
|
|
|
27292
27334
|
'mlModel': MLModelKeyClass,
|
|
27293
27335
|
'notebook': NotebookKeyClass,
|
|
27294
27336
|
'role': RoleKeyClass,
|
|
27337
|
+
'dataHubOpenAPISchema': DataHubOpenAPISchemaKeyClass,
|
|
27295
27338
|
'globalSettings': GlobalSettingsKeyClass,
|
|
27296
27339
|
'dataset': DatasetKeyClass,
|
|
27297
27340
|
'chart': ChartKeyClass,
|
|
@@ -27352,6 +27395,7 @@ ENTITY_TYPE_NAMES: List[str] = [
|
|
|
27352
27395
|
'mlModel',
|
|
27353
27396
|
'notebook',
|
|
27354
27397
|
'role',
|
|
27398
|
+
'dataHubOpenAPISchema',
|
|
27355
27399
|
'globalSettings',
|
|
27356
27400
|
'dataset',
|
|
27357
27401
|
'chart',
|
|
@@ -27411,6 +27455,7 @@ EntityTypeName = Literal[
|
|
|
27411
27455
|
'mlModel',
|
|
27412
27456
|
'notebook',
|
|
27413
27457
|
'role',
|
|
27458
|
+
'dataHubOpenAPISchema',
|
|
27414
27459
|
'globalSettings',
|
|
27415
27460
|
'dataset',
|
|
27416
27461
|
'chart',
|
|
@@ -594,6 +594,62 @@ class RoleUrn(_SpecificUrn):
|
|
|
594
594
|
def id(self) -> str:
|
|
595
595
|
return self._entity_ids[0]
|
|
596
596
|
|
|
597
|
+
if TYPE_CHECKING:
|
|
598
|
+
from datahub.metadata.schema_classes import DataHubOpenAPISchemaKeyClass
|
|
599
|
+
|
|
600
|
+
class DataHubOpenAPISchemaUrn(_SpecificUrn):
|
|
601
|
+
ENTITY_TYPE: ClassVar[Literal["dataHubOpenAPISchema"]] = "dataHubOpenAPISchema"
|
|
602
|
+
_URN_PARTS: ClassVar[int] = 1
|
|
603
|
+
|
|
604
|
+
def __init__(self, id: Union["DataHubOpenAPISchemaUrn", str], *, _allow_coercion: bool = True) -> None:
|
|
605
|
+
if _allow_coercion:
|
|
606
|
+
# Field coercion logic (if any is required).
|
|
607
|
+
if isinstance(id, str):
|
|
608
|
+
if id.startswith('urn:li:'):
|
|
609
|
+
try:
|
|
610
|
+
id = DataHubOpenAPISchemaUrn.from_string(id)
|
|
611
|
+
except InvalidUrnError:
|
|
612
|
+
raise InvalidUrnError(f'Expecting a DataHubOpenAPISchemaUrn but got {id}')
|
|
613
|
+
else:
|
|
614
|
+
id = UrnEncoder.encode_string(id)
|
|
615
|
+
|
|
616
|
+
# Validation logic.
|
|
617
|
+
if not id:
|
|
618
|
+
raise InvalidUrnError("DataHubOpenAPISchemaUrn id cannot be empty")
|
|
619
|
+
if isinstance(id, DataHubOpenAPISchemaUrn):
|
|
620
|
+
id = id.id
|
|
621
|
+
elif isinstance(id, Urn):
|
|
622
|
+
raise InvalidUrnError(f'Expecting a DataHubOpenAPISchemaUrn but got {id}')
|
|
623
|
+
if UrnEncoder.contains_reserved_char(id):
|
|
624
|
+
raise InvalidUrnError(f'DataHubOpenAPISchemaUrn id contains reserved characters')
|
|
625
|
+
|
|
626
|
+
super().__init__(self.ENTITY_TYPE, [id])
|
|
627
|
+
|
|
628
|
+
@classmethod
|
|
629
|
+
def _parse_ids(cls, entity_ids: List[str]) -> "DataHubOpenAPISchemaUrn":
|
|
630
|
+
if len(entity_ids) != cls._URN_PARTS:
|
|
631
|
+
raise InvalidUrnError(f"DataHubOpenAPISchemaUrn should have {cls._URN_PARTS} parts, got {len(entity_ids)}: {entity_ids}")
|
|
632
|
+
return cls(id=entity_ids[0], _allow_coercion=False)
|
|
633
|
+
|
|
634
|
+
@classmethod
|
|
635
|
+
def underlying_key_aspect_type(cls) -> Type["DataHubOpenAPISchemaKeyClass"]:
|
|
636
|
+
from datahub.metadata.schema_classes import DataHubOpenAPISchemaKeyClass
|
|
637
|
+
|
|
638
|
+
return DataHubOpenAPISchemaKeyClass
|
|
639
|
+
|
|
640
|
+
def to_key_aspect(self) -> "DataHubOpenAPISchemaKeyClass":
|
|
641
|
+
from datahub.metadata.schema_classes import DataHubOpenAPISchemaKeyClass
|
|
642
|
+
|
|
643
|
+
return DataHubOpenAPISchemaKeyClass(id=self.id)
|
|
644
|
+
|
|
645
|
+
@classmethod
|
|
646
|
+
def from_key_aspect(cls, key_aspect: "DataHubOpenAPISchemaKeyClass") -> "DataHubOpenAPISchemaUrn":
|
|
647
|
+
return cls(id=key_aspect.id)
|
|
648
|
+
|
|
649
|
+
@property
|
|
650
|
+
def id(self) -> str:
|
|
651
|
+
return self._entity_ids[0]
|
|
652
|
+
|
|
597
653
|
if TYPE_CHECKING:
|
|
598
654
|
from datahub.metadata.schema_classes import GlobalSettingsKeyClass
|
|
599
655
|
|
|
@@ -19,6 +19,7 @@ from ......schema_classes import DataHubAccessTokenKeyClass
|
|
|
19
19
|
from ......schema_classes import DataHubActionKeyClass
|
|
20
20
|
from ......schema_classes import DataHubConnectionKeyClass
|
|
21
21
|
from ......schema_classes import DataHubIngestionSourceKeyClass
|
|
22
|
+
from ......schema_classes import DataHubOpenAPISchemaKeyClass
|
|
22
23
|
from ......schema_classes import DataHubPersonaKeyClass
|
|
23
24
|
from ......schema_classes import DataHubPolicyKeyClass
|
|
24
25
|
from ......schema_classes import DataHubRetentionKeyClass
|
|
@@ -72,6 +73,7 @@ DataHubAccessTokenKey = DataHubAccessTokenKeyClass
|
|
|
72
73
|
DataHubActionKey = DataHubActionKeyClass
|
|
73
74
|
DataHubConnectionKey = DataHubConnectionKeyClass
|
|
74
75
|
DataHubIngestionSourceKey = DataHubIngestionSourceKeyClass
|
|
76
|
+
DataHubOpenAPISchemaKey = DataHubOpenAPISchemaKeyClass
|
|
75
77
|
DataHubPersonaKey = DataHubPersonaKeyClass
|
|
76
78
|
DataHubPolicyKey = DataHubPolicyKeyClass
|
|
77
79
|
DataHubRetentionKey = DataHubRetentionKeyClass
|