acryl-datahub-cloud 0.3.10.3rc2__py3-none-any.whl → 0.3.11rc0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub-cloud might be problematic. Click here for more details.
- acryl_datahub_cloud/_codegen_config.json +1 -1
- acryl_datahub_cloud/_sdk_extras/__init__.py +4 -0
- acryl_datahub_cloud/_sdk_extras/assertion.py +15 -0
- acryl_datahub_cloud/_sdk_extras/assertions_client.py +23 -0
- acryl_datahub_cloud/acryl_cs_issues/acryl_customer.py +1 -1
- acryl_datahub_cloud/action_request/action_request_owner_source.py +1 -2
- acryl_datahub_cloud/datahub_reporting/datahub_dataset.py +3 -7
- acryl_datahub_cloud/datahub_reporting/datahub_form_reporting.py +9 -5
- acryl_datahub_cloud/datahub_usage_reporting/usage_feature_reporter.py +14 -32
- acryl_datahub_cloud/metadata/_urns/urn_defs.py +56 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/assertion/__init__.py +2 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +2 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/monitor/__init__.py +6 -0
- acryl_datahub_cloud/metadata/schema.avsc +138 -29
- acryl_datahub_cloud/metadata/schema_classes.py +214 -29
- acryl_datahub_cloud/metadata/schemas/AssertionAnalyticsRunEvent.avsc +25 -0
- acryl_datahub_cloud/metadata/schemas/AssertionRunEvent.avsc +25 -0
- acryl_datahub_cloud/metadata/schemas/DataContractKey.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/DataHubOpenAPISchemaKey.avsc +22 -0
- acryl_datahub_cloud/metadata/schemas/DataTransformLogic.avsc +4 -2
- acryl_datahub_cloud/metadata/schemas/MLModelDeploymentProperties.avsc +3 -0
- acryl_datahub_cloud/metadata/schemas/MetadataChangeEvent.avsc +6 -0
- acryl_datahub_cloud/metadata/schemas/MetadataChangeLog.avsc +3 -0
- acryl_datahub_cloud/metadata/schemas/MetadataChangeProposal.avsc +3 -0
- acryl_datahub_cloud/metadata/schemas/MonitorAnomalyEvent.avsc +36 -26
- acryl_datahub_cloud/metadata/schemas/MonitorInfo.avsc +58 -0
- acryl_datahub_cloud/metadata/schemas/QueryProperties.avsc +4 -2
- acryl_datahub_cloud/metadata/schemas/SystemMetadata.avsc +86 -0
- {acryl_datahub_cloud-0.3.10.3rc2.dist-info → acryl_datahub_cloud-0.3.11rc0.dist-info}/METADATA +41 -41
- {acryl_datahub_cloud-0.3.10.3rc2.dist-info → acryl_datahub_cloud-0.3.11rc0.dist-info}/RECORD +33 -28
- {acryl_datahub_cloud-0.3.10.3rc2.dist-info → acryl_datahub_cloud-0.3.11rc0.dist-info}/WHEEL +1 -1
- {acryl_datahub_cloud-0.3.10.3rc2.dist-info → acryl_datahub_cloud-0.3.11rc0.dist-info}/entry_points.txt +0 -0
- {acryl_datahub_cloud-0.3.10.3rc2.dist-info → acryl_datahub_cloud-0.3.11rc0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
"""
|
|
2
|
+
This file contains the Assertion class, which is used to represent an assertion in DataHub.
|
|
3
|
+
|
|
4
|
+
The Assertion class is currently not implemented, this is a placeholder for future implementation.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from typing import Union
|
|
8
|
+
|
|
9
|
+
from datahub.metadata.urns import AssertionUrn
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class Assertion:
|
|
13
|
+
def __init__(self, urn: Union[str, AssertionUrn]):
|
|
14
|
+
print(f"The Assertion class is currently not implemented. Urn provided: {urn}")
|
|
15
|
+
self.urn = AssertionUrn(urn)
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING, Union
|
|
4
|
+
|
|
5
|
+
from acryl_datahub_cloud._sdk_extras.assertion import Assertion
|
|
6
|
+
from datahub.metadata.urns import AssertionUrn
|
|
7
|
+
|
|
8
|
+
if TYPE_CHECKING:
|
|
9
|
+
from datahub.sdk.main_client import DataHubClient
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class AssertionsClient:
|
|
13
|
+
def __init__(self, client: DataHubClient):
|
|
14
|
+
self.client = client
|
|
15
|
+
|
|
16
|
+
def get_assertions(
|
|
17
|
+
self, urn: Union[str, list[str], AssertionUrn, list[AssertionUrn]]
|
|
18
|
+
) -> list[Assertion]:
|
|
19
|
+
print(
|
|
20
|
+
"get_assertions is not implemented, this is a placeholder. Returning empty list."
|
|
21
|
+
)
|
|
22
|
+
print(f"urn provided: {urn}")
|
|
23
|
+
return []
|
|
@@ -219,6 +219,7 @@ class AcrylCustomer:
|
|
|
219
219
|
self._emitted_containers: Dict[str, bool] = {}
|
|
220
220
|
|
|
221
221
|
def _get_owner_from_assignee(self, assignee: ExternalUser) -> str:
|
|
222
|
+
assert assignee.email, "Assignee must have an email"
|
|
222
223
|
owner_urn_options = [
|
|
223
224
|
urn
|
|
224
225
|
for urn in self.graph.get_urns_by_filter(
|
|
@@ -233,7 +234,6 @@ class AcrylCustomer:
|
|
|
233
234
|
)
|
|
234
235
|
]
|
|
235
236
|
if not owner_urn_options:
|
|
236
|
-
assert assignee.email, "Assignee must have an email"
|
|
237
237
|
owner_urn = "urn:li:corpuser:" + assignee.email
|
|
238
238
|
self.graph.emit(
|
|
239
239
|
MetadataChangeProposalWrapper(
|
|
@@ -2,6 +2,7 @@ import logging
|
|
|
2
2
|
from typing import Dict, Iterable, List, Optional
|
|
3
3
|
|
|
4
4
|
from datahub.configuration import ConfigModel
|
|
5
|
+
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
5
6
|
from datahub.ingestion.api.common import PipelineContext
|
|
6
7
|
from datahub.ingestion.api.decorators import (
|
|
7
8
|
SupportStatus,
|
|
@@ -14,10 +15,8 @@ from datahub.ingestion.api.workunit import MetadataWorkUnit
|
|
|
14
15
|
from datahub.metadata.schema_classes import (
|
|
15
16
|
ActionRequestInfoClass,
|
|
16
17
|
)
|
|
17
|
-
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
18
18
|
from datahub.utilities.urns.urn import guess_entity_type
|
|
19
19
|
|
|
20
|
-
|
|
21
20
|
logger = logging.getLogger(__name__)
|
|
22
21
|
|
|
23
22
|
|
|
@@ -163,7 +163,7 @@ class DataHubBasedS3Dataset:
|
|
|
163
163
|
self.schema = row.arrow_schema()
|
|
164
164
|
else:
|
|
165
165
|
# hail mary: infer schema from the first row and cast everything to string
|
|
166
|
-
self.schema = pa.schema([(key, pa.string()) for key in row
|
|
166
|
+
self.schema = pa.schema([(key, pa.string()) for key in row])
|
|
167
167
|
self.stringify_row = True
|
|
168
168
|
|
|
169
169
|
self._initialize_local_file()
|
|
@@ -172,7 +172,7 @@ class DataHubBasedS3Dataset:
|
|
|
172
172
|
self.schema,
|
|
173
173
|
compression=self.config.file_compression,
|
|
174
174
|
)
|
|
175
|
-
if isinstance(row, BaseModel
|
|
175
|
+
if isinstance(row, (BaseModel, BaseModelRow)):
|
|
176
176
|
# for anything extending BaseModel, we want to use the dict representation
|
|
177
177
|
write_row: Dict[str, Any] = row.dict()
|
|
178
178
|
elif isinstance(row, dict):
|
|
@@ -274,11 +274,7 @@ class DataHubBasedS3Dataset:
|
|
|
274
274
|
self, duckdb_columns: List[Tuple[str, str]]
|
|
275
275
|
) -> SchemaMetadataClass:
|
|
276
276
|
def get_type_from_dtype(dtype: str) -> SchemaFieldDataTypeClass:
|
|
277
|
-
if "int" in dtype:
|
|
278
|
-
return SchemaFieldDataTypeClass(type=NumberTypeClass())
|
|
279
|
-
elif "float" in dtype:
|
|
280
|
-
return SchemaFieldDataTypeClass(type=NumberTypeClass())
|
|
281
|
-
elif "number" in dtype:
|
|
277
|
+
if "int" in dtype or "float" in dtype or "number" in dtype:
|
|
282
278
|
return SchemaFieldDataTypeClass(type=NumberTypeClass())
|
|
283
279
|
elif "bool" in dtype:
|
|
284
280
|
return SchemaFieldDataTypeClass(type=BooleanTypeClass())
|
|
@@ -5,6 +5,9 @@ from enum import Enum
|
|
|
5
5
|
from typing import Any, Callable, Dict, Iterable, List, Optional
|
|
6
6
|
|
|
7
7
|
import pandas as pd
|
|
8
|
+
from pydantic import BaseModel
|
|
9
|
+
|
|
10
|
+
from acryl_datahub_cloud.elasticsearch.graph_service import BaseModelRow
|
|
8
11
|
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
9
12
|
from datahub.ingestion.graph.client import DataHubGraph
|
|
10
13
|
from datahub.ingestion.graph.filters import RawSearchFilterRule
|
|
@@ -16,9 +19,6 @@ from datahub.metadata.schema_classes import (
|
|
|
16
19
|
FormStateClass,
|
|
17
20
|
FormTypeClass,
|
|
18
21
|
)
|
|
19
|
-
from pydantic import BaseModel
|
|
20
|
-
|
|
21
|
-
from acryl_datahub_cloud.elasticsearch.graph_service import BaseModelRow
|
|
22
22
|
|
|
23
23
|
logger = logging.getLogger(__name__)
|
|
24
24
|
|
|
@@ -257,6 +257,7 @@ class DataHubFormReportingData(FormData):
|
|
|
257
257
|
for prompt_id, response_time in zip(
|
|
258
258
|
search_row.completedFormsCompletedPromptIds,
|
|
259
259
|
search_row.completedFormsCompletedPromptResponseTimes,
|
|
260
|
+
strict=False,
|
|
260
261
|
)
|
|
261
262
|
if prompt_id in form_prompts
|
|
262
263
|
}
|
|
@@ -289,9 +290,10 @@ class DataHubFormReportingData(FormData):
|
|
|
289
290
|
on_asset_scanned: Optional[Callable[[str], Any]] = None,
|
|
290
291
|
on_form_scanned: Optional[Callable[[str], Any]] = None,
|
|
291
292
|
) -> Iterable[FormReportingRow]:
|
|
292
|
-
extra_fields = [f for f in self.DataHubDatasetSearchRow.__fields__
|
|
293
|
+
extra_fields = [f for f in self.DataHubDatasetSearchRow.__fields__]
|
|
294
|
+
# TODO: Replace with the new search/filter SDK.
|
|
293
295
|
result = self.graph.get_results_by_filter(
|
|
294
|
-
extra_or_filters=self.get_form_existence_or_filters(),
|
|
296
|
+
extra_or_filters=[{"and": self.get_form_existence_or_filters()}],
|
|
295
297
|
extra_source_fields=extra_fields,
|
|
296
298
|
skip_cache=True,
|
|
297
299
|
)
|
|
@@ -388,6 +390,7 @@ class DataHubFormReportingData(FormData):
|
|
|
388
390
|
for (p, p_response_time) in zip(
|
|
389
391
|
search_row.incompleteFormsCompletedPromptIds,
|
|
390
392
|
search_row.incompleteFormsCompletedPromptResponseTimes,
|
|
393
|
+
strict=False,
|
|
391
394
|
)
|
|
392
395
|
if p in form_prompts
|
|
393
396
|
]:
|
|
@@ -485,6 +488,7 @@ class DataHubFormReportingData(FormData):
|
|
|
485
488
|
for (p, p_response_time) in zip(
|
|
486
489
|
search_row.completedFormsCompletedPromptIds,
|
|
487
490
|
search_row.completedFormsCompletedPromptResponseTimes,
|
|
491
|
+
strict=False,
|
|
488
492
|
)
|
|
489
493
|
if p in form_prompts
|
|
490
494
|
]:
|
|
@@ -395,18 +395,10 @@ class DataHubUsageFeatureReportingSource(StatefulIngestionSourceBase):
|
|
|
395
395
|
"last_modified_at": (
|
|
396
396
|
doc["_source"]["lastModifiedAt"]
|
|
397
397
|
if "lastModifiedAt" in doc["_source"]
|
|
398
|
-
else (
|
|
399
|
-
doc["_source"]["lastModifiedAt"]
|
|
400
|
-
if "lastModifiedAt" in doc["_source"]
|
|
401
|
-
else None
|
|
402
|
-
)
|
|
398
|
+
else (doc["_source"].get("lastModifiedAt", None))
|
|
403
399
|
),
|
|
404
400
|
"platform": doc["_source"]["platform"],
|
|
405
|
-
"removed": (
|
|
406
|
-
doc["_source"]["removed"]
|
|
407
|
-
if "removed" in doc["_source"]
|
|
408
|
-
else False
|
|
409
|
-
),
|
|
401
|
+
"removed": (doc["_source"].get("removed", False)),
|
|
410
402
|
}
|
|
411
403
|
|
|
412
404
|
time_taken = timer.elapsed_seconds()
|
|
@@ -509,11 +501,7 @@ class DataHubUsageFeatureReportingSource(StatefulIngestionSourceBase):
|
|
|
509
501
|
"eventGranularity": doc["_source"].get("eventGranularity"),
|
|
510
502
|
"totalSqlQueries": doc["_source"].get("totalSqlQueries", 0),
|
|
511
503
|
"uniqueUserCount": doc["_source"].get("uniqueUserCount", 0),
|
|
512
|
-
"userCounts": (
|
|
513
|
-
doc["_source"]["event"]["userCounts"]
|
|
514
|
-
if "userCounts" in doc["_source"]["event"]
|
|
515
|
-
else None
|
|
516
|
-
),
|
|
504
|
+
"userCounts": (doc["_source"]["event"].get("userCounts", None)),
|
|
517
505
|
"platform": platform,
|
|
518
506
|
}
|
|
519
507
|
except KeyError as e:
|
|
@@ -525,7 +513,7 @@ class DataHubUsageFeatureReportingSource(StatefulIngestionSourceBase):
|
|
|
525
513
|
time_taken = timer.elapsed_seconds()
|
|
526
514
|
logger.info(f"DatasetUsage processing took {time_taken:.3f} seconds")
|
|
527
515
|
|
|
528
|
-
def search_score(
|
|
516
|
+
def search_score( # noqa: C901
|
|
529
517
|
self, urn: str, last_update_time: int, usage_percentile: int
|
|
530
518
|
) -> SearchRankingMultipliers:
|
|
531
519
|
usage_search_score_multiplier = 1.0
|
|
@@ -622,10 +610,10 @@ class DataHubUsageFeatureReportingSource(StatefulIngestionSourceBase):
|
|
|
622
610
|
[endpoint],
|
|
623
611
|
http_auth=(user, password),
|
|
624
612
|
use_ssl=(
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
613
|
+
bool(
|
|
614
|
+
self.config.search_index
|
|
615
|
+
and self.config.search_index.use_ssl
|
|
616
|
+
)
|
|
629
617
|
),
|
|
630
618
|
)
|
|
631
619
|
|
|
@@ -639,10 +627,10 @@ class DataHubUsageFeatureReportingSource(StatefulIngestionSourceBase):
|
|
|
639
627
|
[endpoint],
|
|
640
628
|
http_auth=(user, password),
|
|
641
629
|
use_ssl=(
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
630
|
+
bool(
|
|
631
|
+
self.config.search_index
|
|
632
|
+
and self.config.search_index.use_ssl
|
|
633
|
+
)
|
|
646
634
|
),
|
|
647
635
|
)
|
|
648
636
|
|
|
@@ -737,7 +725,7 @@ class DataHubUsageFeatureReportingSource(StatefulIngestionSourceBase):
|
|
|
737
725
|
polars.Duration(): pa.duration("ns"),
|
|
738
726
|
}
|
|
739
727
|
|
|
740
|
-
if polars_dtype in [type(key) for key in type_mapping
|
|
728
|
+
if polars_dtype in [type(key) for key in type_mapping]:
|
|
741
729
|
return type_mapping[polars_dtype]
|
|
742
730
|
elif polars_dtype == polars.Categorical:
|
|
743
731
|
return pa.dictionary(index_type=pa.int32(), value_type=pa.string())
|
|
@@ -1006,12 +994,9 @@ class DataHubUsageFeatureReportingSource(StatefulIngestionSourceBase):
|
|
|
1006
994
|
def generate_mcp_from_lazyframe(
|
|
1007
995
|
self, lazy_frame: polars.LazyFrame
|
|
1008
996
|
) -> Iterable[MetadataWorkUnit]:
|
|
1009
|
-
num = 0
|
|
1010
997
|
for row in lazy_frame.collect(
|
|
1011
998
|
streaming=self.config.experimental_full_streaming
|
|
1012
999
|
).to_struct():
|
|
1013
|
-
num += 1
|
|
1014
|
-
|
|
1015
1000
|
if "siblings" in row and row["siblings"]:
|
|
1016
1001
|
logger.info(f"Siblings found for urn: {row['urn']} -> row['siblings']")
|
|
1017
1002
|
|
|
@@ -1101,10 +1086,7 @@ class DataHubUsageFeatureReportingSource(StatefulIngestionSourceBase):
|
|
|
1101
1086
|
def generate_query_usage_mcp_from_lazyframe(
|
|
1102
1087
|
self, lazy_frame: polars.LazyFrame
|
|
1103
1088
|
) -> Iterable[MetadataWorkUnit]:
|
|
1104
|
-
num = 0
|
|
1105
1089
|
for row in lazy_frame.collect().iter_rows(named=True):
|
|
1106
|
-
num += 1
|
|
1107
|
-
|
|
1108
1090
|
query_usage_features = QueryUsageFeaturesClass(
|
|
1109
1091
|
queryCountLast30Days=int(row.get("totalSqlQueries", 0) or 0),
|
|
1110
1092
|
queryCountTotal=None, # This is not implemented
|
|
@@ -1287,7 +1269,7 @@ class DataHubUsageFeatureReportingSource(StatefulIngestionSourceBase):
|
|
|
1287
1269
|
.is_not_null()
|
|
1288
1270
|
# We only want to downrank datasets that have a search score multiplier greater than 1. 1 is the minimum score of a dataset
|
|
1289
1271
|
.and_(polars.col("combinedSearchRankingMultiplier").ne(1))
|
|
1290
|
-
)
|
|
1272
|
+
)
|
|
1291
1273
|
.filter(polars.col("removed") == False) # noqa: E712
|
|
1292
1274
|
.drop(["removed"])
|
|
1293
1275
|
.drop(["last_modified_at"])
|
|
@@ -3876,6 +3876,62 @@ class FormUrn(_SpecificUrn):
|
|
|
3876
3876
|
def id(self) -> str:
|
|
3877
3877
|
return self._entity_ids[0]
|
|
3878
3878
|
|
|
3879
|
+
if TYPE_CHECKING:
|
|
3880
|
+
from datahub.metadata.schema_classes import DataHubOpenAPISchemaKeyClass
|
|
3881
|
+
|
|
3882
|
+
class DataHubOpenAPISchemaUrn(_SpecificUrn):
|
|
3883
|
+
ENTITY_TYPE: ClassVar[Literal["dataHubOpenAPISchema"]] = "dataHubOpenAPISchema"
|
|
3884
|
+
_URN_PARTS: ClassVar[int] = 1
|
|
3885
|
+
|
|
3886
|
+
def __init__(self, id: Union["DataHubOpenAPISchemaUrn", str], *, _allow_coercion: bool = True) -> None:
|
|
3887
|
+
if _allow_coercion:
|
|
3888
|
+
# Field coercion logic (if any is required).
|
|
3889
|
+
if isinstance(id, str):
|
|
3890
|
+
if id.startswith('urn:li:'):
|
|
3891
|
+
try:
|
|
3892
|
+
id = DataHubOpenAPISchemaUrn.from_string(id)
|
|
3893
|
+
except InvalidUrnError:
|
|
3894
|
+
raise InvalidUrnError(f'Expecting a DataHubOpenAPISchemaUrn but got {id}')
|
|
3895
|
+
else:
|
|
3896
|
+
id = UrnEncoder.encode_string(id)
|
|
3897
|
+
|
|
3898
|
+
# Validation logic.
|
|
3899
|
+
if not id:
|
|
3900
|
+
raise InvalidUrnError("DataHubOpenAPISchemaUrn id cannot be empty")
|
|
3901
|
+
if isinstance(id, DataHubOpenAPISchemaUrn):
|
|
3902
|
+
id = id.id
|
|
3903
|
+
elif isinstance(id, Urn):
|
|
3904
|
+
raise InvalidUrnError(f'Expecting a DataHubOpenAPISchemaUrn but got {id}')
|
|
3905
|
+
if UrnEncoder.contains_reserved_char(id):
|
|
3906
|
+
raise InvalidUrnError(f'DataHubOpenAPISchemaUrn id contains reserved characters')
|
|
3907
|
+
|
|
3908
|
+
super().__init__(self.ENTITY_TYPE, [id])
|
|
3909
|
+
|
|
3910
|
+
@classmethod
|
|
3911
|
+
def _parse_ids(cls, entity_ids: List[str]) -> "DataHubOpenAPISchemaUrn":
|
|
3912
|
+
if len(entity_ids) != cls._URN_PARTS:
|
|
3913
|
+
raise InvalidUrnError(f"DataHubOpenAPISchemaUrn should have {cls._URN_PARTS} parts, got {len(entity_ids)}: {entity_ids}")
|
|
3914
|
+
return cls(id=entity_ids[0], _allow_coercion=False)
|
|
3915
|
+
|
|
3916
|
+
@classmethod
|
|
3917
|
+
def underlying_key_aspect_type(cls) -> Type["DataHubOpenAPISchemaKeyClass"]:
|
|
3918
|
+
from datahub.metadata.schema_classes import DataHubOpenAPISchemaKeyClass
|
|
3919
|
+
|
|
3920
|
+
return DataHubOpenAPISchemaKeyClass
|
|
3921
|
+
|
|
3922
|
+
def to_key_aspect(self) -> "DataHubOpenAPISchemaKeyClass":
|
|
3923
|
+
from datahub.metadata.schema_classes import DataHubOpenAPISchemaKeyClass
|
|
3924
|
+
|
|
3925
|
+
return DataHubOpenAPISchemaKeyClass(id=self.id)
|
|
3926
|
+
|
|
3927
|
+
@classmethod
|
|
3928
|
+
def from_key_aspect(cls, key_aspect: "DataHubOpenAPISchemaKeyClass") -> "DataHubOpenAPISchemaUrn":
|
|
3929
|
+
return cls(id=key_aspect.id)
|
|
3930
|
+
|
|
3931
|
+
@property
|
|
3932
|
+
def id(self) -> str:
|
|
3933
|
+
return self._entity_ids[0]
|
|
3934
|
+
|
|
3879
3935
|
if TYPE_CHECKING:
|
|
3880
3936
|
from datahub.metadata.schema_classes import CorpUserKeyClass
|
|
3881
3937
|
|
|
@@ -19,6 +19,7 @@ from .....schema_classes import AssertionExclusionWindowClass
|
|
|
19
19
|
from .....schema_classes import AssertionExclusionWindowTypeClass
|
|
20
20
|
from .....schema_classes import AssertionInferenceDetailsClass
|
|
21
21
|
from .....schema_classes import AssertionInfoClass
|
|
22
|
+
from .....schema_classes import AssertionMetricClass
|
|
22
23
|
from .....schema_classes import AssertionMonitorSensitivityClass
|
|
23
24
|
from .....schema_classes import AssertionResultClass
|
|
24
25
|
from .....schema_classes import AssertionResultErrorClass
|
|
@@ -84,6 +85,7 @@ AssertionExclusionWindow = AssertionExclusionWindowClass
|
|
|
84
85
|
AssertionExclusionWindowType = AssertionExclusionWindowTypeClass
|
|
85
86
|
AssertionInferenceDetails = AssertionInferenceDetailsClass
|
|
86
87
|
AssertionInfo = AssertionInfoClass
|
|
88
|
+
AssertionMetric = AssertionMetricClass
|
|
87
89
|
AssertionMonitorSensitivity = AssertionMonitorSensitivityClass
|
|
88
90
|
AssertionResult = AssertionResultClass
|
|
89
91
|
AssertionResultError = AssertionResultErrorClass
|
|
@@ -23,6 +23,7 @@ from ......schema_classes import DataHubActionKeyClass
|
|
|
23
23
|
from ......schema_classes import DataHubConnectionKeyClass
|
|
24
24
|
from ......schema_classes import DataHubIngestionSourceKeyClass
|
|
25
25
|
from ......schema_classes import DataHubMetricCubeKeyClass
|
|
26
|
+
from ......schema_classes import DataHubOpenAPISchemaKeyClass
|
|
26
27
|
from ......schema_classes import DataHubPersonaKeyClass
|
|
27
28
|
from ......schema_classes import DataHubPolicyKeyClass
|
|
28
29
|
from ......schema_classes import DataHubRetentionKeyClass
|
|
@@ -89,6 +90,7 @@ DataHubActionKey = DataHubActionKeyClass
|
|
|
89
90
|
DataHubConnectionKey = DataHubConnectionKeyClass
|
|
90
91
|
DataHubIngestionSourceKey = DataHubIngestionSourceKeyClass
|
|
91
92
|
DataHubMetricCubeKey = DataHubMetricCubeKeyClass
|
|
93
|
+
DataHubOpenAPISchemaKey = DataHubOpenAPISchemaKeyClass
|
|
92
94
|
DataHubPersonaKey = DataHubPersonaKeyClass
|
|
93
95
|
DataHubPolicyKey = DataHubPolicyKeyClass
|
|
94
96
|
DataHubRetentionKey = DataHubRetentionKeyClass
|
|
@@ -12,7 +12,10 @@ from .....schema_classes import AssertionEvaluationParametersClass
|
|
|
12
12
|
from .....schema_classes import AssertionEvaluationParametersTypeClass
|
|
13
13
|
from .....schema_classes import AssertionEvaluationSpecClass
|
|
14
14
|
from .....schema_classes import AssertionMonitorClass
|
|
15
|
+
from .....schema_classes import AssertionMonitorBootstrapStatusClass
|
|
15
16
|
from .....schema_classes import AssertionMonitorCapabilityClass
|
|
17
|
+
from .....schema_classes import AssertionMonitorMetricsCubeBootstrapStateClass
|
|
18
|
+
from .....schema_classes import AssertionMonitorMetricsCubeBootstrapStatusClass
|
|
16
19
|
from .....schema_classes import AssertionMonitorSettingsClass
|
|
17
20
|
from .....schema_classes import AuditLogSpecClass
|
|
18
21
|
from .....schema_classes import DataHubOperationSpecClass
|
|
@@ -45,7 +48,10 @@ AssertionEvaluationParameters = AssertionEvaluationParametersClass
|
|
|
45
48
|
AssertionEvaluationParametersType = AssertionEvaluationParametersTypeClass
|
|
46
49
|
AssertionEvaluationSpec = AssertionEvaluationSpecClass
|
|
47
50
|
AssertionMonitor = AssertionMonitorClass
|
|
51
|
+
AssertionMonitorBootstrapStatus = AssertionMonitorBootstrapStatusClass
|
|
48
52
|
AssertionMonitorCapability = AssertionMonitorCapabilityClass
|
|
53
|
+
AssertionMonitorMetricsCubeBootstrapState = AssertionMonitorMetricsCubeBootstrapStateClass
|
|
54
|
+
AssertionMonitorMetricsCubeBootstrapStatus = AssertionMonitorMetricsCubeBootstrapStatusClass
|
|
49
55
|
AssertionMonitorSettings = AssertionMonitorSettingsClass
|
|
50
56
|
AuditLogSpec = AuditLogSpecClass
|
|
51
57
|
DataHubOperationSpec = DataHubOperationSpecClass
|
|
@@ -1449,20 +1449,24 @@
|
|
|
1449
1449
|
},
|
|
1450
1450
|
{
|
|
1451
1451
|
"TimeseriesField": {},
|
|
1452
|
-
"type":
|
|
1453
|
-
"
|
|
1454
|
-
|
|
1455
|
-
"
|
|
1456
|
-
"
|
|
1457
|
-
|
|
1458
|
-
|
|
1459
|
-
|
|
1460
|
-
|
|
1461
|
-
"
|
|
1462
|
-
"
|
|
1463
|
-
|
|
1464
|
-
|
|
1452
|
+
"type": [
|
|
1453
|
+
"null",
|
|
1454
|
+
{
|
|
1455
|
+
"type": "enum",
|
|
1456
|
+
"symbolDocs": {
|
|
1457
|
+
"CONFIRMED": "The anomaly has been confirmed by a human reviewer. This means the anomaly was validated.",
|
|
1458
|
+
"REJECTED": "The anomaly has been dismissed, or ignored, by a human reviewer. This means the anomaly should have been ignored."
|
|
1459
|
+
},
|
|
1460
|
+
"name": "AnomalyReviewState",
|
|
1461
|
+
"namespace": "com.linkedin.pegasus2avro.anomaly",
|
|
1462
|
+
"symbols": [
|
|
1463
|
+
"CONFIRMED",
|
|
1464
|
+
"REJECTED"
|
|
1465
|
+
]
|
|
1466
|
+
}
|
|
1467
|
+
],
|
|
1465
1468
|
"name": "state",
|
|
1469
|
+
"default": null,
|
|
1466
1470
|
"doc": "The review of the anomaly, based on human-provided feedback.\nIf this is not present, then the Anomaly has not yet been reviewed."
|
|
1467
1471
|
},
|
|
1468
1472
|
{
|
|
@@ -1509,23 +1513,29 @@
|
|
|
1509
1513
|
"namespace": "com.linkedin.pegasus2avro.anomaly",
|
|
1510
1514
|
"fields": [
|
|
1511
1515
|
{
|
|
1512
|
-
"TimeseriesField": {},
|
|
1513
1516
|
"type": [
|
|
1514
1517
|
"null",
|
|
1515
|
-
|
|
1516
|
-
|
|
1517
|
-
|
|
1518
|
-
|
|
1519
|
-
|
|
1520
|
-
|
|
1521
|
-
|
|
1522
|
-
|
|
1523
|
-
|
|
1524
|
-
|
|
1518
|
+
{
|
|
1519
|
+
"type": "record",
|
|
1520
|
+
"name": "AssertionMetric",
|
|
1521
|
+
"namespace": "com.linkedin.pegasus2avro.assertion",
|
|
1522
|
+
"fields": [
|
|
1523
|
+
{
|
|
1524
|
+
"type": "long",
|
|
1525
|
+
"name": "timestampMs",
|
|
1526
|
+
"doc": "The timestamp associated with the metric sampling time in milliseconds since epoch"
|
|
1527
|
+
},
|
|
1528
|
+
{
|
|
1529
|
+
"type": "float",
|
|
1530
|
+
"name": "value",
|
|
1531
|
+
"doc": "The value of the metric that was sampled"
|
|
1532
|
+
}
|
|
1533
|
+
]
|
|
1534
|
+
}
|
|
1525
1535
|
],
|
|
1526
|
-
"name": "
|
|
1536
|
+
"name": "assertionMetric",
|
|
1527
1537
|
"default": null,
|
|
1528
|
-
"doc": "The
|
|
1538
|
+
"doc": "The monitor metric associated with the anomaly, if generated from an assertion monitor (the norm)."
|
|
1529
1539
|
}
|
|
1530
1540
|
],
|
|
1531
1541
|
"doc": "Ad-hoc properties about an anomaly source."
|
|
@@ -3970,12 +3980,14 @@
|
|
|
3970
3980
|
"type": {
|
|
3971
3981
|
"type": "enum",
|
|
3972
3982
|
"symbolDocs": {
|
|
3973
|
-
"SQL": "A SQL Query"
|
|
3983
|
+
"SQL": "A SQL Query",
|
|
3984
|
+
"UNKNOWN": "Unknown query language"
|
|
3974
3985
|
},
|
|
3975
3986
|
"name": "QueryLanguage",
|
|
3976
3987
|
"namespace": "com.linkedin.pegasus2avro.query",
|
|
3977
3988
|
"symbols": [
|
|
3978
|
-
"SQL"
|
|
3989
|
+
"SQL",
|
|
3990
|
+
"UNKNOWN"
|
|
3979
3991
|
]
|
|
3980
3992
|
},
|
|
3981
3993
|
"name": "language",
|
|
@@ -7757,6 +7769,9 @@
|
|
|
7757
7769
|
"doc": "Version of the MLModelDeployment"
|
|
7758
7770
|
},
|
|
7759
7771
|
{
|
|
7772
|
+
"Searchable": {
|
|
7773
|
+
"fieldName": "deploymentStatus"
|
|
7774
|
+
},
|
|
7760
7775
|
"type": [
|
|
7761
7776
|
"null",
|
|
7762
7777
|
{
|
|
@@ -15901,6 +15916,9 @@
|
|
|
15901
15916
|
"null",
|
|
15902
15917
|
{
|
|
15903
15918
|
"type": "record",
|
|
15919
|
+
"Aspect": {
|
|
15920
|
+
"name": "systemMetadata"
|
|
15921
|
+
},
|
|
15904
15922
|
"name": "SystemMetadata",
|
|
15905
15923
|
"namespace": "com.linkedin.pegasus2avro.mxe",
|
|
15906
15924
|
"fields": [
|
|
@@ -16118,6 +16136,7 @@
|
|
|
16118
16136
|
],
|
|
16119
16137
|
"doc": "Kafka event for proposing a metadata change for an entity. A corresponding MetadataChangeLog is emitted when the change is accepted and committed, otherwise a FailedMetadataChangeProposal will be emitted instead."
|
|
16120
16138
|
},
|
|
16139
|
+
"com.linkedin.pegasus2avro.mxe.SystemMetadata",
|
|
16121
16140
|
{
|
|
16122
16141
|
"type": "record",
|
|
16123
16142
|
"name": "MetadataChangeLog",
|
|
@@ -19942,6 +19961,64 @@
|
|
|
19942
19961
|
"name": "settings",
|
|
19943
19962
|
"default": null,
|
|
19944
19963
|
"doc": "Specific settings for an assertion monitor"
|
|
19964
|
+
},
|
|
19965
|
+
{
|
|
19966
|
+
"type": [
|
|
19967
|
+
"null",
|
|
19968
|
+
{
|
|
19969
|
+
"type": "record",
|
|
19970
|
+
"name": "AssertionMonitorBootstrapStatus",
|
|
19971
|
+
"namespace": "com.linkedin.pegasus2avro.monitor",
|
|
19972
|
+
"fields": [
|
|
19973
|
+
{
|
|
19974
|
+
"type": [
|
|
19975
|
+
"null",
|
|
19976
|
+
{
|
|
19977
|
+
"type": "record",
|
|
19978
|
+
"name": "AssertionMonitorMetricsCubeBootstrapStatus",
|
|
19979
|
+
"namespace": "com.linkedin.pegasus2avro.monitor",
|
|
19980
|
+
"fields": [
|
|
19981
|
+
{
|
|
19982
|
+
"type": {
|
|
19983
|
+
"type": "enum",
|
|
19984
|
+
"symbolDocs": {
|
|
19985
|
+
"COMPLETED": "The metrics cube for this monitor has been bootstrapped.",
|
|
19986
|
+
"FAILED": "The metrics cube for this monitor has failed to bootstrap.",
|
|
19987
|
+
"PENDING": "The metrics cube for this monitor has not been bootstrapped."
|
|
19988
|
+
},
|
|
19989
|
+
"name": "AssertionMonitorMetricsCubeBootstrapState",
|
|
19990
|
+
"namespace": "com.linkedin.pegasus2avro.monitor",
|
|
19991
|
+
"symbols": [
|
|
19992
|
+
"PENDING",
|
|
19993
|
+
"FAILED",
|
|
19994
|
+
"COMPLETED"
|
|
19995
|
+
]
|
|
19996
|
+
},
|
|
19997
|
+
"name": "state",
|
|
19998
|
+
"doc": "Whether the metrics cube for this monitor has been bootstrapped."
|
|
19999
|
+
},
|
|
20000
|
+
{
|
|
20001
|
+
"type": [
|
|
20002
|
+
"null",
|
|
20003
|
+
"string"
|
|
20004
|
+
],
|
|
20005
|
+
"name": "message",
|
|
20006
|
+
"default": null,
|
|
20007
|
+
"doc": "The message associated with the bootstrap status.\nI.e., an error message if the bootstrap failed."
|
|
20008
|
+
}
|
|
20009
|
+
]
|
|
20010
|
+
}
|
|
20011
|
+
],
|
|
20012
|
+
"name": "metricsCubeBootstrapStatus",
|
|
20013
|
+
"default": null,
|
|
20014
|
+
"doc": "Whether the metrics cube for this monitor has been bootstrapped."
|
|
20015
|
+
}
|
|
20016
|
+
]
|
|
20017
|
+
}
|
|
20018
|
+
],
|
|
20019
|
+
"name": "bootstrapStatus",
|
|
20020
|
+
"default": null,
|
|
20021
|
+
"doc": "The status of the bootstrap actions performed on the assertion."
|
|
19945
20022
|
}
|
|
19946
20023
|
],
|
|
19947
20024
|
"doc": "Information about an Assertion monitor."
|
|
@@ -20765,6 +20842,15 @@
|
|
|
20765
20842
|
"name": "assertionInferenceDetails",
|
|
20766
20843
|
"default": null,
|
|
20767
20844
|
"doc": "The optional AssertionInferenceDetails which contains the settings used for the inferred assertion.\nThis field is used to store the settings used for the smart assertion.\nIt is optional and may not always be present in the AssertionRunEvent record.\nWhen present, it provides additional context about the settings used for the smart assertion."
|
|
20845
|
+
},
|
|
20846
|
+
{
|
|
20847
|
+
"type": [
|
|
20848
|
+
"null",
|
|
20849
|
+
"com.linkedin.pegasus2avro.assertion.AssertionMetric"
|
|
20850
|
+
],
|
|
20851
|
+
"name": "metric",
|
|
20852
|
+
"default": null,
|
|
20853
|
+
"doc": "Information about the metric that was sampled & used when evaluating the assertion.\n\nCurrently, this is only populated for Volume & Field Metric Assertions and used for anomaly\nlogging for Smart Assertions."
|
|
20768
20854
|
}
|
|
20769
20855
|
],
|
|
20770
20856
|
"doc": "The result of running an assertion"
|
|
@@ -26605,7 +26691,8 @@
|
|
|
26605
26691
|
"entityAspects": [
|
|
26606
26692
|
"dataContractProperties",
|
|
26607
26693
|
"dataContractStatus",
|
|
26608
|
-
"status"
|
|
26694
|
+
"status",
|
|
26695
|
+
"structuredProperties"
|
|
26609
26696
|
]
|
|
26610
26697
|
},
|
|
26611
26698
|
"name": "DataContractKey",
|
|
@@ -27350,6 +27437,28 @@
|
|
|
27350
27437
|
],
|
|
27351
27438
|
"doc": "Key for a Form"
|
|
27352
27439
|
},
|
|
27440
|
+
{
|
|
27441
|
+
"type": "record",
|
|
27442
|
+
"Aspect": {
|
|
27443
|
+
"name": "dataHubOpenAPISchemaKey",
|
|
27444
|
+
"keyForEntity": "dataHubOpenAPISchema",
|
|
27445
|
+
"entityCategory": "internal",
|
|
27446
|
+
"entityAspects": [
|
|
27447
|
+
"systemMetadata"
|
|
27448
|
+
],
|
|
27449
|
+
"entityDoc": "Contains aspects which are used in OpenAPI requests/responses which are not otherwise present in the data model."
|
|
27450
|
+
},
|
|
27451
|
+
"name": "DataHubOpenAPISchemaKey",
|
|
27452
|
+
"namespace": "com.linkedin.pegasus2avro.metadata.key",
|
|
27453
|
+
"fields": [
|
|
27454
|
+
{
|
|
27455
|
+
"type": "string",
|
|
27456
|
+
"name": "id",
|
|
27457
|
+
"doc": "A unique id for the DataHub OpenAPI schema."
|
|
27458
|
+
}
|
|
27459
|
+
],
|
|
27460
|
+
"doc": "Key for a Query"
|
|
27461
|
+
},
|
|
27353
27462
|
"com.linkedin.pegasus2avro.metadata.key.CorpUserKey",
|
|
27354
27463
|
"com.linkedin.pegasus2avro.metadata.key.DataPlatformKey",
|
|
27355
27464
|
{
|