acryl-datahub-cloud 0.3.10.3rc2__py3-none-any.whl → 0.3.11rc0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub-cloud might be problematic. Click here for more details.

Files changed (33) hide show
  1. acryl_datahub_cloud/_codegen_config.json +1 -1
  2. acryl_datahub_cloud/_sdk_extras/__init__.py +4 -0
  3. acryl_datahub_cloud/_sdk_extras/assertion.py +15 -0
  4. acryl_datahub_cloud/_sdk_extras/assertions_client.py +23 -0
  5. acryl_datahub_cloud/acryl_cs_issues/acryl_customer.py +1 -1
  6. acryl_datahub_cloud/action_request/action_request_owner_source.py +1 -2
  7. acryl_datahub_cloud/datahub_reporting/datahub_dataset.py +3 -7
  8. acryl_datahub_cloud/datahub_reporting/datahub_form_reporting.py +9 -5
  9. acryl_datahub_cloud/datahub_usage_reporting/usage_feature_reporter.py +14 -32
  10. acryl_datahub_cloud/metadata/_urns/urn_defs.py +56 -0
  11. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/assertion/__init__.py +2 -0
  12. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +2 -0
  13. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/monitor/__init__.py +6 -0
  14. acryl_datahub_cloud/metadata/schema.avsc +138 -29
  15. acryl_datahub_cloud/metadata/schema_classes.py +214 -29
  16. acryl_datahub_cloud/metadata/schemas/AssertionAnalyticsRunEvent.avsc +25 -0
  17. acryl_datahub_cloud/metadata/schemas/AssertionRunEvent.avsc +25 -0
  18. acryl_datahub_cloud/metadata/schemas/DataContractKey.avsc +2 -1
  19. acryl_datahub_cloud/metadata/schemas/DataHubOpenAPISchemaKey.avsc +22 -0
  20. acryl_datahub_cloud/metadata/schemas/DataTransformLogic.avsc +4 -2
  21. acryl_datahub_cloud/metadata/schemas/MLModelDeploymentProperties.avsc +3 -0
  22. acryl_datahub_cloud/metadata/schemas/MetadataChangeEvent.avsc +6 -0
  23. acryl_datahub_cloud/metadata/schemas/MetadataChangeLog.avsc +3 -0
  24. acryl_datahub_cloud/metadata/schemas/MetadataChangeProposal.avsc +3 -0
  25. acryl_datahub_cloud/metadata/schemas/MonitorAnomalyEvent.avsc +36 -26
  26. acryl_datahub_cloud/metadata/schemas/MonitorInfo.avsc +58 -0
  27. acryl_datahub_cloud/metadata/schemas/QueryProperties.avsc +4 -2
  28. acryl_datahub_cloud/metadata/schemas/SystemMetadata.avsc +86 -0
  29. {acryl_datahub_cloud-0.3.10.3rc2.dist-info → acryl_datahub_cloud-0.3.11rc0.dist-info}/METADATA +41 -41
  30. {acryl_datahub_cloud-0.3.10.3rc2.dist-info → acryl_datahub_cloud-0.3.11rc0.dist-info}/RECORD +33 -28
  31. {acryl_datahub_cloud-0.3.10.3rc2.dist-info → acryl_datahub_cloud-0.3.11rc0.dist-info}/WHEEL +1 -1
  32. {acryl_datahub_cloud-0.3.10.3rc2.dist-info → acryl_datahub_cloud-0.3.11rc0.dist-info}/entry_points.txt +0 -0
  33. {acryl_datahub_cloud-0.3.10.3rc2.dist-info → acryl_datahub_cloud-0.3.11rc0.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "acryl-datahub-cloud",
3
- "version": "0.3.10.3rc2",
3
+ "version": "0.3.11rc0",
4
4
  "install_requires": [
5
5
  "avro-gen3==0.7.16",
6
6
  "acryl-datahub"
@@ -0,0 +1,4 @@
1
+ from acryl_datahub_cloud._sdk_extras.assertion import Assertion
2
+ from acryl_datahub_cloud._sdk_extras.assertions_client import AssertionsClient
3
+
4
+ __all__ = ["Assertion", "AssertionsClient"]
@@ -0,0 +1,15 @@
1
+ """
2
+ This file contains the Assertion class, which is used to represent an assertion in DataHub.
3
+
4
+ The Assertion class is currently not implemented, this is a placeholder for future implementation.
5
+ """
6
+
7
+ from typing import Union
8
+
9
+ from datahub.metadata.urns import AssertionUrn
10
+
11
+
12
+ class Assertion:
13
+ def __init__(self, urn: Union[str, AssertionUrn]):
14
+ print(f"The Assertion class is currently not implemented. Urn provided: {urn}")
15
+ self.urn = AssertionUrn(urn)
@@ -0,0 +1,23 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING, Union
4
+
5
+ from acryl_datahub_cloud._sdk_extras.assertion import Assertion
6
+ from datahub.metadata.urns import AssertionUrn
7
+
8
+ if TYPE_CHECKING:
9
+ from datahub.sdk.main_client import DataHubClient
10
+
11
+
12
+ class AssertionsClient:
13
+ def __init__(self, client: DataHubClient):
14
+ self.client = client
15
+
16
+ def get_assertions(
17
+ self, urn: Union[str, list[str], AssertionUrn, list[AssertionUrn]]
18
+ ) -> list[Assertion]:
19
+ print(
20
+ "get_assertions is not implemented, this is a placeholder. Returning empty list."
21
+ )
22
+ print(f"urn provided: {urn}")
23
+ return []
@@ -219,6 +219,7 @@ class AcrylCustomer:
219
219
  self._emitted_containers: Dict[str, bool] = {}
220
220
 
221
221
  def _get_owner_from_assignee(self, assignee: ExternalUser) -> str:
222
+ assert assignee.email, "Assignee must have an email"
222
223
  owner_urn_options = [
223
224
  urn
224
225
  for urn in self.graph.get_urns_by_filter(
@@ -233,7 +234,6 @@ class AcrylCustomer:
233
234
  )
234
235
  ]
235
236
  if not owner_urn_options:
236
- assert assignee.email, "Assignee must have an email"
237
237
  owner_urn = "urn:li:corpuser:" + assignee.email
238
238
  self.graph.emit(
239
239
  MetadataChangeProposalWrapper(
@@ -2,6 +2,7 @@ import logging
2
2
  from typing import Dict, Iterable, List, Optional
3
3
 
4
4
  from datahub.configuration import ConfigModel
5
+ from datahub.emitter.mcp import MetadataChangeProposalWrapper
5
6
  from datahub.ingestion.api.common import PipelineContext
6
7
  from datahub.ingestion.api.decorators import (
7
8
  SupportStatus,
@@ -14,10 +15,8 @@ from datahub.ingestion.api.workunit import MetadataWorkUnit
14
15
  from datahub.metadata.schema_classes import (
15
16
  ActionRequestInfoClass,
16
17
  )
17
- from datahub.emitter.mcp import MetadataChangeProposalWrapper
18
18
  from datahub.utilities.urns.urn import guess_entity_type
19
19
 
20
-
21
20
  logger = logging.getLogger(__name__)
22
21
 
23
22
 
@@ -163,7 +163,7 @@ class DataHubBasedS3Dataset:
163
163
  self.schema = row.arrow_schema()
164
164
  else:
165
165
  # hail mary: infer schema from the first row and cast everything to string
166
- self.schema = pa.schema([(key, pa.string()) for key in row.keys()])
166
+ self.schema = pa.schema([(key, pa.string()) for key in row])
167
167
  self.stringify_row = True
168
168
 
169
169
  self._initialize_local_file()
@@ -172,7 +172,7 @@ class DataHubBasedS3Dataset:
172
172
  self.schema,
173
173
  compression=self.config.file_compression,
174
174
  )
175
- if isinstance(row, BaseModel) or isinstance(row, BaseModelRow):
175
+ if isinstance(row, (BaseModel, BaseModelRow)):
176
176
  # for anything extending BaseModel, we want to use the dict representation
177
177
  write_row: Dict[str, Any] = row.dict()
178
178
  elif isinstance(row, dict):
@@ -274,11 +274,7 @@ class DataHubBasedS3Dataset:
274
274
  self, duckdb_columns: List[Tuple[str, str]]
275
275
  ) -> SchemaMetadataClass:
276
276
  def get_type_from_dtype(dtype: str) -> SchemaFieldDataTypeClass:
277
- if "int" in dtype:
278
- return SchemaFieldDataTypeClass(type=NumberTypeClass())
279
- elif "float" in dtype:
280
- return SchemaFieldDataTypeClass(type=NumberTypeClass())
281
- elif "number" in dtype:
277
+ if "int" in dtype or "float" in dtype or "number" in dtype:
282
278
  return SchemaFieldDataTypeClass(type=NumberTypeClass())
283
279
  elif "bool" in dtype:
284
280
  return SchemaFieldDataTypeClass(type=BooleanTypeClass())
@@ -5,6 +5,9 @@ from enum import Enum
5
5
  from typing import Any, Callable, Dict, Iterable, List, Optional
6
6
 
7
7
  import pandas as pd
8
+ from pydantic import BaseModel
9
+
10
+ from acryl_datahub_cloud.elasticsearch.graph_service import BaseModelRow
8
11
  from datahub.emitter.mcp import MetadataChangeProposalWrapper
9
12
  from datahub.ingestion.graph.client import DataHubGraph
10
13
  from datahub.ingestion.graph.filters import RawSearchFilterRule
@@ -16,9 +19,6 @@ from datahub.metadata.schema_classes import (
16
19
  FormStateClass,
17
20
  FormTypeClass,
18
21
  )
19
- from pydantic import BaseModel
20
-
21
- from acryl_datahub_cloud.elasticsearch.graph_service import BaseModelRow
22
22
 
23
23
  logger = logging.getLogger(__name__)
24
24
 
@@ -257,6 +257,7 @@ class DataHubFormReportingData(FormData):
257
257
  for prompt_id, response_time in zip(
258
258
  search_row.completedFormsCompletedPromptIds,
259
259
  search_row.completedFormsCompletedPromptResponseTimes,
260
+ strict=False,
260
261
  )
261
262
  if prompt_id in form_prompts
262
263
  }
@@ -289,9 +290,10 @@ class DataHubFormReportingData(FormData):
289
290
  on_asset_scanned: Optional[Callable[[str], Any]] = None,
290
291
  on_form_scanned: Optional[Callable[[str], Any]] = None,
291
292
  ) -> Iterable[FormReportingRow]:
292
- extra_fields = [f for f in self.DataHubDatasetSearchRow.__fields__.keys()]
293
+ extra_fields = [f for f in self.DataHubDatasetSearchRow.__fields__]
294
+ # TODO: Replace with the new search/filter SDK.
293
295
  result = self.graph.get_results_by_filter(
294
- extra_or_filters=self.get_form_existence_or_filters(),
296
+ extra_or_filters=[{"and": self.get_form_existence_or_filters()}],
295
297
  extra_source_fields=extra_fields,
296
298
  skip_cache=True,
297
299
  )
@@ -388,6 +390,7 @@ class DataHubFormReportingData(FormData):
388
390
  for (p, p_response_time) in zip(
389
391
  search_row.incompleteFormsCompletedPromptIds,
390
392
  search_row.incompleteFormsCompletedPromptResponseTimes,
393
+ strict=False,
391
394
  )
392
395
  if p in form_prompts
393
396
  ]:
@@ -485,6 +488,7 @@ class DataHubFormReportingData(FormData):
485
488
  for (p, p_response_time) in zip(
486
489
  search_row.completedFormsCompletedPromptIds,
487
490
  search_row.completedFormsCompletedPromptResponseTimes,
491
+ strict=False,
488
492
  )
489
493
  if p in form_prompts
490
494
  ]:
@@ -395,18 +395,10 @@ class DataHubUsageFeatureReportingSource(StatefulIngestionSourceBase):
395
395
  "last_modified_at": (
396
396
  doc["_source"]["lastModifiedAt"]
397
397
  if "lastModifiedAt" in doc["_source"]
398
- else (
399
- doc["_source"]["lastModifiedAt"]
400
- if "lastModifiedAt" in doc["_source"]
401
- else None
402
- )
398
+ else (doc["_source"].get("lastModifiedAt", None))
403
399
  ),
404
400
  "platform": doc["_source"]["platform"],
405
- "removed": (
406
- doc["_source"]["removed"]
407
- if "removed" in doc["_source"]
408
- else False
409
- ),
401
+ "removed": (doc["_source"].get("removed", False)),
410
402
  }
411
403
 
412
404
  time_taken = timer.elapsed_seconds()
@@ -509,11 +501,7 @@ class DataHubUsageFeatureReportingSource(StatefulIngestionSourceBase):
509
501
  "eventGranularity": doc["_source"].get("eventGranularity"),
510
502
  "totalSqlQueries": doc["_source"].get("totalSqlQueries", 0),
511
503
  "uniqueUserCount": doc["_source"].get("uniqueUserCount", 0),
512
- "userCounts": (
513
- doc["_source"]["event"]["userCounts"]
514
- if "userCounts" in doc["_source"]["event"]
515
- else None
516
- ),
504
+ "userCounts": (doc["_source"]["event"].get("userCounts", None)),
517
505
  "platform": platform,
518
506
  }
519
507
  except KeyError as e:
@@ -525,7 +513,7 @@ class DataHubUsageFeatureReportingSource(StatefulIngestionSourceBase):
525
513
  time_taken = timer.elapsed_seconds()
526
514
  logger.info(f"DatasetUsage processing took {time_taken:.3f} seconds")
527
515
 
528
- def search_score(
516
+ def search_score( # noqa: C901
529
517
  self, urn: str, last_update_time: int, usage_percentile: int
530
518
  ) -> SearchRankingMultipliers:
531
519
  usage_search_score_multiplier = 1.0
@@ -622,10 +610,10 @@ class DataHubUsageFeatureReportingSource(StatefulIngestionSourceBase):
622
610
  [endpoint],
623
611
  http_auth=(user, password),
624
612
  use_ssl=(
625
- True
626
- if self.config.search_index
627
- and self.config.search_index.use_ssl
628
- else False
613
+ bool(
614
+ self.config.search_index
615
+ and self.config.search_index.use_ssl
616
+ )
629
617
  ),
630
618
  )
631
619
 
@@ -639,10 +627,10 @@ class DataHubUsageFeatureReportingSource(StatefulIngestionSourceBase):
639
627
  [endpoint],
640
628
  http_auth=(user, password),
641
629
  use_ssl=(
642
- True
643
- if self.config.search_index
644
- and self.config.search_index.use_ssl
645
- else False
630
+ bool(
631
+ self.config.search_index
632
+ and self.config.search_index.use_ssl
633
+ )
646
634
  ),
647
635
  )
648
636
 
@@ -737,7 +725,7 @@ class DataHubUsageFeatureReportingSource(StatefulIngestionSourceBase):
737
725
  polars.Duration(): pa.duration("ns"),
738
726
  }
739
727
 
740
- if polars_dtype in [type(key) for key in type_mapping.keys()]:
728
+ if polars_dtype in [type(key) for key in type_mapping]:
741
729
  return type_mapping[polars_dtype]
742
730
  elif polars_dtype == polars.Categorical:
743
731
  return pa.dictionary(index_type=pa.int32(), value_type=pa.string())
@@ -1006,12 +994,9 @@ class DataHubUsageFeatureReportingSource(StatefulIngestionSourceBase):
1006
994
  def generate_mcp_from_lazyframe(
1007
995
  self, lazy_frame: polars.LazyFrame
1008
996
  ) -> Iterable[MetadataWorkUnit]:
1009
- num = 0
1010
997
  for row in lazy_frame.collect(
1011
998
  streaming=self.config.experimental_full_streaming
1012
999
  ).to_struct():
1013
- num += 1
1014
-
1015
1000
  if "siblings" in row and row["siblings"]:
1016
1001
  logger.info(f"Siblings found for urn: {row['urn']} -> row['siblings']")
1017
1002
 
@@ -1101,10 +1086,7 @@ class DataHubUsageFeatureReportingSource(StatefulIngestionSourceBase):
1101
1086
  def generate_query_usage_mcp_from_lazyframe(
1102
1087
  self, lazy_frame: polars.LazyFrame
1103
1088
  ) -> Iterable[MetadataWorkUnit]:
1104
- num = 0
1105
1089
  for row in lazy_frame.collect().iter_rows(named=True):
1106
- num += 1
1107
-
1108
1090
  query_usage_features = QueryUsageFeaturesClass(
1109
1091
  queryCountLast30Days=int(row.get("totalSqlQueries", 0) or 0),
1110
1092
  queryCountTotal=None, # This is not implemented
@@ -1287,7 +1269,7 @@ class DataHubUsageFeatureReportingSource(StatefulIngestionSourceBase):
1287
1269
  .is_not_null()
1288
1270
  # We only want to downrank datasets that have a search score multiplier greater than 1. 1 is the minimum score of a dataset
1289
1271
  .and_(polars.col("combinedSearchRankingMultiplier").ne(1))
1290
- ) # noqa: E712
1272
+ )
1291
1273
  .filter(polars.col("removed") == False) # noqa: E712
1292
1274
  .drop(["removed"])
1293
1275
  .drop(["last_modified_at"])
@@ -3876,6 +3876,62 @@ class FormUrn(_SpecificUrn):
3876
3876
  def id(self) -> str:
3877
3877
  return self._entity_ids[0]
3878
3878
 
3879
+ if TYPE_CHECKING:
3880
+ from datahub.metadata.schema_classes import DataHubOpenAPISchemaKeyClass
3881
+
3882
+ class DataHubOpenAPISchemaUrn(_SpecificUrn):
3883
+ ENTITY_TYPE: ClassVar[Literal["dataHubOpenAPISchema"]] = "dataHubOpenAPISchema"
3884
+ _URN_PARTS: ClassVar[int] = 1
3885
+
3886
+ def __init__(self, id: Union["DataHubOpenAPISchemaUrn", str], *, _allow_coercion: bool = True) -> None:
3887
+ if _allow_coercion:
3888
+ # Field coercion logic (if any is required).
3889
+ if isinstance(id, str):
3890
+ if id.startswith('urn:li:'):
3891
+ try:
3892
+ id = DataHubOpenAPISchemaUrn.from_string(id)
3893
+ except InvalidUrnError:
3894
+ raise InvalidUrnError(f'Expecting a DataHubOpenAPISchemaUrn but got {id}')
3895
+ else:
3896
+ id = UrnEncoder.encode_string(id)
3897
+
3898
+ # Validation logic.
3899
+ if not id:
3900
+ raise InvalidUrnError("DataHubOpenAPISchemaUrn id cannot be empty")
3901
+ if isinstance(id, DataHubOpenAPISchemaUrn):
3902
+ id = id.id
3903
+ elif isinstance(id, Urn):
3904
+ raise InvalidUrnError(f'Expecting a DataHubOpenAPISchemaUrn but got {id}')
3905
+ if UrnEncoder.contains_reserved_char(id):
3906
+ raise InvalidUrnError(f'DataHubOpenAPISchemaUrn id contains reserved characters')
3907
+
3908
+ super().__init__(self.ENTITY_TYPE, [id])
3909
+
3910
+ @classmethod
3911
+ def _parse_ids(cls, entity_ids: List[str]) -> "DataHubOpenAPISchemaUrn":
3912
+ if len(entity_ids) != cls._URN_PARTS:
3913
+ raise InvalidUrnError(f"DataHubOpenAPISchemaUrn should have {cls._URN_PARTS} parts, got {len(entity_ids)}: {entity_ids}")
3914
+ return cls(id=entity_ids[0], _allow_coercion=False)
3915
+
3916
+ @classmethod
3917
+ def underlying_key_aspect_type(cls) -> Type["DataHubOpenAPISchemaKeyClass"]:
3918
+ from datahub.metadata.schema_classes import DataHubOpenAPISchemaKeyClass
3919
+
3920
+ return DataHubOpenAPISchemaKeyClass
3921
+
3922
+ def to_key_aspect(self) -> "DataHubOpenAPISchemaKeyClass":
3923
+ from datahub.metadata.schema_classes import DataHubOpenAPISchemaKeyClass
3924
+
3925
+ return DataHubOpenAPISchemaKeyClass(id=self.id)
3926
+
3927
+ @classmethod
3928
+ def from_key_aspect(cls, key_aspect: "DataHubOpenAPISchemaKeyClass") -> "DataHubOpenAPISchemaUrn":
3929
+ return cls(id=key_aspect.id)
3930
+
3931
+ @property
3932
+ def id(self) -> str:
3933
+ return self._entity_ids[0]
3934
+
3879
3935
  if TYPE_CHECKING:
3880
3936
  from datahub.metadata.schema_classes import CorpUserKeyClass
3881
3937
 
@@ -19,6 +19,7 @@ from .....schema_classes import AssertionExclusionWindowClass
19
19
  from .....schema_classes import AssertionExclusionWindowTypeClass
20
20
  from .....schema_classes import AssertionInferenceDetailsClass
21
21
  from .....schema_classes import AssertionInfoClass
22
+ from .....schema_classes import AssertionMetricClass
22
23
  from .....schema_classes import AssertionMonitorSensitivityClass
23
24
  from .....schema_classes import AssertionResultClass
24
25
  from .....schema_classes import AssertionResultErrorClass
@@ -84,6 +85,7 @@ AssertionExclusionWindow = AssertionExclusionWindowClass
84
85
  AssertionExclusionWindowType = AssertionExclusionWindowTypeClass
85
86
  AssertionInferenceDetails = AssertionInferenceDetailsClass
86
87
  AssertionInfo = AssertionInfoClass
88
+ AssertionMetric = AssertionMetricClass
87
89
  AssertionMonitorSensitivity = AssertionMonitorSensitivityClass
88
90
  AssertionResult = AssertionResultClass
89
91
  AssertionResultError = AssertionResultErrorClass
@@ -23,6 +23,7 @@ from ......schema_classes import DataHubActionKeyClass
23
23
  from ......schema_classes import DataHubConnectionKeyClass
24
24
  from ......schema_classes import DataHubIngestionSourceKeyClass
25
25
  from ......schema_classes import DataHubMetricCubeKeyClass
26
+ from ......schema_classes import DataHubOpenAPISchemaKeyClass
26
27
  from ......schema_classes import DataHubPersonaKeyClass
27
28
  from ......schema_classes import DataHubPolicyKeyClass
28
29
  from ......schema_classes import DataHubRetentionKeyClass
@@ -89,6 +90,7 @@ DataHubActionKey = DataHubActionKeyClass
89
90
  DataHubConnectionKey = DataHubConnectionKeyClass
90
91
  DataHubIngestionSourceKey = DataHubIngestionSourceKeyClass
91
92
  DataHubMetricCubeKey = DataHubMetricCubeKeyClass
93
+ DataHubOpenAPISchemaKey = DataHubOpenAPISchemaKeyClass
92
94
  DataHubPersonaKey = DataHubPersonaKeyClass
93
95
  DataHubPolicyKey = DataHubPolicyKeyClass
94
96
  DataHubRetentionKey = DataHubRetentionKeyClass
@@ -12,7 +12,10 @@ from .....schema_classes import AssertionEvaluationParametersClass
12
12
  from .....schema_classes import AssertionEvaluationParametersTypeClass
13
13
  from .....schema_classes import AssertionEvaluationSpecClass
14
14
  from .....schema_classes import AssertionMonitorClass
15
+ from .....schema_classes import AssertionMonitorBootstrapStatusClass
15
16
  from .....schema_classes import AssertionMonitorCapabilityClass
17
+ from .....schema_classes import AssertionMonitorMetricsCubeBootstrapStateClass
18
+ from .....schema_classes import AssertionMonitorMetricsCubeBootstrapStatusClass
16
19
  from .....schema_classes import AssertionMonitorSettingsClass
17
20
  from .....schema_classes import AuditLogSpecClass
18
21
  from .....schema_classes import DataHubOperationSpecClass
@@ -45,7 +48,10 @@ AssertionEvaluationParameters = AssertionEvaluationParametersClass
45
48
  AssertionEvaluationParametersType = AssertionEvaluationParametersTypeClass
46
49
  AssertionEvaluationSpec = AssertionEvaluationSpecClass
47
50
  AssertionMonitor = AssertionMonitorClass
51
+ AssertionMonitorBootstrapStatus = AssertionMonitorBootstrapStatusClass
48
52
  AssertionMonitorCapability = AssertionMonitorCapabilityClass
53
+ AssertionMonitorMetricsCubeBootstrapState = AssertionMonitorMetricsCubeBootstrapStateClass
54
+ AssertionMonitorMetricsCubeBootstrapStatus = AssertionMonitorMetricsCubeBootstrapStatusClass
49
55
  AssertionMonitorSettings = AssertionMonitorSettingsClass
50
56
  AuditLogSpec = AuditLogSpecClass
51
57
  DataHubOperationSpec = DataHubOperationSpecClass
@@ -1449,20 +1449,24 @@
1449
1449
  },
1450
1450
  {
1451
1451
  "TimeseriesField": {},
1452
- "type": {
1453
- "type": "enum",
1454
- "symbolDocs": {
1455
- "CONFIRMED": "The anomaly has been confirmed by a human reviewer. This means the anomaly was validated.",
1456
- "REJECTED": "The anomaly has been dismissed, or ignored, by a human reviewer. This means the anomaly should have been ignored."
1457
- },
1458
- "name": "AnomalyReviewState",
1459
- "namespace": "com.linkedin.pegasus2avro.anomaly",
1460
- "symbols": [
1461
- "CONFIRMED",
1462
- "REJECTED"
1463
- ]
1464
- },
1452
+ "type": [
1453
+ "null",
1454
+ {
1455
+ "type": "enum",
1456
+ "symbolDocs": {
1457
+ "CONFIRMED": "The anomaly has been confirmed by a human reviewer. This means the anomaly was validated.",
1458
+ "REJECTED": "The anomaly has been dismissed, or ignored, by a human reviewer. This means the anomaly should have been ignored."
1459
+ },
1460
+ "name": "AnomalyReviewState",
1461
+ "namespace": "com.linkedin.pegasus2avro.anomaly",
1462
+ "symbols": [
1463
+ "CONFIRMED",
1464
+ "REJECTED"
1465
+ ]
1466
+ }
1467
+ ],
1465
1468
  "name": "state",
1469
+ "default": null,
1466
1470
  "doc": "The review of the anomaly, based on human-provided feedback.\nIf this is not present, then the Anomaly has not yet been reviewed."
1467
1471
  },
1468
1472
  {
@@ -1509,23 +1513,29 @@
1509
1513
  "namespace": "com.linkedin.pegasus2avro.anomaly",
1510
1514
  "fields": [
1511
1515
  {
1512
- "TimeseriesField": {},
1513
1516
  "type": [
1514
1517
  "null",
1515
- "long"
1516
- ],
1517
- "name": "assertionRunEventTime",
1518
- "default": null,
1519
- "doc": "The timestampMillis field of the AssertionRunEvent which altered the anomaly status the anomaly (if applicable)."
1520
- },
1521
- {
1522
- "type": [
1523
- "null",
1524
- "long"
1518
+ {
1519
+ "type": "record",
1520
+ "name": "AssertionMetric",
1521
+ "namespace": "com.linkedin.pegasus2avro.assertion",
1522
+ "fields": [
1523
+ {
1524
+ "type": "long",
1525
+ "name": "timestampMs",
1526
+ "doc": "The timestamp associated with the metric sampling time in milliseconds since epoch"
1527
+ },
1528
+ {
1529
+ "type": "float",
1530
+ "name": "value",
1531
+ "doc": "The value of the metric that was sampled"
1532
+ }
1533
+ ]
1534
+ }
1525
1535
  ],
1526
- "name": "metricCubeTimestamp",
1536
+ "name": "assertionMetric",
1527
1537
  "default": null,
1528
- "doc": "The timestamp associated with the metric cube value that triggered the anomaly."
1538
+ "doc": "The monitor metric associated with the anomaly, if generated from an assertion monitor (the norm)."
1529
1539
  }
1530
1540
  ],
1531
1541
  "doc": "Ad-hoc properties about an anomaly source."
@@ -3970,12 +3980,14 @@
3970
3980
  "type": {
3971
3981
  "type": "enum",
3972
3982
  "symbolDocs": {
3973
- "SQL": "A SQL Query"
3983
+ "SQL": "A SQL Query",
3984
+ "UNKNOWN": "Unknown query language"
3974
3985
  },
3975
3986
  "name": "QueryLanguage",
3976
3987
  "namespace": "com.linkedin.pegasus2avro.query",
3977
3988
  "symbols": [
3978
- "SQL"
3989
+ "SQL",
3990
+ "UNKNOWN"
3979
3991
  ]
3980
3992
  },
3981
3993
  "name": "language",
@@ -7757,6 +7769,9 @@
7757
7769
  "doc": "Version of the MLModelDeployment"
7758
7770
  },
7759
7771
  {
7772
+ "Searchable": {
7773
+ "fieldName": "deploymentStatus"
7774
+ },
7760
7775
  "type": [
7761
7776
  "null",
7762
7777
  {
@@ -15901,6 +15916,9 @@
15901
15916
  "null",
15902
15917
  {
15903
15918
  "type": "record",
15919
+ "Aspect": {
15920
+ "name": "systemMetadata"
15921
+ },
15904
15922
  "name": "SystemMetadata",
15905
15923
  "namespace": "com.linkedin.pegasus2avro.mxe",
15906
15924
  "fields": [
@@ -16118,6 +16136,7 @@
16118
16136
  ],
16119
16137
  "doc": "Kafka event for proposing a metadata change for an entity. A corresponding MetadataChangeLog is emitted when the change is accepted and committed, otherwise a FailedMetadataChangeProposal will be emitted instead."
16120
16138
  },
16139
+ "com.linkedin.pegasus2avro.mxe.SystemMetadata",
16121
16140
  {
16122
16141
  "type": "record",
16123
16142
  "name": "MetadataChangeLog",
@@ -19942,6 +19961,64 @@
19942
19961
  "name": "settings",
19943
19962
  "default": null,
19944
19963
  "doc": "Specific settings for an assertion monitor"
19964
+ },
19965
+ {
19966
+ "type": [
19967
+ "null",
19968
+ {
19969
+ "type": "record",
19970
+ "name": "AssertionMonitorBootstrapStatus",
19971
+ "namespace": "com.linkedin.pegasus2avro.monitor",
19972
+ "fields": [
19973
+ {
19974
+ "type": [
19975
+ "null",
19976
+ {
19977
+ "type": "record",
19978
+ "name": "AssertionMonitorMetricsCubeBootstrapStatus",
19979
+ "namespace": "com.linkedin.pegasus2avro.monitor",
19980
+ "fields": [
19981
+ {
19982
+ "type": {
19983
+ "type": "enum",
19984
+ "symbolDocs": {
19985
+ "COMPLETED": "The metrics cube for this monitor has been bootstrapped.",
19986
+ "FAILED": "The metrics cube for this monitor has failed to bootstrap.",
19987
+ "PENDING": "The metrics cube for this monitor has not been bootstrapped."
19988
+ },
19989
+ "name": "AssertionMonitorMetricsCubeBootstrapState",
19990
+ "namespace": "com.linkedin.pegasus2avro.monitor",
19991
+ "symbols": [
19992
+ "PENDING",
19993
+ "FAILED",
19994
+ "COMPLETED"
19995
+ ]
19996
+ },
19997
+ "name": "state",
19998
+ "doc": "Whether the metrics cube for this monitor has been bootstrapped."
19999
+ },
20000
+ {
20001
+ "type": [
20002
+ "null",
20003
+ "string"
20004
+ ],
20005
+ "name": "message",
20006
+ "default": null,
20007
+ "doc": "The message associated with the bootstrap status.\nI.e., an error message if the bootstrap failed."
20008
+ }
20009
+ ]
20010
+ }
20011
+ ],
20012
+ "name": "metricsCubeBootstrapStatus",
20013
+ "default": null,
20014
+ "doc": "Whether the metrics cube for this monitor has been bootstrapped."
20015
+ }
20016
+ ]
20017
+ }
20018
+ ],
20019
+ "name": "bootstrapStatus",
20020
+ "default": null,
20021
+ "doc": "The status of the bootstrap actions performed on the assertion."
19945
20022
  }
19946
20023
  ],
19947
20024
  "doc": "Information about an Assertion monitor."
@@ -20765,6 +20842,15 @@
20765
20842
  "name": "assertionInferenceDetails",
20766
20843
  "default": null,
20767
20844
  "doc": "The optional AssertionInferenceDetails which contains the settings used for the inferred assertion.\nThis field is used to store the settings used for the smart assertion.\nIt is optional and may not always be present in the AssertionRunEvent record.\nWhen present, it provides additional context about the settings used for the smart assertion."
20845
+ },
20846
+ {
20847
+ "type": [
20848
+ "null",
20849
+ "com.linkedin.pegasus2avro.assertion.AssertionMetric"
20850
+ ],
20851
+ "name": "metric",
20852
+ "default": null,
20853
+ "doc": "Information about the metric that was sampled & used when evaluating the assertion.\n\nCurrently, this is only populated for Volume & Field Metric Assertions and used for anomaly\nlogging for Smart Assertions."
20768
20854
  }
20769
20855
  ],
20770
20856
  "doc": "The result of running an assertion"
@@ -26605,7 +26691,8 @@
26605
26691
  "entityAspects": [
26606
26692
  "dataContractProperties",
26607
26693
  "dataContractStatus",
26608
- "status"
26694
+ "status",
26695
+ "structuredProperties"
26609
26696
  ]
26610
26697
  },
26611
26698
  "name": "DataContractKey",
@@ -27350,6 +27437,28 @@
27350
27437
  ],
27351
27438
  "doc": "Key for a Form"
27352
27439
  },
27440
+ {
27441
+ "type": "record",
27442
+ "Aspect": {
27443
+ "name": "dataHubOpenAPISchemaKey",
27444
+ "keyForEntity": "dataHubOpenAPISchema",
27445
+ "entityCategory": "internal",
27446
+ "entityAspects": [
27447
+ "systemMetadata"
27448
+ ],
27449
+ "entityDoc": "Contains aspects which are used in OpenAPI requests/responses which are not otherwise present in the data model."
27450
+ },
27451
+ "name": "DataHubOpenAPISchemaKey",
27452
+ "namespace": "com.linkedin.pegasus2avro.metadata.key",
27453
+ "fields": [
27454
+ {
27455
+ "type": "string",
27456
+ "name": "id",
27457
+ "doc": "A unique id for the DataHub OpenAPI schema."
27458
+ }
27459
+ ],
27460
+ "doc": "Key for a Query"
27461
+ },
27353
27462
  "com.linkedin.pegasus2avro.metadata.key.CorpUserKey",
27354
27463
  "com.linkedin.pegasus2avro.metadata.key.DataPlatformKey",
27355
27464
  {