acryl-datahub-cloud 0.3.11rc0__py3-none-any.whl → 0.3.16.1rc0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub-cloud might be problematic. Click here for more details.
- acryl_datahub_cloud/_codegen_config.json +1 -1
- acryl_datahub_cloud/acryl_cs_issues/models.py +5 -3
- acryl_datahub_cloud/action_request/action_request_owner_source.py +36 -6
- acryl_datahub_cloud/datahub_forms_notifications/__init__.py +0 -0
- acryl_datahub_cloud/datahub_forms_notifications/forms_notifications_source.py +569 -0
- acryl_datahub_cloud/datahub_forms_notifications/get_feature_flag.gql +7 -0
- acryl_datahub_cloud/datahub_forms_notifications/get_search_results_total.gql +14 -0
- acryl_datahub_cloud/datahub_forms_notifications/query.py +17 -0
- acryl_datahub_cloud/datahub_forms_notifications/scroll_forms_for_notification.gql +29 -0
- acryl_datahub_cloud/datahub_forms_notifications/send_form_notification_request.gql +5 -0
- acryl_datahub_cloud/datahub_reporting/datahub_dataset.py +37 -13
- acryl_datahub_cloud/datahub_reporting/datahub_form_reporting.py +55 -24
- acryl_datahub_cloud/datahub_reporting/extract_graph.py +4 -3
- acryl_datahub_cloud/datahub_reporting/extract_sql.py +242 -51
- acryl_datahub_cloud/datahub_reporting/forms.py +1 -1
- acryl_datahub_cloud/datahub_reporting/forms_config.py +3 -2
- acryl_datahub_cloud/datahub_restore/source.py +3 -2
- acryl_datahub_cloud/datahub_usage_reporting/excluded.py +94 -0
- acryl_datahub_cloud/datahub_usage_reporting/query_builder.py +48 -8
- acryl_datahub_cloud/datahub_usage_reporting/usage_feature_reporter.py +518 -77
- acryl_datahub_cloud/elasticsearch/graph_service.py +76 -14
- acryl_datahub_cloud/graphql_utils.py +64 -0
- acryl_datahub_cloud/lineage_features/source.py +555 -49
- acryl_datahub_cloud/metadata/_urns/urn_defs.py +2296 -1900
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/actionworkflow/__init__.py +53 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/anomaly/__init__.py +2 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/assertion/__init__.py +4 -2
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/common/__init__.py +6 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/conversation/__init__.py +29 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/event/notification/settings/__init__.py +2 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/execution/__init__.py +2 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/file/__init__.py +19 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/form/__init__.py +8 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/identity/__init__.py +8 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/knowledge/__init__.py +33 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +12 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/metadata/search/features/__init__.py +2 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/module/__init__.py +31 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/notification/__init__.py +19 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py +4 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/role/__init__.py +2 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/settings/asset/__init__.py +19 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +28 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/template/__init__.py +31 -0
- acryl_datahub_cloud/metadata/schema.avsc +25091 -20557
- acryl_datahub_cloud/metadata/schema_classes.py +29269 -23863
- acryl_datahub_cloud/metadata/schemas/ActionRequestInfo.avsc +235 -2
- acryl_datahub_cloud/metadata/schemas/ActionWorkflowInfo.avsc +683 -0
- acryl_datahub_cloud/metadata/schemas/ActionWorkflowKey.avsc +21 -0
- acryl_datahub_cloud/metadata/schemas/Actors.avsc +38 -1
- acryl_datahub_cloud/metadata/schemas/ApplicationKey.avsc +31 -0
- acryl_datahub_cloud/metadata/schemas/ApplicationProperties.avsc +75 -0
- acryl_datahub_cloud/metadata/schemas/Applications.avsc +38 -0
- acryl_datahub_cloud/metadata/schemas/AssertionAnalyticsRunEvent.avsc +353 -215
- acryl_datahub_cloud/metadata/schemas/AssertionInfo.avsc +147 -20
- acryl_datahub_cloud/metadata/schemas/AssertionKey.avsc +1 -1
- acryl_datahub_cloud/metadata/schemas/AssertionRunEvent.avsc +166 -21
- acryl_datahub_cloud/metadata/schemas/{AssertionSummary.avsc → AssertionRunSummary.avsc} +15 -2
- acryl_datahub_cloud/metadata/schemas/AssertionsSummary.avsc +54 -0
- acryl_datahub_cloud/metadata/schemas/AssetSettings.avsc +63 -0
- acryl_datahub_cloud/metadata/schemas/BusinessAttributeInfo.avsc +7 -3
- acryl_datahub_cloud/metadata/schemas/ChartInfo.avsc +20 -6
- acryl_datahub_cloud/metadata/schemas/ChartKey.avsc +1 -0
- acryl_datahub_cloud/metadata/schemas/ConstraintInfo.avsc +12 -1
- acryl_datahub_cloud/metadata/schemas/ContainerKey.avsc +1 -0
- acryl_datahub_cloud/metadata/schemas/ContainerProperties.avsc +16 -5
- acryl_datahub_cloud/metadata/schemas/CorpGroupEditableInfo.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/CorpGroupInfo.avsc +7 -3
- acryl_datahub_cloud/metadata/schemas/CorpGroupKey.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/CorpGroupSettings.avsc +127 -2
- acryl_datahub_cloud/metadata/schemas/CorpUserEditableInfo.avsc +1 -1
- acryl_datahub_cloud/metadata/schemas/CorpUserInfo.avsc +18 -2
- acryl_datahub_cloud/metadata/schemas/CorpUserInvitationStatus.avsc +106 -0
- acryl_datahub_cloud/metadata/schemas/CorpUserKey.avsc +4 -1
- acryl_datahub_cloud/metadata/schemas/CorpUserSettings.avsc +304 -2
- acryl_datahub_cloud/metadata/schemas/CorpUserUsageFeatures.avsc +86 -0
- acryl_datahub_cloud/metadata/schemas/DashboardInfo.avsc +11 -5
- acryl_datahub_cloud/metadata/schemas/DashboardKey.avsc +1 -0
- acryl_datahub_cloud/metadata/schemas/DataFlowInfo.avsc +15 -5
- acryl_datahub_cloud/metadata/schemas/DataFlowKey.avsc +1 -0
- acryl_datahub_cloud/metadata/schemas/DataHubAiConversationInfo.avsc +256 -0
- acryl_datahub_cloud/metadata/schemas/DataHubAiConversationKey.avsc +22 -0
- acryl_datahub_cloud/metadata/schemas/DataHubFileInfo.avsc +234 -0
- acryl_datahub_cloud/metadata/schemas/DataHubFileKey.avsc +22 -0
- acryl_datahub_cloud/metadata/schemas/DataHubIngestionSourceKey.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
- acryl_datahub_cloud/metadata/schemas/DataHubPageModuleProperties.avsc +308 -0
- acryl_datahub_cloud/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
- acryl_datahub_cloud/metadata/schemas/DataHubPageTemplateProperties.avsc +251 -0
- acryl_datahub_cloud/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
- acryl_datahub_cloud/metadata/schemas/DataJobInfo.avsc +13 -4
- acryl_datahub_cloud/metadata/schemas/DataJobInputOutput.avsc +8 -0
- acryl_datahub_cloud/metadata/schemas/DataJobKey.avsc +1 -0
- acryl_datahub_cloud/metadata/schemas/DataPlatformInfo.avsc +3 -1
- acryl_datahub_cloud/metadata/schemas/DataPlatformInstanceProperties.avsc +5 -2
- acryl_datahub_cloud/metadata/schemas/DataProcessKey.avsc +4 -0
- acryl_datahub_cloud/metadata/schemas/DataProductKey.avsc +2 -0
- acryl_datahub_cloud/metadata/schemas/DataProductProperties.avsc +6 -3
- acryl_datahub_cloud/metadata/schemas/DataTypeInfo.avsc +5 -0
- acryl_datahub_cloud/metadata/schemas/DatasetKey.avsc +10 -2
- acryl_datahub_cloud/metadata/schemas/DatasetProperties.avsc +12 -5
- acryl_datahub_cloud/metadata/schemas/DatasetUsageStatistics.avsc +8 -0
- acryl_datahub_cloud/metadata/schemas/DocumentInfo.avsc +407 -0
- acryl_datahub_cloud/metadata/schemas/DocumentKey.avsc +35 -0
- acryl_datahub_cloud/metadata/schemas/DocumentSettings.avsc +79 -0
- acryl_datahub_cloud/metadata/schemas/DomainKey.avsc +2 -0
- acryl_datahub_cloud/metadata/schemas/DomainProperties.avsc +7 -3
- acryl_datahub_cloud/metadata/schemas/EditableContainerProperties.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/EditableDashboardProperties.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/EditableDataFlowProperties.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/EditableDataJobProperties.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/EditableDatasetProperties.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/EditableERModelRelationshipProperties.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/EditableMLFeatureProperties.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/EditableMLFeatureTableProperties.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/EditableMLModelGroupProperties.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/EditableMLModelProperties.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/EditableNotebookProperties.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/EditableSchemaMetadata.avsc +4 -2
- acryl_datahub_cloud/metadata/schemas/EntityTypeInfo.avsc +5 -0
- acryl_datahub_cloud/metadata/schemas/ExecutionRequestArtifactsLocation.avsc +16 -0
- acryl_datahub_cloud/metadata/schemas/ExecutionRequestKey.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/FormAssignmentStatus.avsc +36 -0
- acryl_datahub_cloud/metadata/schemas/FormInfo.avsc +6 -0
- acryl_datahub_cloud/metadata/schemas/FormKey.avsc +3 -1
- acryl_datahub_cloud/metadata/schemas/FormNotifications.avsc +69 -0
- acryl_datahub_cloud/metadata/schemas/FormSettings.avsc +30 -0
- acryl_datahub_cloud/metadata/schemas/GlobalSettingsInfo.avsc +416 -0
- acryl_datahub_cloud/metadata/schemas/GlobalTags.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/GlossaryNodeInfo.avsc +3 -1
- acryl_datahub_cloud/metadata/schemas/GlossaryNodeKey.avsc +1 -0
- acryl_datahub_cloud/metadata/schemas/GlossaryTermInfo.avsc +3 -1
- acryl_datahub_cloud/metadata/schemas/GlossaryTermKey.avsc +2 -0
- acryl_datahub_cloud/metadata/schemas/IcebergWarehouseInfo.avsc +4 -0
- acryl_datahub_cloud/metadata/schemas/IncidentActivityEvent.avsc +3 -3
- acryl_datahub_cloud/metadata/schemas/IncidentInfo.avsc +3 -3
- acryl_datahub_cloud/metadata/schemas/InferredMetadata.avsc +71 -1
- acryl_datahub_cloud/metadata/schemas/InputFields.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/InviteToken.avsc +26 -0
- acryl_datahub_cloud/metadata/schemas/LineageFeatures.avsc +67 -42
- acryl_datahub_cloud/metadata/schemas/LogicalParent.avsc +145 -0
- acryl_datahub_cloud/metadata/schemas/MLFeatureKey.avsc +4 -1
- acryl_datahub_cloud/metadata/schemas/MLFeatureTableKey.avsc +4 -1
- acryl_datahub_cloud/metadata/schemas/MLModelDeploymentKey.avsc +7 -1
- acryl_datahub_cloud/metadata/schemas/MLModelGroupKey.avsc +9 -1
- acryl_datahub_cloud/metadata/schemas/MLModelKey.avsc +9 -1
- acryl_datahub_cloud/metadata/schemas/MLModelProperties.avsc +4 -2
- acryl_datahub_cloud/metadata/schemas/MLPrimaryKeyKey.avsc +4 -1
- acryl_datahub_cloud/metadata/schemas/MetadataChangeEvent.avsc +418 -97
- acryl_datahub_cloud/metadata/schemas/MetadataChangeLog.avsc +62 -44
- acryl_datahub_cloud/metadata/schemas/MetadataChangeProposal.avsc +61 -0
- acryl_datahub_cloud/metadata/schemas/MonitorAnomalyEvent.avsc +54 -9
- acryl_datahub_cloud/metadata/schemas/MonitorInfo.avsc +163 -23
- acryl_datahub_cloud/metadata/schemas/MonitorKey.avsc +9 -1
- acryl_datahub_cloud/metadata/schemas/MonitorSuiteInfo.avsc +128 -3
- acryl_datahub_cloud/metadata/schemas/NotebookInfo.avsc +5 -2
- acryl_datahub_cloud/metadata/schemas/NotebookKey.avsc +1 -0
- acryl_datahub_cloud/metadata/schemas/NotificationRequest.avsc +91 -4
- acryl_datahub_cloud/metadata/schemas/Operation.avsc +17 -0
- acryl_datahub_cloud/metadata/schemas/Ownership.avsc +71 -1
- acryl_datahub_cloud/metadata/schemas/QuerySubjects.avsc +2 -13
- acryl_datahub_cloud/metadata/schemas/RelationshipChangeEvent.avsc +215 -0
- acryl_datahub_cloud/metadata/schemas/RoleProperties.avsc +3 -1
- acryl_datahub_cloud/metadata/schemas/SchemaFieldInfo.avsc +3 -1
- acryl_datahub_cloud/metadata/schemas/SchemaFieldKey.avsc +3 -0
- acryl_datahub_cloud/metadata/schemas/SchemaMetadata.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/SemanticContent.avsc +123 -0
- acryl_datahub_cloud/metadata/schemas/StructuredProperties.avsc +69 -0
- acryl_datahub_cloud/metadata/schemas/StructuredPropertyDefinition.avsc +15 -4
- acryl_datahub_cloud/metadata/schemas/StructuredPropertySettings.avsc +9 -0
- acryl_datahub_cloud/metadata/schemas/SubscriptionInfo.avsc +136 -5
- acryl_datahub_cloud/metadata/schemas/SubscriptionKey.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/SystemMetadata.avsc +61 -0
- acryl_datahub_cloud/metadata/schemas/TagProperties.avsc +3 -1
- acryl_datahub_cloud/metadata/schemas/TestInfo.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/UpstreamLineage.avsc +9 -0
- acryl_datahub_cloud/metadata/schemas/UsageFeatures.avsc +10 -0
- acryl_datahub_cloud/notifications/__init__.py +0 -0
- acryl_datahub_cloud/notifications/notification_recipient_builder.py +399 -0
- acryl_datahub_cloud/sdk/__init__.py +69 -0
- acryl_datahub_cloud/sdk/assertion/__init__.py +58 -0
- acryl_datahub_cloud/sdk/assertion/assertion_base.py +779 -0
- acryl_datahub_cloud/sdk/assertion/column_metric_assertion.py +191 -0
- acryl_datahub_cloud/sdk/assertion/column_value_assertion.py +431 -0
- acryl_datahub_cloud/sdk/assertion/freshness_assertion.py +201 -0
- acryl_datahub_cloud/sdk/assertion/schema_assertion.py +268 -0
- acryl_datahub_cloud/sdk/assertion/smart_column_metric_assertion.py +212 -0
- acryl_datahub_cloud/sdk/assertion/smart_freshness_assertion.py +165 -0
- acryl_datahub_cloud/sdk/assertion/smart_sql_assertion.py +156 -0
- acryl_datahub_cloud/sdk/assertion/smart_volume_assertion.py +162 -0
- acryl_datahub_cloud/sdk/assertion/sql_assertion.py +273 -0
- acryl_datahub_cloud/sdk/assertion/types.py +20 -0
- acryl_datahub_cloud/sdk/assertion/volume_assertion.py +156 -0
- acryl_datahub_cloud/sdk/assertion_client/__init__.py +0 -0
- acryl_datahub_cloud/sdk/assertion_client/column_metric.py +545 -0
- acryl_datahub_cloud/sdk/assertion_client/column_value.py +617 -0
- acryl_datahub_cloud/sdk/assertion_client/freshness.py +371 -0
- acryl_datahub_cloud/sdk/assertion_client/helpers.py +166 -0
- acryl_datahub_cloud/sdk/assertion_client/schema.py +358 -0
- acryl_datahub_cloud/sdk/assertion_client/smart_column_metric.py +540 -0
- acryl_datahub_cloud/sdk/assertion_client/smart_freshness.py +373 -0
- acryl_datahub_cloud/sdk/assertion_client/smart_sql.py +411 -0
- acryl_datahub_cloud/sdk/assertion_client/smart_volume.py +380 -0
- acryl_datahub_cloud/sdk/assertion_client/sql.py +410 -0
- acryl_datahub_cloud/sdk/assertion_client/volume.py +446 -0
- acryl_datahub_cloud/sdk/assertion_input/__init__.py +0 -0
- acryl_datahub_cloud/sdk/assertion_input/assertion_input.py +1470 -0
- acryl_datahub_cloud/sdk/assertion_input/column_assertion_constants.py +114 -0
- acryl_datahub_cloud/sdk/assertion_input/column_assertion_utils.py +284 -0
- acryl_datahub_cloud/sdk/assertion_input/column_metric_assertion_input.py +759 -0
- acryl_datahub_cloud/sdk/assertion_input/column_metric_constants.py +109 -0
- acryl_datahub_cloud/sdk/assertion_input/column_value_assertion_input.py +810 -0
- acryl_datahub_cloud/sdk/assertion_input/freshness_assertion_input.py +305 -0
- acryl_datahub_cloud/sdk/assertion_input/schema_assertion_input.py +413 -0
- acryl_datahub_cloud/sdk/assertion_input/smart_column_metric_assertion_input.py +793 -0
- acryl_datahub_cloud/sdk/assertion_input/smart_freshness_assertion_input.py +218 -0
- acryl_datahub_cloud/sdk/assertion_input/smart_sql_assertion_input.py +181 -0
- acryl_datahub_cloud/sdk/assertion_input/smart_volume_assertion_input.py +189 -0
- acryl_datahub_cloud/sdk/assertion_input/sql_assertion_input.py +320 -0
- acryl_datahub_cloud/sdk/assertion_input/volume_assertion_input.py +635 -0
- acryl_datahub_cloud/sdk/assertions_client.py +1074 -0
- acryl_datahub_cloud/sdk/entities/__init__.py +0 -0
- acryl_datahub_cloud/sdk/entities/assertion.py +439 -0
- acryl_datahub_cloud/sdk/entities/monitor.py +291 -0
- acryl_datahub_cloud/sdk/entities/subscription.py +100 -0
- acryl_datahub_cloud/sdk/errors.py +34 -0
- acryl_datahub_cloud/sdk/resolver_client.py +42 -0
- acryl_datahub_cloud/sdk/subscription_client.py +737 -0
- {acryl_datahub_cloud-0.3.11rc0.dist-info → acryl_datahub_cloud-0.3.16.1rc0.dist-info}/METADATA +55 -49
- {acryl_datahub_cloud-0.3.11rc0.dist-info → acryl_datahub_cloud-0.3.16.1rc0.dist-info}/RECORD +235 -142
- {acryl_datahub_cloud-0.3.11rc0.dist-info → acryl_datahub_cloud-0.3.16.1rc0.dist-info}/WHEEL +1 -1
- {acryl_datahub_cloud-0.3.11rc0.dist-info → acryl_datahub_cloud-0.3.16.1rc0.dist-info}/entry_points.txt +1 -0
- acryl_datahub_cloud/_sdk_extras/__init__.py +0 -4
- acryl_datahub_cloud/_sdk_extras/assertion.py +0 -15
- acryl_datahub_cloud/_sdk_extras/assertions_client.py +0 -23
- {acryl_datahub_cloud-0.3.11rc0.dist-info → acryl_datahub_cloud-0.3.16.1rc0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from datetime import datetime
|
|
3
|
+
from typing import Optional, Union
|
|
4
|
+
|
|
5
|
+
from typing_extensions import Self
|
|
6
|
+
|
|
7
|
+
from acryl_datahub_cloud.sdk.assertion.assertion_base import (
|
|
8
|
+
AssertionMode,
|
|
9
|
+
_AssertionPublic,
|
|
10
|
+
_HasSchedule,
|
|
11
|
+
)
|
|
12
|
+
from acryl_datahub_cloud.sdk.assertion_input.assertion_input import (
|
|
13
|
+
DEFAULT_DETECTION_MECHANISM,
|
|
14
|
+
AssertionIncidentBehavior,
|
|
15
|
+
DetectionMechanism,
|
|
16
|
+
TimeWindowSizeInputTypes,
|
|
17
|
+
_DetectionMechanismTypes,
|
|
18
|
+
)
|
|
19
|
+
from acryl_datahub_cloud.sdk.entities.assertion import Assertion
|
|
20
|
+
from acryl_datahub_cloud.sdk.entities.monitor import Monitor
|
|
21
|
+
from acryl_datahub_cloud.sdk.errors import SDKNotYetSupportedError
|
|
22
|
+
from datahub.metadata import schema_classes as models
|
|
23
|
+
from datahub.metadata.urns import AssertionUrn, CorpUserUrn, DatasetUrn, TagUrn
|
|
24
|
+
|
|
25
|
+
logger = logging.getLogger(__name__)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class FreshnessAssertion(_HasSchedule, _AssertionPublic):
|
|
29
|
+
"""
|
|
30
|
+
A class that represents a freshness assertion.
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
def __init__(
|
|
34
|
+
self,
|
|
35
|
+
*,
|
|
36
|
+
urn: AssertionUrn,
|
|
37
|
+
dataset_urn: DatasetUrn,
|
|
38
|
+
display_name: str,
|
|
39
|
+
mode: AssertionMode,
|
|
40
|
+
schedule: models.CronScheduleClass,
|
|
41
|
+
freshness_schedule_check_type: Union[
|
|
42
|
+
str, models.FreshnessAssertionScheduleTypeClass
|
|
43
|
+
],
|
|
44
|
+
lookback_window: Optional[TimeWindowSizeInputTypes],
|
|
45
|
+
tags: list[TagUrn],
|
|
46
|
+
incident_behavior: list[AssertionIncidentBehavior],
|
|
47
|
+
detection_mechanism: Optional[
|
|
48
|
+
_DetectionMechanismTypes
|
|
49
|
+
] = DEFAULT_DETECTION_MECHANISM,
|
|
50
|
+
created_by: Optional[CorpUserUrn] = None,
|
|
51
|
+
created_at: Union[datetime, None] = None,
|
|
52
|
+
updated_by: Optional[CorpUserUrn] = None,
|
|
53
|
+
updated_at: Optional[datetime] = None,
|
|
54
|
+
):
|
|
55
|
+
"""
|
|
56
|
+
Initialize a freshness assertion.
|
|
57
|
+
|
|
58
|
+
Note: Values can be accessed, but not set on the assertion object.
|
|
59
|
+
To update an assertion, use the `upsert_*` method.
|
|
60
|
+
Args:
|
|
61
|
+
urn: The urn of the assertion.
|
|
62
|
+
dataset_urn: The urn of the dataset that the assertion is for.
|
|
63
|
+
display_name: The display name of the assertion.
|
|
64
|
+
mode: The mode of the assertion (active, inactive).
|
|
65
|
+
schedule: The schedule of the assertion.
|
|
66
|
+
freshness_schedule_check_type: The type of freshness schedule check to be used for the assertion.
|
|
67
|
+
lookback_window: The lookback window to be used for the assertion.
|
|
68
|
+
tags: The tags applied to the assertion.
|
|
69
|
+
incident_behavior: Whether to raise or resolve an incident when the assertion fails / passes.
|
|
70
|
+
detection_mechanism: The detection mechanism of the assertion.
|
|
71
|
+
created_by: The urn of the user that created the assertion.
|
|
72
|
+
created_at: The timestamp of when the assertion was created.
|
|
73
|
+
updated_by: The urn of the user that updated the assertion.
|
|
74
|
+
updated_at: The timestamp of when the assertion was updated.
|
|
75
|
+
"""
|
|
76
|
+
_HasSchedule.__init__(self, schedule=schedule)
|
|
77
|
+
_AssertionPublic.__init__(
|
|
78
|
+
self,
|
|
79
|
+
urn=urn,
|
|
80
|
+
dataset_urn=dataset_urn,
|
|
81
|
+
display_name=display_name,
|
|
82
|
+
mode=mode,
|
|
83
|
+
incident_behavior=incident_behavior,
|
|
84
|
+
detection_mechanism=detection_mechanism,
|
|
85
|
+
created_by=created_by,
|
|
86
|
+
created_at=created_at,
|
|
87
|
+
updated_by=updated_by,
|
|
88
|
+
updated_at=updated_at,
|
|
89
|
+
tags=tags,
|
|
90
|
+
)
|
|
91
|
+
self._freshness_schedule_check_type = freshness_schedule_check_type
|
|
92
|
+
self._lookback_window = lookback_window
|
|
93
|
+
|
|
94
|
+
@property
|
|
95
|
+
def freshness_schedule_check_type(
|
|
96
|
+
self,
|
|
97
|
+
) -> Union[str, models.FreshnessAssertionScheduleTypeClass]:
|
|
98
|
+
return self._freshness_schedule_check_type
|
|
99
|
+
|
|
100
|
+
@property
|
|
101
|
+
def lookback_window(self) -> Optional[TimeWindowSizeInputTypes]:
|
|
102
|
+
return self._lookback_window
|
|
103
|
+
|
|
104
|
+
@staticmethod
|
|
105
|
+
def _get_freshness_schedule_check_type(
|
|
106
|
+
assertion: Assertion,
|
|
107
|
+
) -> Union[str, models.FreshnessAssertionScheduleTypeClass]:
|
|
108
|
+
if assertion.info is None:
|
|
109
|
+
raise SDKNotYetSupportedError(
|
|
110
|
+
f"Assertion {assertion.urn} does not have a freshness assertion info, which is not supported"
|
|
111
|
+
)
|
|
112
|
+
if isinstance(assertion.info, models.FreshnessAssertionInfoClass):
|
|
113
|
+
if assertion.info.schedule is None:
|
|
114
|
+
raise SDKNotYetSupportedError(
|
|
115
|
+
f"Traditional freshness assertion {assertion.urn} does not have a schedule, which is not supported"
|
|
116
|
+
)
|
|
117
|
+
return assertion.info.schedule.type
|
|
118
|
+
else:
|
|
119
|
+
raise SDKNotYetSupportedError(
|
|
120
|
+
f"Assertion {assertion.urn} is not a freshness assertion"
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
@staticmethod
|
|
124
|
+
def _get_lookback_window(
|
|
125
|
+
assertion: Assertion,
|
|
126
|
+
) -> Optional[models.FixedIntervalScheduleClass]:
|
|
127
|
+
if assertion.info is None:
|
|
128
|
+
raise SDKNotYetSupportedError(
|
|
129
|
+
f"Assertion {assertion.urn} does not have a freshness assertion info, which is not supported"
|
|
130
|
+
)
|
|
131
|
+
if isinstance(assertion.info, models.FreshnessAssertionInfoClass):
|
|
132
|
+
if assertion.info.schedule is None:
|
|
133
|
+
raise SDKNotYetSupportedError(
|
|
134
|
+
f"Traditional freshness assertion {assertion.urn} does not have a schedule, which is not supported"
|
|
135
|
+
)
|
|
136
|
+
return assertion.info.schedule.fixedInterval
|
|
137
|
+
else:
|
|
138
|
+
raise SDKNotYetSupportedError(
|
|
139
|
+
f"Assertion {assertion.urn} is not a freshness assertion"
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
@staticmethod
|
|
143
|
+
def _get_detection_mechanism(
|
|
144
|
+
assertion: Assertion,
|
|
145
|
+
monitor: Monitor,
|
|
146
|
+
default: Optional[_DetectionMechanismTypes] = DEFAULT_DETECTION_MECHANISM,
|
|
147
|
+
) -> Optional[_DetectionMechanismTypes]:
|
|
148
|
+
"""Get the detection mechanism for freshness assertions."""
|
|
149
|
+
parameters = _AssertionPublic._get_validated_detection_context(
|
|
150
|
+
monitor,
|
|
151
|
+
assertion,
|
|
152
|
+
models.AssertionEvaluationParametersTypeClass.DATASET_FRESHNESS,
|
|
153
|
+
models.FreshnessAssertionInfoClass,
|
|
154
|
+
default,
|
|
155
|
+
)
|
|
156
|
+
if parameters is None:
|
|
157
|
+
return default
|
|
158
|
+
if parameters.datasetFreshnessParameters is None:
|
|
159
|
+
logger.warning(
|
|
160
|
+
f"Monitor does not have datasetFreshnessParameters, defaulting detection mechanism to {DEFAULT_DETECTION_MECHANISM}"
|
|
161
|
+
)
|
|
162
|
+
return default
|
|
163
|
+
source_type = parameters.datasetFreshnessParameters.sourceType
|
|
164
|
+
if source_type == models.DatasetFreshnessSourceTypeClass.INFORMATION_SCHEMA:
|
|
165
|
+
return DetectionMechanism.INFORMATION_SCHEMA
|
|
166
|
+
elif source_type == models.DatasetFreshnessSourceTypeClass.AUDIT_LOG:
|
|
167
|
+
return DetectionMechanism.AUDIT_LOG
|
|
168
|
+
elif source_type == models.DatasetFreshnessSourceTypeClass.FIELD_VALUE:
|
|
169
|
+
return _AssertionPublic._get_field_value_detection_mechanism(
|
|
170
|
+
assertion, parameters
|
|
171
|
+
)
|
|
172
|
+
elif source_type == models.DatasetFreshnessSourceTypeClass.DATAHUB_OPERATION:
|
|
173
|
+
return DetectionMechanism.DATAHUB_OPERATION
|
|
174
|
+
elif source_type == models.DatasetFreshnessSourceTypeClass.FILE_METADATA:
|
|
175
|
+
raise SDKNotYetSupportedError("FILE_METADATA DatasetFreshnessSourceType")
|
|
176
|
+
else:
|
|
177
|
+
raise SDKNotYetSupportedError(f"DatasetFreshnessSourceType {source_type}")
|
|
178
|
+
|
|
179
|
+
@classmethod
|
|
180
|
+
def _from_entities(cls, assertion: Assertion, monitor: Monitor) -> Self:
|
|
181
|
+
"""
|
|
182
|
+
Create a freshness assertion from the assertion and monitor entities.
|
|
183
|
+
"""
|
|
184
|
+
return cls(
|
|
185
|
+
urn=assertion.urn,
|
|
186
|
+
dataset_urn=assertion.dataset,
|
|
187
|
+
display_name=assertion.description or "",
|
|
188
|
+
mode=cls._get_mode(monitor),
|
|
189
|
+
schedule=cls._get_schedule(monitor),
|
|
190
|
+
freshness_schedule_check_type=cls._get_freshness_schedule_check_type(
|
|
191
|
+
assertion
|
|
192
|
+
),
|
|
193
|
+
lookback_window=cls._get_lookback_window(assertion),
|
|
194
|
+
incident_behavior=cls._get_incident_behavior(assertion),
|
|
195
|
+
detection_mechanism=cls._get_detection_mechanism(assertion, monitor),
|
|
196
|
+
created_by=cls._get_created_by(assertion),
|
|
197
|
+
created_at=cls._get_created_at(assertion),
|
|
198
|
+
updated_by=cls._get_updated_by(assertion),
|
|
199
|
+
updated_at=cls._get_updated_at(assertion),
|
|
200
|
+
tags=cls._get_tags(assertion),
|
|
201
|
+
)
|
|
@@ -0,0 +1,268 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Schema assertion module.
|
|
3
|
+
|
|
4
|
+
This module provides the SchemaAssertion class for representing and working with
|
|
5
|
+
schema assertions that validate dataset schemas match expected field definitions.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import logging
|
|
9
|
+
from datetime import datetime
|
|
10
|
+
from typing import Optional, Union
|
|
11
|
+
|
|
12
|
+
from typing_extensions import Self
|
|
13
|
+
|
|
14
|
+
from acryl_datahub_cloud.sdk.assertion.assertion_base import (
|
|
15
|
+
AssertionMode,
|
|
16
|
+
_AssertionPublic,
|
|
17
|
+
_HasSchedule,
|
|
18
|
+
)
|
|
19
|
+
from acryl_datahub_cloud.sdk.assertion_input.assertion_input import (
|
|
20
|
+
AssertionIncidentBehavior,
|
|
21
|
+
_DetectionMechanismTypes,
|
|
22
|
+
)
|
|
23
|
+
from acryl_datahub_cloud.sdk.assertion_input.schema_assertion_input import (
|
|
24
|
+
DEFAULT_SCHEMA_ASSERTION_COMPATIBILITY,
|
|
25
|
+
SCHEMA_FIELD_CLASS_TO_TYPE_MAP,
|
|
26
|
+
SchemaAssertionCompatibility,
|
|
27
|
+
SchemaAssertionField,
|
|
28
|
+
)
|
|
29
|
+
from acryl_datahub_cloud.sdk.entities.assertion import Assertion
|
|
30
|
+
from acryl_datahub_cloud.sdk.entities.monitor import Monitor
|
|
31
|
+
from acryl_datahub_cloud.sdk.errors import SDKNotYetSupportedError
|
|
32
|
+
from datahub.metadata import schema_classes as models
|
|
33
|
+
from datahub.metadata.urns import AssertionUrn, CorpUserUrn, DatasetUrn, TagUrn
|
|
34
|
+
|
|
35
|
+
logger = logging.getLogger(__name__)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class SchemaAssertion(_HasSchedule, _AssertionPublic):
|
|
39
|
+
"""
|
|
40
|
+
A class that represents a schema assertion.
|
|
41
|
+
|
|
42
|
+
Schema assertions validate that a dataset's schema matches expected
|
|
43
|
+
field definitions with configurable compatibility modes.
|
|
44
|
+
"""
|
|
45
|
+
|
|
46
|
+
def __init__(
|
|
47
|
+
self,
|
|
48
|
+
*,
|
|
49
|
+
urn: AssertionUrn,
|
|
50
|
+
dataset_urn: DatasetUrn,
|
|
51
|
+
display_name: str,
|
|
52
|
+
mode: AssertionMode,
|
|
53
|
+
schedule: models.CronScheduleClass,
|
|
54
|
+
compatibility: SchemaAssertionCompatibility,
|
|
55
|
+
fields: list[SchemaAssertionField],
|
|
56
|
+
tags: list[TagUrn],
|
|
57
|
+
incident_behavior: list[AssertionIncidentBehavior],
|
|
58
|
+
created_by: Optional[CorpUserUrn] = None,
|
|
59
|
+
created_at: Union[datetime, None] = None,
|
|
60
|
+
updated_by: Optional[CorpUserUrn] = None,
|
|
61
|
+
updated_at: Optional[datetime] = None,
|
|
62
|
+
):
|
|
63
|
+
"""
|
|
64
|
+
Initialize a schema assertion.
|
|
65
|
+
|
|
66
|
+
Note: Values can be accessed, but not set on the assertion object.
|
|
67
|
+
To update an assertion, use the `sync_schema_assertion` method.
|
|
68
|
+
|
|
69
|
+
Args:
|
|
70
|
+
urn: The URN of the assertion.
|
|
71
|
+
dataset_urn: The URN of the dataset that the assertion validates.
|
|
72
|
+
display_name: The display name of the assertion.
|
|
73
|
+
mode: The mode of the assertion (active, inactive).
|
|
74
|
+
schedule: The evaluation schedule of the assertion.
|
|
75
|
+
compatibility: The compatibility mode (EXACT_MATCH, SUPERSET, SUBSET).
|
|
76
|
+
fields: The expected schema fields to validate.
|
|
77
|
+
tags: The tags applied to the assertion.
|
|
78
|
+
incident_behavior: Actions to take on assertion pass/fail.
|
|
79
|
+
created_by: The user who created the assertion.
|
|
80
|
+
created_at: The timestamp when the assertion was created.
|
|
81
|
+
updated_by: The user who last updated the assertion.
|
|
82
|
+
updated_at: The timestamp when the assertion was last updated.
|
|
83
|
+
"""
|
|
84
|
+
_HasSchedule.__init__(self, schedule=schedule)
|
|
85
|
+
_AssertionPublic.__init__(
|
|
86
|
+
self,
|
|
87
|
+
urn=urn,
|
|
88
|
+
dataset_urn=dataset_urn,
|
|
89
|
+
display_name=display_name,
|
|
90
|
+
mode=mode,
|
|
91
|
+
incident_behavior=incident_behavior,
|
|
92
|
+
detection_mechanism=None, # Schema assertions don't use detection mechanism
|
|
93
|
+
created_by=created_by,
|
|
94
|
+
created_at=created_at,
|
|
95
|
+
updated_by=updated_by,
|
|
96
|
+
updated_at=updated_at,
|
|
97
|
+
tags=tags,
|
|
98
|
+
)
|
|
99
|
+
self._compatibility = compatibility
|
|
100
|
+
self._fields = fields
|
|
101
|
+
|
|
102
|
+
@property
|
|
103
|
+
def compatibility(self) -> SchemaAssertionCompatibility:
|
|
104
|
+
"""The compatibility mode for the schema assertion."""
|
|
105
|
+
return self._compatibility
|
|
106
|
+
|
|
107
|
+
@property
|
|
108
|
+
def fields(self) -> list[SchemaAssertionField]:
|
|
109
|
+
"""The expected schema fields to validate."""
|
|
110
|
+
return self._fields
|
|
111
|
+
|
|
112
|
+
@staticmethod
|
|
113
|
+
def _get_detection_mechanism(
|
|
114
|
+
assertion: Assertion,
|
|
115
|
+
monitor: Monitor,
|
|
116
|
+
default: Optional[_DetectionMechanismTypes] = None,
|
|
117
|
+
) -> Optional[_DetectionMechanismTypes]:
|
|
118
|
+
"""
|
|
119
|
+
Schema assertions don't have a detection mechanism.
|
|
120
|
+
They always use DATAHUB_SCHEMA as the source type.
|
|
121
|
+
"""
|
|
122
|
+
return None
|
|
123
|
+
|
|
124
|
+
@staticmethod
|
|
125
|
+
def _get_compatibility(assertion: Assertion) -> SchemaAssertionCompatibility:
|
|
126
|
+
"""Extract compatibility from the assertion info."""
|
|
127
|
+
if assertion.info is None:
|
|
128
|
+
logger.warning(
|
|
129
|
+
f"Assertion {assertion.urn} does not have info, "
|
|
130
|
+
f"defaulting to {DEFAULT_SCHEMA_ASSERTION_COMPATIBILITY}"
|
|
131
|
+
)
|
|
132
|
+
return DEFAULT_SCHEMA_ASSERTION_COMPATIBILITY
|
|
133
|
+
|
|
134
|
+
if not isinstance(assertion.info, models.SchemaAssertionInfoClass):
|
|
135
|
+
raise SDKNotYetSupportedError(
|
|
136
|
+
f"Assertion {assertion.urn} is not a schema assertion"
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
compatibility_value = assertion.info.compatibility
|
|
140
|
+
if compatibility_value is None:
|
|
141
|
+
return DEFAULT_SCHEMA_ASSERTION_COMPATIBILITY
|
|
142
|
+
|
|
143
|
+
# Handle enum value extraction
|
|
144
|
+
try:
|
|
145
|
+
if hasattr(compatibility_value, "name"):
|
|
146
|
+
# It's an enum-like object
|
|
147
|
+
return SchemaAssertionCompatibility(compatibility_value.name)
|
|
148
|
+
elif isinstance(compatibility_value, str):
|
|
149
|
+
return SchemaAssertionCompatibility(compatibility_value.upper())
|
|
150
|
+
else:
|
|
151
|
+
logger.warning(
|
|
152
|
+
f"Unknown compatibility value type {type(compatibility_value)}, "
|
|
153
|
+
f"defaulting to {DEFAULT_SCHEMA_ASSERTION_COMPATIBILITY}"
|
|
154
|
+
)
|
|
155
|
+
return DEFAULT_SCHEMA_ASSERTION_COMPATIBILITY
|
|
156
|
+
except ValueError:
|
|
157
|
+
logger.warning(
|
|
158
|
+
f"Unknown compatibility value '{compatibility_value}', "
|
|
159
|
+
f"defaulting to {DEFAULT_SCHEMA_ASSERTION_COMPATIBILITY}"
|
|
160
|
+
)
|
|
161
|
+
return DEFAULT_SCHEMA_ASSERTION_COMPATIBILITY
|
|
162
|
+
|
|
163
|
+
@staticmethod
|
|
164
|
+
def _get_fields(assertion: Assertion) -> list[SchemaAssertionField]:
|
|
165
|
+
"""Extract schema fields from the assertion info."""
|
|
166
|
+
if assertion.info is None:
|
|
167
|
+
logger.warning(
|
|
168
|
+
f"Assertion {assertion.urn} does not have info, returning empty fields"
|
|
169
|
+
)
|
|
170
|
+
return []
|
|
171
|
+
|
|
172
|
+
if not isinstance(assertion.info, models.SchemaAssertionInfoClass):
|
|
173
|
+
raise SDKNotYetSupportedError(
|
|
174
|
+
f"Assertion {assertion.urn} is not a schema assertion"
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
schema = assertion.info.schema
|
|
178
|
+
if schema is None:
|
|
179
|
+
logger.warning(
|
|
180
|
+
f"Assertion {assertion.urn} does not have schema metadata, "
|
|
181
|
+
"returning empty fields"
|
|
182
|
+
)
|
|
183
|
+
return []
|
|
184
|
+
|
|
185
|
+
fields = []
|
|
186
|
+
failed_fields = []
|
|
187
|
+
for schema_field in schema.fields or []:
|
|
188
|
+
field = SchemaAssertion._parse_schema_field(schema_field)
|
|
189
|
+
if field is not None:
|
|
190
|
+
fields.append(field)
|
|
191
|
+
else:
|
|
192
|
+
failed_fields.append(schema_field.fieldPath)
|
|
193
|
+
|
|
194
|
+
if failed_fields:
|
|
195
|
+
logger.warning(
|
|
196
|
+
f"Failed to parse {len(failed_fields)} field(s) from assertion {assertion.urn}: "
|
|
197
|
+
f"{', '.join(failed_fields)}. These fields will be excluded from the assertion object. "
|
|
198
|
+
f"This may indicate unsupported field types or corrupted data."
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
return fields
|
|
202
|
+
|
|
203
|
+
@staticmethod
|
|
204
|
+
def _parse_schema_field(
|
|
205
|
+
schema_field: models.SchemaFieldClass,
|
|
206
|
+
) -> Optional[SchemaAssertionField]:
|
|
207
|
+
"""Parse a SchemaFieldClass into a SchemaAssertionField."""
|
|
208
|
+
# Explicit attribute checks to fail fast on missing required attributes
|
|
209
|
+
if not hasattr(schema_field, "fieldPath") or not hasattr(schema_field, "type"):
|
|
210
|
+
logger.warning(
|
|
211
|
+
"Schema field missing required attributes: fieldPath or type, skipping"
|
|
212
|
+
)
|
|
213
|
+
return None
|
|
214
|
+
|
|
215
|
+
# Extract the type from the SchemaFieldDataTypeClass
|
|
216
|
+
field_data_type = schema_field.type
|
|
217
|
+
if field_data_type is None or field_data_type.type is None:
|
|
218
|
+
logger.warning(
|
|
219
|
+
f"Schema field {schema_field.fieldPath} has no type, skipping"
|
|
220
|
+
)
|
|
221
|
+
return None
|
|
222
|
+
|
|
223
|
+
# Get the actual type class instance
|
|
224
|
+
type_instance = field_data_type.type
|
|
225
|
+
type_class = type(type_instance)
|
|
226
|
+
|
|
227
|
+
# Look up the SchemaFieldDataType enum value
|
|
228
|
+
field_type = SCHEMA_FIELD_CLASS_TO_TYPE_MAP.get(type_class)
|
|
229
|
+
if field_type is None:
|
|
230
|
+
logger.warning(
|
|
231
|
+
f"Unknown schema field type {type_class.__name__} "
|
|
232
|
+
f"for field {schema_field.fieldPath}, skipping"
|
|
233
|
+
)
|
|
234
|
+
return None
|
|
235
|
+
|
|
236
|
+
return SchemaAssertionField(
|
|
237
|
+
path=schema_field.fieldPath,
|
|
238
|
+
type=field_type,
|
|
239
|
+
native_type=getattr(schema_field, "nativeDataType", None),
|
|
240
|
+
)
|
|
241
|
+
|
|
242
|
+
@classmethod
|
|
243
|
+
def _from_entities(cls, assertion: Assertion, monitor: Monitor) -> Self:
|
|
244
|
+
"""
|
|
245
|
+
Create a SchemaAssertion from the assertion and monitor entities.
|
|
246
|
+
|
|
247
|
+
Args:
|
|
248
|
+
assertion: The assertion entity from the backend.
|
|
249
|
+
monitor: The monitor entity from the backend.
|
|
250
|
+
|
|
251
|
+
Returns:
|
|
252
|
+
A SchemaAssertion instance populated from the entities.
|
|
253
|
+
"""
|
|
254
|
+
return cls(
|
|
255
|
+
urn=assertion.urn,
|
|
256
|
+
dataset_urn=assertion.dataset,
|
|
257
|
+
display_name=assertion.description or "",
|
|
258
|
+
mode=cls._get_mode(monitor),
|
|
259
|
+
schedule=cls._get_schedule(monitor),
|
|
260
|
+
compatibility=cls._get_compatibility(assertion),
|
|
261
|
+
fields=cls._get_fields(assertion),
|
|
262
|
+
incident_behavior=cls._get_incident_behavior(assertion),
|
|
263
|
+
created_by=cls._get_created_by(assertion),
|
|
264
|
+
created_at=cls._get_created_at(assertion),
|
|
265
|
+
updated_by=cls._get_updated_by(assertion),
|
|
266
|
+
updated_at=cls._get_updated_at(assertion),
|
|
267
|
+
tags=cls._get_tags(assertion),
|
|
268
|
+
)
|
|
@@ -0,0 +1,212 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from datetime import datetime
|
|
3
|
+
from typing import Optional, Union
|
|
4
|
+
|
|
5
|
+
from typing_extensions import Self
|
|
6
|
+
|
|
7
|
+
from acryl_datahub_cloud.sdk.assertion.assertion_base import (
|
|
8
|
+
AssertionMode,
|
|
9
|
+
_AssertionPublic,
|
|
10
|
+
_HasColumnMetricFunctionality,
|
|
11
|
+
_HasSchedule,
|
|
12
|
+
_HasSmartFunctionality,
|
|
13
|
+
)
|
|
14
|
+
from acryl_datahub_cloud.sdk.assertion_input.assertion_input import (
|
|
15
|
+
ASSERTION_MONITOR_DEFAULT_TRAINING_LOOKBACK_WINDOW_DAYS,
|
|
16
|
+
DEFAULT_DETECTION_MECHANISM,
|
|
17
|
+
DEFAULT_SCHEDULE,
|
|
18
|
+
DEFAULT_SENSITIVITY,
|
|
19
|
+
AssertionIncidentBehavior,
|
|
20
|
+
DetectionMechanism,
|
|
21
|
+
ExclusionWindowTypes,
|
|
22
|
+
InferenceSensitivity,
|
|
23
|
+
_DetectionMechanismTypes,
|
|
24
|
+
)
|
|
25
|
+
from acryl_datahub_cloud.sdk.assertion_input.column_metric_constants import (
|
|
26
|
+
OperatorType,
|
|
27
|
+
)
|
|
28
|
+
from acryl_datahub_cloud.sdk.assertion_input.smart_column_metric_assertion_input import (
|
|
29
|
+
MetricInputType,
|
|
30
|
+
)
|
|
31
|
+
from acryl_datahub_cloud.sdk.entities.assertion import Assertion
|
|
32
|
+
from acryl_datahub_cloud.sdk.entities.monitor import Monitor
|
|
33
|
+
from datahub.metadata import schema_classes as models
|
|
34
|
+
from datahub.metadata.urns import AssertionUrn, CorpUserUrn, DatasetUrn, TagUrn
|
|
35
|
+
|
|
36
|
+
logger = logging.getLogger(__name__)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class SmartColumnMetricAssertion(
|
|
40
|
+
_HasColumnMetricFunctionality,
|
|
41
|
+
_HasSmartFunctionality,
|
|
42
|
+
_HasSchedule,
|
|
43
|
+
_AssertionPublic,
|
|
44
|
+
):
|
|
45
|
+
"""
|
|
46
|
+
A class that represents a smart column metric assertion.
|
|
47
|
+
This assertion is used to validate the value of a common field / column metric (e.g. aggregation) such as null count + percentage,
|
|
48
|
+
min, max, median, and more. It uses AI to infer the assertion parameters. The operator is automatically set to BETWEEN with
|
|
49
|
+
criteria_parameters of (0, 0) since the actual values will be inferred by AI.
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
def __init__(
|
|
53
|
+
self,
|
|
54
|
+
*,
|
|
55
|
+
urn: AssertionUrn,
|
|
56
|
+
dataset_urn: DatasetUrn,
|
|
57
|
+
column_name: str,
|
|
58
|
+
metric_type: MetricInputType,
|
|
59
|
+
# TODO: Evaluate these params:
|
|
60
|
+
display_name: str,
|
|
61
|
+
mode: AssertionMode,
|
|
62
|
+
schedule: models.CronScheduleClass = DEFAULT_SCHEDULE,
|
|
63
|
+
sensitivity: InferenceSensitivity = DEFAULT_SENSITIVITY,
|
|
64
|
+
exclusion_windows: list[ExclusionWindowTypes],
|
|
65
|
+
training_data_lookback_days: int = ASSERTION_MONITOR_DEFAULT_TRAINING_LOOKBACK_WINDOW_DAYS,
|
|
66
|
+
incident_behavior: list[AssertionIncidentBehavior],
|
|
67
|
+
detection_mechanism: Optional[
|
|
68
|
+
_DetectionMechanismTypes
|
|
69
|
+
] = DEFAULT_DETECTION_MECHANISM,
|
|
70
|
+
tags: list[TagUrn],
|
|
71
|
+
created_by: Optional[CorpUserUrn] = None,
|
|
72
|
+
created_at: Union[datetime, None] = None,
|
|
73
|
+
updated_by: Optional[CorpUserUrn] = None,
|
|
74
|
+
updated_at: Optional[datetime] = None,
|
|
75
|
+
):
|
|
76
|
+
"""
|
|
77
|
+
Initialize a smart column metric assertion.
|
|
78
|
+
|
|
79
|
+
Args:
|
|
80
|
+
urn: The URN of the assertion.
|
|
81
|
+
dataset_urn: The URN of the dataset to monitor.
|
|
82
|
+
display_name: The display name of the assertion.
|
|
83
|
+
mode: The mode of the assertion (active/inactive).
|
|
84
|
+
sensitivity: The sensitivity of the assertion (low/medium/high).
|
|
85
|
+
exclusion_windows: The exclusion windows to apply to the assertion.
|
|
86
|
+
training_data_lookback_days: The number of days of data to use for training.
|
|
87
|
+
incident_behavior: The behavior when incidents occur.
|
|
88
|
+
detection_mechanism: The mechanism used to detect changes.
|
|
89
|
+
tags: The tags to apply to the assertion.
|
|
90
|
+
created_by: The URN of the user who created the assertion.
|
|
91
|
+
created_at: The timestamp when the assertion was created.
|
|
92
|
+
updated_by: The URN of the user who last updated the assertion.
|
|
93
|
+
updated_at: The timestamp when the assertion was last updated.
|
|
94
|
+
"""
|
|
95
|
+
_AssertionPublic.__init__(
|
|
96
|
+
self,
|
|
97
|
+
urn=urn,
|
|
98
|
+
dataset_urn=dataset_urn,
|
|
99
|
+
display_name=display_name,
|
|
100
|
+
mode=mode,
|
|
101
|
+
tags=tags,
|
|
102
|
+
incident_behavior=incident_behavior,
|
|
103
|
+
detection_mechanism=detection_mechanism,
|
|
104
|
+
created_by=created_by,
|
|
105
|
+
created_at=created_at,
|
|
106
|
+
updated_by=updated_by,
|
|
107
|
+
updated_at=updated_at,
|
|
108
|
+
)
|
|
109
|
+
_HasSmartFunctionality.__init__(
|
|
110
|
+
self,
|
|
111
|
+
sensitivity=sensitivity,
|
|
112
|
+
exclusion_windows=exclusion_windows,
|
|
113
|
+
training_data_lookback_days=training_data_lookback_days,
|
|
114
|
+
)
|
|
115
|
+
_HasSchedule.__init__(
|
|
116
|
+
self,
|
|
117
|
+
schedule=schedule,
|
|
118
|
+
)
|
|
119
|
+
_HasColumnMetricFunctionality.__init__(
|
|
120
|
+
self,
|
|
121
|
+
column_name=column_name,
|
|
122
|
+
metric_type=metric_type,
|
|
123
|
+
operator=OperatorType.BETWEEN, # Fixed operator for smart assertions
|
|
124
|
+
criteria_parameters=(
|
|
125
|
+
0,
|
|
126
|
+
0,
|
|
127
|
+
), # Fixed criteria_parameters for smart assertions
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
@classmethod
|
|
131
|
+
def _from_entities(cls, assertion: Assertion, monitor: Monitor) -> Self:
|
|
132
|
+
"""
|
|
133
|
+
Create a SmartColumnMetricAssertion from an Assertion and Monitor entity.
|
|
134
|
+
|
|
135
|
+
Args:
|
|
136
|
+
assertion: The Assertion entity.
|
|
137
|
+
monitor: The Monitor entity.
|
|
138
|
+
|
|
139
|
+
Returns:
|
|
140
|
+
A SmartColumnMetricAssertion instance.
|
|
141
|
+
"""
|
|
142
|
+
return cls(
|
|
143
|
+
urn=assertion.urn,
|
|
144
|
+
dataset_urn=assertion.dataset,
|
|
145
|
+
column_name=cls._get_column_name(assertion),
|
|
146
|
+
metric_type=cls._get_metric_type(assertion),
|
|
147
|
+
display_name=assertion.description or "",
|
|
148
|
+
mode=cls._get_mode(monitor),
|
|
149
|
+
schedule=cls._get_schedule(monitor),
|
|
150
|
+
sensitivity=cls._get_sensitivity(monitor),
|
|
151
|
+
exclusion_windows=cls._get_exclusion_windows(monitor),
|
|
152
|
+
training_data_lookback_days=cls._get_training_data_lookback_days(monitor),
|
|
153
|
+
incident_behavior=cls._get_incident_behavior(assertion),
|
|
154
|
+
detection_mechanism=cls._get_detection_mechanism(assertion, monitor),
|
|
155
|
+
tags=cls._get_tags(assertion),
|
|
156
|
+
created_by=cls._get_created_by(assertion),
|
|
157
|
+
created_at=cls._get_created_at(assertion),
|
|
158
|
+
updated_by=cls._get_updated_by(assertion),
|
|
159
|
+
updated_at=cls._get_updated_at(assertion),
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
@staticmethod
|
|
163
|
+
def _get_detection_mechanism(
|
|
164
|
+
assertion: Assertion,
|
|
165
|
+
monitor: Monitor,
|
|
166
|
+
default: Optional[_DetectionMechanismTypes] = DEFAULT_DETECTION_MECHANISM,
|
|
167
|
+
) -> Optional[_DetectionMechanismTypes]:
|
|
168
|
+
"""Get the detection mechanism for column metric assertions."""
|
|
169
|
+
parameters = _AssertionPublic._get_validated_detection_context(
|
|
170
|
+
monitor,
|
|
171
|
+
assertion,
|
|
172
|
+
models.AssertionEvaluationParametersTypeClass.DATASET_FIELD,
|
|
173
|
+
models.FieldAssertionInfoClass,
|
|
174
|
+
default,
|
|
175
|
+
)
|
|
176
|
+
if parameters is None:
|
|
177
|
+
return default
|
|
178
|
+
if parameters.datasetFieldParameters is None:
|
|
179
|
+
logger.warning(
|
|
180
|
+
f"Monitor does not have datasetFieldParameters, defaulting detection mechanism to {default}"
|
|
181
|
+
)
|
|
182
|
+
return default
|
|
183
|
+
source_type = parameters.datasetFieldParameters.sourceType
|
|
184
|
+
if source_type == models.DatasetFieldAssertionSourceTypeClass.ALL_ROWS_QUERY:
|
|
185
|
+
additional_filter = _AssertionPublic._get_additional_filter(assertion)
|
|
186
|
+
return DetectionMechanism.ALL_ROWS_QUERY(
|
|
187
|
+
additional_filter=additional_filter
|
|
188
|
+
)
|
|
189
|
+
elif (
|
|
190
|
+
source_type
|
|
191
|
+
== models.DatasetFieldAssertionSourceTypeClass.CHANGED_ROWS_QUERY
|
|
192
|
+
):
|
|
193
|
+
if parameters.datasetFieldParameters.changedRowsField is None:
|
|
194
|
+
logger.warning(
|
|
195
|
+
f"Monitor has CHANGED_ROWS_QUERY source type but no changedRowsField, defaulting detection mechanism to {default}"
|
|
196
|
+
)
|
|
197
|
+
return default
|
|
198
|
+
column_name = parameters.datasetFieldParameters.changedRowsField.path
|
|
199
|
+
additional_filter = _AssertionPublic._get_additional_filter(assertion)
|
|
200
|
+
return DetectionMechanism.CHANGED_ROWS_QUERY(
|
|
201
|
+
column_name=column_name, additional_filter=additional_filter
|
|
202
|
+
)
|
|
203
|
+
elif (
|
|
204
|
+
source_type
|
|
205
|
+
== models.DatasetFieldAssertionSourceTypeClass.DATAHUB_DATASET_PROFILE
|
|
206
|
+
):
|
|
207
|
+
return DetectionMechanism.ALL_ROWS_QUERY_DATAHUB_DATASET_PROFILE
|
|
208
|
+
else:
|
|
209
|
+
logger.warning(
|
|
210
|
+
f"Unsupported DatasetFieldAssertionSourceType {source_type}, defaulting detection mechanism to {default}"
|
|
211
|
+
)
|
|
212
|
+
return default
|