acryl-datahub-cloud 0.3.11rc0__py3-none-any.whl → 0.3.16.1rc0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub-cloud might be problematic. Click here for more details.
- acryl_datahub_cloud/_codegen_config.json +1 -1
- acryl_datahub_cloud/acryl_cs_issues/models.py +5 -3
- acryl_datahub_cloud/action_request/action_request_owner_source.py +36 -6
- acryl_datahub_cloud/datahub_forms_notifications/__init__.py +0 -0
- acryl_datahub_cloud/datahub_forms_notifications/forms_notifications_source.py +569 -0
- acryl_datahub_cloud/datahub_forms_notifications/get_feature_flag.gql +7 -0
- acryl_datahub_cloud/datahub_forms_notifications/get_search_results_total.gql +14 -0
- acryl_datahub_cloud/datahub_forms_notifications/query.py +17 -0
- acryl_datahub_cloud/datahub_forms_notifications/scroll_forms_for_notification.gql +29 -0
- acryl_datahub_cloud/datahub_forms_notifications/send_form_notification_request.gql +5 -0
- acryl_datahub_cloud/datahub_reporting/datahub_dataset.py +37 -13
- acryl_datahub_cloud/datahub_reporting/datahub_form_reporting.py +55 -24
- acryl_datahub_cloud/datahub_reporting/extract_graph.py +4 -3
- acryl_datahub_cloud/datahub_reporting/extract_sql.py +242 -51
- acryl_datahub_cloud/datahub_reporting/forms.py +1 -1
- acryl_datahub_cloud/datahub_reporting/forms_config.py +3 -2
- acryl_datahub_cloud/datahub_restore/source.py +3 -2
- acryl_datahub_cloud/datahub_usage_reporting/excluded.py +94 -0
- acryl_datahub_cloud/datahub_usage_reporting/query_builder.py +48 -8
- acryl_datahub_cloud/datahub_usage_reporting/usage_feature_reporter.py +518 -77
- acryl_datahub_cloud/elasticsearch/graph_service.py +76 -14
- acryl_datahub_cloud/graphql_utils.py +64 -0
- acryl_datahub_cloud/lineage_features/source.py +555 -49
- acryl_datahub_cloud/metadata/_urns/urn_defs.py +2296 -1900
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/actionworkflow/__init__.py +53 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/anomaly/__init__.py +2 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/assertion/__init__.py +4 -2
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/common/__init__.py +6 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/conversation/__init__.py +29 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/event/notification/settings/__init__.py +2 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/execution/__init__.py +2 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/file/__init__.py +19 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/form/__init__.py +8 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/identity/__init__.py +8 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/knowledge/__init__.py +33 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +12 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/metadata/search/features/__init__.py +2 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/module/__init__.py +31 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/notification/__init__.py +19 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py +4 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/role/__init__.py +2 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/settings/asset/__init__.py +19 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +28 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/template/__init__.py +31 -0
- acryl_datahub_cloud/metadata/schema.avsc +25091 -20557
- acryl_datahub_cloud/metadata/schema_classes.py +29269 -23863
- acryl_datahub_cloud/metadata/schemas/ActionRequestInfo.avsc +235 -2
- acryl_datahub_cloud/metadata/schemas/ActionWorkflowInfo.avsc +683 -0
- acryl_datahub_cloud/metadata/schemas/ActionWorkflowKey.avsc +21 -0
- acryl_datahub_cloud/metadata/schemas/Actors.avsc +38 -1
- acryl_datahub_cloud/metadata/schemas/ApplicationKey.avsc +31 -0
- acryl_datahub_cloud/metadata/schemas/ApplicationProperties.avsc +75 -0
- acryl_datahub_cloud/metadata/schemas/Applications.avsc +38 -0
- acryl_datahub_cloud/metadata/schemas/AssertionAnalyticsRunEvent.avsc +353 -215
- acryl_datahub_cloud/metadata/schemas/AssertionInfo.avsc +147 -20
- acryl_datahub_cloud/metadata/schemas/AssertionKey.avsc +1 -1
- acryl_datahub_cloud/metadata/schemas/AssertionRunEvent.avsc +166 -21
- acryl_datahub_cloud/metadata/schemas/{AssertionSummary.avsc → AssertionRunSummary.avsc} +15 -2
- acryl_datahub_cloud/metadata/schemas/AssertionsSummary.avsc +54 -0
- acryl_datahub_cloud/metadata/schemas/AssetSettings.avsc +63 -0
- acryl_datahub_cloud/metadata/schemas/BusinessAttributeInfo.avsc +7 -3
- acryl_datahub_cloud/metadata/schemas/ChartInfo.avsc +20 -6
- acryl_datahub_cloud/metadata/schemas/ChartKey.avsc +1 -0
- acryl_datahub_cloud/metadata/schemas/ConstraintInfo.avsc +12 -1
- acryl_datahub_cloud/metadata/schemas/ContainerKey.avsc +1 -0
- acryl_datahub_cloud/metadata/schemas/ContainerProperties.avsc +16 -5
- acryl_datahub_cloud/metadata/schemas/CorpGroupEditableInfo.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/CorpGroupInfo.avsc +7 -3
- acryl_datahub_cloud/metadata/schemas/CorpGroupKey.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/CorpGroupSettings.avsc +127 -2
- acryl_datahub_cloud/metadata/schemas/CorpUserEditableInfo.avsc +1 -1
- acryl_datahub_cloud/metadata/schemas/CorpUserInfo.avsc +18 -2
- acryl_datahub_cloud/metadata/schemas/CorpUserInvitationStatus.avsc +106 -0
- acryl_datahub_cloud/metadata/schemas/CorpUserKey.avsc +4 -1
- acryl_datahub_cloud/metadata/schemas/CorpUserSettings.avsc +304 -2
- acryl_datahub_cloud/metadata/schemas/CorpUserUsageFeatures.avsc +86 -0
- acryl_datahub_cloud/metadata/schemas/DashboardInfo.avsc +11 -5
- acryl_datahub_cloud/metadata/schemas/DashboardKey.avsc +1 -0
- acryl_datahub_cloud/metadata/schemas/DataFlowInfo.avsc +15 -5
- acryl_datahub_cloud/metadata/schemas/DataFlowKey.avsc +1 -0
- acryl_datahub_cloud/metadata/schemas/DataHubAiConversationInfo.avsc +256 -0
- acryl_datahub_cloud/metadata/schemas/DataHubAiConversationKey.avsc +22 -0
- acryl_datahub_cloud/metadata/schemas/DataHubFileInfo.avsc +234 -0
- acryl_datahub_cloud/metadata/schemas/DataHubFileKey.avsc +22 -0
- acryl_datahub_cloud/metadata/schemas/DataHubIngestionSourceKey.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
- acryl_datahub_cloud/metadata/schemas/DataHubPageModuleProperties.avsc +308 -0
- acryl_datahub_cloud/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
- acryl_datahub_cloud/metadata/schemas/DataHubPageTemplateProperties.avsc +251 -0
- acryl_datahub_cloud/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
- acryl_datahub_cloud/metadata/schemas/DataJobInfo.avsc +13 -4
- acryl_datahub_cloud/metadata/schemas/DataJobInputOutput.avsc +8 -0
- acryl_datahub_cloud/metadata/schemas/DataJobKey.avsc +1 -0
- acryl_datahub_cloud/metadata/schemas/DataPlatformInfo.avsc +3 -1
- acryl_datahub_cloud/metadata/schemas/DataPlatformInstanceProperties.avsc +5 -2
- acryl_datahub_cloud/metadata/schemas/DataProcessKey.avsc +4 -0
- acryl_datahub_cloud/metadata/schemas/DataProductKey.avsc +2 -0
- acryl_datahub_cloud/metadata/schemas/DataProductProperties.avsc +6 -3
- acryl_datahub_cloud/metadata/schemas/DataTypeInfo.avsc +5 -0
- acryl_datahub_cloud/metadata/schemas/DatasetKey.avsc +10 -2
- acryl_datahub_cloud/metadata/schemas/DatasetProperties.avsc +12 -5
- acryl_datahub_cloud/metadata/schemas/DatasetUsageStatistics.avsc +8 -0
- acryl_datahub_cloud/metadata/schemas/DocumentInfo.avsc +407 -0
- acryl_datahub_cloud/metadata/schemas/DocumentKey.avsc +35 -0
- acryl_datahub_cloud/metadata/schemas/DocumentSettings.avsc +79 -0
- acryl_datahub_cloud/metadata/schemas/DomainKey.avsc +2 -0
- acryl_datahub_cloud/metadata/schemas/DomainProperties.avsc +7 -3
- acryl_datahub_cloud/metadata/schemas/EditableContainerProperties.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/EditableDashboardProperties.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/EditableDataFlowProperties.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/EditableDataJobProperties.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/EditableDatasetProperties.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/EditableERModelRelationshipProperties.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/EditableMLFeatureProperties.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/EditableMLFeatureTableProperties.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/EditableMLModelGroupProperties.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/EditableMLModelProperties.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/EditableNotebookProperties.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/EditableSchemaMetadata.avsc +4 -2
- acryl_datahub_cloud/metadata/schemas/EntityTypeInfo.avsc +5 -0
- acryl_datahub_cloud/metadata/schemas/ExecutionRequestArtifactsLocation.avsc +16 -0
- acryl_datahub_cloud/metadata/schemas/ExecutionRequestKey.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/FormAssignmentStatus.avsc +36 -0
- acryl_datahub_cloud/metadata/schemas/FormInfo.avsc +6 -0
- acryl_datahub_cloud/metadata/schemas/FormKey.avsc +3 -1
- acryl_datahub_cloud/metadata/schemas/FormNotifications.avsc +69 -0
- acryl_datahub_cloud/metadata/schemas/FormSettings.avsc +30 -0
- acryl_datahub_cloud/metadata/schemas/GlobalSettingsInfo.avsc +416 -0
- acryl_datahub_cloud/metadata/schemas/GlobalTags.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/GlossaryNodeInfo.avsc +3 -1
- acryl_datahub_cloud/metadata/schemas/GlossaryNodeKey.avsc +1 -0
- acryl_datahub_cloud/metadata/schemas/GlossaryTermInfo.avsc +3 -1
- acryl_datahub_cloud/metadata/schemas/GlossaryTermKey.avsc +2 -0
- acryl_datahub_cloud/metadata/schemas/IcebergWarehouseInfo.avsc +4 -0
- acryl_datahub_cloud/metadata/schemas/IncidentActivityEvent.avsc +3 -3
- acryl_datahub_cloud/metadata/schemas/IncidentInfo.avsc +3 -3
- acryl_datahub_cloud/metadata/schemas/InferredMetadata.avsc +71 -1
- acryl_datahub_cloud/metadata/schemas/InputFields.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/InviteToken.avsc +26 -0
- acryl_datahub_cloud/metadata/schemas/LineageFeatures.avsc +67 -42
- acryl_datahub_cloud/metadata/schemas/LogicalParent.avsc +145 -0
- acryl_datahub_cloud/metadata/schemas/MLFeatureKey.avsc +4 -1
- acryl_datahub_cloud/metadata/schemas/MLFeatureTableKey.avsc +4 -1
- acryl_datahub_cloud/metadata/schemas/MLModelDeploymentKey.avsc +7 -1
- acryl_datahub_cloud/metadata/schemas/MLModelGroupKey.avsc +9 -1
- acryl_datahub_cloud/metadata/schemas/MLModelKey.avsc +9 -1
- acryl_datahub_cloud/metadata/schemas/MLModelProperties.avsc +4 -2
- acryl_datahub_cloud/metadata/schemas/MLPrimaryKeyKey.avsc +4 -1
- acryl_datahub_cloud/metadata/schemas/MetadataChangeEvent.avsc +418 -97
- acryl_datahub_cloud/metadata/schemas/MetadataChangeLog.avsc +62 -44
- acryl_datahub_cloud/metadata/schemas/MetadataChangeProposal.avsc +61 -0
- acryl_datahub_cloud/metadata/schemas/MonitorAnomalyEvent.avsc +54 -9
- acryl_datahub_cloud/metadata/schemas/MonitorInfo.avsc +163 -23
- acryl_datahub_cloud/metadata/schemas/MonitorKey.avsc +9 -1
- acryl_datahub_cloud/metadata/schemas/MonitorSuiteInfo.avsc +128 -3
- acryl_datahub_cloud/metadata/schemas/NotebookInfo.avsc +5 -2
- acryl_datahub_cloud/metadata/schemas/NotebookKey.avsc +1 -0
- acryl_datahub_cloud/metadata/schemas/NotificationRequest.avsc +91 -4
- acryl_datahub_cloud/metadata/schemas/Operation.avsc +17 -0
- acryl_datahub_cloud/metadata/schemas/Ownership.avsc +71 -1
- acryl_datahub_cloud/metadata/schemas/QuerySubjects.avsc +2 -13
- acryl_datahub_cloud/metadata/schemas/RelationshipChangeEvent.avsc +215 -0
- acryl_datahub_cloud/metadata/schemas/RoleProperties.avsc +3 -1
- acryl_datahub_cloud/metadata/schemas/SchemaFieldInfo.avsc +3 -1
- acryl_datahub_cloud/metadata/schemas/SchemaFieldKey.avsc +3 -0
- acryl_datahub_cloud/metadata/schemas/SchemaMetadata.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/SemanticContent.avsc +123 -0
- acryl_datahub_cloud/metadata/schemas/StructuredProperties.avsc +69 -0
- acryl_datahub_cloud/metadata/schemas/StructuredPropertyDefinition.avsc +15 -4
- acryl_datahub_cloud/metadata/schemas/StructuredPropertySettings.avsc +9 -0
- acryl_datahub_cloud/metadata/schemas/SubscriptionInfo.avsc +136 -5
- acryl_datahub_cloud/metadata/schemas/SubscriptionKey.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/SystemMetadata.avsc +61 -0
- acryl_datahub_cloud/metadata/schemas/TagProperties.avsc +3 -1
- acryl_datahub_cloud/metadata/schemas/TestInfo.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/UpstreamLineage.avsc +9 -0
- acryl_datahub_cloud/metadata/schemas/UsageFeatures.avsc +10 -0
- acryl_datahub_cloud/notifications/__init__.py +0 -0
- acryl_datahub_cloud/notifications/notification_recipient_builder.py +399 -0
- acryl_datahub_cloud/sdk/__init__.py +69 -0
- acryl_datahub_cloud/sdk/assertion/__init__.py +58 -0
- acryl_datahub_cloud/sdk/assertion/assertion_base.py +779 -0
- acryl_datahub_cloud/sdk/assertion/column_metric_assertion.py +191 -0
- acryl_datahub_cloud/sdk/assertion/column_value_assertion.py +431 -0
- acryl_datahub_cloud/sdk/assertion/freshness_assertion.py +201 -0
- acryl_datahub_cloud/sdk/assertion/schema_assertion.py +268 -0
- acryl_datahub_cloud/sdk/assertion/smart_column_metric_assertion.py +212 -0
- acryl_datahub_cloud/sdk/assertion/smart_freshness_assertion.py +165 -0
- acryl_datahub_cloud/sdk/assertion/smart_sql_assertion.py +156 -0
- acryl_datahub_cloud/sdk/assertion/smart_volume_assertion.py +162 -0
- acryl_datahub_cloud/sdk/assertion/sql_assertion.py +273 -0
- acryl_datahub_cloud/sdk/assertion/types.py +20 -0
- acryl_datahub_cloud/sdk/assertion/volume_assertion.py +156 -0
- acryl_datahub_cloud/sdk/assertion_client/__init__.py +0 -0
- acryl_datahub_cloud/sdk/assertion_client/column_metric.py +545 -0
- acryl_datahub_cloud/sdk/assertion_client/column_value.py +617 -0
- acryl_datahub_cloud/sdk/assertion_client/freshness.py +371 -0
- acryl_datahub_cloud/sdk/assertion_client/helpers.py +166 -0
- acryl_datahub_cloud/sdk/assertion_client/schema.py +358 -0
- acryl_datahub_cloud/sdk/assertion_client/smart_column_metric.py +540 -0
- acryl_datahub_cloud/sdk/assertion_client/smart_freshness.py +373 -0
- acryl_datahub_cloud/sdk/assertion_client/smart_sql.py +411 -0
- acryl_datahub_cloud/sdk/assertion_client/smart_volume.py +380 -0
- acryl_datahub_cloud/sdk/assertion_client/sql.py +410 -0
- acryl_datahub_cloud/sdk/assertion_client/volume.py +446 -0
- acryl_datahub_cloud/sdk/assertion_input/__init__.py +0 -0
- acryl_datahub_cloud/sdk/assertion_input/assertion_input.py +1470 -0
- acryl_datahub_cloud/sdk/assertion_input/column_assertion_constants.py +114 -0
- acryl_datahub_cloud/sdk/assertion_input/column_assertion_utils.py +284 -0
- acryl_datahub_cloud/sdk/assertion_input/column_metric_assertion_input.py +759 -0
- acryl_datahub_cloud/sdk/assertion_input/column_metric_constants.py +109 -0
- acryl_datahub_cloud/sdk/assertion_input/column_value_assertion_input.py +810 -0
- acryl_datahub_cloud/sdk/assertion_input/freshness_assertion_input.py +305 -0
- acryl_datahub_cloud/sdk/assertion_input/schema_assertion_input.py +413 -0
- acryl_datahub_cloud/sdk/assertion_input/smart_column_metric_assertion_input.py +793 -0
- acryl_datahub_cloud/sdk/assertion_input/smart_freshness_assertion_input.py +218 -0
- acryl_datahub_cloud/sdk/assertion_input/smart_sql_assertion_input.py +181 -0
- acryl_datahub_cloud/sdk/assertion_input/smart_volume_assertion_input.py +189 -0
- acryl_datahub_cloud/sdk/assertion_input/sql_assertion_input.py +320 -0
- acryl_datahub_cloud/sdk/assertion_input/volume_assertion_input.py +635 -0
- acryl_datahub_cloud/sdk/assertions_client.py +1074 -0
- acryl_datahub_cloud/sdk/entities/__init__.py +0 -0
- acryl_datahub_cloud/sdk/entities/assertion.py +439 -0
- acryl_datahub_cloud/sdk/entities/monitor.py +291 -0
- acryl_datahub_cloud/sdk/entities/subscription.py +100 -0
- acryl_datahub_cloud/sdk/errors.py +34 -0
- acryl_datahub_cloud/sdk/resolver_client.py +42 -0
- acryl_datahub_cloud/sdk/subscription_client.py +737 -0
- {acryl_datahub_cloud-0.3.11rc0.dist-info → acryl_datahub_cloud-0.3.16.1rc0.dist-info}/METADATA +55 -49
- {acryl_datahub_cloud-0.3.11rc0.dist-info → acryl_datahub_cloud-0.3.16.1rc0.dist-info}/RECORD +235 -142
- {acryl_datahub_cloud-0.3.11rc0.dist-info → acryl_datahub_cloud-0.3.16.1rc0.dist-info}/WHEEL +1 -1
- {acryl_datahub_cloud-0.3.11rc0.dist-info → acryl_datahub_cloud-0.3.16.1rc0.dist-info}/entry_points.txt +1 -0
- acryl_datahub_cloud/_sdk_extras/__init__.py +0 -4
- acryl_datahub_cloud/_sdk_extras/assertion.py +0 -15
- acryl_datahub_cloud/_sdk_extras/assertions_client.py +0 -23
- {acryl_datahub_cloud-0.3.11rc0.dist-info → acryl_datahub_cloud-0.3.16.1rc0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,1470 @@
|
|
|
1
|
+
"""
|
|
2
|
+
This file contains the AssertionInput class and related classes, which are used to
|
|
3
|
+
validate and represent the input for creating an Assertion in DataHub.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import inspect
|
|
7
|
+
import random
|
|
8
|
+
import string
|
|
9
|
+
from abc import ABC, abstractmethod
|
|
10
|
+
from dataclasses import dataclass
|
|
11
|
+
from datetime import datetime
|
|
12
|
+
from enum import Enum
|
|
13
|
+
from typing import (
|
|
14
|
+
Callable,
|
|
15
|
+
Collection,
|
|
16
|
+
Literal,
|
|
17
|
+
Optional,
|
|
18
|
+
Type,
|
|
19
|
+
TypeAlias,
|
|
20
|
+
TypeVar,
|
|
21
|
+
Union,
|
|
22
|
+
cast,
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
import pydantic
|
|
26
|
+
import pytz
|
|
27
|
+
import tzlocal
|
|
28
|
+
from avrogen.dict_wrapper import DictWrapper
|
|
29
|
+
from croniter import croniter
|
|
30
|
+
from pydantic import BaseModel, Extra, ValidationError
|
|
31
|
+
|
|
32
|
+
from acryl_datahub_cloud.sdk.entities.assertion import (
|
|
33
|
+
Assertion,
|
|
34
|
+
AssertionActionsInputType,
|
|
35
|
+
AssertionInfoInputType,
|
|
36
|
+
TagsInputType,
|
|
37
|
+
)
|
|
38
|
+
from acryl_datahub_cloud.sdk.entities.monitor import Monitor
|
|
39
|
+
from acryl_datahub_cloud.sdk.errors import (
|
|
40
|
+
SDKUsageError,
|
|
41
|
+
SDKUsageErrorWithExamples,
|
|
42
|
+
)
|
|
43
|
+
from datahub.emitter.enum_helpers import get_enum_options
|
|
44
|
+
from datahub.emitter.mce_builder import make_ts_millis, parse_ts_millis
|
|
45
|
+
from datahub.metadata import schema_classes as models
|
|
46
|
+
from datahub.metadata.urns import AssertionUrn, CorpUserUrn, DatasetUrn, TagUrn
|
|
47
|
+
from datahub.sdk import Dataset
|
|
48
|
+
from datahub.sdk.entity_client import EntityClient
|
|
49
|
+
|
|
50
|
+
# TODO: Import ASSERTION_MONITOR_DEFAULT_TRAINING_LOOKBACK_WINDOW_DAYS from datahub_executor.config
|
|
51
|
+
ASSERTION_MONITOR_DEFAULT_TRAINING_LOOKBACK_WINDOW_DAYS = 60
|
|
52
|
+
|
|
53
|
+
DEFAULT_NAME_PREFIX = "New Assertion"
|
|
54
|
+
DEFAULT_NAME_SUFFIX_LENGTH = 8
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
DEFAULT_DAILY_SCHEDULE: models.CronScheduleClass = models.CronScheduleClass(
|
|
58
|
+
cron="0 0 * * *", # Every day at midnight, matches the UI default
|
|
59
|
+
timezone=str(
|
|
60
|
+
tzlocal.get_localzone()
|
|
61
|
+
), # User local timezone, matches the UI default
|
|
62
|
+
)
|
|
63
|
+
DEFAULT_SCHEDULE: models.CronScheduleClass = DEFAULT_DAILY_SCHEDULE
|
|
64
|
+
|
|
65
|
+
# Legacy aliases - all now point to daily schedule for consistency
|
|
66
|
+
DEFAULT_HOURLY_SCHEDULE: models.CronScheduleClass = DEFAULT_DAILY_SCHEDULE
|
|
67
|
+
DEFAULT_EVERY_SIX_HOURS_SCHEDULE: models.CronScheduleClass = DEFAULT_DAILY_SCHEDULE
|
|
68
|
+
|
|
69
|
+
TYPE_CLASS_NAME_TO_TYPE_MAP = {
|
|
70
|
+
"StringTypeClass": "STRING",
|
|
71
|
+
"NumberTypeClass": "NUMBER",
|
|
72
|
+
"BooleanTypeClass": "BOOLEAN",
|
|
73
|
+
"DateTypeClass": "DATE",
|
|
74
|
+
"TimeTypeClass": "TIME",
|
|
75
|
+
"NullTypeClass": "NULL",
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
class AbstractDetectionMechanism(BaseModel, ABC):
|
|
80
|
+
type: str
|
|
81
|
+
|
|
82
|
+
class Config:
|
|
83
|
+
extra = Extra.forbid
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
class _InformationSchema(AbstractDetectionMechanism):
|
|
87
|
+
type: Literal["information_schema"] = "information_schema"
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
class _AuditLog(AbstractDetectionMechanism):
|
|
91
|
+
type: Literal["audit_log"] = "audit_log"
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
# Keep this in sync with the allowed field types in the UI, currently in
|
|
95
|
+
# datahub-web-react/src/app/entity/shared/tabs/Dataset/Validations/assertion/builder/constants.ts: LAST_MODIFIED_FIELD_TYPES
|
|
96
|
+
LAST_MODIFIED_ALLOWED_FIELD_TYPES = [models.DateTypeClass(), models.TimeTypeClass()]
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
class _LastModifiedColumn(AbstractDetectionMechanism):
|
|
100
|
+
type: Literal["last_modified_column"] = "last_modified_column"
|
|
101
|
+
column_name: str
|
|
102
|
+
additional_filter: Optional[str] = None
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
# Keep this in sync with the allowed field types in the UI, currently in
|
|
106
|
+
# datahub-web-react/src/app/entity/shared/tabs/Dataset/Validations/assertion/builder/constants.ts: HIGH_WATERMARK_FIELD_TYPES
|
|
107
|
+
HIGH_WATERMARK_ALLOWED_FIELD_TYPES = [
|
|
108
|
+
models.NumberTypeClass(),
|
|
109
|
+
models.DateTypeClass(),
|
|
110
|
+
models.TimeTypeClass(),
|
|
111
|
+
]
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
class _HighWatermarkColumn(AbstractDetectionMechanism):
|
|
115
|
+
type: Literal["high_watermark_column"] = "high_watermark_column"
|
|
116
|
+
column_name: str
|
|
117
|
+
additional_filter: Optional[str] = None
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
class _DataHubOperation(AbstractDetectionMechanism):
|
|
121
|
+
type: Literal["datahub_operation"] = "datahub_operation"
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
class _Query(AbstractDetectionMechanism):
|
|
125
|
+
# COUNT(*) query
|
|
126
|
+
type: Literal["query"] = "query"
|
|
127
|
+
additional_filter: Optional[str] = None
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
class _AllRowsQuery(AbstractDetectionMechanism):
|
|
131
|
+
# For column-based assertions, this is the default detection mechanism.
|
|
132
|
+
type: Literal["all_rows_query"] = "all_rows_query"
|
|
133
|
+
additional_filter: Optional[str] = None
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
class _AllRowsQueryDataHubDatasetProfile(AbstractDetectionMechanism):
|
|
137
|
+
# Used for column-based assertions.
|
|
138
|
+
type: Literal["all_rows_query_datahub_dataset_profile"] = (
|
|
139
|
+
"all_rows_query_datahub_dataset_profile"
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
class _ChangedRowsQuery(AbstractDetectionMechanism):
|
|
144
|
+
# Used for column-based assertions.
|
|
145
|
+
type: Literal["changed_rows_query"] = "changed_rows_query"
|
|
146
|
+
column_name: str
|
|
147
|
+
additional_filter: Optional[str] = None
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
class _DatasetProfile(AbstractDetectionMechanism):
|
|
151
|
+
type: Literal["dataset_profile"] = "dataset_profile"
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
class _SchemaMetadata(AbstractDetectionMechanism):
|
|
155
|
+
type: Literal["schema_metadata"] = "schema_metadata"
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
# Operators that require a single value numeric parameter
|
|
159
|
+
SINGLE_VALUE_NUMERIC_OPERATORS = [
|
|
160
|
+
models.AssertionStdOperatorClass.EQUAL_TO,
|
|
161
|
+
models.AssertionStdOperatorClass.NOT_EQUAL_TO,
|
|
162
|
+
models.AssertionStdOperatorClass.GREATER_THAN,
|
|
163
|
+
models.AssertionStdOperatorClass.LESS_THAN,
|
|
164
|
+
models.AssertionStdOperatorClass.GREATER_THAN_OR_EQUAL_TO,
|
|
165
|
+
models.AssertionStdOperatorClass.LESS_THAN_OR_EQUAL_TO,
|
|
166
|
+
]
|
|
167
|
+
|
|
168
|
+
# Operators that require a single value parameter
|
|
169
|
+
SINGLE_VALUE_OPERATORS = [
|
|
170
|
+
models.AssertionStdOperatorClass.CONTAIN,
|
|
171
|
+
models.AssertionStdOperatorClass.END_WITH,
|
|
172
|
+
models.AssertionStdOperatorClass.START_WITH,
|
|
173
|
+
models.AssertionStdOperatorClass.REGEX_MATCH,
|
|
174
|
+
models.AssertionStdOperatorClass.IN,
|
|
175
|
+
models.AssertionStdOperatorClass.NOT_IN,
|
|
176
|
+
] + SINGLE_VALUE_NUMERIC_OPERATORS
|
|
177
|
+
|
|
178
|
+
# Operators that require a numeric range parameter
|
|
179
|
+
RANGE_OPERATORS = [
|
|
180
|
+
models.AssertionStdOperatorClass.BETWEEN,
|
|
181
|
+
]
|
|
182
|
+
|
|
183
|
+
# Operators that require no parameters
|
|
184
|
+
NO_PARAMETER_OPERATORS = [
|
|
185
|
+
models.AssertionStdOperatorClass.NULL,
|
|
186
|
+
models.AssertionStdOperatorClass.NOT_NULL,
|
|
187
|
+
models.AssertionStdOperatorClass.IS_TRUE,
|
|
188
|
+
models.AssertionStdOperatorClass.IS_FALSE,
|
|
189
|
+
]
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
# Keep these two lists in sync:
|
|
193
|
+
_DETECTION_MECHANISM_CONCRETE_TYPES = (
|
|
194
|
+
_InformationSchema,
|
|
195
|
+
_AuditLog,
|
|
196
|
+
_LastModifiedColumn,
|
|
197
|
+
_HighWatermarkColumn,
|
|
198
|
+
_DataHubOperation,
|
|
199
|
+
_Query,
|
|
200
|
+
_DatasetProfile,
|
|
201
|
+
_AllRowsQuery,
|
|
202
|
+
_ChangedRowsQuery,
|
|
203
|
+
_AllRowsQueryDataHubDatasetProfile,
|
|
204
|
+
_SchemaMetadata,
|
|
205
|
+
)
|
|
206
|
+
_DetectionMechanismTypes = Union[
|
|
207
|
+
_InformationSchema,
|
|
208
|
+
_AuditLog,
|
|
209
|
+
_LastModifiedColumn,
|
|
210
|
+
_HighWatermarkColumn,
|
|
211
|
+
_DataHubOperation,
|
|
212
|
+
_Query,
|
|
213
|
+
_DatasetProfile,
|
|
214
|
+
_AllRowsQuery,
|
|
215
|
+
_ChangedRowsQuery,
|
|
216
|
+
_AllRowsQueryDataHubDatasetProfile,
|
|
217
|
+
_SchemaMetadata,
|
|
218
|
+
]
|
|
219
|
+
|
|
220
|
+
_DETECTION_MECHANISM_TYPES_WITH_ADDITIONAL_FILTER = (
|
|
221
|
+
_LastModifiedColumn,
|
|
222
|
+
_HighWatermarkColumn,
|
|
223
|
+
_Query,
|
|
224
|
+
_AllRowsQuery,
|
|
225
|
+
_ChangedRowsQuery,
|
|
226
|
+
)
|
|
227
|
+
|
|
228
|
+
DEFAULT_DETECTION_MECHANISM: _DetectionMechanismTypes = _InformationSchema()
|
|
229
|
+
|
|
230
|
+
|
|
231
|
+
class DetectionMechanism:
|
|
232
|
+
# To have a more enum-like user experience even with sub parameters, we define the detection mechanisms as class attributes.
|
|
233
|
+
# The options with sub parameters are the classes themselves so that parameters can be applied, and the rest are already instantiated instances of the classes.
|
|
234
|
+
INFORMATION_SCHEMA = _InformationSchema()
|
|
235
|
+
AUDIT_LOG = _AuditLog()
|
|
236
|
+
LAST_MODIFIED_COLUMN = _LastModifiedColumn
|
|
237
|
+
HIGH_WATERMARK_COLUMN = _HighWatermarkColumn
|
|
238
|
+
DATAHUB_OPERATION = _DataHubOperation()
|
|
239
|
+
QUERY = _Query
|
|
240
|
+
ALL_ROWS_QUERY = _AllRowsQuery
|
|
241
|
+
CHANGED_ROWS_QUERY = _ChangedRowsQuery
|
|
242
|
+
ALL_ROWS_QUERY_DATAHUB_DATASET_PROFILE = _AllRowsQueryDataHubDatasetProfile()
|
|
243
|
+
DATASET_PROFILE = _DatasetProfile()
|
|
244
|
+
|
|
245
|
+
_DETECTION_MECHANISM_EXAMPLES = {
|
|
246
|
+
"Information Schema from string": "information_schema",
|
|
247
|
+
"Information Schema from DetectionMechanism": "DetectionMechanism.INFORMATION_SCHEMA",
|
|
248
|
+
"Audit Log from string": "audit_log",
|
|
249
|
+
"Audit Log from DetectionMechanism": "DetectionMechanism.AUDIT_LOG",
|
|
250
|
+
"Last Modified Column from dict": {
|
|
251
|
+
"type": "last_modified_column",
|
|
252
|
+
"column_name": "last_modified",
|
|
253
|
+
"additional_filter": "last_modified > '2021-01-01'",
|
|
254
|
+
},
|
|
255
|
+
"Last Modified Column from DetectionMechanism": "DetectionMechanism.LAST_MODIFIED_COLUMN(column_name='last_modified', additional_filter='last_modified > 2021-01-01')",
|
|
256
|
+
"High Watermark Column from dict": {
|
|
257
|
+
"type": "high_watermark_column",
|
|
258
|
+
"column_name": "id",
|
|
259
|
+
"additional_filter": "id > 1000",
|
|
260
|
+
},
|
|
261
|
+
"High Watermark Column from DetectionMechanism": "DetectionMechanism.HIGH_WATERMARK_COLUMN(column_name='id', additional_filter='id > 1000')",
|
|
262
|
+
"DataHub Operation from string": "datahub_operation",
|
|
263
|
+
"DataHub Operation from DetectionMechanism": "DetectionMechanism.DATAHUB_OPERATION",
|
|
264
|
+
"Query from string": "query",
|
|
265
|
+
"Query from dict": {
|
|
266
|
+
"type": "query",
|
|
267
|
+
"additional_filter": "id > 1000",
|
|
268
|
+
},
|
|
269
|
+
"Query from DetectionMechanism (with optional additional filter)": "DetectionMechanism.QUERY(additional_filter='id > 1000')",
|
|
270
|
+
"Dataset Profile from string": "dataset_profile",
|
|
271
|
+
"Dataset Profile from DetectionMechanism": "DetectionMechanism.DATASET_PROFILE",
|
|
272
|
+
"All Rows Query from string": "all_rows_query",
|
|
273
|
+
"All Rows Query from DetectionMechanism": "DetectionMechanism.ALL_ROWS_QUERY",
|
|
274
|
+
"All Rows Query from DetectionMechanism (with optional additional filter)": "DetectionMechanism.ALL_ROWS_QUERY(additional_filter='id > 1000')",
|
|
275
|
+
"Changed Rows Query from dict (with optional additional filter)": {
|
|
276
|
+
"type": "changed_rows_query",
|
|
277
|
+
"column_name": "id",
|
|
278
|
+
"additional_filter": "id > 1000",
|
|
279
|
+
},
|
|
280
|
+
"Changed Rows Query from DetectionMechanism": "DetectionMechanism.CHANGED_ROWS_QUERY(column_name='id')",
|
|
281
|
+
"Changed Rows Query from DetectionMechanism (with optional additional filter)": "DetectionMechanism.CHANGED_ROWS_QUERY(column_name='id', additional_filter='id > 1000')",
|
|
282
|
+
"All Rows Query DataHub Dataset Profile from string": "all_rows_query_datahub_dataset_profile",
|
|
283
|
+
"All Rows Query DataHub Dataset Profile from DetectionMechanism": "DetectionMechanism.ALL_ROWS_QUERY_DATAHUB_DATASET_PROFILE",
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
@staticmethod
|
|
287
|
+
def parse(
|
|
288
|
+
detection_mechanism_config: Optional[
|
|
289
|
+
Union[str, dict[str, str], _DetectionMechanismTypes]
|
|
290
|
+
] = None,
|
|
291
|
+
default_detection_mechanism: _DetectionMechanismTypes = DEFAULT_DETECTION_MECHANISM,
|
|
292
|
+
) -> _DetectionMechanismTypes:
|
|
293
|
+
if detection_mechanism_config is None:
|
|
294
|
+
return default_detection_mechanism
|
|
295
|
+
if isinstance(detection_mechanism_config, _DETECTION_MECHANISM_CONCRETE_TYPES):
|
|
296
|
+
return detection_mechanism_config
|
|
297
|
+
elif isinstance(detection_mechanism_config, str):
|
|
298
|
+
return DetectionMechanism._try_parse_from_string(detection_mechanism_config)
|
|
299
|
+
elif isinstance(detection_mechanism_config, dict):
|
|
300
|
+
return DetectionMechanism._try_parse_from_dict(detection_mechanism_config)
|
|
301
|
+
else:
|
|
302
|
+
raise SDKUsageErrorWithExamples(
|
|
303
|
+
msg=f"Invalid detection mechanism: {detection_mechanism_config}",
|
|
304
|
+
examples=DetectionMechanism._DETECTION_MECHANISM_EXAMPLES,
|
|
305
|
+
)
|
|
306
|
+
|
|
307
|
+
@staticmethod
|
|
308
|
+
def _try_parse_from_string(
|
|
309
|
+
detection_mechanism_config: str,
|
|
310
|
+
) -> _DetectionMechanismTypes:
|
|
311
|
+
try:
|
|
312
|
+
return_value = getattr(
|
|
313
|
+
DetectionMechanism, detection_mechanism_config.upper()
|
|
314
|
+
)
|
|
315
|
+
if inspect.isclass(return_value) and issubclass(
|
|
316
|
+
return_value, pydantic.BaseModel
|
|
317
|
+
):
|
|
318
|
+
try:
|
|
319
|
+
# We try to instantiate here to let pydantic raise a helpful error
|
|
320
|
+
# about which parameters are missing
|
|
321
|
+
return_value = return_value()
|
|
322
|
+
except ValidationError as e:
|
|
323
|
+
raise SDKUsageErrorWithExamples(
|
|
324
|
+
msg=f"Detection mechanism type '{detection_mechanism_config}' requires additional parameters: {e}",
|
|
325
|
+
examples=DetectionMechanism._DETECTION_MECHANISM_EXAMPLES,
|
|
326
|
+
) from e
|
|
327
|
+
return return_value
|
|
328
|
+
except AttributeError as e:
|
|
329
|
+
raise SDKUsageErrorWithExamples(
|
|
330
|
+
msg=f"Invalid detection mechanism type: {detection_mechanism_config}",
|
|
331
|
+
examples=DetectionMechanism._DETECTION_MECHANISM_EXAMPLES,
|
|
332
|
+
) from e
|
|
333
|
+
|
|
334
|
+
@staticmethod
|
|
335
|
+
def _try_parse_from_dict(
|
|
336
|
+
detection_mechanism_config: dict[str, str],
|
|
337
|
+
) -> _DetectionMechanismTypes:
|
|
338
|
+
# Make a copy of the dictionary to avoid mutating the original
|
|
339
|
+
config_copy = detection_mechanism_config.copy()
|
|
340
|
+
|
|
341
|
+
try:
|
|
342
|
+
detection_mechanism_type = config_copy.pop("type")
|
|
343
|
+
except KeyError as e:
|
|
344
|
+
raise SDKUsageErrorWithExamples(
|
|
345
|
+
msg="Detection mechanism type is required if using a dict to create a DetectionMechanism",
|
|
346
|
+
examples=DetectionMechanism._DETECTION_MECHANISM_EXAMPLES,
|
|
347
|
+
) from e
|
|
348
|
+
try:
|
|
349
|
+
detection_mechanism_obj = getattr(
|
|
350
|
+
DetectionMechanism, detection_mechanism_type.upper()
|
|
351
|
+
)
|
|
352
|
+
except AttributeError as e:
|
|
353
|
+
raise SDKUsageErrorWithExamples(
|
|
354
|
+
msg=f"Invalid detection mechanism type: {detection_mechanism_type}",
|
|
355
|
+
examples=DetectionMechanism._DETECTION_MECHANISM_EXAMPLES,
|
|
356
|
+
) from e
|
|
357
|
+
|
|
358
|
+
try:
|
|
359
|
+
return detection_mechanism_obj(**config_copy)
|
|
360
|
+
except TypeError as e:
|
|
361
|
+
if "object is not callable" not in e.args[0]:
|
|
362
|
+
raise e
|
|
363
|
+
if config_copy:
|
|
364
|
+
# If we are here in the TypeError case, the detection mechanism is an instance of a class,
|
|
365
|
+
# not a class itself, so we can't instantiate it with the config dict.
|
|
366
|
+
# In this case, the config dict should be empty after the type is popped.
|
|
367
|
+
# If it is not empty, we raise an error.
|
|
368
|
+
raise SDKUsageErrorWithExamples(
|
|
369
|
+
msg=f"Invalid additional fields specified for detection mechanism '{detection_mechanism_type}': {config_copy}",
|
|
370
|
+
examples=DetectionMechanism._DETECTION_MECHANISM_EXAMPLES,
|
|
371
|
+
) from e
|
|
372
|
+
return detection_mechanism_obj
|
|
373
|
+
except ValidationError as e:
|
|
374
|
+
raise SDKUsageErrorWithExamples(
|
|
375
|
+
msg=f"Invalid detection mechanism type '{detection_mechanism_type}': {config_copy} {e}",
|
|
376
|
+
examples=DetectionMechanism._DETECTION_MECHANISM_EXAMPLES,
|
|
377
|
+
) from e
|
|
378
|
+
|
|
379
|
+
|
|
380
|
+
DetectionMechanismInputTypes: TypeAlias = Union[
|
|
381
|
+
str, dict[str, str], _DetectionMechanismTypes, None
|
|
382
|
+
]
|
|
383
|
+
|
|
384
|
+
|
|
385
|
+
class InferenceSensitivity(Enum):
|
|
386
|
+
HIGH = "high"
|
|
387
|
+
MEDIUM = "medium"
|
|
388
|
+
LOW = "low"
|
|
389
|
+
|
|
390
|
+
@staticmethod
|
|
391
|
+
def parse(
|
|
392
|
+
sensitivity: Optional[
|
|
393
|
+
Union[
|
|
394
|
+
str,
|
|
395
|
+
int,
|
|
396
|
+
"InferenceSensitivity",
|
|
397
|
+
models.AssertionMonitorSensitivityClass,
|
|
398
|
+
]
|
|
399
|
+
],
|
|
400
|
+
) -> "InferenceSensitivity":
|
|
401
|
+
if sensitivity is None:
|
|
402
|
+
return DEFAULT_SENSITIVITY
|
|
403
|
+
EXAMPLES = {
|
|
404
|
+
"High sensitivity from string": "high",
|
|
405
|
+
"High sensitivity from enum": "InferenceSensitivity.HIGH",
|
|
406
|
+
"Medium sensitivity from string": "medium",
|
|
407
|
+
"Medium sensitivity from enum": "InferenceSensitivity.MEDIUM",
|
|
408
|
+
"Low sensitivity from string": "low",
|
|
409
|
+
"Low sensitivity from enum": "InferenceSensitivity.LOW",
|
|
410
|
+
"Sensitivity from int (1-3: low, 4-6: medium, 7-10: high)": "10",
|
|
411
|
+
}
|
|
412
|
+
|
|
413
|
+
if isinstance(sensitivity, InferenceSensitivity):
|
|
414
|
+
return sensitivity
|
|
415
|
+
if isinstance(sensitivity, models.AssertionMonitorSensitivityClass):
|
|
416
|
+
sensitivity = sensitivity.level
|
|
417
|
+
if isinstance(sensitivity, int):
|
|
418
|
+
if (sensitivity < 1) or (sensitivity > 10):
|
|
419
|
+
raise SDKUsageErrorWithExamples(
|
|
420
|
+
msg=f"Invalid inference sensitivity: {sensitivity}",
|
|
421
|
+
examples=EXAMPLES,
|
|
422
|
+
)
|
|
423
|
+
elif sensitivity < 4:
|
|
424
|
+
return InferenceSensitivity.LOW
|
|
425
|
+
elif sensitivity < 7:
|
|
426
|
+
return InferenceSensitivity.MEDIUM
|
|
427
|
+
else:
|
|
428
|
+
return InferenceSensitivity.HIGH
|
|
429
|
+
try:
|
|
430
|
+
return InferenceSensitivity(sensitivity)
|
|
431
|
+
except ValueError as e:
|
|
432
|
+
raise SDKUsageErrorWithExamples(
|
|
433
|
+
msg=f"Invalid inference sensitivity: {sensitivity}",
|
|
434
|
+
examples=EXAMPLES,
|
|
435
|
+
) from e
|
|
436
|
+
|
|
437
|
+
@staticmethod
|
|
438
|
+
def to_int(sensitivity: "InferenceSensitivity") -> int:
|
|
439
|
+
return {
|
|
440
|
+
InferenceSensitivity.HIGH: 10,
|
|
441
|
+
InferenceSensitivity.MEDIUM: 5,
|
|
442
|
+
InferenceSensitivity.LOW: 1,
|
|
443
|
+
}[sensitivity]
|
|
444
|
+
|
|
445
|
+
|
|
446
|
+
DEFAULT_SENSITIVITY: InferenceSensitivity = InferenceSensitivity.MEDIUM
|
|
447
|
+
|
|
448
|
+
TIME_WINDOW_SIZE_EXAMPLES = {
|
|
449
|
+
"Recommended: Time window size from objects": "TimeWindowSize(unit=CalendarInterval.MINUTE, multiple=10)",
|
|
450
|
+
"Time window size from object": "TimeWindowSize(unit='MINUTE', multiple=10)",
|
|
451
|
+
"Time window size from models.TimeWindowSizeClass": "models.TimeWindowSizeClass(unit='MINUTE', multiple=10)",
|
|
452
|
+
"Time window size from dict": '{"unit": "MINUTE", "multiple": 10}',
|
|
453
|
+
}
|
|
454
|
+
|
|
455
|
+
|
|
456
|
+
class CalendarInterval(str, Enum):
|
|
457
|
+
MINUTE = "MINUTE"
|
|
458
|
+
HOUR = "HOUR"
|
|
459
|
+
DAY = "DAY"
|
|
460
|
+
|
|
461
|
+
|
|
462
|
+
class TimeWindowSize(BaseModel):
|
|
463
|
+
unit: Union[CalendarInterval, str]
|
|
464
|
+
multiple: int
|
|
465
|
+
|
|
466
|
+
|
|
467
|
+
TimeWindowSizeInputTypes: TypeAlias = Union[
|
|
468
|
+
models.TimeWindowSizeClass,
|
|
469
|
+
models.FixedIntervalScheduleClass,
|
|
470
|
+
TimeWindowSize,
|
|
471
|
+
dict[str, Union[str, int]], # {"unit": "MINUTE", "multiple": 10}
|
|
472
|
+
]
|
|
473
|
+
|
|
474
|
+
|
|
475
|
+
def _try_parse_calendar_interval(
|
|
476
|
+
config: Union[str, CalendarInterval],
|
|
477
|
+
) -> CalendarInterval:
|
|
478
|
+
if isinstance(config, CalendarInterval):
|
|
479
|
+
return config
|
|
480
|
+
try:
|
|
481
|
+
return CalendarInterval(config.upper())
|
|
482
|
+
except ValueError as e:
|
|
483
|
+
raise SDKUsageErrorWithExamples(
|
|
484
|
+
msg=f"Invalid calendar interval: {config}",
|
|
485
|
+
examples=TIME_WINDOW_SIZE_EXAMPLES,
|
|
486
|
+
) from e
|
|
487
|
+
|
|
488
|
+
|
|
489
|
+
def _try_parse_time_window_size(
|
|
490
|
+
config: TimeWindowSizeInputTypes,
|
|
491
|
+
) -> models.TimeWindowSizeClass:
|
|
492
|
+
if isinstance(config, models.TimeWindowSizeClass):
|
|
493
|
+
return config
|
|
494
|
+
elif isinstance(config, models.FixedIntervalScheduleClass):
|
|
495
|
+
return models.TimeWindowSizeClass(
|
|
496
|
+
unit=_try_parse_and_validate_schema_classes_enum(
|
|
497
|
+
config.unit, models.CalendarIntervalClass
|
|
498
|
+
),
|
|
499
|
+
multiple=config.multiple,
|
|
500
|
+
)
|
|
501
|
+
elif isinstance(config, TimeWindowSize):
|
|
502
|
+
return models.TimeWindowSizeClass(
|
|
503
|
+
unit=_try_parse_and_validate_schema_classes_enum(
|
|
504
|
+
_try_parse_and_validate_schema_classes_enum(
|
|
505
|
+
config.unit, CalendarInterval
|
|
506
|
+
).value,
|
|
507
|
+
models.CalendarIntervalClass,
|
|
508
|
+
),
|
|
509
|
+
multiple=config.multiple,
|
|
510
|
+
)
|
|
511
|
+
elif isinstance(config, dict):
|
|
512
|
+
if "unit" not in config or "multiple" not in config:
|
|
513
|
+
raise SDKUsageErrorWithExamples(
|
|
514
|
+
msg=f"Invalid time window size: {config}",
|
|
515
|
+
examples=TIME_WINDOW_SIZE_EXAMPLES,
|
|
516
|
+
)
|
|
517
|
+
try:
|
|
518
|
+
multiple = int(config["multiple"])
|
|
519
|
+
except ValueError as e:
|
|
520
|
+
raise SDKUsageErrorWithExamples(
|
|
521
|
+
msg=f"Invalid time window size: {config}",
|
|
522
|
+
examples=TIME_WINDOW_SIZE_EXAMPLES,
|
|
523
|
+
) from e
|
|
524
|
+
return models.TimeWindowSizeClass(
|
|
525
|
+
unit=_try_parse_calendar_interval(str(config["unit"])),
|
|
526
|
+
multiple=multiple,
|
|
527
|
+
)
|
|
528
|
+
else:
|
|
529
|
+
raise SDKUsageErrorWithExamples(
|
|
530
|
+
msg=f"Invalid time window size: {config}",
|
|
531
|
+
examples=TIME_WINDOW_SIZE_EXAMPLES,
|
|
532
|
+
)
|
|
533
|
+
|
|
534
|
+
|
|
535
|
+
class FixedRangeExclusionWindow(BaseModel):
|
|
536
|
+
type: Literal["fixed_range_exclusion_window"] = "fixed_range_exclusion_window"
|
|
537
|
+
start: datetime
|
|
538
|
+
end: datetime
|
|
539
|
+
|
|
540
|
+
|
|
541
|
+
ExclusionWindowTypes: TypeAlias = Union[
|
|
542
|
+
FixedRangeExclusionWindow,
|
|
543
|
+
# Add other exclusion window types here as they are added to the SDK.
|
|
544
|
+
]
|
|
545
|
+
|
|
546
|
+
FIXED_RANGE_EXCLUSION_WINDOW_EXAMPLES = {
|
|
547
|
+
"Exclusion Window from datetimes": {
|
|
548
|
+
"start": "datetime(2025, 1, 1, 0, 0, 0)",
|
|
549
|
+
"end": "datetime(2025, 1, 2, 0, 0, 0)",
|
|
550
|
+
},
|
|
551
|
+
"Exclusion Window from strings": {
|
|
552
|
+
"start": "2025-01-01T00:00:00",
|
|
553
|
+
"end": "2025-01-02T00:00:00",
|
|
554
|
+
},
|
|
555
|
+
"Exclusion Window from object": "ExclusionWindow(start=datetime(2025, 1, 1, 0, 0, 0), end=datetime(2025, 1, 2, 0, 0, 0))",
|
|
556
|
+
}
|
|
557
|
+
FixedRangeExclusionWindowInputTypes: TypeAlias = Union[
|
|
558
|
+
dict[str, datetime],
|
|
559
|
+
dict[str, str],
|
|
560
|
+
list[dict[str, datetime]],
|
|
561
|
+
list[dict[str, str]],
|
|
562
|
+
FixedRangeExclusionWindow,
|
|
563
|
+
list[FixedRangeExclusionWindow],
|
|
564
|
+
]
|
|
565
|
+
|
|
566
|
+
ExclusionWindowInputTypes: TypeAlias = Union[
|
|
567
|
+
models.AssertionExclusionWindowClass,
|
|
568
|
+
list[models.AssertionExclusionWindowClass],
|
|
569
|
+
FixedRangeExclusionWindowInputTypes,
|
|
570
|
+
# Add other exclusion window types here as they are added to the SDK.
|
|
571
|
+
]
|
|
572
|
+
|
|
573
|
+
IterableExclusionWindowInputTypes: TypeAlias = Union[
|
|
574
|
+
list[dict[str, datetime]],
|
|
575
|
+
list[dict[str, str]],
|
|
576
|
+
list[FixedRangeExclusionWindow],
|
|
577
|
+
list[models.AssertionExclusionWindowClass],
|
|
578
|
+
]
|
|
579
|
+
|
|
580
|
+
|
|
581
|
+
def _try_parse_exclusion_window(
|
|
582
|
+
config: Optional[ExclusionWindowInputTypes],
|
|
583
|
+
) -> Union[FixedRangeExclusionWindow, list[FixedRangeExclusionWindow], None]:
|
|
584
|
+
if config is None:
|
|
585
|
+
return []
|
|
586
|
+
if isinstance(config, dict):
|
|
587
|
+
return [FixedRangeExclusionWindow(**config)]
|
|
588
|
+
if isinstance(config, FixedRangeExclusionWindow):
|
|
589
|
+
return [config]
|
|
590
|
+
elif isinstance(config, models.AssertionExclusionWindowClass):
|
|
591
|
+
assert config.fixedRange is not None
|
|
592
|
+
return [
|
|
593
|
+
FixedRangeExclusionWindow(
|
|
594
|
+
start=parse_ts_millis(config.fixedRange.startTimeMillis),
|
|
595
|
+
end=parse_ts_millis(config.fixedRange.endTimeMillis),
|
|
596
|
+
)
|
|
597
|
+
]
|
|
598
|
+
elif isinstance(config, list):
|
|
599
|
+
return _try_parse_list_of_exclusion_windows(config)
|
|
600
|
+
else:
|
|
601
|
+
raise SDKUsageErrorWithExamples(
|
|
602
|
+
msg=f"Invalid exclusion window: {config}",
|
|
603
|
+
examples=FIXED_RANGE_EXCLUSION_WINDOW_EXAMPLES,
|
|
604
|
+
)
|
|
605
|
+
|
|
606
|
+
|
|
607
|
+
def _try_parse_list_of_exclusion_windows(
|
|
608
|
+
config: IterableExclusionWindowInputTypes,
|
|
609
|
+
) -> Union[list[FixedRangeExclusionWindow], None]:
|
|
610
|
+
if all(isinstance(item, models.AssertionExclusionWindowClass) for item in config):
|
|
611
|
+
exclusion_windows = []
|
|
612
|
+
for item in config:
|
|
613
|
+
assert isinstance(item, models.AssertionExclusionWindowClass)
|
|
614
|
+
assert item.fixedRange is not None
|
|
615
|
+
exclusion_windows.append(
|
|
616
|
+
FixedRangeExclusionWindow(
|
|
617
|
+
start=parse_ts_millis(item.fixedRange.startTimeMillis),
|
|
618
|
+
end=parse_ts_millis(item.fixedRange.endTimeMillis),
|
|
619
|
+
)
|
|
620
|
+
)
|
|
621
|
+
return exclusion_windows
|
|
622
|
+
else:
|
|
623
|
+
exclusion_windows = []
|
|
624
|
+
for item in config:
|
|
625
|
+
if isinstance(item, dict):
|
|
626
|
+
try:
|
|
627
|
+
exclusion_windows.append(FixedRangeExclusionWindow(**item))
|
|
628
|
+
except ValidationError as e:
|
|
629
|
+
raise SDKUsageErrorWithExamples(
|
|
630
|
+
msg=f"Invalid exclusion window: {item}",
|
|
631
|
+
examples=FIXED_RANGE_EXCLUSION_WINDOW_EXAMPLES,
|
|
632
|
+
) from e
|
|
633
|
+
elif isinstance(item, FixedRangeExclusionWindow):
|
|
634
|
+
exclusion_windows.append(item)
|
|
635
|
+
elif item is None:
|
|
636
|
+
pass
|
|
637
|
+
else:
|
|
638
|
+
raise SDKUsageErrorWithExamples(
|
|
639
|
+
msg=f"Invalid exclusion window: {item}",
|
|
640
|
+
examples=FIXED_RANGE_EXCLUSION_WINDOW_EXAMPLES,
|
|
641
|
+
)
|
|
642
|
+
return exclusion_windows
|
|
643
|
+
|
|
644
|
+
|
|
645
|
+
class AssertionIncidentBehavior(Enum):
|
|
646
|
+
RAISE_ON_FAIL = "raise_on_fail"
|
|
647
|
+
RESOLVE_ON_PASS = "resolve_on_pass"
|
|
648
|
+
|
|
649
|
+
|
|
650
|
+
ASSERTION_INCIDENT_BEHAVIOR_EXAMPLES = {
|
|
651
|
+
"Raise on fail from string": "raise_on_fail",
|
|
652
|
+
"Raise on fail from enum": "AssertionIncidentBehavior.RAISE_ON_FAIL",
|
|
653
|
+
"Resolve on pass from string": "resolve_on_pass",
|
|
654
|
+
"Resolve on pass from enum": "AssertionIncidentBehavior.RESOLVE_ON_PASS",
|
|
655
|
+
}
|
|
656
|
+
|
|
657
|
+
AssertionIncidentBehaviorInputTypes: TypeAlias = Union[
|
|
658
|
+
str,
|
|
659
|
+
list[str],
|
|
660
|
+
AssertionIncidentBehavior,
|
|
661
|
+
list[AssertionIncidentBehavior],
|
|
662
|
+
None,
|
|
663
|
+
]
|
|
664
|
+
|
|
665
|
+
|
|
666
|
+
def _try_parse_incident_behavior(
|
|
667
|
+
config: AssertionIncidentBehaviorInputTypes,
|
|
668
|
+
) -> Union[AssertionIncidentBehavior, list[AssertionIncidentBehavior], None]:
|
|
669
|
+
if config is None:
|
|
670
|
+
return []
|
|
671
|
+
if isinstance(config, str):
|
|
672
|
+
try:
|
|
673
|
+
return [AssertionIncidentBehavior(config)]
|
|
674
|
+
except ValueError as e:
|
|
675
|
+
raise SDKUsageErrorWithExamples(
|
|
676
|
+
msg=f"Invalid incident behavior: {config}",
|
|
677
|
+
examples=ASSERTION_INCIDENT_BEHAVIOR_EXAMPLES,
|
|
678
|
+
) from e
|
|
679
|
+
if isinstance(config, AssertionIncidentBehavior):
|
|
680
|
+
return [config]
|
|
681
|
+
elif isinstance(config, list):
|
|
682
|
+
incident_behaviors = []
|
|
683
|
+
for item in config:
|
|
684
|
+
if isinstance(item, str):
|
|
685
|
+
try:
|
|
686
|
+
incident_behaviors.append(AssertionIncidentBehavior(item))
|
|
687
|
+
except ValueError as e:
|
|
688
|
+
raise SDKUsageErrorWithExamples(
|
|
689
|
+
msg=f"Invalid incident behavior: {item}",
|
|
690
|
+
examples=ASSERTION_INCIDENT_BEHAVIOR_EXAMPLES,
|
|
691
|
+
) from e
|
|
692
|
+
elif isinstance(item, AssertionIncidentBehavior):
|
|
693
|
+
incident_behaviors.append(item)
|
|
694
|
+
else:
|
|
695
|
+
raise SDKUsageErrorWithExamples(
|
|
696
|
+
msg=f"Invalid incident behavior: {item}",
|
|
697
|
+
examples=ASSERTION_INCIDENT_BEHAVIOR_EXAMPLES,
|
|
698
|
+
)
|
|
699
|
+
return incident_behaviors
|
|
700
|
+
else:
|
|
701
|
+
raise SDKUsageErrorWithExamples(
|
|
702
|
+
msg=f"Invalid incident behavior: {config}",
|
|
703
|
+
examples=ASSERTION_INCIDENT_BEHAVIOR_EXAMPLES,
|
|
704
|
+
)
|
|
705
|
+
|
|
706
|
+
|
|
707
|
+
def _generate_default_name(prefix: str, suffix_length: int) -> str:
|
|
708
|
+
return f"{prefix}-{''.join(random.choices(string.ascii_letters + string.digits, k=suffix_length))}"
|
|
709
|
+
|
|
710
|
+
|
|
711
|
+
TRAINING_DATA_LOOKBACK_DAYS_EXAMPLES = {
|
|
712
|
+
"Training data lookback days from int": ASSERTION_MONITOR_DEFAULT_TRAINING_LOOKBACK_WINDOW_DAYS,
|
|
713
|
+
f"Training data lookback days from None (uses default of {ASSERTION_MONITOR_DEFAULT_TRAINING_LOOKBACK_WINDOW_DAYS} days)": None,
|
|
714
|
+
}
|
|
715
|
+
|
|
716
|
+
|
|
717
|
+
def _try_parse_training_data_lookback_days(
|
|
718
|
+
training_data_lookback_days: Optional[int],
|
|
719
|
+
) -> int:
|
|
720
|
+
if training_data_lookback_days is None:
|
|
721
|
+
return ASSERTION_MONITOR_DEFAULT_TRAINING_LOOKBACK_WINDOW_DAYS
|
|
722
|
+
if isinstance(training_data_lookback_days, str):
|
|
723
|
+
try:
|
|
724
|
+
training_data_lookback_days = int(training_data_lookback_days)
|
|
725
|
+
except ValueError as e:
|
|
726
|
+
raise SDKUsageErrorWithExamples(
|
|
727
|
+
msg=f"Invalid training data lookback days: {training_data_lookback_days}",
|
|
728
|
+
examples=TRAINING_DATA_LOOKBACK_DAYS_EXAMPLES,
|
|
729
|
+
) from e
|
|
730
|
+
if not isinstance(training_data_lookback_days, int):
|
|
731
|
+
raise SDKUsageErrorWithExamples(
|
|
732
|
+
msg=f"Invalid training data lookback days: {training_data_lookback_days}",
|
|
733
|
+
examples=TRAINING_DATA_LOOKBACK_DAYS_EXAMPLES,
|
|
734
|
+
)
|
|
735
|
+
if training_data_lookback_days < 0:
|
|
736
|
+
raise SDKUsageError("Training data lookback days must be non-negative")
|
|
737
|
+
return training_data_lookback_days
|
|
738
|
+
|
|
739
|
+
|
|
740
|
+
def _validate_cron_schedule(schedule: str, timezone: str) -> None:
|
|
741
|
+
"""We are using the POSIX.1-2017 standard for cron expressions.
|
|
742
|
+
|
|
743
|
+
Note: We are using the croniter library for cron parsing which is different from executor, which uses apscheduler, so there is a risk of mismatch here.
|
|
744
|
+
"""
|
|
745
|
+
try:
|
|
746
|
+
# Validate timezone - pytz.timezone() raises UnknownTimeZoneError for invalid timezones
|
|
747
|
+
# Skip timezone validation when empty
|
|
748
|
+
if timezone:
|
|
749
|
+
pytz.timezone(timezone)
|
|
750
|
+
|
|
751
|
+
# Validate 5-field cron expression only (POSIX.1-2017 standard)
|
|
752
|
+
fields = schedule.strip().split()
|
|
753
|
+
if len(fields) != 5:
|
|
754
|
+
raise ValueError("POSIX.1-2017 requires exactly 5 fields")
|
|
755
|
+
|
|
756
|
+
# POSIX.1-2017 specific validation: Sunday must be 0, not 7
|
|
757
|
+
# However croniter accepts 7 as Sunday, so custom check is needed here.
|
|
758
|
+
# Check the day-of-week field (5th field, index 4)
|
|
759
|
+
dow_field = fields[4]
|
|
760
|
+
if "7" in dow_field:
|
|
761
|
+
# Check if 7 appears as a standalone value or in ranges
|
|
762
|
+
import re
|
|
763
|
+
|
|
764
|
+
# Match 7 as standalone, in lists, or in ranges
|
|
765
|
+
if re.search(r"\b7\b|7-|,7,|^7,|,7$|-7\b", dow_field):
|
|
766
|
+
raise ValueError(
|
|
767
|
+
"POSIX.1-2017 standard: Sunday must be represented as 0, not 7"
|
|
768
|
+
)
|
|
769
|
+
|
|
770
|
+
# Validate cron expression - croniter constructor validates the expression
|
|
771
|
+
croniter(schedule)
|
|
772
|
+
|
|
773
|
+
except Exception as e:
|
|
774
|
+
raise SDKUsageError(
|
|
775
|
+
f"Invalid cron expression or timezone: {schedule} {timezone}, please use a POSIX.1-2017 compatible cron expression and timezone."
|
|
776
|
+
) from e
|
|
777
|
+
|
|
778
|
+
|
|
779
|
+
def _try_parse_schedule(
|
|
780
|
+
schedule: Optional[Union[str, models.CronScheduleClass]],
|
|
781
|
+
) -> Optional[models.CronScheduleClass]:
|
|
782
|
+
if schedule is None:
|
|
783
|
+
return None
|
|
784
|
+
if isinstance(schedule, str):
|
|
785
|
+
_validate_cron_schedule(schedule, "UTC")
|
|
786
|
+
return models.CronScheduleClass(
|
|
787
|
+
cron=schedule,
|
|
788
|
+
timezone="UTC",
|
|
789
|
+
)
|
|
790
|
+
if isinstance(schedule, models.CronScheduleClass):
|
|
791
|
+
_validate_cron_schedule(schedule.cron, schedule.timezone)
|
|
792
|
+
return schedule
|
|
793
|
+
|
|
794
|
+
|
|
795
|
+
FieldSpecType = Union[models.FreshnessFieldSpecClass, models.SchemaFieldSpecClass]
|
|
796
|
+
|
|
797
|
+
|
|
798
|
+
T = TypeVar("T")
|
|
799
|
+
|
|
800
|
+
|
|
801
|
+
def _try_parse_and_validate_schema_classes_enum(
|
|
802
|
+
value: Union[str, T],
|
|
803
|
+
enum_class: Type[T],
|
|
804
|
+
) -> T:
|
|
805
|
+
if isinstance(value, enum_class):
|
|
806
|
+
return value
|
|
807
|
+
assert isinstance(value, str)
|
|
808
|
+
if value.upper() not in get_enum_options(enum_class):
|
|
809
|
+
raise SDKUsageError(
|
|
810
|
+
f"Invalid value for {enum_class.__name__}: {value}, valid options are {get_enum_options(enum_class)}"
|
|
811
|
+
)
|
|
812
|
+
return getattr(enum_class, value.upper())
|
|
813
|
+
|
|
814
|
+
|
|
815
|
+
def get_gms_type_if_criteria_unchanged(
|
|
816
|
+
criteria_parameters: Union[str, int, float],
|
|
817
|
+
gms_type_info: Optional[Union[models.AssertionStdParameterTypeClass, tuple]],
|
|
818
|
+
) -> Optional[models.AssertionStdParameterTypeClass]:
|
|
819
|
+
"""
|
|
820
|
+
Get the GMS type for criteria, but only if the user hasn't changed the value.
|
|
821
|
+
|
|
822
|
+
When updating an assertion, we want to preserve the stored type metadata only
|
|
823
|
+
if the criteria value wasn't changed. If the user provided a new value, we
|
|
824
|
+
should infer the type from their input instead.
|
|
825
|
+
|
|
826
|
+
Args:
|
|
827
|
+
criteria_parameters: The criteria value (user-provided or extracted from GMS).
|
|
828
|
+
gms_type_info: Type info from GMS in format (value, type).
|
|
829
|
+
|
|
830
|
+
Returns:
|
|
831
|
+
The GMS type if criteria is unchanged, None if user provided new criteria.
|
|
832
|
+
"""
|
|
833
|
+
if gms_type_info is None:
|
|
834
|
+
return None
|
|
835
|
+
|
|
836
|
+
# Validate format: (value, type) where neither is a tuple
|
|
837
|
+
if not (
|
|
838
|
+
isinstance(gms_type_info, tuple)
|
|
839
|
+
and len(gms_type_info) >= 2
|
|
840
|
+
and not isinstance(gms_type_info[0], tuple)
|
|
841
|
+
and not isinstance(gms_type_info[1], tuple)
|
|
842
|
+
):
|
|
843
|
+
return None
|
|
844
|
+
|
|
845
|
+
gms_value, gms_type = gms_type_info[0], gms_type_info[1]
|
|
846
|
+
|
|
847
|
+
# If values match, criteria was extracted (not user-provided) - use GMS type
|
|
848
|
+
if criteria_parameters == gms_value:
|
|
849
|
+
return gms_type
|
|
850
|
+
|
|
851
|
+
# Values differ - user provided new criteria, caller should infer type
|
|
852
|
+
return None
|
|
853
|
+
|
|
854
|
+
|
|
855
|
+
def get_gms_types_if_criteria_unchanged(
|
|
856
|
+
criteria_parameters: tuple,
|
|
857
|
+
gms_type_info: Optional[Union[models.AssertionStdParameterTypeClass, tuple]],
|
|
858
|
+
) -> Optional[tuple]:
|
|
859
|
+
"""
|
|
860
|
+
Get the GMS types for range criteria, but only if the user hasn't changed the values.
|
|
861
|
+
|
|
862
|
+
Args:
|
|
863
|
+
criteria_parameters: The range values (min, max).
|
|
864
|
+
gms_type_info: Type info from GMS in format ((min_val, max_val), (min_type, max_type)).
|
|
865
|
+
|
|
866
|
+
Returns:
|
|
867
|
+
The GMS types if criteria is unchanged, None if user provided new criteria.
|
|
868
|
+
"""
|
|
869
|
+
if gms_type_info is None:
|
|
870
|
+
return None
|
|
871
|
+
|
|
872
|
+
# Validate format: ((min_val, max_val), (min_type, max_type))
|
|
873
|
+
if not (
|
|
874
|
+
isinstance(gms_type_info, tuple)
|
|
875
|
+
and len(gms_type_info) == 2
|
|
876
|
+
and isinstance(gms_type_info[0], tuple)
|
|
877
|
+
and isinstance(gms_type_info[1], tuple)
|
|
878
|
+
):
|
|
879
|
+
return None
|
|
880
|
+
|
|
881
|
+
gms_values, gms_types = gms_type_info[0], gms_type_info[1]
|
|
882
|
+
|
|
883
|
+
# If values match, criteria was extracted (not user-provided) - use GMS types
|
|
884
|
+
if criteria_parameters == gms_values:
|
|
885
|
+
return gms_types
|
|
886
|
+
|
|
887
|
+
# Values differ - user provided new criteria, caller should infer types
|
|
888
|
+
return None
|
|
889
|
+
|
|
890
|
+
|
|
891
|
+
@dataclass(frozen=True)
|
|
892
|
+
class DatasetSourceType:
|
|
893
|
+
"""
|
|
894
|
+
DatasetSourceType is used to represent a dataset source type.
|
|
895
|
+
It is used to check if a source type is valid for a dataset type and assertion type.
|
|
896
|
+
|
|
897
|
+
Args:
|
|
898
|
+
source_type: The source type (e.g. information schema, field value, etc. aka detection mechanism)
|
|
899
|
+
platform: The platform of the dataset as a string OR "all" for all platforms.
|
|
900
|
+
assertion_type: The assertion type as a models.AssertionTypeClass string e.g. models.AssertionTypeClass.FRESHNESS OR "all" for all assertion types.
|
|
901
|
+
|
|
902
|
+
Example:
|
|
903
|
+
DatasetSourceType(
|
|
904
|
+
source_type=_InformationSchema,
|
|
905
|
+
platform="databricks",
|
|
906
|
+
assertion_type="all",
|
|
907
|
+
)
|
|
908
|
+
This means that the source type _InformationSchema is invalid for the dataset type "databricks" and assertion type "all".
|
|
909
|
+
"all" in this example means that the source type is invalid for all assertion types.
|
|
910
|
+
"""
|
|
911
|
+
|
|
912
|
+
source_type: Type[_DetectionMechanismTypes]
|
|
913
|
+
platform: str
|
|
914
|
+
assertion_type: Union[models.AssertionTypeClass, str]
|
|
915
|
+
|
|
916
|
+
|
|
917
|
+
INVALID_SOURCE_TYPES = {
|
|
918
|
+
# Add exceptions here if a source type (detection mechanism) is invalid for a dataset type and assertion type.
|
|
919
|
+
DatasetSourceType(
|
|
920
|
+
source_type=_InformationSchema,
|
|
921
|
+
platform="databricks",
|
|
922
|
+
assertion_type="all",
|
|
923
|
+
)
|
|
924
|
+
}
|
|
925
|
+
|
|
926
|
+
|
|
927
|
+
def _is_source_type_valid(
|
|
928
|
+
dataset_source_type: DatasetSourceType,
|
|
929
|
+
invalid_source_types: set[DatasetSourceType] = INVALID_SOURCE_TYPES,
|
|
930
|
+
) -> bool:
|
|
931
|
+
for invalid in invalid_source_types:
|
|
932
|
+
if invalid.source_type == dataset_source_type.source_type:
|
|
933
|
+
# If both platform and assertion type are "all", the source type is invalid for all combinations
|
|
934
|
+
if invalid.platform == "all" and invalid.assertion_type == "all":
|
|
935
|
+
return False
|
|
936
|
+
# If platform matches and assertion type is "all", the source type is invalid for all assertion types on that platform
|
|
937
|
+
if (
|
|
938
|
+
invalid.platform == dataset_source_type.platform
|
|
939
|
+
and invalid.assertion_type == "all"
|
|
940
|
+
):
|
|
941
|
+
return False
|
|
942
|
+
# If platform is "all" and assertion type matches, the source type is invalid for all platforms for that assertion type
|
|
943
|
+
if (
|
|
944
|
+
invalid.platform == "all"
|
|
945
|
+
and invalid.assertion_type == dataset_source_type.assertion_type
|
|
946
|
+
):
|
|
947
|
+
return False
|
|
948
|
+
# If both platform and assertion type match exactly, the source type is invalid
|
|
949
|
+
if (
|
|
950
|
+
invalid.platform == dataset_source_type.platform
|
|
951
|
+
and invalid.assertion_type == dataset_source_type.assertion_type
|
|
952
|
+
):
|
|
953
|
+
return False
|
|
954
|
+
return True
|
|
955
|
+
|
|
956
|
+
|
|
957
|
+
class _HasSmartAssertionInputs:
|
|
958
|
+
"""
|
|
959
|
+
A class that contains the common inputs for smart assertions.
|
|
960
|
+
This is used to avoid code duplication in the smart assertion inputs.
|
|
961
|
+
|
|
962
|
+
Args:
|
|
963
|
+
sensitivity: The sensitivity to be applied to the assertion.
|
|
964
|
+
exclusion_windows: The exclusion windows to be applied to the assertion. If not provided, no exclusion windows will be applied.
|
|
965
|
+
training_data_lookback_days: The training data lookback days to be applied to the assertion.
|
|
966
|
+
"""
|
|
967
|
+
|
|
968
|
+
def __init__(
|
|
969
|
+
self,
|
|
970
|
+
*,
|
|
971
|
+
sensitivity: Optional[Union[str, InferenceSensitivity]] = None,
|
|
972
|
+
exclusion_windows: Optional[ExclusionWindowInputTypes] = None,
|
|
973
|
+
training_data_lookback_days: Optional[int] = None,
|
|
974
|
+
):
|
|
975
|
+
self.sensitivity = InferenceSensitivity.parse(sensitivity)
|
|
976
|
+
self.exclusion_windows = _try_parse_exclusion_window(exclusion_windows)
|
|
977
|
+
self.training_data_lookback_days = _try_parse_training_data_lookback_days(
|
|
978
|
+
training_data_lookback_days
|
|
979
|
+
)
|
|
980
|
+
|
|
981
|
+
def _convert_exclusion_windows(
|
|
982
|
+
self,
|
|
983
|
+
) -> list[models.AssertionExclusionWindowClass]:
|
|
984
|
+
"""
|
|
985
|
+
Convert exclusion windows into AssertionExclusionWindowClass objects including generating display names for them.
|
|
986
|
+
|
|
987
|
+
Returns:
|
|
988
|
+
A list of AssertionExclusionWindowClass objects.
|
|
989
|
+
|
|
990
|
+
Raises:
|
|
991
|
+
SDKUsageErrorWithExamples: If an exclusion window is of an invalid type.
|
|
992
|
+
"""
|
|
993
|
+
exclusion_windows: list[models.AssertionExclusionWindowClass] = []
|
|
994
|
+
if self.exclusion_windows:
|
|
995
|
+
for window in self.exclusion_windows:
|
|
996
|
+
if not isinstance(window, FixedRangeExclusionWindow):
|
|
997
|
+
raise SDKUsageErrorWithExamples(
|
|
998
|
+
msg=f"Invalid exclusion window type: {window}",
|
|
999
|
+
examples=FIXED_RANGE_EXCLUSION_WINDOW_EXAMPLES,
|
|
1000
|
+
)
|
|
1001
|
+
# To match the UI, we generate a display name for the exclusion window.
|
|
1002
|
+
# See here for the UI code: https://github.com/acryldata/datahub-fork/blob/acryl-main/datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/assertion/builder/steps/inferred/common/ExclusionWindowAdjuster.tsx#L31
|
|
1003
|
+
# Copied here for reference: displayName: `${dayjs(startTime).format('MMM D, h:mm A')} - ${dayjs(endTime).format('MMM D, h:mm A')}`,
|
|
1004
|
+
generated_display_name = f"{window.start.strftime('%b %-d, %-I:%M %p')} - {window.end.strftime('%b %-d, %-I:%M %p')}"
|
|
1005
|
+
exclusion_windows.append(
|
|
1006
|
+
models.AssertionExclusionWindowClass(
|
|
1007
|
+
type=models.AssertionExclusionWindowTypeClass.FIXED_RANGE, # Currently only fixed range is supported
|
|
1008
|
+
displayName=generated_display_name,
|
|
1009
|
+
fixedRange=models.AbsoluteTimeWindowClass(
|
|
1010
|
+
startTimeMillis=make_ts_millis(window.start),
|
|
1011
|
+
endTimeMillis=make_ts_millis(window.end),
|
|
1012
|
+
),
|
|
1013
|
+
)
|
|
1014
|
+
)
|
|
1015
|
+
return exclusion_windows
|
|
1016
|
+
|
|
1017
|
+
def _convert_sensitivity(self) -> models.AssertionMonitorSensitivityClass:
|
|
1018
|
+
"""
|
|
1019
|
+
Convert sensitivity into an AssertionMonitorSensitivityClass.
|
|
1020
|
+
|
|
1021
|
+
Returns:
|
|
1022
|
+
An AssertionMonitorSensitivityClass with the appropriate sensitivity.
|
|
1023
|
+
"""
|
|
1024
|
+
return models.AssertionMonitorSensitivityClass(
|
|
1025
|
+
level=InferenceSensitivity.to_int(self.sensitivity),
|
|
1026
|
+
)
|
|
1027
|
+
|
|
1028
|
+
|
|
1029
|
+
class _AssertionInput(ABC):
|
|
1030
|
+
def __init__(
|
|
1031
|
+
self,
|
|
1032
|
+
*,
|
|
1033
|
+
dataset_urn: Union[str, DatasetUrn],
|
|
1034
|
+
entity_client: EntityClient, # Needed to get the schema field spec for the detection mechanism if needed
|
|
1035
|
+
urn: Optional[
|
|
1036
|
+
Union[str, AssertionUrn]
|
|
1037
|
+
] = None, # Can be None if the assertion is not yet created
|
|
1038
|
+
display_name: Optional[str] = None,
|
|
1039
|
+
enabled: bool = True,
|
|
1040
|
+
schedule: Optional[Union[str, models.CronScheduleClass]] = None,
|
|
1041
|
+
detection_mechanism: DetectionMechanismInputTypes = None,
|
|
1042
|
+
incident_behavior: Optional[AssertionIncidentBehaviorInputTypes] = None,
|
|
1043
|
+
tags: Optional[TagsInputType] = None,
|
|
1044
|
+
source_type: str = models.AssertionSourceTypeClass.NATIVE, # Verified on init to be a valid enum value
|
|
1045
|
+
created_by: Union[str, CorpUserUrn],
|
|
1046
|
+
created_at: datetime,
|
|
1047
|
+
updated_by: Union[str, CorpUserUrn],
|
|
1048
|
+
updated_at: datetime,
|
|
1049
|
+
default_detection_mechanism: _DetectionMechanismTypes = DEFAULT_DETECTION_MECHANISM,
|
|
1050
|
+
):
|
|
1051
|
+
"""
|
|
1052
|
+
Create an AssertionInput object.
|
|
1053
|
+
|
|
1054
|
+
Args:
|
|
1055
|
+
dataset_urn: The urn of the dataset to be monitored.
|
|
1056
|
+
entity_client: The entity client to be used for creating the assertion.
|
|
1057
|
+
urn: The urn of the assertion. If not provided, a random urn will be generated.
|
|
1058
|
+
display_name: The display name of the assertion. If not provided, a random display name will be generated.
|
|
1059
|
+
enabled: Whether the assertion is enabled. Defaults to True.
|
|
1060
|
+
detection_mechanism: The detection mechanism to be used for the assertion.
|
|
1061
|
+
incident_behavior: The incident behavior to be applied to the assertion. Accepts:
|
|
1062
|
+
- String values: "raise_on_fail", "resolve_on_pass"
|
|
1063
|
+
- Enum values: AssertionIncidentBehavior.RAISE_ON_FAIL, AssertionIncidentBehavior.RESOLVE_ON_PASS
|
|
1064
|
+
- Lists of any of the above values
|
|
1065
|
+
- None (default behavior)
|
|
1066
|
+
tags: The tags to be applied to the assertion.
|
|
1067
|
+
source_type: The source type of the assertion. Defaults to models.AssertionSourceTypeClass.NATIVE.
|
|
1068
|
+
created_by: The actor that created the assertion.
|
|
1069
|
+
created_at: The timestamp of the assertion creation.
|
|
1070
|
+
updated_by: The actor that last updated the assertion.
|
|
1071
|
+
updated_at: The timestamp of the assertion last update.
|
|
1072
|
+
"""
|
|
1073
|
+
self.dataset_urn = DatasetUrn.from_string(dataset_urn)
|
|
1074
|
+
self.entity_client = entity_client
|
|
1075
|
+
self.urn = AssertionUrn(urn) if urn else None
|
|
1076
|
+
self.display_name = (
|
|
1077
|
+
display_name
|
|
1078
|
+
if display_name is not None
|
|
1079
|
+
else _generate_default_name(DEFAULT_NAME_PREFIX, DEFAULT_NAME_SUFFIX_LENGTH)
|
|
1080
|
+
)
|
|
1081
|
+
self.enabled = enabled
|
|
1082
|
+
self.schedule = _try_parse_schedule(schedule)
|
|
1083
|
+
self.detection_mechanism = DetectionMechanism.parse(
|
|
1084
|
+
detection_mechanism, default_detection_mechanism
|
|
1085
|
+
)
|
|
1086
|
+
if not _is_source_type_valid(
|
|
1087
|
+
DatasetSourceType(
|
|
1088
|
+
source_type=type(self.detection_mechanism),
|
|
1089
|
+
platform=self.dataset_urn.platform,
|
|
1090
|
+
assertion_type=self._assertion_type(),
|
|
1091
|
+
)
|
|
1092
|
+
):
|
|
1093
|
+
raise SDKUsageError(
|
|
1094
|
+
f"Invalid source type: {self.detection_mechanism} for dataset type: {self.dataset_urn.platform} and assertion type: {self._assertion_type()}"
|
|
1095
|
+
)
|
|
1096
|
+
self.incident_behavior = _try_parse_incident_behavior(incident_behavior)
|
|
1097
|
+
self.tags = tags
|
|
1098
|
+
if source_type not in get_enum_options(models.AssertionSourceTypeClass):
|
|
1099
|
+
raise SDKUsageError(
|
|
1100
|
+
msg=f"Invalid source type: {source_type}, valid options are {get_enum_options(models.AssertionSourceTypeClass)}",
|
|
1101
|
+
)
|
|
1102
|
+
self.source_type = source_type
|
|
1103
|
+
self.created_by = created_by
|
|
1104
|
+
self.created_at = created_at
|
|
1105
|
+
self.updated_by = updated_by
|
|
1106
|
+
self.updated_at = updated_at
|
|
1107
|
+
self.cached_dataset: Optional[Dataset] = None
|
|
1108
|
+
|
|
1109
|
+
def to_assertion_and_monitor_entities(self) -> tuple[Assertion, Monitor]:
|
|
1110
|
+
"""
|
|
1111
|
+
Convert the assertion input to an assertion and monitor entity.
|
|
1112
|
+
|
|
1113
|
+
Returns:
|
|
1114
|
+
A tuple of (assertion, monitor) entities.
|
|
1115
|
+
"""
|
|
1116
|
+
assertion = self.to_assertion_entity()
|
|
1117
|
+
monitor = self.to_monitor_entity(assertion.urn)
|
|
1118
|
+
return assertion, monitor
|
|
1119
|
+
|
|
1120
|
+
def to_assertion_entity(self) -> Assertion:
|
|
1121
|
+
"""
|
|
1122
|
+
Convert the assertion input to an assertion entity.
|
|
1123
|
+
|
|
1124
|
+
Returns:
|
|
1125
|
+
The created assertion entity.
|
|
1126
|
+
"""
|
|
1127
|
+
on_success, on_failure = self._convert_incident_behavior()
|
|
1128
|
+
filter = self._create_filter_from_detection_mechanism()
|
|
1129
|
+
|
|
1130
|
+
return Assertion(
|
|
1131
|
+
id=self.urn,
|
|
1132
|
+
info=self._create_assertion_info(filter),
|
|
1133
|
+
description=self.display_name,
|
|
1134
|
+
on_success=on_success,
|
|
1135
|
+
on_failure=on_failure,
|
|
1136
|
+
tags=self._convert_tags(),
|
|
1137
|
+
source=self._convert_source(),
|
|
1138
|
+
last_updated=self._convert_last_updated(),
|
|
1139
|
+
)
|
|
1140
|
+
|
|
1141
|
+
def _convert_incident_behavior(
|
|
1142
|
+
self,
|
|
1143
|
+
) -> tuple[
|
|
1144
|
+
Optional[AssertionActionsInputType],
|
|
1145
|
+
Optional[AssertionActionsInputType],
|
|
1146
|
+
]:
|
|
1147
|
+
"""
|
|
1148
|
+
Convert incident behavior to on_success and on_failure actions.
|
|
1149
|
+
|
|
1150
|
+
Returns:
|
|
1151
|
+
A tuple of (on_success, on_failure) actions.
|
|
1152
|
+
"""
|
|
1153
|
+
if not self.incident_behavior:
|
|
1154
|
+
return None, None
|
|
1155
|
+
|
|
1156
|
+
behaviors = (
|
|
1157
|
+
[self.incident_behavior]
|
|
1158
|
+
if isinstance(self.incident_behavior, AssertionIncidentBehavior)
|
|
1159
|
+
else self.incident_behavior
|
|
1160
|
+
)
|
|
1161
|
+
|
|
1162
|
+
on_success: Optional[AssertionActionsInputType] = [
|
|
1163
|
+
models.AssertionActionClass(
|
|
1164
|
+
type=models.AssertionActionTypeClass.RESOLVE_INCIDENT
|
|
1165
|
+
)
|
|
1166
|
+
for behavior in behaviors
|
|
1167
|
+
if behavior == AssertionIncidentBehavior.RESOLVE_ON_PASS
|
|
1168
|
+
] or None
|
|
1169
|
+
|
|
1170
|
+
on_failure: Optional[AssertionActionsInputType] = [
|
|
1171
|
+
models.AssertionActionClass(
|
|
1172
|
+
type=models.AssertionActionTypeClass.RAISE_INCIDENT
|
|
1173
|
+
)
|
|
1174
|
+
for behavior in behaviors
|
|
1175
|
+
if behavior == AssertionIncidentBehavior.RAISE_ON_FAIL
|
|
1176
|
+
] or None
|
|
1177
|
+
|
|
1178
|
+
return on_success, on_failure
|
|
1179
|
+
|
|
1180
|
+
def _create_filter_from_detection_mechanism(
|
|
1181
|
+
self,
|
|
1182
|
+
) -> Optional[models.DatasetFilterClass]:
|
|
1183
|
+
"""
|
|
1184
|
+
Create a filter from the detection mechanism if it has an additional filter.
|
|
1185
|
+
|
|
1186
|
+
Returns:
|
|
1187
|
+
A DatasetFilterClass if the detection mechanism has an additional filter, None otherwise.
|
|
1188
|
+
"""
|
|
1189
|
+
if not isinstance(
|
|
1190
|
+
self.detection_mechanism,
|
|
1191
|
+
_DETECTION_MECHANISM_TYPES_WITH_ADDITIONAL_FILTER,
|
|
1192
|
+
):
|
|
1193
|
+
return None
|
|
1194
|
+
|
|
1195
|
+
additional_filter = self.detection_mechanism.additional_filter
|
|
1196
|
+
if not additional_filter:
|
|
1197
|
+
return None
|
|
1198
|
+
|
|
1199
|
+
return models.DatasetFilterClass(
|
|
1200
|
+
type=models.DatasetFilterTypeClass.SQL,
|
|
1201
|
+
sql=additional_filter,
|
|
1202
|
+
)
|
|
1203
|
+
|
|
1204
|
+
def _convert_tags(self) -> Optional[TagsInputType]:
|
|
1205
|
+
"""
|
|
1206
|
+
Convert the tags input into a standardized format.
|
|
1207
|
+
|
|
1208
|
+
Tag names are automatically converted to tag URNs using TagUrn constructor. For example:
|
|
1209
|
+
- "my_tag" becomes "urn:li:tag:my_tag"
|
|
1210
|
+
- "urn:li:tag:my_tag" remains unchanged
|
|
1211
|
+
|
|
1212
|
+
Returns:
|
|
1213
|
+
A list of tags or None if no tags are provided.
|
|
1214
|
+
|
|
1215
|
+
Raises:
|
|
1216
|
+
SDKUsageErrorWithExamples: If the tags input is invalid.
|
|
1217
|
+
"""
|
|
1218
|
+
if not self.tags:
|
|
1219
|
+
return None
|
|
1220
|
+
|
|
1221
|
+
if isinstance(self.tags, str):
|
|
1222
|
+
return [str(TagUrn(self.tags))]
|
|
1223
|
+
elif isinstance(self.tags, list):
|
|
1224
|
+
return [
|
|
1225
|
+
str(TagUrn(tag)) if isinstance(tag, str) else tag for tag in self.tags
|
|
1226
|
+
]
|
|
1227
|
+
else:
|
|
1228
|
+
raise SDKUsageErrorWithExamples(
|
|
1229
|
+
msg=f"Invalid tags: {self.tags}",
|
|
1230
|
+
examples={
|
|
1231
|
+
"Tags from string (tag name)": "my_tag_1",
|
|
1232
|
+
"Tags from string (tag URN)": "urn:li:tag:my_tag_1",
|
|
1233
|
+
"Tags from list (mixed)": cast(
|
|
1234
|
+
Collection[str],
|
|
1235
|
+
[
|
|
1236
|
+
"my_tag_1",
|
|
1237
|
+
"urn:li:tag:my_tag_2",
|
|
1238
|
+
],
|
|
1239
|
+
),
|
|
1240
|
+
},
|
|
1241
|
+
)
|
|
1242
|
+
|
|
1243
|
+
def _convert_source(self) -> models.AssertionSourceClass:
|
|
1244
|
+
"""
|
|
1245
|
+
Convert the source input into a models.AssertionSourceClass.
|
|
1246
|
+
"""
|
|
1247
|
+
return models.AssertionSourceClass(
|
|
1248
|
+
type=self.source_type,
|
|
1249
|
+
created=models.AuditStampClass(
|
|
1250
|
+
time=make_ts_millis(self.created_at),
|
|
1251
|
+
actor=str(self.created_by),
|
|
1252
|
+
),
|
|
1253
|
+
)
|
|
1254
|
+
|
|
1255
|
+
def _convert_last_updated(self) -> tuple[datetime, str]:
|
|
1256
|
+
"""
|
|
1257
|
+
Convert the last updated input into a tuple of (datetime, str).
|
|
1258
|
+
|
|
1259
|
+
Validation is handled in the Assertion entity constructor.
|
|
1260
|
+
"""
|
|
1261
|
+
return (self.updated_at, str(self.updated_by))
|
|
1262
|
+
|
|
1263
|
+
def to_monitor_entity(self, assertion_urn: AssertionUrn) -> Monitor:
|
|
1264
|
+
"""
|
|
1265
|
+
Convert the assertion input to a monitor entity.
|
|
1266
|
+
|
|
1267
|
+
Args:
|
|
1268
|
+
assertion_urn: The URN of the assertion to monitor.
|
|
1269
|
+
|
|
1270
|
+
Returns:
|
|
1271
|
+
A Monitor entity configured with the assertion input parameters.
|
|
1272
|
+
"""
|
|
1273
|
+
return Monitor(
|
|
1274
|
+
id=(self.dataset_urn, assertion_urn),
|
|
1275
|
+
info=self._create_monitor_info(
|
|
1276
|
+
assertion_urn=assertion_urn,
|
|
1277
|
+
status=self._convert_monitor_status(),
|
|
1278
|
+
schedule=self._convert_schedule(),
|
|
1279
|
+
),
|
|
1280
|
+
)
|
|
1281
|
+
|
|
1282
|
+
def _convert_monitor_status(self) -> models.MonitorStatusClass:
|
|
1283
|
+
"""
|
|
1284
|
+
Convert the enabled flag into a MonitorStatusClass.
|
|
1285
|
+
|
|
1286
|
+
Returns:
|
|
1287
|
+
A MonitorStatusClass with ACTIVE or INACTIVE mode based on the enabled flag.
|
|
1288
|
+
"""
|
|
1289
|
+
return models.MonitorStatusClass(
|
|
1290
|
+
mode=models.MonitorModeClass.ACTIVE
|
|
1291
|
+
if self.enabled
|
|
1292
|
+
else models.MonitorModeClass.INACTIVE,
|
|
1293
|
+
)
|
|
1294
|
+
|
|
1295
|
+
def _get_schema_field_spec(self, column_name: str) -> models.SchemaFieldSpecClass:
|
|
1296
|
+
"""
|
|
1297
|
+
Get the schema field spec for the detection mechanism if needed.
|
|
1298
|
+
"""
|
|
1299
|
+
# Only fetch the dataset if it's not already cached.
|
|
1300
|
+
# Also we only fetch the dataset if it's needed for the detection mechanism.
|
|
1301
|
+
if self.cached_dataset is None:
|
|
1302
|
+
self.cached_dataset = self.entity_client.get(self.dataset_urn)
|
|
1303
|
+
|
|
1304
|
+
# Handle case where dataset doesn't exist
|
|
1305
|
+
if self.cached_dataset is None:
|
|
1306
|
+
raise SDKUsageError(
|
|
1307
|
+
f"Dataset {self.dataset_urn} not found. Cannot validate column {column_name}."
|
|
1308
|
+
)
|
|
1309
|
+
|
|
1310
|
+
# TODO: Make a public accessor for _schema_dict in the SDK
|
|
1311
|
+
schema_fields = self.cached_dataset._schema_dict()
|
|
1312
|
+
field = schema_fields.get(column_name)
|
|
1313
|
+
if field:
|
|
1314
|
+
return self._convert_schema_field_to_schema_field_spec(field)
|
|
1315
|
+
else:
|
|
1316
|
+
raise SDKUsageError(
|
|
1317
|
+
msg=f"Column {column_name} not found in dataset {self.dataset_urn}",
|
|
1318
|
+
)
|
|
1319
|
+
|
|
1320
|
+
def _convert_schema_field_to_schema_field_spec(
|
|
1321
|
+
self, field: models.SchemaFieldClass
|
|
1322
|
+
) -> models.SchemaFieldSpecClass:
|
|
1323
|
+
"""
|
|
1324
|
+
Convert a SchemaFieldClass to a SchemaFieldSpecClass.
|
|
1325
|
+
"""
|
|
1326
|
+
type_class_name = field.type.type.__class__.__name__
|
|
1327
|
+
try:
|
|
1328
|
+
type = self._convert_schema_field_type_class_name_to_type(type_class_name)
|
|
1329
|
+
except KeyError as e:
|
|
1330
|
+
raise SDKUsageError(
|
|
1331
|
+
msg=f"Invalid type: {type_class_name}. Must be one of {list(TYPE_CLASS_NAME_TO_TYPE_MAP.keys())}",
|
|
1332
|
+
) from e
|
|
1333
|
+
|
|
1334
|
+
return models.SchemaFieldSpecClass(
|
|
1335
|
+
path=field.fieldPath,
|
|
1336
|
+
type=type,
|
|
1337
|
+
nativeType=field.nativeDataType,
|
|
1338
|
+
)
|
|
1339
|
+
|
|
1340
|
+
def _convert_schema_field_type_class_name_to_type(
|
|
1341
|
+
self, type_class_name: str
|
|
1342
|
+
) -> str:
|
|
1343
|
+
"""
|
|
1344
|
+
Convert a type class name to a type.
|
|
1345
|
+
"""
|
|
1346
|
+
return TYPE_CLASS_NAME_TO_TYPE_MAP[type_class_name]
|
|
1347
|
+
|
|
1348
|
+
def _validate_field_type(
|
|
1349
|
+
self,
|
|
1350
|
+
field_spec: models.SchemaFieldSpecClass,
|
|
1351
|
+
column_name: str,
|
|
1352
|
+
allowed_types: list[DictWrapper],
|
|
1353
|
+
field_type_name: str,
|
|
1354
|
+
) -> None:
|
|
1355
|
+
"""
|
|
1356
|
+
Validate that a field has an allowed type.
|
|
1357
|
+
|
|
1358
|
+
Args:
|
|
1359
|
+
field_spec: The field specification to validate
|
|
1360
|
+
column_name: The name of the column for error messages
|
|
1361
|
+
allowed_types: List of allowed field types
|
|
1362
|
+
field_type_name: Human-readable name of the field type for error messages
|
|
1363
|
+
|
|
1364
|
+
Raises:
|
|
1365
|
+
SDKUsageError: If the field has an invalid type
|
|
1366
|
+
"""
|
|
1367
|
+
allowed_type_names = [
|
|
1368
|
+
self._convert_schema_field_type_class_name_to_type(t.__class__.__name__)
|
|
1369
|
+
for t in allowed_types
|
|
1370
|
+
]
|
|
1371
|
+
if field_spec.type not in allowed_type_names:
|
|
1372
|
+
raise SDKUsageError(
|
|
1373
|
+
msg=f"Column {column_name} with type {field_spec.type} does not have an allowed type for a {field_type_name} in dataset {self.dataset_urn}. "
|
|
1374
|
+
f"Allowed types are {allowed_type_names}.",
|
|
1375
|
+
)
|
|
1376
|
+
|
|
1377
|
+
@abstractmethod
|
|
1378
|
+
def _create_monitor_info(
|
|
1379
|
+
self,
|
|
1380
|
+
assertion_urn: AssertionUrn,
|
|
1381
|
+
status: models.MonitorStatusClass,
|
|
1382
|
+
schedule: models.CronScheduleClass,
|
|
1383
|
+
) -> models.MonitorInfoClass:
|
|
1384
|
+
"""
|
|
1385
|
+
Create a MonitorInfoClass with all the necessary components.
|
|
1386
|
+
|
|
1387
|
+
Args:
|
|
1388
|
+
status: The monitor status.
|
|
1389
|
+
schedule: The monitor schedule.
|
|
1390
|
+
Returns:
|
|
1391
|
+
A MonitorInfoClass configured with all the provided components.
|
|
1392
|
+
"""
|
|
1393
|
+
pass
|
|
1394
|
+
|
|
1395
|
+
@abstractmethod
|
|
1396
|
+
def _assertion_type(self) -> str:
|
|
1397
|
+
"""Get the assertion type."""
|
|
1398
|
+
pass
|
|
1399
|
+
|
|
1400
|
+
@abstractmethod
|
|
1401
|
+
def _create_assertion_info(
|
|
1402
|
+
self, filter: Optional[models.DatasetFilterClass]
|
|
1403
|
+
) -> AssertionInfoInputType:
|
|
1404
|
+
"""Create assertion info specific to the assertion type."""
|
|
1405
|
+
pass
|
|
1406
|
+
|
|
1407
|
+
@abstractmethod
|
|
1408
|
+
def _convert_schedule(self) -> models.CronScheduleClass:
|
|
1409
|
+
"""Convert schedule to appropriate format for the assertion type."""
|
|
1410
|
+
pass
|
|
1411
|
+
|
|
1412
|
+
@abstractmethod
|
|
1413
|
+
def _get_assertion_evaluation_parameters(
|
|
1414
|
+
self, source_type: str, field: Optional[FieldSpecType]
|
|
1415
|
+
) -> models.AssertionEvaluationParametersClass:
|
|
1416
|
+
"""Get evaluation parameters specific to the assertion type."""
|
|
1417
|
+
pass
|
|
1418
|
+
|
|
1419
|
+
@abstractmethod
|
|
1420
|
+
def _convert_assertion_source_type_and_field(
|
|
1421
|
+
self,
|
|
1422
|
+
) -> tuple[str, Optional[FieldSpecType]]:
|
|
1423
|
+
"""Convert detection mechanism to source type and field spec."""
|
|
1424
|
+
pass
|
|
1425
|
+
|
|
1426
|
+
|
|
1427
|
+
class _HasFreshnessFeatures:
|
|
1428
|
+
def _create_field_spec(
|
|
1429
|
+
self,
|
|
1430
|
+
column_name: str,
|
|
1431
|
+
allowed_types: list[DictWrapper], # TODO: Use the type from the PDL
|
|
1432
|
+
field_type_name: str,
|
|
1433
|
+
kind: str,
|
|
1434
|
+
get_schema_field_spec: Callable[[str], models.SchemaFieldSpecClass],
|
|
1435
|
+
validate_field_type: Callable[
|
|
1436
|
+
[models.SchemaFieldSpecClass, str, list[DictWrapper], str], None
|
|
1437
|
+
],
|
|
1438
|
+
) -> models.FreshnessFieldSpecClass:
|
|
1439
|
+
"""
|
|
1440
|
+
Create a field specification for a column, validating its type.
|
|
1441
|
+
|
|
1442
|
+
Args:
|
|
1443
|
+
column_name: The name of the column to create a spec for
|
|
1444
|
+
allowed_types: List of allowed field types
|
|
1445
|
+
field_type_name: Human-readable name of the field type for error messages
|
|
1446
|
+
kind: The kind of field to create
|
|
1447
|
+
|
|
1448
|
+
Returns:
|
|
1449
|
+
A FreshnessFieldSpecClass for the column
|
|
1450
|
+
|
|
1451
|
+
Raises:
|
|
1452
|
+
SDKUsageError: If the column is not found or has an invalid type
|
|
1453
|
+
"""
|
|
1454
|
+
SUPPORTED_KINDS = [
|
|
1455
|
+
models.FreshnessFieldKindClass.LAST_MODIFIED,
|
|
1456
|
+
models.FreshnessFieldKindClass.HIGH_WATERMARK,
|
|
1457
|
+
]
|
|
1458
|
+
if kind not in SUPPORTED_KINDS:
|
|
1459
|
+
raise SDKUsageError(
|
|
1460
|
+
msg=f"Invalid kind: {kind}. Must be one of {SUPPORTED_KINDS}",
|
|
1461
|
+
)
|
|
1462
|
+
|
|
1463
|
+
field_spec = get_schema_field_spec(column_name)
|
|
1464
|
+
validate_field_type(field_spec, column_name, allowed_types, field_type_name)
|
|
1465
|
+
return models.FreshnessFieldSpecClass(
|
|
1466
|
+
path=field_spec.path,
|
|
1467
|
+
type=field_spec.type,
|
|
1468
|
+
nativeType=field_spec.nativeType,
|
|
1469
|
+
kind=kind,
|
|
1470
|
+
)
|