acryl-datahub-cloud 0.3.10rc4__py3-none-any.whl → 0.3.16.1rc0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub-cloud might be problematic. Click here for more details.
- acryl_datahub_cloud/_codegen_config.json +1 -1
- acryl_datahub_cloud/acryl_cs_issues/acryl_customer.py +1 -1
- acryl_datahub_cloud/acryl_cs_issues/models.py +5 -3
- acryl_datahub_cloud/action_request/action_request_owner_source.py +37 -8
- acryl_datahub_cloud/datahub_forms_notifications/__init__.py +0 -0
- acryl_datahub_cloud/datahub_forms_notifications/forms_notifications_source.py +569 -0
- acryl_datahub_cloud/datahub_forms_notifications/get_feature_flag.gql +7 -0
- acryl_datahub_cloud/datahub_forms_notifications/get_search_results_total.gql +14 -0
- acryl_datahub_cloud/datahub_forms_notifications/query.py +17 -0
- acryl_datahub_cloud/datahub_forms_notifications/scroll_forms_for_notification.gql +29 -0
- acryl_datahub_cloud/datahub_forms_notifications/send_form_notification_request.gql +5 -0
- acryl_datahub_cloud/datahub_reporting/datahub_dataset.py +39 -19
- acryl_datahub_cloud/datahub_reporting/datahub_form_reporting.py +60 -25
- acryl_datahub_cloud/datahub_reporting/extract_graph.py +9 -3
- acryl_datahub_cloud/datahub_reporting/extract_sql.py +248 -52
- acryl_datahub_cloud/datahub_reporting/forms.py +1 -1
- acryl_datahub_cloud/datahub_reporting/forms_config.py +3 -2
- acryl_datahub_cloud/datahub_restore/source.py +3 -2
- acryl_datahub_cloud/datahub_usage_reporting/excluded.py +94 -0
- acryl_datahub_cloud/datahub_usage_reporting/query_builder.py +48 -8
- acryl_datahub_cloud/datahub_usage_reporting/usage_feature_reporter.py +532 -109
- acryl_datahub_cloud/elasticsearch/graph_service.py +76 -14
- acryl_datahub_cloud/graphql_utils.py +64 -0
- acryl_datahub_cloud/lineage_features/source.py +555 -49
- acryl_datahub_cloud/metadata/_urns/urn_defs.py +2390 -1938
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/actionworkflow/__init__.py +53 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/anomaly/__init__.py +2 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/assertion/__init__.py +6 -2
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/common/__init__.py +6 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/conversation/__init__.py +29 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/event/notification/settings/__init__.py +2 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/execution/__init__.py +2 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/file/__init__.py +19 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/form/__init__.py +8 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/identity/__init__.py +8 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/knowledge/__init__.py +33 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +14 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/metadata/search/features/__init__.py +2 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/module/__init__.py +31 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/monitor/__init__.py +6 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/notification/__init__.py +19 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py +4 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/role/__init__.py +2 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/settings/asset/__init__.py +19 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +28 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/template/__init__.py +31 -0
- acryl_datahub_cloud/metadata/schema.avsc +27843 -23200
- acryl_datahub_cloud/metadata/schema_classes.py +29901 -24310
- acryl_datahub_cloud/metadata/schemas/ActionRequestInfo.avsc +235 -2
- acryl_datahub_cloud/metadata/schemas/ActionWorkflowInfo.avsc +683 -0
- acryl_datahub_cloud/metadata/schemas/ActionWorkflowKey.avsc +21 -0
- acryl_datahub_cloud/metadata/schemas/Actors.avsc +38 -1
- acryl_datahub_cloud/metadata/schemas/ApplicationKey.avsc +31 -0
- acryl_datahub_cloud/metadata/schemas/ApplicationProperties.avsc +75 -0
- acryl_datahub_cloud/metadata/schemas/Applications.avsc +38 -0
- acryl_datahub_cloud/metadata/schemas/AssertionAnalyticsRunEvent.avsc +375 -212
- acryl_datahub_cloud/metadata/schemas/AssertionInfo.avsc +147 -20
- acryl_datahub_cloud/metadata/schemas/AssertionKey.avsc +1 -1
- acryl_datahub_cloud/metadata/schemas/AssertionRunEvent.avsc +191 -21
- acryl_datahub_cloud/metadata/schemas/{AssertionSummary.avsc → AssertionRunSummary.avsc} +15 -2
- acryl_datahub_cloud/metadata/schemas/AssertionsSummary.avsc +54 -0
- acryl_datahub_cloud/metadata/schemas/AssetSettings.avsc +63 -0
- acryl_datahub_cloud/metadata/schemas/BusinessAttributeInfo.avsc +7 -3
- acryl_datahub_cloud/metadata/schemas/ChartInfo.avsc +20 -6
- acryl_datahub_cloud/metadata/schemas/ChartKey.avsc +1 -0
- acryl_datahub_cloud/metadata/schemas/ConstraintInfo.avsc +12 -1
- acryl_datahub_cloud/metadata/schemas/ContainerKey.avsc +1 -0
- acryl_datahub_cloud/metadata/schemas/ContainerProperties.avsc +16 -5
- acryl_datahub_cloud/metadata/schemas/CorpGroupEditableInfo.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/CorpGroupInfo.avsc +7 -3
- acryl_datahub_cloud/metadata/schemas/CorpGroupKey.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/CorpGroupSettings.avsc +127 -2
- acryl_datahub_cloud/metadata/schemas/CorpUserEditableInfo.avsc +1 -1
- acryl_datahub_cloud/metadata/schemas/CorpUserInfo.avsc +18 -2
- acryl_datahub_cloud/metadata/schemas/CorpUserInvitationStatus.avsc +106 -0
- acryl_datahub_cloud/metadata/schemas/CorpUserKey.avsc +4 -1
- acryl_datahub_cloud/metadata/schemas/CorpUserSettings.avsc +304 -2
- acryl_datahub_cloud/metadata/schemas/CorpUserUsageFeatures.avsc +86 -0
- acryl_datahub_cloud/metadata/schemas/DashboardInfo.avsc +11 -5
- acryl_datahub_cloud/metadata/schemas/DashboardKey.avsc +1 -0
- acryl_datahub_cloud/metadata/schemas/DataContractKey.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/DataFlowInfo.avsc +15 -5
- acryl_datahub_cloud/metadata/schemas/DataFlowKey.avsc +1 -0
- acryl_datahub_cloud/metadata/schemas/DataHubAiConversationInfo.avsc +256 -0
- acryl_datahub_cloud/metadata/schemas/DataHubAiConversationKey.avsc +22 -0
- acryl_datahub_cloud/metadata/schemas/DataHubFileInfo.avsc +234 -0
- acryl_datahub_cloud/metadata/schemas/DataHubFileKey.avsc +22 -0
- acryl_datahub_cloud/metadata/schemas/DataHubIngestionSourceKey.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/DataHubOpenAPISchemaKey.avsc +22 -0
- acryl_datahub_cloud/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
- acryl_datahub_cloud/metadata/schemas/DataHubPageModuleProperties.avsc +308 -0
- acryl_datahub_cloud/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
- acryl_datahub_cloud/metadata/schemas/DataHubPageTemplateProperties.avsc +251 -0
- acryl_datahub_cloud/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
- acryl_datahub_cloud/metadata/schemas/DataJobInfo.avsc +13 -4
- acryl_datahub_cloud/metadata/schemas/DataJobInputOutput.avsc +8 -0
- acryl_datahub_cloud/metadata/schemas/DataJobKey.avsc +1 -0
- acryl_datahub_cloud/metadata/schemas/DataPlatformInfo.avsc +3 -1
- acryl_datahub_cloud/metadata/schemas/DataPlatformInstanceProperties.avsc +5 -2
- acryl_datahub_cloud/metadata/schemas/DataProcessKey.avsc +4 -0
- acryl_datahub_cloud/metadata/schemas/DataProductKey.avsc +2 -0
- acryl_datahub_cloud/metadata/schemas/DataProductProperties.avsc +6 -3
- acryl_datahub_cloud/metadata/schemas/DataTransformLogic.avsc +4 -2
- acryl_datahub_cloud/metadata/schemas/DataTypeInfo.avsc +5 -0
- acryl_datahub_cloud/metadata/schemas/DatasetKey.avsc +10 -2
- acryl_datahub_cloud/metadata/schemas/DatasetProperties.avsc +12 -5
- acryl_datahub_cloud/metadata/schemas/DatasetUsageStatistics.avsc +8 -0
- acryl_datahub_cloud/metadata/schemas/DocumentInfo.avsc +407 -0
- acryl_datahub_cloud/metadata/schemas/DocumentKey.avsc +35 -0
- acryl_datahub_cloud/metadata/schemas/DocumentSettings.avsc +79 -0
- acryl_datahub_cloud/metadata/schemas/DomainKey.avsc +2 -0
- acryl_datahub_cloud/metadata/schemas/DomainProperties.avsc +7 -3
- acryl_datahub_cloud/metadata/schemas/EditableContainerProperties.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/EditableDashboardProperties.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/EditableDataFlowProperties.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/EditableDataJobProperties.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/EditableDatasetProperties.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/EditableERModelRelationshipProperties.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/EditableMLFeatureProperties.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/EditableMLFeatureTableProperties.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/EditableMLModelGroupProperties.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/EditableMLModelProperties.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/EditableNotebookProperties.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/EditableSchemaMetadata.avsc +4 -2
- acryl_datahub_cloud/metadata/schemas/EntityTypeInfo.avsc +5 -0
- acryl_datahub_cloud/metadata/schemas/ExecutionRequestArtifactsLocation.avsc +16 -0
- acryl_datahub_cloud/metadata/schemas/ExecutionRequestKey.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/FormAssignmentStatus.avsc +36 -0
- acryl_datahub_cloud/metadata/schemas/FormInfo.avsc +6 -0
- acryl_datahub_cloud/metadata/schemas/FormKey.avsc +3 -1
- acryl_datahub_cloud/metadata/schemas/FormNotifications.avsc +69 -0
- acryl_datahub_cloud/metadata/schemas/FormSettings.avsc +30 -0
- acryl_datahub_cloud/metadata/schemas/GlobalSettingsInfo.avsc +416 -0
- acryl_datahub_cloud/metadata/schemas/GlobalTags.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/GlossaryNodeInfo.avsc +3 -1
- acryl_datahub_cloud/metadata/schemas/GlossaryNodeKey.avsc +1 -0
- acryl_datahub_cloud/metadata/schemas/GlossaryTermInfo.avsc +3 -1
- acryl_datahub_cloud/metadata/schemas/GlossaryTermKey.avsc +2 -0
- acryl_datahub_cloud/metadata/schemas/IcebergWarehouseInfo.avsc +4 -0
- acryl_datahub_cloud/metadata/schemas/IncidentActivityEvent.avsc +3 -3
- acryl_datahub_cloud/metadata/schemas/IncidentInfo.avsc +3 -3
- acryl_datahub_cloud/metadata/schemas/InferredMetadata.avsc +71 -1
- acryl_datahub_cloud/metadata/schemas/InputFields.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/InviteToken.avsc +26 -0
- acryl_datahub_cloud/metadata/schemas/LineageFeatures.avsc +67 -42
- acryl_datahub_cloud/metadata/schemas/LogicalParent.avsc +145 -0
- acryl_datahub_cloud/metadata/schemas/MLFeatureKey.avsc +4 -1
- acryl_datahub_cloud/metadata/schemas/MLFeatureTableKey.avsc +4 -1
- acryl_datahub_cloud/metadata/schemas/MLModelDeploymentKey.avsc +7 -1
- acryl_datahub_cloud/metadata/schemas/MLModelDeploymentProperties.avsc +3 -0
- acryl_datahub_cloud/metadata/schemas/MLModelGroupKey.avsc +9 -1
- acryl_datahub_cloud/metadata/schemas/MLModelKey.avsc +9 -1
- acryl_datahub_cloud/metadata/schemas/MLModelProperties.avsc +4 -2
- acryl_datahub_cloud/metadata/schemas/MLPrimaryKeyKey.avsc +4 -1
- acryl_datahub_cloud/metadata/schemas/MetadataChangeEvent.avsc +424 -97
- acryl_datahub_cloud/metadata/schemas/MetadataChangeLog.avsc +65 -44
- acryl_datahub_cloud/metadata/schemas/MetadataChangeProposal.avsc +64 -0
- acryl_datahub_cloud/metadata/schemas/MonitorAnomalyEvent.avsc +84 -29
- acryl_datahub_cloud/metadata/schemas/MonitorInfo.avsc +221 -23
- acryl_datahub_cloud/metadata/schemas/MonitorKey.avsc +9 -1
- acryl_datahub_cloud/metadata/schemas/MonitorSuiteInfo.avsc +128 -3
- acryl_datahub_cloud/metadata/schemas/NotebookInfo.avsc +5 -2
- acryl_datahub_cloud/metadata/schemas/NotebookKey.avsc +1 -0
- acryl_datahub_cloud/metadata/schemas/NotificationRequest.avsc +91 -4
- acryl_datahub_cloud/metadata/schemas/Operation.avsc +17 -0
- acryl_datahub_cloud/metadata/schemas/Ownership.avsc +71 -1
- acryl_datahub_cloud/metadata/schemas/QueryProperties.avsc +4 -2
- acryl_datahub_cloud/metadata/schemas/QuerySubjects.avsc +2 -13
- acryl_datahub_cloud/metadata/schemas/RelationshipChangeEvent.avsc +215 -0
- acryl_datahub_cloud/metadata/schemas/RoleProperties.avsc +3 -1
- acryl_datahub_cloud/metadata/schemas/SchemaFieldInfo.avsc +3 -1
- acryl_datahub_cloud/metadata/schemas/SchemaFieldKey.avsc +3 -0
- acryl_datahub_cloud/metadata/schemas/SchemaMetadata.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/SemanticContent.avsc +123 -0
- acryl_datahub_cloud/metadata/schemas/StructuredProperties.avsc +69 -0
- acryl_datahub_cloud/metadata/schemas/StructuredPropertyDefinition.avsc +15 -4
- acryl_datahub_cloud/metadata/schemas/StructuredPropertySettings.avsc +9 -0
- acryl_datahub_cloud/metadata/schemas/SubscriptionInfo.avsc +136 -5
- acryl_datahub_cloud/metadata/schemas/SubscriptionKey.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/SystemMetadata.avsc +147 -0
- acryl_datahub_cloud/metadata/schemas/TagProperties.avsc +3 -1
- acryl_datahub_cloud/metadata/schemas/TestInfo.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/UpstreamLineage.avsc +9 -0
- acryl_datahub_cloud/metadata/schemas/UsageFeatures.avsc +10 -0
- acryl_datahub_cloud/metadata/schemas/__init__.py +3 -3
- acryl_datahub_cloud/notifications/__init__.py +0 -0
- acryl_datahub_cloud/notifications/notification_recipient_builder.py +399 -0
- acryl_datahub_cloud/sdk/__init__.py +69 -0
- acryl_datahub_cloud/sdk/assertion/__init__.py +58 -0
- acryl_datahub_cloud/sdk/assertion/assertion_base.py +779 -0
- acryl_datahub_cloud/sdk/assertion/column_metric_assertion.py +191 -0
- acryl_datahub_cloud/sdk/assertion/column_value_assertion.py +431 -0
- acryl_datahub_cloud/sdk/assertion/freshness_assertion.py +201 -0
- acryl_datahub_cloud/sdk/assertion/schema_assertion.py +268 -0
- acryl_datahub_cloud/sdk/assertion/smart_column_metric_assertion.py +212 -0
- acryl_datahub_cloud/sdk/assertion/smart_freshness_assertion.py +165 -0
- acryl_datahub_cloud/sdk/assertion/smart_sql_assertion.py +156 -0
- acryl_datahub_cloud/sdk/assertion/smart_volume_assertion.py +162 -0
- acryl_datahub_cloud/sdk/assertion/sql_assertion.py +273 -0
- acryl_datahub_cloud/sdk/assertion/types.py +20 -0
- acryl_datahub_cloud/sdk/assertion/volume_assertion.py +156 -0
- acryl_datahub_cloud/sdk/assertion_client/__init__.py +0 -0
- acryl_datahub_cloud/sdk/assertion_client/column_metric.py +545 -0
- acryl_datahub_cloud/sdk/assertion_client/column_value.py +617 -0
- acryl_datahub_cloud/sdk/assertion_client/freshness.py +371 -0
- acryl_datahub_cloud/sdk/assertion_client/helpers.py +166 -0
- acryl_datahub_cloud/sdk/assertion_client/schema.py +358 -0
- acryl_datahub_cloud/sdk/assertion_client/smart_column_metric.py +540 -0
- acryl_datahub_cloud/sdk/assertion_client/smart_freshness.py +373 -0
- acryl_datahub_cloud/sdk/assertion_client/smart_sql.py +411 -0
- acryl_datahub_cloud/sdk/assertion_client/smart_volume.py +380 -0
- acryl_datahub_cloud/sdk/assertion_client/sql.py +410 -0
- acryl_datahub_cloud/sdk/assertion_client/volume.py +446 -0
- acryl_datahub_cloud/sdk/assertion_input/__init__.py +0 -0
- acryl_datahub_cloud/sdk/assertion_input/assertion_input.py +1470 -0
- acryl_datahub_cloud/sdk/assertion_input/column_assertion_constants.py +114 -0
- acryl_datahub_cloud/sdk/assertion_input/column_assertion_utils.py +284 -0
- acryl_datahub_cloud/sdk/assertion_input/column_metric_assertion_input.py +759 -0
- acryl_datahub_cloud/sdk/assertion_input/column_metric_constants.py +109 -0
- acryl_datahub_cloud/sdk/assertion_input/column_value_assertion_input.py +810 -0
- acryl_datahub_cloud/sdk/assertion_input/freshness_assertion_input.py +305 -0
- acryl_datahub_cloud/sdk/assertion_input/schema_assertion_input.py +413 -0
- acryl_datahub_cloud/sdk/assertion_input/smart_column_metric_assertion_input.py +793 -0
- acryl_datahub_cloud/sdk/assertion_input/smart_freshness_assertion_input.py +218 -0
- acryl_datahub_cloud/sdk/assertion_input/smart_sql_assertion_input.py +181 -0
- acryl_datahub_cloud/sdk/assertion_input/smart_volume_assertion_input.py +189 -0
- acryl_datahub_cloud/sdk/assertion_input/sql_assertion_input.py +320 -0
- acryl_datahub_cloud/sdk/assertion_input/volume_assertion_input.py +635 -0
- acryl_datahub_cloud/sdk/assertions_client.py +1074 -0
- acryl_datahub_cloud/sdk/entities/__init__.py +0 -0
- acryl_datahub_cloud/sdk/entities/assertion.py +439 -0
- acryl_datahub_cloud/sdk/entities/monitor.py +291 -0
- acryl_datahub_cloud/sdk/entities/subscription.py +100 -0
- acryl_datahub_cloud/sdk/errors.py +34 -0
- acryl_datahub_cloud/sdk/resolver_client.py +42 -0
- acryl_datahub_cloud/sdk/subscription_client.py +737 -0
- {acryl_datahub_cloud-0.3.10rc4.dist-info → acryl_datahub_cloud-0.3.16.1rc0.dist-info}/METADATA +49 -43
- {acryl_datahub_cloud-0.3.10rc4.dist-info → acryl_datahub_cloud-0.3.16.1rc0.dist-info}/RECORD +243 -145
- {acryl_datahub_cloud-0.3.10rc4.dist-info → acryl_datahub_cloud-0.3.16.1rc0.dist-info}/WHEEL +1 -1
- {acryl_datahub_cloud-0.3.10rc4.dist-info → acryl_datahub_cloud-0.3.16.1rc0.dist-info}/entry_points.txt +1 -0
- {acryl_datahub_cloud-0.3.10rc4.dist-info → acryl_datahub_cloud-0.3.16.1rc0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from datetime import datetime
|
|
3
|
+
from typing import Optional, Union
|
|
4
|
+
|
|
5
|
+
from typing_extensions import Self
|
|
6
|
+
|
|
7
|
+
from acryl_datahub_cloud.sdk.assertion.assertion_base import (
|
|
8
|
+
AssertionMode,
|
|
9
|
+
_AssertionPublic,
|
|
10
|
+
_HasColumnMetricFunctionality,
|
|
11
|
+
_HasSchedule,
|
|
12
|
+
)
|
|
13
|
+
from acryl_datahub_cloud.sdk.assertion_input.assertion_input import (
|
|
14
|
+
DEFAULT_DETECTION_MECHANISM,
|
|
15
|
+
DEFAULT_SCHEDULE,
|
|
16
|
+
AssertionIncidentBehavior,
|
|
17
|
+
DetectionMechanism,
|
|
18
|
+
_DetectionMechanismTypes,
|
|
19
|
+
)
|
|
20
|
+
from acryl_datahub_cloud.sdk.assertion_input.column_metric_assertion_input import (
|
|
21
|
+
ColumnMetricAssertionParameters,
|
|
22
|
+
MetricInputType,
|
|
23
|
+
OperatorInputType,
|
|
24
|
+
)
|
|
25
|
+
from acryl_datahub_cloud.sdk.entities.assertion import Assertion
|
|
26
|
+
from acryl_datahub_cloud.sdk.entities.monitor import Monitor
|
|
27
|
+
from datahub.metadata import schema_classes as models
|
|
28
|
+
from datahub.metadata.urns import AssertionUrn, CorpUserUrn, DatasetUrn, TagUrn
|
|
29
|
+
|
|
30
|
+
logger = logging.getLogger(__name__)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class ColumnMetricAssertion(
|
|
34
|
+
_HasColumnMetricFunctionality,
|
|
35
|
+
_HasSchedule,
|
|
36
|
+
_AssertionPublic,
|
|
37
|
+
):
|
|
38
|
+
"""
|
|
39
|
+
A class that represents a column metric assertion.
|
|
40
|
+
This assertion is used to validate the value of a common field / column metric (e.g. aggregation) such as null count + percentage,
|
|
41
|
+
min, max, median, and more. It uses native source types without AI inference.
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
def __init__(
|
|
45
|
+
self,
|
|
46
|
+
*,
|
|
47
|
+
urn: AssertionUrn,
|
|
48
|
+
dataset_urn: DatasetUrn,
|
|
49
|
+
column_name: str,
|
|
50
|
+
metric_type: MetricInputType,
|
|
51
|
+
operator: OperatorInputType,
|
|
52
|
+
# Consolidated criteria parameters
|
|
53
|
+
criteria_parameters: Optional[ColumnMetricAssertionParameters] = None,
|
|
54
|
+
# Standard assertion parameters:
|
|
55
|
+
display_name: str,
|
|
56
|
+
mode: AssertionMode,
|
|
57
|
+
schedule: models.CronScheduleClass = DEFAULT_SCHEDULE,
|
|
58
|
+
incident_behavior: list[AssertionIncidentBehavior],
|
|
59
|
+
detection_mechanism: Optional[
|
|
60
|
+
_DetectionMechanismTypes
|
|
61
|
+
] = DEFAULT_DETECTION_MECHANISM,
|
|
62
|
+
tags: list[TagUrn],
|
|
63
|
+
created_by: Optional[CorpUserUrn] = None,
|
|
64
|
+
created_at: Union[datetime, None] = None,
|
|
65
|
+
updated_by: Optional[CorpUserUrn] = None,
|
|
66
|
+
updated_at: Optional[datetime] = None,
|
|
67
|
+
):
|
|
68
|
+
"""
|
|
69
|
+
Initialize a column metric assertion.
|
|
70
|
+
|
|
71
|
+
Args:
|
|
72
|
+
urn: The URN of the assertion.
|
|
73
|
+
dataset_urn: The URN of the dataset to monitor.
|
|
74
|
+
display_name: The display name of the assertion.
|
|
75
|
+
mode: The mode of the assertion (active/inactive).
|
|
76
|
+
incident_behavior: The behavior when incidents occur.
|
|
77
|
+
detection_mechanism: The mechanism used to detect changes.
|
|
78
|
+
tags: The tags to apply to the assertion.
|
|
79
|
+
created_by: The URN of the user who created the assertion.
|
|
80
|
+
created_at: The timestamp when the assertion was created.
|
|
81
|
+
updated_by: The URN of the user who last updated the assertion.
|
|
82
|
+
updated_at: The timestamp when the assertion was last updated.
|
|
83
|
+
"""
|
|
84
|
+
_AssertionPublic.__init__(
|
|
85
|
+
self,
|
|
86
|
+
urn=urn,
|
|
87
|
+
dataset_urn=dataset_urn,
|
|
88
|
+
display_name=display_name,
|
|
89
|
+
mode=mode,
|
|
90
|
+
tags=tags,
|
|
91
|
+
incident_behavior=incident_behavior,
|
|
92
|
+
detection_mechanism=detection_mechanism,
|
|
93
|
+
created_by=created_by,
|
|
94
|
+
created_at=created_at,
|
|
95
|
+
updated_by=updated_by,
|
|
96
|
+
updated_at=updated_at,
|
|
97
|
+
)
|
|
98
|
+
_HasSchedule.__init__(
|
|
99
|
+
self,
|
|
100
|
+
schedule=schedule,
|
|
101
|
+
)
|
|
102
|
+
_HasColumnMetricFunctionality.__init__(
|
|
103
|
+
self,
|
|
104
|
+
column_name=column_name,
|
|
105
|
+
metric_type=metric_type,
|
|
106
|
+
operator=operator,
|
|
107
|
+
criteria_parameters=criteria_parameters,
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
@classmethod
|
|
111
|
+
def _from_entities(cls, assertion: Assertion, monitor: Monitor) -> Self:
|
|
112
|
+
"""
|
|
113
|
+
Create a ColumnMetricAssertion from an Assertion and Monitor entity.
|
|
114
|
+
|
|
115
|
+
Args:
|
|
116
|
+
assertion: The Assertion entity.
|
|
117
|
+
monitor: The Monitor entity.
|
|
118
|
+
|
|
119
|
+
Returns:
|
|
120
|
+
A ColumnMetricAssertion instance.
|
|
121
|
+
"""
|
|
122
|
+
return cls(
|
|
123
|
+
urn=assertion.urn,
|
|
124
|
+
dataset_urn=assertion.dataset,
|
|
125
|
+
column_name=cls._get_column_name(assertion),
|
|
126
|
+
metric_type=cls._get_metric_type(assertion),
|
|
127
|
+
operator=cls._get_operator(assertion),
|
|
128
|
+
criteria_parameters=cls._get_criteria_parameters(assertion),
|
|
129
|
+
display_name=assertion.description or "",
|
|
130
|
+
mode=cls._get_mode(monitor),
|
|
131
|
+
schedule=cls._get_schedule(monitor),
|
|
132
|
+
incident_behavior=cls._get_incident_behavior(assertion),
|
|
133
|
+
detection_mechanism=cls._get_detection_mechanism(assertion, monitor),
|
|
134
|
+
tags=cls._get_tags(assertion),
|
|
135
|
+
created_by=cls._get_created_by(assertion),
|
|
136
|
+
created_at=cls._get_created_at(assertion),
|
|
137
|
+
updated_by=cls._get_updated_by(assertion),
|
|
138
|
+
updated_at=cls._get_updated_at(assertion),
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
@staticmethod
|
|
142
|
+
def _get_detection_mechanism(
|
|
143
|
+
assertion: Assertion,
|
|
144
|
+
monitor: Monitor,
|
|
145
|
+
default: Optional[_DetectionMechanismTypes] = DEFAULT_DETECTION_MECHANISM,
|
|
146
|
+
) -> Optional[_DetectionMechanismTypes]:
|
|
147
|
+
"""Get the detection mechanism for column metric assertions."""
|
|
148
|
+
parameters = _AssertionPublic._get_validated_detection_context(
|
|
149
|
+
monitor,
|
|
150
|
+
assertion,
|
|
151
|
+
models.AssertionEvaluationParametersTypeClass.DATASET_FIELD,
|
|
152
|
+
models.FieldAssertionInfoClass,
|
|
153
|
+
default,
|
|
154
|
+
)
|
|
155
|
+
if parameters is None:
|
|
156
|
+
return default
|
|
157
|
+
if parameters.datasetFieldParameters is None:
|
|
158
|
+
logger.warning(
|
|
159
|
+
f"Monitor does not have datasetFieldParameters, defaulting detection mechanism to {default}"
|
|
160
|
+
)
|
|
161
|
+
return default
|
|
162
|
+
source_type = parameters.datasetFieldParameters.sourceType
|
|
163
|
+
if source_type == models.DatasetFieldAssertionSourceTypeClass.ALL_ROWS_QUERY:
|
|
164
|
+
additional_filter = _AssertionPublic._get_additional_filter(assertion)
|
|
165
|
+
return DetectionMechanism.ALL_ROWS_QUERY(
|
|
166
|
+
additional_filter=additional_filter
|
|
167
|
+
)
|
|
168
|
+
elif (
|
|
169
|
+
source_type
|
|
170
|
+
== models.DatasetFieldAssertionSourceTypeClass.CHANGED_ROWS_QUERY
|
|
171
|
+
):
|
|
172
|
+
if parameters.datasetFieldParameters.changedRowsField is None:
|
|
173
|
+
logger.warning(
|
|
174
|
+
f"Monitor has CHANGED_ROWS_QUERY source type but no changedRowsField, defaulting detection mechanism to {default}"
|
|
175
|
+
)
|
|
176
|
+
return default
|
|
177
|
+
column_name = parameters.datasetFieldParameters.changedRowsField.path
|
|
178
|
+
additional_filter = _AssertionPublic._get_additional_filter(assertion)
|
|
179
|
+
return DetectionMechanism.CHANGED_ROWS_QUERY(
|
|
180
|
+
column_name=column_name, additional_filter=additional_filter
|
|
181
|
+
)
|
|
182
|
+
elif (
|
|
183
|
+
source_type
|
|
184
|
+
== models.DatasetFieldAssertionSourceTypeClass.DATAHUB_DATASET_PROFILE
|
|
185
|
+
):
|
|
186
|
+
return DetectionMechanism.ALL_ROWS_QUERY_DATAHUB_DATASET_PROFILE
|
|
187
|
+
else:
|
|
188
|
+
logger.warning(
|
|
189
|
+
f"Unsupported DatasetFieldAssertionSourceType {source_type}, defaulting detection mechanism to {default}"
|
|
190
|
+
)
|
|
191
|
+
return default
|
|
@@ -0,0 +1,431 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Column value assertion module.
|
|
3
|
+
|
|
4
|
+
This module provides the ColumnValueAssertion class for representing column value
|
|
5
|
+
assertions that validate individual row values against semantic constraints.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import logging
|
|
9
|
+
from datetime import datetime
|
|
10
|
+
from typing import Optional, Union
|
|
11
|
+
|
|
12
|
+
from typing_extensions import Self
|
|
13
|
+
|
|
14
|
+
from acryl_datahub_cloud.sdk.assertion.assertion_base import (
|
|
15
|
+
AssertionMode,
|
|
16
|
+
_AssertionPublic,
|
|
17
|
+
_HasSchedule,
|
|
18
|
+
)
|
|
19
|
+
from acryl_datahub_cloud.sdk.assertion_input.assertion_input import (
|
|
20
|
+
DEFAULT_DETECTION_MECHANISM,
|
|
21
|
+
DEFAULT_SCHEDULE,
|
|
22
|
+
AssertionIncidentBehavior,
|
|
23
|
+
DetectionMechanism,
|
|
24
|
+
_DetectionMechanismTypes,
|
|
25
|
+
)
|
|
26
|
+
from acryl_datahub_cloud.sdk.assertion_input.column_metric_constants import (
|
|
27
|
+
OperatorInputType,
|
|
28
|
+
)
|
|
29
|
+
from acryl_datahub_cloud.sdk.assertion_input.column_value_assertion_input import (
|
|
30
|
+
ColumnValueAssertionParameters,
|
|
31
|
+
FailThresholdType,
|
|
32
|
+
FieldTransformInputType,
|
|
33
|
+
)
|
|
34
|
+
from acryl_datahub_cloud.sdk.entities.assertion import Assertion
|
|
35
|
+
from acryl_datahub_cloud.sdk.entities.monitor import (
|
|
36
|
+
Monitor,
|
|
37
|
+
_get_nested_field_for_entity_with_default,
|
|
38
|
+
)
|
|
39
|
+
from acryl_datahub_cloud.sdk.errors import SDKUsageError
|
|
40
|
+
from datahub.metadata import schema_classes as models
|
|
41
|
+
from datahub.metadata.urns import AssertionUrn, CorpUserUrn, DatasetUrn, TagUrn
|
|
42
|
+
|
|
43
|
+
logger = logging.getLogger(__name__)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class ColumnValueAssertion(
|
|
47
|
+
_HasSchedule,
|
|
48
|
+
_AssertionPublic,
|
|
49
|
+
):
|
|
50
|
+
"""
|
|
51
|
+
A class that represents a column value assertion.
|
|
52
|
+
|
|
53
|
+
This assertion validates individual row values in a column against semantic
|
|
54
|
+
constraints (e.g., "all values in column X must match pattern Y" or
|
|
55
|
+
"no NULL values allowed").
|
|
56
|
+
|
|
57
|
+
Key differences from ColumnMetricAssertion (FIELD_METRIC):
|
|
58
|
+
- FIELD_METRIC: Validates aggregated metrics (NULL_COUNT, MEAN, MIN, etc.)
|
|
59
|
+
- FIELD_VALUES: Validates each individual row value against an operator/predicate
|
|
60
|
+
"""
|
|
61
|
+
|
|
62
|
+
def __init__(
|
|
63
|
+
self,
|
|
64
|
+
*,
|
|
65
|
+
urn: AssertionUrn,
|
|
66
|
+
dataset_urn: DatasetUrn,
|
|
67
|
+
column_name: str,
|
|
68
|
+
operator: OperatorInputType,
|
|
69
|
+
criteria_parameters: Optional[ColumnValueAssertionParameters] = None,
|
|
70
|
+
transform: Optional[FieldTransformInputType] = None,
|
|
71
|
+
fail_threshold_type: FailThresholdType = FailThresholdType.COUNT,
|
|
72
|
+
fail_threshold_value: int = 0,
|
|
73
|
+
exclude_nulls: bool = True,
|
|
74
|
+
display_name: str,
|
|
75
|
+
mode: AssertionMode,
|
|
76
|
+
schedule: models.CronScheduleClass = DEFAULT_SCHEDULE,
|
|
77
|
+
incident_behavior: list[AssertionIncidentBehavior],
|
|
78
|
+
detection_mechanism: Optional[
|
|
79
|
+
_DetectionMechanismTypes
|
|
80
|
+
] = DEFAULT_DETECTION_MECHANISM,
|
|
81
|
+
tags: list[TagUrn],
|
|
82
|
+
created_by: Optional[CorpUserUrn] = None,
|
|
83
|
+
created_at: Union[datetime, None] = None,
|
|
84
|
+
updated_by: Optional[CorpUserUrn] = None,
|
|
85
|
+
updated_at: Optional[datetime] = None,
|
|
86
|
+
):
|
|
87
|
+
"""
|
|
88
|
+
Initialize a column value assertion.
|
|
89
|
+
|
|
90
|
+
Args:
|
|
91
|
+
urn: The URN of the assertion.
|
|
92
|
+
dataset_urn: The URN of the dataset to monitor.
|
|
93
|
+
column_name: The name of the column to validate.
|
|
94
|
+
operator: The operator to use for validation.
|
|
95
|
+
criteria_parameters: The criteria parameters for the operator.
|
|
96
|
+
transform: Optional transform to apply to field values before evaluation.
|
|
97
|
+
fail_threshold_type: The type of failure threshold (COUNT or PERCENTAGE).
|
|
98
|
+
fail_threshold_value: The failure threshold value.
|
|
99
|
+
exclude_nulls: Whether to exclude nulls when evaluating.
|
|
100
|
+
display_name: The display name of the assertion.
|
|
101
|
+
mode: The mode of the assertion (active/inactive).
|
|
102
|
+
schedule: The evaluation schedule.
|
|
103
|
+
incident_behavior: The behavior when incidents occur.
|
|
104
|
+
detection_mechanism: The mechanism used to detect changes.
|
|
105
|
+
tags: The tags to apply to the assertion.
|
|
106
|
+
created_by: The URN of the user who created the assertion.
|
|
107
|
+
created_at: The timestamp when the assertion was created.
|
|
108
|
+
updated_by: The URN of the user who last updated the assertion.
|
|
109
|
+
updated_at: The timestamp when the assertion was last updated.
|
|
110
|
+
"""
|
|
111
|
+
_AssertionPublic.__init__(
|
|
112
|
+
self,
|
|
113
|
+
urn=urn,
|
|
114
|
+
dataset_urn=dataset_urn,
|
|
115
|
+
display_name=display_name,
|
|
116
|
+
mode=mode,
|
|
117
|
+
tags=tags,
|
|
118
|
+
incident_behavior=incident_behavior,
|
|
119
|
+
detection_mechanism=detection_mechanism,
|
|
120
|
+
created_by=created_by,
|
|
121
|
+
created_at=created_at,
|
|
122
|
+
updated_by=updated_by,
|
|
123
|
+
updated_at=updated_at,
|
|
124
|
+
)
|
|
125
|
+
_HasSchedule.__init__(
|
|
126
|
+
self,
|
|
127
|
+
schedule=schedule,
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
self._column_name = column_name
|
|
131
|
+
self._operator = operator
|
|
132
|
+
self._criteria_parameters = criteria_parameters
|
|
133
|
+
self._transform = transform
|
|
134
|
+
self._fail_threshold_type = fail_threshold_type
|
|
135
|
+
self._fail_threshold_value = fail_threshold_value
|
|
136
|
+
self._exclude_nulls = exclude_nulls
|
|
137
|
+
|
|
138
|
+
@property
|
|
139
|
+
def column_name(self) -> str:
|
|
140
|
+
return self._column_name
|
|
141
|
+
|
|
142
|
+
@property
|
|
143
|
+
def operator(self) -> OperatorInputType:
|
|
144
|
+
return self._operator
|
|
145
|
+
|
|
146
|
+
@property
|
|
147
|
+
def criteria_parameters(self) -> Optional[ColumnValueAssertionParameters]:
|
|
148
|
+
return self._criteria_parameters
|
|
149
|
+
|
|
150
|
+
@property
|
|
151
|
+
def transform(self) -> Optional[FieldTransformInputType]:
|
|
152
|
+
return self._transform
|
|
153
|
+
|
|
154
|
+
@property
|
|
155
|
+
def fail_threshold_type(self) -> FailThresholdType:
|
|
156
|
+
return self._fail_threshold_type
|
|
157
|
+
|
|
158
|
+
@property
|
|
159
|
+
def fail_threshold_value(self) -> int:
|
|
160
|
+
return self._fail_threshold_value
|
|
161
|
+
|
|
162
|
+
@property
|
|
163
|
+
def exclude_nulls(self) -> bool:
|
|
164
|
+
return self._exclude_nulls
|
|
165
|
+
|
|
166
|
+
@classmethod
|
|
167
|
+
def _from_entities(cls, assertion: Assertion, monitor: Monitor) -> Self:
|
|
168
|
+
"""
|
|
169
|
+
Create a ColumnValueAssertion from an Assertion and Monitor entity.
|
|
170
|
+
|
|
171
|
+
Args:
|
|
172
|
+
assertion: The Assertion entity.
|
|
173
|
+
monitor: The Monitor entity.
|
|
174
|
+
|
|
175
|
+
Returns:
|
|
176
|
+
A ColumnValueAssertion instance.
|
|
177
|
+
"""
|
|
178
|
+
return cls(
|
|
179
|
+
urn=assertion.urn,
|
|
180
|
+
dataset_urn=assertion.dataset,
|
|
181
|
+
column_name=cls._get_column_name(assertion),
|
|
182
|
+
operator=cls._get_operator(assertion),
|
|
183
|
+
criteria_parameters=cls._get_criteria_parameters(assertion),
|
|
184
|
+
transform=cls._get_transform(assertion),
|
|
185
|
+
fail_threshold_type=cls._get_fail_threshold_type(assertion),
|
|
186
|
+
fail_threshold_value=cls._get_fail_threshold_value(assertion),
|
|
187
|
+
exclude_nulls=cls._get_exclude_nulls(assertion),
|
|
188
|
+
display_name=assertion.description or "",
|
|
189
|
+
mode=cls._get_mode(monitor),
|
|
190
|
+
schedule=cls._get_schedule(monitor),
|
|
191
|
+
incident_behavior=cls._get_incident_behavior(assertion),
|
|
192
|
+
detection_mechanism=cls._get_detection_mechanism(assertion, monitor),
|
|
193
|
+
tags=cls._get_tags(assertion),
|
|
194
|
+
created_by=cls._get_created_by(assertion),
|
|
195
|
+
created_at=cls._get_created_at(assertion),
|
|
196
|
+
updated_by=cls._get_updated_by(assertion),
|
|
197
|
+
updated_at=cls._get_updated_at(assertion),
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
@staticmethod
|
|
201
|
+
def _get_column_name(assertion: Assertion) -> str:
|
|
202
|
+
"""Get the column name from the assertion."""
|
|
203
|
+
column_name = _get_nested_field_for_entity_with_default(
|
|
204
|
+
assertion,
|
|
205
|
+
field_path="info.fieldValuesAssertion.field.path",
|
|
206
|
+
default=None,
|
|
207
|
+
)
|
|
208
|
+
if column_name is None:
|
|
209
|
+
raise SDKUsageError(
|
|
210
|
+
f"Column name is required for column value assertions. "
|
|
211
|
+
f"Assertion {assertion.urn} does not have a column name"
|
|
212
|
+
)
|
|
213
|
+
return column_name
|
|
214
|
+
|
|
215
|
+
@staticmethod
|
|
216
|
+
def _get_operator(assertion: Assertion) -> OperatorInputType:
|
|
217
|
+
"""Get the operator from the assertion."""
|
|
218
|
+
operator = _get_nested_field_for_entity_with_default(
|
|
219
|
+
assertion,
|
|
220
|
+
field_path="info.fieldValuesAssertion.operator",
|
|
221
|
+
default=None,
|
|
222
|
+
)
|
|
223
|
+
if operator is None:
|
|
224
|
+
raise SDKUsageError(
|
|
225
|
+
f"Operator is required for column value assertions. "
|
|
226
|
+
f"Assertion {assertion.urn} does not have an operator"
|
|
227
|
+
)
|
|
228
|
+
return operator
|
|
229
|
+
|
|
230
|
+
@staticmethod
|
|
231
|
+
def _convert_param_value(
|
|
232
|
+
value: str, param_type: Optional[str]
|
|
233
|
+
) -> Union[str, int, float]:
|
|
234
|
+
"""Convert a string value to the appropriate type based on param_type."""
|
|
235
|
+
if param_type == models.AssertionStdParameterTypeClass.NUMBER:
|
|
236
|
+
# Try to convert to int first, then float
|
|
237
|
+
try:
|
|
238
|
+
if "." in value:
|
|
239
|
+
return float(value)
|
|
240
|
+
return int(value)
|
|
241
|
+
except (ValueError, TypeError) as e:
|
|
242
|
+
logger.warning(
|
|
243
|
+
f"Failed to convert value '{value}' to number (type: {param_type}). "
|
|
244
|
+
f"Returning as string. Error: {e}"
|
|
245
|
+
)
|
|
246
|
+
return value
|
|
247
|
+
# For STRING and other types, return as-is
|
|
248
|
+
return value
|
|
249
|
+
|
|
250
|
+
@staticmethod
|
|
251
|
+
def _get_criteria_parameters(
|
|
252
|
+
assertion: Assertion,
|
|
253
|
+
) -> Optional[ColumnValueAssertionParameters]:
|
|
254
|
+
"""Get the criteria parameters from the assertion."""
|
|
255
|
+
# First check if there's a single value parameter
|
|
256
|
+
value_param = _get_nested_field_for_entity_with_default(
|
|
257
|
+
assertion,
|
|
258
|
+
field_path="info.fieldValuesAssertion.parameters.value",
|
|
259
|
+
default=None,
|
|
260
|
+
)
|
|
261
|
+
if value_param is not None:
|
|
262
|
+
return ColumnValueAssertion._convert_param_value(
|
|
263
|
+
value_param.value, getattr(value_param, "type", None)
|
|
264
|
+
)
|
|
265
|
+
|
|
266
|
+
# Then check for range parameters
|
|
267
|
+
min_param = _get_nested_field_for_entity_with_default(
|
|
268
|
+
assertion,
|
|
269
|
+
field_path="info.fieldValuesAssertion.parameters.minValue",
|
|
270
|
+
default=None,
|
|
271
|
+
)
|
|
272
|
+
max_param = _get_nested_field_for_entity_with_default(
|
|
273
|
+
assertion,
|
|
274
|
+
field_path="info.fieldValuesAssertion.parameters.maxValue",
|
|
275
|
+
default=None,
|
|
276
|
+
)
|
|
277
|
+
|
|
278
|
+
if min_param is not None and max_param is not None:
|
|
279
|
+
min_value = ColumnValueAssertion._convert_param_value(
|
|
280
|
+
min_param.value if hasattr(min_param, "value") else min_param,
|
|
281
|
+
getattr(min_param, "type", None),
|
|
282
|
+
)
|
|
283
|
+
max_value = ColumnValueAssertion._convert_param_value(
|
|
284
|
+
max_param.value if hasattr(max_param, "value") else max_param,
|
|
285
|
+
getattr(max_param, "type", None),
|
|
286
|
+
)
|
|
287
|
+
return (min_value, max_value)
|
|
288
|
+
|
|
289
|
+
return None
|
|
290
|
+
|
|
291
|
+
@staticmethod
|
|
292
|
+
def _get_criteria_parameters_with_type(
|
|
293
|
+
assertion: Assertion,
|
|
294
|
+
) -> Optional[tuple]:
|
|
295
|
+
"""
|
|
296
|
+
Get criteria parameters along with their type information from the backend.
|
|
297
|
+
|
|
298
|
+
Returns:
|
|
299
|
+
For single values: (value, type)
|
|
300
|
+
For ranges: ((min_value, max_value), (min_type, max_type))
|
|
301
|
+
None if no parameters found
|
|
302
|
+
"""
|
|
303
|
+
# First check if there's a single value parameter
|
|
304
|
+
value_param = _get_nested_field_for_entity_with_default(
|
|
305
|
+
assertion,
|
|
306
|
+
field_path="info.fieldValuesAssertion.parameters.value",
|
|
307
|
+
default=None,
|
|
308
|
+
)
|
|
309
|
+
if value_param is not None:
|
|
310
|
+
return (value_param.value, value_param.type)
|
|
311
|
+
|
|
312
|
+
# Then check for range parameters
|
|
313
|
+
min_param = _get_nested_field_for_entity_with_default(
|
|
314
|
+
assertion,
|
|
315
|
+
field_path="info.fieldValuesAssertion.parameters.minValue",
|
|
316
|
+
default=None,
|
|
317
|
+
)
|
|
318
|
+
max_param = _get_nested_field_for_entity_with_default(
|
|
319
|
+
assertion,
|
|
320
|
+
field_path="info.fieldValuesAssertion.parameters.maxValue",
|
|
321
|
+
default=None,
|
|
322
|
+
)
|
|
323
|
+
|
|
324
|
+
if min_param is not None and max_param is not None:
|
|
325
|
+
return (
|
|
326
|
+
(min_param.value, max_param.value),
|
|
327
|
+
(min_param.type, max_param.type),
|
|
328
|
+
)
|
|
329
|
+
|
|
330
|
+
return None
|
|
331
|
+
|
|
332
|
+
@staticmethod
|
|
333
|
+
def _get_transform(assertion: Assertion) -> Optional[FieldTransformInputType]:
|
|
334
|
+
"""Get the transform from the assertion."""
|
|
335
|
+
transform = _get_nested_field_for_entity_with_default(
|
|
336
|
+
assertion,
|
|
337
|
+
field_path="info.fieldValuesAssertion.transform",
|
|
338
|
+
default=None,
|
|
339
|
+
)
|
|
340
|
+
if transform is None:
|
|
341
|
+
return None
|
|
342
|
+
# Return the type from the transform class
|
|
343
|
+
if hasattr(transform, "type"):
|
|
344
|
+
return transform.type
|
|
345
|
+
return None
|
|
346
|
+
|
|
347
|
+
@staticmethod
|
|
348
|
+
def _get_fail_threshold_type(assertion: Assertion) -> FailThresholdType:
|
|
349
|
+
"""Get the fail threshold type from the assertion."""
|
|
350
|
+
threshold_type = _get_nested_field_for_entity_with_default(
|
|
351
|
+
assertion,
|
|
352
|
+
field_path="info.fieldValuesAssertion.failThreshold.type",
|
|
353
|
+
default=models.FieldValuesFailThresholdTypeClass.COUNT,
|
|
354
|
+
)
|
|
355
|
+
if threshold_type == models.FieldValuesFailThresholdTypeClass.PERCENTAGE:
|
|
356
|
+
return FailThresholdType.PERCENTAGE
|
|
357
|
+
return FailThresholdType.COUNT
|
|
358
|
+
|
|
359
|
+
@staticmethod
|
|
360
|
+
def _get_fail_threshold_value(assertion: Assertion) -> int:
|
|
361
|
+
"""Get the fail threshold value from the assertion."""
|
|
362
|
+
value = _get_nested_field_for_entity_with_default(
|
|
363
|
+
assertion,
|
|
364
|
+
field_path="info.fieldValuesAssertion.failThreshold.value",
|
|
365
|
+
default=0,
|
|
366
|
+
)
|
|
367
|
+
return int(value)
|
|
368
|
+
|
|
369
|
+
@staticmethod
|
|
370
|
+
def _get_exclude_nulls(assertion: Assertion) -> bool:
|
|
371
|
+
"""Get the exclude_nulls setting from the assertion."""
|
|
372
|
+
return _get_nested_field_for_entity_with_default(
|
|
373
|
+
assertion,
|
|
374
|
+
field_path="info.fieldValuesAssertion.excludeNulls",
|
|
375
|
+
default=True,
|
|
376
|
+
)
|
|
377
|
+
|
|
378
|
+
@staticmethod
|
|
379
|
+
def _get_detection_mechanism(
|
|
380
|
+
assertion: Assertion,
|
|
381
|
+
monitor: Monitor,
|
|
382
|
+
default: Optional[_DetectionMechanismTypes] = DEFAULT_DETECTION_MECHANISM,
|
|
383
|
+
) -> Optional[_DetectionMechanismTypes]:
|
|
384
|
+
"""Get the detection mechanism for column value assertions."""
|
|
385
|
+
parameters = _AssertionPublic._get_validated_detection_context(
|
|
386
|
+
monitor,
|
|
387
|
+
assertion,
|
|
388
|
+
models.AssertionEvaluationParametersTypeClass.DATASET_FIELD,
|
|
389
|
+
models.FieldAssertionInfoClass,
|
|
390
|
+
default,
|
|
391
|
+
)
|
|
392
|
+
if parameters is None:
|
|
393
|
+
return default
|
|
394
|
+
if parameters.datasetFieldParameters is None:
|
|
395
|
+
logger.warning(
|
|
396
|
+
f"Monitor does not have datasetFieldParameters, "
|
|
397
|
+
f"defaulting detection mechanism to {default}"
|
|
398
|
+
)
|
|
399
|
+
return default
|
|
400
|
+
source_type = parameters.datasetFieldParameters.sourceType
|
|
401
|
+
if source_type == models.DatasetFieldAssertionSourceTypeClass.ALL_ROWS_QUERY:
|
|
402
|
+
additional_filter = _AssertionPublic._get_additional_filter(assertion)
|
|
403
|
+
return DetectionMechanism.ALL_ROWS_QUERY(
|
|
404
|
+
additional_filter=additional_filter
|
|
405
|
+
)
|
|
406
|
+
elif (
|
|
407
|
+
source_type
|
|
408
|
+
== models.DatasetFieldAssertionSourceTypeClass.CHANGED_ROWS_QUERY
|
|
409
|
+
):
|
|
410
|
+
if parameters.datasetFieldParameters.changedRowsField is None:
|
|
411
|
+
logger.warning(
|
|
412
|
+
f"Monitor has CHANGED_ROWS_QUERY source type but no changedRowsField, "
|
|
413
|
+
f"defaulting detection mechanism to {default}"
|
|
414
|
+
)
|
|
415
|
+
return default
|
|
416
|
+
column_name = parameters.datasetFieldParameters.changedRowsField.path
|
|
417
|
+
additional_filter = _AssertionPublic._get_additional_filter(assertion)
|
|
418
|
+
return DetectionMechanism.CHANGED_ROWS_QUERY(
|
|
419
|
+
column_name=column_name, additional_filter=additional_filter
|
|
420
|
+
)
|
|
421
|
+
elif (
|
|
422
|
+
source_type
|
|
423
|
+
== models.DatasetFieldAssertionSourceTypeClass.DATAHUB_DATASET_PROFILE
|
|
424
|
+
):
|
|
425
|
+
return DetectionMechanism.ALL_ROWS_QUERY_DATAHUB_DATASET_PROFILE
|
|
426
|
+
else:
|
|
427
|
+
logger.warning(
|
|
428
|
+
f"Unsupported DatasetFieldAssertionSourceType {source_type}, "
|
|
429
|
+
f"defaulting detection mechanism to {default}"
|
|
430
|
+
)
|
|
431
|
+
return default
|