acryl-datahub-cloud 0.3.10rc4__py3-none-any.whl → 0.3.16.1rc0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub-cloud might be problematic. Click here for more details.
- acryl_datahub_cloud/_codegen_config.json +1 -1
- acryl_datahub_cloud/acryl_cs_issues/acryl_customer.py +1 -1
- acryl_datahub_cloud/acryl_cs_issues/models.py +5 -3
- acryl_datahub_cloud/action_request/action_request_owner_source.py +37 -8
- acryl_datahub_cloud/datahub_forms_notifications/__init__.py +0 -0
- acryl_datahub_cloud/datahub_forms_notifications/forms_notifications_source.py +569 -0
- acryl_datahub_cloud/datahub_forms_notifications/get_feature_flag.gql +7 -0
- acryl_datahub_cloud/datahub_forms_notifications/get_search_results_total.gql +14 -0
- acryl_datahub_cloud/datahub_forms_notifications/query.py +17 -0
- acryl_datahub_cloud/datahub_forms_notifications/scroll_forms_for_notification.gql +29 -0
- acryl_datahub_cloud/datahub_forms_notifications/send_form_notification_request.gql +5 -0
- acryl_datahub_cloud/datahub_reporting/datahub_dataset.py +39 -19
- acryl_datahub_cloud/datahub_reporting/datahub_form_reporting.py +60 -25
- acryl_datahub_cloud/datahub_reporting/extract_graph.py +9 -3
- acryl_datahub_cloud/datahub_reporting/extract_sql.py +248 -52
- acryl_datahub_cloud/datahub_reporting/forms.py +1 -1
- acryl_datahub_cloud/datahub_reporting/forms_config.py +3 -2
- acryl_datahub_cloud/datahub_restore/source.py +3 -2
- acryl_datahub_cloud/datahub_usage_reporting/excluded.py +94 -0
- acryl_datahub_cloud/datahub_usage_reporting/query_builder.py +48 -8
- acryl_datahub_cloud/datahub_usage_reporting/usage_feature_reporter.py +532 -109
- acryl_datahub_cloud/elasticsearch/graph_service.py +76 -14
- acryl_datahub_cloud/graphql_utils.py +64 -0
- acryl_datahub_cloud/lineage_features/source.py +555 -49
- acryl_datahub_cloud/metadata/_urns/urn_defs.py +2390 -1938
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/actionworkflow/__init__.py +53 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/anomaly/__init__.py +2 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/assertion/__init__.py +6 -2
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/common/__init__.py +6 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/conversation/__init__.py +29 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/event/notification/settings/__init__.py +2 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/execution/__init__.py +2 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/file/__init__.py +19 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/form/__init__.py +8 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/identity/__init__.py +8 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/knowledge/__init__.py +33 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +14 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/metadata/search/features/__init__.py +2 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/module/__init__.py +31 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/monitor/__init__.py +6 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/notification/__init__.py +19 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py +4 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/role/__init__.py +2 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/settings/asset/__init__.py +19 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +28 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/template/__init__.py +31 -0
- acryl_datahub_cloud/metadata/schema.avsc +27843 -23200
- acryl_datahub_cloud/metadata/schema_classes.py +29901 -24310
- acryl_datahub_cloud/metadata/schemas/ActionRequestInfo.avsc +235 -2
- acryl_datahub_cloud/metadata/schemas/ActionWorkflowInfo.avsc +683 -0
- acryl_datahub_cloud/metadata/schemas/ActionWorkflowKey.avsc +21 -0
- acryl_datahub_cloud/metadata/schemas/Actors.avsc +38 -1
- acryl_datahub_cloud/metadata/schemas/ApplicationKey.avsc +31 -0
- acryl_datahub_cloud/metadata/schemas/ApplicationProperties.avsc +75 -0
- acryl_datahub_cloud/metadata/schemas/Applications.avsc +38 -0
- acryl_datahub_cloud/metadata/schemas/AssertionAnalyticsRunEvent.avsc +375 -212
- acryl_datahub_cloud/metadata/schemas/AssertionInfo.avsc +147 -20
- acryl_datahub_cloud/metadata/schemas/AssertionKey.avsc +1 -1
- acryl_datahub_cloud/metadata/schemas/AssertionRunEvent.avsc +191 -21
- acryl_datahub_cloud/metadata/schemas/{AssertionSummary.avsc → AssertionRunSummary.avsc} +15 -2
- acryl_datahub_cloud/metadata/schemas/AssertionsSummary.avsc +54 -0
- acryl_datahub_cloud/metadata/schemas/AssetSettings.avsc +63 -0
- acryl_datahub_cloud/metadata/schemas/BusinessAttributeInfo.avsc +7 -3
- acryl_datahub_cloud/metadata/schemas/ChartInfo.avsc +20 -6
- acryl_datahub_cloud/metadata/schemas/ChartKey.avsc +1 -0
- acryl_datahub_cloud/metadata/schemas/ConstraintInfo.avsc +12 -1
- acryl_datahub_cloud/metadata/schemas/ContainerKey.avsc +1 -0
- acryl_datahub_cloud/metadata/schemas/ContainerProperties.avsc +16 -5
- acryl_datahub_cloud/metadata/schemas/CorpGroupEditableInfo.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/CorpGroupInfo.avsc +7 -3
- acryl_datahub_cloud/metadata/schemas/CorpGroupKey.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/CorpGroupSettings.avsc +127 -2
- acryl_datahub_cloud/metadata/schemas/CorpUserEditableInfo.avsc +1 -1
- acryl_datahub_cloud/metadata/schemas/CorpUserInfo.avsc +18 -2
- acryl_datahub_cloud/metadata/schemas/CorpUserInvitationStatus.avsc +106 -0
- acryl_datahub_cloud/metadata/schemas/CorpUserKey.avsc +4 -1
- acryl_datahub_cloud/metadata/schemas/CorpUserSettings.avsc +304 -2
- acryl_datahub_cloud/metadata/schemas/CorpUserUsageFeatures.avsc +86 -0
- acryl_datahub_cloud/metadata/schemas/DashboardInfo.avsc +11 -5
- acryl_datahub_cloud/metadata/schemas/DashboardKey.avsc +1 -0
- acryl_datahub_cloud/metadata/schemas/DataContractKey.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/DataFlowInfo.avsc +15 -5
- acryl_datahub_cloud/metadata/schemas/DataFlowKey.avsc +1 -0
- acryl_datahub_cloud/metadata/schemas/DataHubAiConversationInfo.avsc +256 -0
- acryl_datahub_cloud/metadata/schemas/DataHubAiConversationKey.avsc +22 -0
- acryl_datahub_cloud/metadata/schemas/DataHubFileInfo.avsc +234 -0
- acryl_datahub_cloud/metadata/schemas/DataHubFileKey.avsc +22 -0
- acryl_datahub_cloud/metadata/schemas/DataHubIngestionSourceKey.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/DataHubOpenAPISchemaKey.avsc +22 -0
- acryl_datahub_cloud/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
- acryl_datahub_cloud/metadata/schemas/DataHubPageModuleProperties.avsc +308 -0
- acryl_datahub_cloud/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
- acryl_datahub_cloud/metadata/schemas/DataHubPageTemplateProperties.avsc +251 -0
- acryl_datahub_cloud/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
- acryl_datahub_cloud/metadata/schemas/DataJobInfo.avsc +13 -4
- acryl_datahub_cloud/metadata/schemas/DataJobInputOutput.avsc +8 -0
- acryl_datahub_cloud/metadata/schemas/DataJobKey.avsc +1 -0
- acryl_datahub_cloud/metadata/schemas/DataPlatformInfo.avsc +3 -1
- acryl_datahub_cloud/metadata/schemas/DataPlatformInstanceProperties.avsc +5 -2
- acryl_datahub_cloud/metadata/schemas/DataProcessKey.avsc +4 -0
- acryl_datahub_cloud/metadata/schemas/DataProductKey.avsc +2 -0
- acryl_datahub_cloud/metadata/schemas/DataProductProperties.avsc +6 -3
- acryl_datahub_cloud/metadata/schemas/DataTransformLogic.avsc +4 -2
- acryl_datahub_cloud/metadata/schemas/DataTypeInfo.avsc +5 -0
- acryl_datahub_cloud/metadata/schemas/DatasetKey.avsc +10 -2
- acryl_datahub_cloud/metadata/schemas/DatasetProperties.avsc +12 -5
- acryl_datahub_cloud/metadata/schemas/DatasetUsageStatistics.avsc +8 -0
- acryl_datahub_cloud/metadata/schemas/DocumentInfo.avsc +407 -0
- acryl_datahub_cloud/metadata/schemas/DocumentKey.avsc +35 -0
- acryl_datahub_cloud/metadata/schemas/DocumentSettings.avsc +79 -0
- acryl_datahub_cloud/metadata/schemas/DomainKey.avsc +2 -0
- acryl_datahub_cloud/metadata/schemas/DomainProperties.avsc +7 -3
- acryl_datahub_cloud/metadata/schemas/EditableContainerProperties.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/EditableDashboardProperties.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/EditableDataFlowProperties.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/EditableDataJobProperties.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/EditableDatasetProperties.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/EditableERModelRelationshipProperties.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/EditableMLFeatureProperties.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/EditableMLFeatureTableProperties.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/EditableMLModelGroupProperties.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/EditableMLModelProperties.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/EditableNotebookProperties.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/EditableSchemaMetadata.avsc +4 -2
- acryl_datahub_cloud/metadata/schemas/EntityTypeInfo.avsc +5 -0
- acryl_datahub_cloud/metadata/schemas/ExecutionRequestArtifactsLocation.avsc +16 -0
- acryl_datahub_cloud/metadata/schemas/ExecutionRequestKey.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/FormAssignmentStatus.avsc +36 -0
- acryl_datahub_cloud/metadata/schemas/FormInfo.avsc +6 -0
- acryl_datahub_cloud/metadata/schemas/FormKey.avsc +3 -1
- acryl_datahub_cloud/metadata/schemas/FormNotifications.avsc +69 -0
- acryl_datahub_cloud/metadata/schemas/FormSettings.avsc +30 -0
- acryl_datahub_cloud/metadata/schemas/GlobalSettingsInfo.avsc +416 -0
- acryl_datahub_cloud/metadata/schemas/GlobalTags.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/GlossaryNodeInfo.avsc +3 -1
- acryl_datahub_cloud/metadata/schemas/GlossaryNodeKey.avsc +1 -0
- acryl_datahub_cloud/metadata/schemas/GlossaryTermInfo.avsc +3 -1
- acryl_datahub_cloud/metadata/schemas/GlossaryTermKey.avsc +2 -0
- acryl_datahub_cloud/metadata/schemas/IcebergWarehouseInfo.avsc +4 -0
- acryl_datahub_cloud/metadata/schemas/IncidentActivityEvent.avsc +3 -3
- acryl_datahub_cloud/metadata/schemas/IncidentInfo.avsc +3 -3
- acryl_datahub_cloud/metadata/schemas/InferredMetadata.avsc +71 -1
- acryl_datahub_cloud/metadata/schemas/InputFields.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/InviteToken.avsc +26 -0
- acryl_datahub_cloud/metadata/schemas/LineageFeatures.avsc +67 -42
- acryl_datahub_cloud/metadata/schemas/LogicalParent.avsc +145 -0
- acryl_datahub_cloud/metadata/schemas/MLFeatureKey.avsc +4 -1
- acryl_datahub_cloud/metadata/schemas/MLFeatureTableKey.avsc +4 -1
- acryl_datahub_cloud/metadata/schemas/MLModelDeploymentKey.avsc +7 -1
- acryl_datahub_cloud/metadata/schemas/MLModelDeploymentProperties.avsc +3 -0
- acryl_datahub_cloud/metadata/schemas/MLModelGroupKey.avsc +9 -1
- acryl_datahub_cloud/metadata/schemas/MLModelKey.avsc +9 -1
- acryl_datahub_cloud/metadata/schemas/MLModelProperties.avsc +4 -2
- acryl_datahub_cloud/metadata/schemas/MLPrimaryKeyKey.avsc +4 -1
- acryl_datahub_cloud/metadata/schemas/MetadataChangeEvent.avsc +424 -97
- acryl_datahub_cloud/metadata/schemas/MetadataChangeLog.avsc +65 -44
- acryl_datahub_cloud/metadata/schemas/MetadataChangeProposal.avsc +64 -0
- acryl_datahub_cloud/metadata/schemas/MonitorAnomalyEvent.avsc +84 -29
- acryl_datahub_cloud/metadata/schemas/MonitorInfo.avsc +221 -23
- acryl_datahub_cloud/metadata/schemas/MonitorKey.avsc +9 -1
- acryl_datahub_cloud/metadata/schemas/MonitorSuiteInfo.avsc +128 -3
- acryl_datahub_cloud/metadata/schemas/NotebookInfo.avsc +5 -2
- acryl_datahub_cloud/metadata/schemas/NotebookKey.avsc +1 -0
- acryl_datahub_cloud/metadata/schemas/NotificationRequest.avsc +91 -4
- acryl_datahub_cloud/metadata/schemas/Operation.avsc +17 -0
- acryl_datahub_cloud/metadata/schemas/Ownership.avsc +71 -1
- acryl_datahub_cloud/metadata/schemas/QueryProperties.avsc +4 -2
- acryl_datahub_cloud/metadata/schemas/QuerySubjects.avsc +2 -13
- acryl_datahub_cloud/metadata/schemas/RelationshipChangeEvent.avsc +215 -0
- acryl_datahub_cloud/metadata/schemas/RoleProperties.avsc +3 -1
- acryl_datahub_cloud/metadata/schemas/SchemaFieldInfo.avsc +3 -1
- acryl_datahub_cloud/metadata/schemas/SchemaFieldKey.avsc +3 -0
- acryl_datahub_cloud/metadata/schemas/SchemaMetadata.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/SemanticContent.avsc +123 -0
- acryl_datahub_cloud/metadata/schemas/StructuredProperties.avsc +69 -0
- acryl_datahub_cloud/metadata/schemas/StructuredPropertyDefinition.avsc +15 -4
- acryl_datahub_cloud/metadata/schemas/StructuredPropertySettings.avsc +9 -0
- acryl_datahub_cloud/metadata/schemas/SubscriptionInfo.avsc +136 -5
- acryl_datahub_cloud/metadata/schemas/SubscriptionKey.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/SystemMetadata.avsc +147 -0
- acryl_datahub_cloud/metadata/schemas/TagProperties.avsc +3 -1
- acryl_datahub_cloud/metadata/schemas/TestInfo.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/UpstreamLineage.avsc +9 -0
- acryl_datahub_cloud/metadata/schemas/UsageFeatures.avsc +10 -0
- acryl_datahub_cloud/metadata/schemas/__init__.py +3 -3
- acryl_datahub_cloud/notifications/__init__.py +0 -0
- acryl_datahub_cloud/notifications/notification_recipient_builder.py +399 -0
- acryl_datahub_cloud/sdk/__init__.py +69 -0
- acryl_datahub_cloud/sdk/assertion/__init__.py +58 -0
- acryl_datahub_cloud/sdk/assertion/assertion_base.py +779 -0
- acryl_datahub_cloud/sdk/assertion/column_metric_assertion.py +191 -0
- acryl_datahub_cloud/sdk/assertion/column_value_assertion.py +431 -0
- acryl_datahub_cloud/sdk/assertion/freshness_assertion.py +201 -0
- acryl_datahub_cloud/sdk/assertion/schema_assertion.py +268 -0
- acryl_datahub_cloud/sdk/assertion/smart_column_metric_assertion.py +212 -0
- acryl_datahub_cloud/sdk/assertion/smart_freshness_assertion.py +165 -0
- acryl_datahub_cloud/sdk/assertion/smart_sql_assertion.py +156 -0
- acryl_datahub_cloud/sdk/assertion/smart_volume_assertion.py +162 -0
- acryl_datahub_cloud/sdk/assertion/sql_assertion.py +273 -0
- acryl_datahub_cloud/sdk/assertion/types.py +20 -0
- acryl_datahub_cloud/sdk/assertion/volume_assertion.py +156 -0
- acryl_datahub_cloud/sdk/assertion_client/__init__.py +0 -0
- acryl_datahub_cloud/sdk/assertion_client/column_metric.py +545 -0
- acryl_datahub_cloud/sdk/assertion_client/column_value.py +617 -0
- acryl_datahub_cloud/sdk/assertion_client/freshness.py +371 -0
- acryl_datahub_cloud/sdk/assertion_client/helpers.py +166 -0
- acryl_datahub_cloud/sdk/assertion_client/schema.py +358 -0
- acryl_datahub_cloud/sdk/assertion_client/smart_column_metric.py +540 -0
- acryl_datahub_cloud/sdk/assertion_client/smart_freshness.py +373 -0
- acryl_datahub_cloud/sdk/assertion_client/smart_sql.py +411 -0
- acryl_datahub_cloud/sdk/assertion_client/smart_volume.py +380 -0
- acryl_datahub_cloud/sdk/assertion_client/sql.py +410 -0
- acryl_datahub_cloud/sdk/assertion_client/volume.py +446 -0
- acryl_datahub_cloud/sdk/assertion_input/__init__.py +0 -0
- acryl_datahub_cloud/sdk/assertion_input/assertion_input.py +1470 -0
- acryl_datahub_cloud/sdk/assertion_input/column_assertion_constants.py +114 -0
- acryl_datahub_cloud/sdk/assertion_input/column_assertion_utils.py +284 -0
- acryl_datahub_cloud/sdk/assertion_input/column_metric_assertion_input.py +759 -0
- acryl_datahub_cloud/sdk/assertion_input/column_metric_constants.py +109 -0
- acryl_datahub_cloud/sdk/assertion_input/column_value_assertion_input.py +810 -0
- acryl_datahub_cloud/sdk/assertion_input/freshness_assertion_input.py +305 -0
- acryl_datahub_cloud/sdk/assertion_input/schema_assertion_input.py +413 -0
- acryl_datahub_cloud/sdk/assertion_input/smart_column_metric_assertion_input.py +793 -0
- acryl_datahub_cloud/sdk/assertion_input/smart_freshness_assertion_input.py +218 -0
- acryl_datahub_cloud/sdk/assertion_input/smart_sql_assertion_input.py +181 -0
- acryl_datahub_cloud/sdk/assertion_input/smart_volume_assertion_input.py +189 -0
- acryl_datahub_cloud/sdk/assertion_input/sql_assertion_input.py +320 -0
- acryl_datahub_cloud/sdk/assertion_input/volume_assertion_input.py +635 -0
- acryl_datahub_cloud/sdk/assertions_client.py +1074 -0
- acryl_datahub_cloud/sdk/entities/__init__.py +0 -0
- acryl_datahub_cloud/sdk/entities/assertion.py +439 -0
- acryl_datahub_cloud/sdk/entities/monitor.py +291 -0
- acryl_datahub_cloud/sdk/entities/subscription.py +100 -0
- acryl_datahub_cloud/sdk/errors.py +34 -0
- acryl_datahub_cloud/sdk/resolver_client.py +42 -0
- acryl_datahub_cloud/sdk/subscription_client.py +737 -0
- {acryl_datahub_cloud-0.3.10rc4.dist-info → acryl_datahub_cloud-0.3.16.1rc0.dist-info}/METADATA +49 -43
- {acryl_datahub_cloud-0.3.10rc4.dist-info → acryl_datahub_cloud-0.3.16.1rc0.dist-info}/RECORD +243 -145
- {acryl_datahub_cloud-0.3.10rc4.dist-info → acryl_datahub_cloud-0.3.16.1rc0.dist-info}/WHEEL +1 -1
- {acryl_datahub_cloud-0.3.10rc4.dist-info → acryl_datahub_cloud-0.3.16.1rc0.dist-info}/entry_points.txt +1 -0
- {acryl_datahub_cloud-0.3.10rc4.dist-info → acryl_datahub_cloud-0.3.16.1rc0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,810 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Column value assertion input module.
|
|
3
|
+
|
|
4
|
+
This module provides the input class for creating column value assertions that validate
|
|
5
|
+
individual row values against semantic constraints (e.g., "all values in column X must match pattern Y"
|
|
6
|
+
or "no NULL values allowed").
|
|
7
|
+
|
|
8
|
+
Key differences from column_metric_assertion (FIELD_METRIC):
|
|
9
|
+
- FIELD_METRIC: Validates aggregated metrics (NULL_COUNT, MEAN, MIN, etc.)
|
|
10
|
+
- FIELD_VALUES: Validates each individual row value against an operator/predicate
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from datetime import datetime
|
|
14
|
+
from enum import Enum
|
|
15
|
+
from typing import Optional, Union
|
|
16
|
+
|
|
17
|
+
from acryl_datahub_cloud.sdk.assertion_input.assertion_input import (
|
|
18
|
+
DEFAULT_EVERY_SIX_HOURS_SCHEDULE,
|
|
19
|
+
NO_PARAMETER_OPERATORS,
|
|
20
|
+
RANGE_OPERATORS,
|
|
21
|
+
SINGLE_VALUE_OPERATORS,
|
|
22
|
+
AssertionIncidentBehaviorInputTypes,
|
|
23
|
+
AssertionInfoInputType,
|
|
24
|
+
DetectionMechanismInputTypes,
|
|
25
|
+
FieldSpecType,
|
|
26
|
+
_AllRowsQuery,
|
|
27
|
+
_AllRowsQueryDataHubDatasetProfile,
|
|
28
|
+
_AssertionInput,
|
|
29
|
+
_ChangedRowsQuery,
|
|
30
|
+
_DatasetProfile,
|
|
31
|
+
_try_parse_and_validate_schema_classes_enum,
|
|
32
|
+
)
|
|
33
|
+
from acryl_datahub_cloud.sdk.assertion_input.column_metric_assertion_input import (
|
|
34
|
+
_try_parse_and_validate_range,
|
|
35
|
+
_try_parse_and_validate_range_type,
|
|
36
|
+
_try_parse_and_validate_value,
|
|
37
|
+
_try_parse_and_validate_value_type,
|
|
38
|
+
)
|
|
39
|
+
from acryl_datahub_cloud.sdk.assertion_input.column_metric_constants import (
|
|
40
|
+
ALLOWED_COLUMN_TYPES_FOR_COLUMN_METRIC_ASSERTION,
|
|
41
|
+
FIELD_VALUES_OPERATOR_CONFIG,
|
|
42
|
+
OperatorInputType,
|
|
43
|
+
RangeInputType,
|
|
44
|
+
RangeTypeInputType,
|
|
45
|
+
RangeTypeParsedType,
|
|
46
|
+
ValueInputType,
|
|
47
|
+
ValueType,
|
|
48
|
+
ValueTypeInputType,
|
|
49
|
+
)
|
|
50
|
+
from acryl_datahub_cloud.sdk.entities.assertion import TagsInputType
|
|
51
|
+
from acryl_datahub_cloud.sdk.errors import (
|
|
52
|
+
SDKNotYetSupportedError,
|
|
53
|
+
SDKUsageError,
|
|
54
|
+
)
|
|
55
|
+
from datahub.metadata import schema_classes as models
|
|
56
|
+
from datahub.metadata.urns import AssertionUrn, CorpUserUrn, DatasetUrn
|
|
57
|
+
from datahub.sdk.entity_client import EntityClient
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
class FailThresholdType(str, Enum):
|
|
61
|
+
"""Enum for fail threshold types in column value assertions."""
|
|
62
|
+
|
|
63
|
+
COUNT = "COUNT"
|
|
64
|
+
PERCENTAGE = "PERCENTAGE"
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
class FieldTransformType(str, Enum):
|
|
68
|
+
"""Enum for field transform types in column value assertions."""
|
|
69
|
+
|
|
70
|
+
LENGTH = "LENGTH"
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
FailThresholdInputType = Union[FailThresholdType, str]
|
|
74
|
+
FieldTransformInputType = Union[FieldTransformType, str, None]
|
|
75
|
+
ColumnValueAssertionParameters = Union[
|
|
76
|
+
None, # For operators that don't require parameters (NULL, NOT_NULL)
|
|
77
|
+
ValueInputType, # Single value
|
|
78
|
+
RangeInputType, # Range as tuple
|
|
79
|
+
]
|
|
80
|
+
|
|
81
|
+
# This represents the type information from existing GMS assertions:
|
|
82
|
+
# - Single value: (value, type)
|
|
83
|
+
# - Range: ((min_value, max_value), (min_type, max_type))
|
|
84
|
+
GmsCriteriaTypeInfo = Union[
|
|
85
|
+
tuple[ValueInputType, models.AssertionStdParameterTypeClass], # Single value
|
|
86
|
+
tuple[
|
|
87
|
+
tuple[ValueInputType, ValueInputType], # (min, max) values
|
|
88
|
+
tuple[
|
|
89
|
+
models.AssertionStdParameterTypeClass,
|
|
90
|
+
models.AssertionStdParameterTypeClass,
|
|
91
|
+
], # (min_type, max_type)
|
|
92
|
+
], # Range
|
|
93
|
+
]
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def _get_default_detection_mechanism_column_value_assertion() -> _AllRowsQuery:
|
|
97
|
+
"""Factory function for creating default detection mechanism instances.
|
|
98
|
+
|
|
99
|
+
Returns a new instance each time to avoid shared mutable state.
|
|
100
|
+
"""
|
|
101
|
+
return _AllRowsQuery()
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
# This is used to validate that operators are compatible with transform outputs
|
|
105
|
+
FIELD_TRANSFORM_OUTPUT_TYPE: dict[str, str] = {
|
|
106
|
+
models.FieldTransformTypeClass.LENGTH: "NUMBER", # LENGTH(string) -> number
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def _try_parse_fail_threshold_type(
|
|
111
|
+
fail_threshold_type: Optional[FailThresholdInputType],
|
|
112
|
+
) -> FailThresholdType:
|
|
113
|
+
"""Parse and validate fail threshold type.
|
|
114
|
+
|
|
115
|
+
Args:
|
|
116
|
+
fail_threshold_type: The fail threshold type to parse.
|
|
117
|
+
|
|
118
|
+
Returns:
|
|
119
|
+
The parsed FailThresholdType.
|
|
120
|
+
|
|
121
|
+
Raises:
|
|
122
|
+
SDKUsageError: If the fail threshold type is invalid.
|
|
123
|
+
"""
|
|
124
|
+
if fail_threshold_type is None:
|
|
125
|
+
return FailThresholdType.COUNT
|
|
126
|
+
|
|
127
|
+
if isinstance(fail_threshold_type, FailThresholdType):
|
|
128
|
+
return fail_threshold_type
|
|
129
|
+
|
|
130
|
+
if isinstance(fail_threshold_type, str):
|
|
131
|
+
try:
|
|
132
|
+
return FailThresholdType(fail_threshold_type.upper())
|
|
133
|
+
except ValueError as e:
|
|
134
|
+
raise SDKUsageError(
|
|
135
|
+
f"Invalid fail threshold type: {fail_threshold_type}. "
|
|
136
|
+
f"Valid options are: {[t.value for t in FailThresholdType]}"
|
|
137
|
+
) from e
|
|
138
|
+
|
|
139
|
+
raise SDKUsageError(
|
|
140
|
+
f"Invalid fail threshold type: {fail_threshold_type}. "
|
|
141
|
+
f"Valid options are: {[t.value for t in FailThresholdType]}"
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def _try_parse_field_transform_type(
|
|
146
|
+
field_transform: Optional[FieldTransformInputType],
|
|
147
|
+
) -> Optional[str]:
|
|
148
|
+
"""Parse and validate field transform type.
|
|
149
|
+
|
|
150
|
+
Args:
|
|
151
|
+
field_transform: The field transform type to parse.
|
|
152
|
+
|
|
153
|
+
Returns:
|
|
154
|
+
The parsed FieldTransformTypeClass string constant or None.
|
|
155
|
+
|
|
156
|
+
Raises:
|
|
157
|
+
SDKUsageError: If the field transform type is invalid.
|
|
158
|
+
"""
|
|
159
|
+
if field_transform is None:
|
|
160
|
+
return None
|
|
161
|
+
|
|
162
|
+
if isinstance(field_transform, FieldTransformType):
|
|
163
|
+
return models.FieldTransformTypeClass.LENGTH
|
|
164
|
+
|
|
165
|
+
if isinstance(field_transform, str):
|
|
166
|
+
if field_transform.upper() == "LENGTH":
|
|
167
|
+
return models.FieldTransformTypeClass.LENGTH
|
|
168
|
+
else:
|
|
169
|
+
raise SDKUsageError(
|
|
170
|
+
f"Invalid field transform type: {field_transform}. "
|
|
171
|
+
f"Valid options are: {[t.value for t in FieldTransformType]}"
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
raise SDKUsageError(
|
|
175
|
+
f"Invalid field transform type: {field_transform}. "
|
|
176
|
+
f"Valid options are: {[t.value for t in FieldTransformType]}"
|
|
177
|
+
)
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def _validate_fail_threshold_value(
|
|
181
|
+
fail_threshold_type: FailThresholdType,
|
|
182
|
+
fail_threshold_value: int,
|
|
183
|
+
) -> None:
|
|
184
|
+
"""Validate fail threshold value based on the type.
|
|
185
|
+
|
|
186
|
+
Args:
|
|
187
|
+
fail_threshold_type: The type of fail threshold.
|
|
188
|
+
fail_threshold_value: The value to validate.
|
|
189
|
+
|
|
190
|
+
Raises:
|
|
191
|
+
SDKUsageError: If the fail threshold value is invalid.
|
|
192
|
+
"""
|
|
193
|
+
if fail_threshold_value < 0:
|
|
194
|
+
raise SDKUsageError(
|
|
195
|
+
f"Fail threshold value must be non-negative, got {fail_threshold_value}"
|
|
196
|
+
)
|
|
197
|
+
|
|
198
|
+
if (
|
|
199
|
+
fail_threshold_type == FailThresholdType.PERCENTAGE
|
|
200
|
+
and fail_threshold_value > 100
|
|
201
|
+
):
|
|
202
|
+
raise SDKUsageError(
|
|
203
|
+
f"Fail threshold value for PERCENTAGE must be between 0 and 100, "
|
|
204
|
+
f"got {fail_threshold_value}"
|
|
205
|
+
)
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
class _ColumnValueAssertionInput(_AssertionInput):
|
|
209
|
+
"""
|
|
210
|
+
Input used to create a column value assertion.
|
|
211
|
+
|
|
212
|
+
This assertion is used to validate individual row values in a column against
|
|
213
|
+
semantic constraints (e.g., "all values in column X must match pattern Y" or
|
|
214
|
+
"no NULL values allowed").
|
|
215
|
+
|
|
216
|
+
Key differences from column_metric_assertion (FIELD_METRIC):
|
|
217
|
+
- FIELD_METRIC: Validates aggregated metrics (NULL_COUNT, MEAN, MIN, etc.)
|
|
218
|
+
- FIELD_VALUES: Validates each individual row value against an operator/predicate
|
|
219
|
+
"""
|
|
220
|
+
|
|
221
|
+
def __init__(
|
|
222
|
+
self,
|
|
223
|
+
*,
|
|
224
|
+
dataset_urn: Union[str, DatasetUrn],
|
|
225
|
+
entity_client: EntityClient,
|
|
226
|
+
column_name: str,
|
|
227
|
+
operator: OperatorInputType,
|
|
228
|
+
criteria_parameters: Optional[ColumnValueAssertionParameters] = None,
|
|
229
|
+
transform: Optional[FieldTransformInputType] = None,
|
|
230
|
+
fail_threshold_type: Optional[FailThresholdInputType] = None,
|
|
231
|
+
fail_threshold_value: int = 0,
|
|
232
|
+
exclude_nulls: bool = True,
|
|
233
|
+
urn: Optional[Union[str, AssertionUrn]] = None,
|
|
234
|
+
display_name: Optional[str] = None,
|
|
235
|
+
enabled: bool = True,
|
|
236
|
+
schedule: Optional[Union[str, models.CronScheduleClass]] = None,
|
|
237
|
+
detection_mechanism: DetectionMechanismInputTypes = None,
|
|
238
|
+
incident_behavior: Optional[AssertionIncidentBehaviorInputTypes] = None,
|
|
239
|
+
tags: Optional[TagsInputType] = None,
|
|
240
|
+
created_by: Union[str, CorpUserUrn],
|
|
241
|
+
created_at: datetime,
|
|
242
|
+
updated_by: Union[str, CorpUserUrn],
|
|
243
|
+
updated_at: datetime,
|
|
244
|
+
gms_criteria_type_info: Optional[GmsCriteriaTypeInfo] = None,
|
|
245
|
+
):
|
|
246
|
+
"""
|
|
247
|
+
Initialize a column value assertion input.
|
|
248
|
+
|
|
249
|
+
Args:
|
|
250
|
+
dataset_urn: The dataset urn.
|
|
251
|
+
entity_client: The entity client.
|
|
252
|
+
column_name: The name of the column to validate.
|
|
253
|
+
operator: The operator to use for the assertion.
|
|
254
|
+
criteria_parameters: The criteria parameters (single value, range tuple, or None).
|
|
255
|
+
transform: Optional transform to apply to field values before evaluation.
|
|
256
|
+
fail_threshold_type: The type of failure threshold (COUNT or PERCENTAGE).
|
|
257
|
+
fail_threshold_value: The failure threshold value (default 0 = all rows must pass).
|
|
258
|
+
exclude_nulls: Whether to exclude nulls when evaluating the assertion.
|
|
259
|
+
urn: The urn of the assertion.
|
|
260
|
+
display_name: The display name of the assertion.
|
|
261
|
+
enabled: Whether the assertion is enabled.
|
|
262
|
+
schedule: The schedule of the assertion.
|
|
263
|
+
detection_mechanism: The detection mechanism of the assertion.
|
|
264
|
+
incident_behavior: The incident behavior of the assertion.
|
|
265
|
+
tags: The tags of the assertion.
|
|
266
|
+
created_by: The creator of the assertion.
|
|
267
|
+
created_at: The creation time of the assertion.
|
|
268
|
+
updated_by: The updater of the assertion.
|
|
269
|
+
updated_at: The update time of the assertion.
|
|
270
|
+
gms_criteria_type_info: Type info from existing GMS assertion for updates.
|
|
271
|
+
Format: (value, type) for single values, or ((min, max), (min_type, max_type)) for ranges.
|
|
272
|
+
"""
|
|
273
|
+
_AssertionInput.__init__(
|
|
274
|
+
self,
|
|
275
|
+
dataset_urn=dataset_urn,
|
|
276
|
+
entity_client=entity_client,
|
|
277
|
+
urn=urn,
|
|
278
|
+
display_name=display_name,
|
|
279
|
+
enabled=enabled,
|
|
280
|
+
schedule=schedule,
|
|
281
|
+
detection_mechanism=detection_mechanism,
|
|
282
|
+
incident_behavior=incident_behavior,
|
|
283
|
+
tags=tags,
|
|
284
|
+
source_type=models.AssertionSourceTypeClass.NATIVE,
|
|
285
|
+
created_by=created_by,
|
|
286
|
+
created_at=created_at,
|
|
287
|
+
updated_by=updated_by,
|
|
288
|
+
updated_at=updated_at,
|
|
289
|
+
default_detection_mechanism=_get_default_detection_mechanism_column_value_assertion(),
|
|
290
|
+
)
|
|
291
|
+
|
|
292
|
+
self.column_name = self._try_parse_and_validate_column_name_is_valid_type(
|
|
293
|
+
column_name
|
|
294
|
+
)
|
|
295
|
+
self.operator = _try_parse_and_validate_schema_classes_enum(
|
|
296
|
+
operator, models.AssertionStdOperatorClass
|
|
297
|
+
)
|
|
298
|
+
self.transform = _try_parse_field_transform_type(transform)
|
|
299
|
+
self.fail_threshold_type = _try_parse_fail_threshold_type(fail_threshold_type)
|
|
300
|
+
self.fail_threshold_value = fail_threshold_value
|
|
301
|
+
self.exclude_nulls = exclude_nulls
|
|
302
|
+
|
|
303
|
+
_validate_fail_threshold_value(
|
|
304
|
+
self.fail_threshold_type, self.fail_threshold_value
|
|
305
|
+
)
|
|
306
|
+
|
|
307
|
+
if self.transform is not None:
|
|
308
|
+
self._validate_transform_for_column_type()
|
|
309
|
+
self._validate_operator_for_transform_output_type()
|
|
310
|
+
|
|
311
|
+
self.criteria_parameters: Optional[ColumnValueAssertionParameters] = None
|
|
312
|
+
self.criteria_type: Optional[Union[ValueTypeInputType, RangeTypeInputType]] = (
|
|
313
|
+
None
|
|
314
|
+
)
|
|
315
|
+
|
|
316
|
+
if gms_criteria_type_info is not None:
|
|
317
|
+
self._process_criteria_parameters_with_gms_type(
|
|
318
|
+
criteria_parameters, gms_criteria_type_info
|
|
319
|
+
)
|
|
320
|
+
else:
|
|
321
|
+
self._process_criteria_parameters(criteria_parameters)
|
|
322
|
+
|
|
323
|
+
if self.transform is None:
|
|
324
|
+
self._validate_field_type_and_operator_compatibility(
|
|
325
|
+
self.column_name, self.operator
|
|
326
|
+
)
|
|
327
|
+
|
|
328
|
+
def _validate_transform_for_column_type(self) -> None:
|
|
329
|
+
"""Validate that the transform is compatible with the column type.
|
|
330
|
+
|
|
331
|
+
LENGTH transform is only valid for STRING columns.
|
|
332
|
+
|
|
333
|
+
Raises:
|
|
334
|
+
SDKUsageError: If the transform is not compatible with the column type.
|
|
335
|
+
"""
|
|
336
|
+
field_spec = self._get_schema_field_spec(self.column_name)
|
|
337
|
+
if (
|
|
338
|
+
self.transform == models.FieldTransformTypeClass.LENGTH
|
|
339
|
+
and field_spec.type != "STRING"
|
|
340
|
+
):
|
|
341
|
+
raise SDKUsageError(
|
|
342
|
+
f"LENGTH transform is only valid for STRING columns, "
|
|
343
|
+
f"but column '{self.column_name}' is of type {field_spec.type}"
|
|
344
|
+
)
|
|
345
|
+
|
|
346
|
+
def _validate_operator_for_transform_output_type(self) -> None:
|
|
347
|
+
"""Validate that the operator is compatible with the transform output type.
|
|
348
|
+
|
|
349
|
+
When a transform is applied, the operator must be compatible with the
|
|
350
|
+
transform's output type, not the original column type.
|
|
351
|
+
For example: LENGTH(string_column) produces a NUMBER, so operators like
|
|
352
|
+
REGEX_MATCH (which expect STRING) should be rejected.
|
|
353
|
+
|
|
354
|
+
Raises:
|
|
355
|
+
SDKUsageError: If the operator is not compatible with transform output.
|
|
356
|
+
"""
|
|
357
|
+
if self.transform is None:
|
|
358
|
+
return
|
|
359
|
+
|
|
360
|
+
# Get the output type for this transform
|
|
361
|
+
transform_output_type = FIELD_TRANSFORM_OUTPUT_TYPE.get(self.transform)
|
|
362
|
+
if transform_output_type is None:
|
|
363
|
+
raise SDKNotYetSupportedError(
|
|
364
|
+
f"Transform {self.transform} is not yet supported for operator validation. "
|
|
365
|
+
f"Please update FIELD_TRANSFORM_OUTPUT_TYPE mapping."
|
|
366
|
+
)
|
|
367
|
+
|
|
368
|
+
# Check if operator is allowed for the transform output type
|
|
369
|
+
allowed_operators = FIELD_VALUES_OPERATOR_CONFIG.get(transform_output_type, [])
|
|
370
|
+
if self.operator not in allowed_operators:
|
|
371
|
+
raise SDKUsageError(
|
|
372
|
+
f"Operator {self.operator} is not compatible with transform {self.transform}. "
|
|
373
|
+
f"Transform {self.transform} produces {transform_output_type} values, "
|
|
374
|
+
f"but operator {self.operator} is not valid for {transform_output_type} types. "
|
|
375
|
+
f"Allowed operators for {transform_output_type}: "
|
|
376
|
+
f"{', '.join(str(op) for op in allowed_operators)}"
|
|
377
|
+
)
|
|
378
|
+
|
|
379
|
+
def _infer_criteria_type_from_parameters(
|
|
380
|
+
self,
|
|
381
|
+
criteria_parameters: Optional[ColumnValueAssertionParameters],
|
|
382
|
+
) -> Optional[Union[ValueTypeInputType, RangeTypeInputType]]:
|
|
383
|
+
"""
|
|
384
|
+
Infer the criteria type from the parameters based on Python types.
|
|
385
|
+
"""
|
|
386
|
+
if criteria_parameters is None:
|
|
387
|
+
return None
|
|
388
|
+
|
|
389
|
+
if isinstance(criteria_parameters, tuple):
|
|
390
|
+
if len(criteria_parameters) != 2:
|
|
391
|
+
raise SDKUsageError(
|
|
392
|
+
"Range parameters must be a tuple of exactly 2 values"
|
|
393
|
+
)
|
|
394
|
+
inferred_min_type = self._infer_single_value_type(criteria_parameters[0])
|
|
395
|
+
inferred_max_type = self._infer_single_value_type(criteria_parameters[1])
|
|
396
|
+
return (inferred_min_type, inferred_max_type)
|
|
397
|
+
else:
|
|
398
|
+
return self._infer_single_value_type(criteria_parameters)
|
|
399
|
+
|
|
400
|
+
def _infer_single_value_type(self, value: ValueInputType) -> ValueTypeInputType:
|
|
401
|
+
"""Infer the type of a single value based on its Python type."""
|
|
402
|
+
if isinstance(value, (int, float)):
|
|
403
|
+
return ValueType.NUMBER
|
|
404
|
+
elif isinstance(value, str):
|
|
405
|
+
return ValueType.STRING
|
|
406
|
+
else:
|
|
407
|
+
return ValueType.UNKNOWN
|
|
408
|
+
|
|
409
|
+
def _process_criteria_parameters_with_gms_type(
|
|
410
|
+
self,
|
|
411
|
+
criteria_parameters: Optional[ColumnValueAssertionParameters],
|
|
412
|
+
gms_type_info: Optional[Union[models.AssertionStdParameterTypeClass, tuple]],
|
|
413
|
+
) -> None:
|
|
414
|
+
"""Process criteria_parameters using explicit type information from GMS."""
|
|
415
|
+
if criteria_parameters is None:
|
|
416
|
+
self._process_none_parameters()
|
|
417
|
+
elif isinstance(criteria_parameters, tuple):
|
|
418
|
+
# For range parameters, pass explicit types if available
|
|
419
|
+
# gms_type_info format: ((min_val, max_val), (min_type, max_type))
|
|
420
|
+
explicit_types = None
|
|
421
|
+
if (
|
|
422
|
+
isinstance(gms_type_info, tuple)
|
|
423
|
+
and len(gms_type_info) == 2
|
|
424
|
+
and isinstance(gms_type_info[0], tuple)
|
|
425
|
+
):
|
|
426
|
+
# Extract types from second element (should be tuple of types)
|
|
427
|
+
explicit_types = (
|
|
428
|
+
gms_type_info[1] if isinstance(gms_type_info[1], tuple) else None
|
|
429
|
+
)
|
|
430
|
+
self._process_range_parameters(criteria_parameters, explicit_types)
|
|
431
|
+
else:
|
|
432
|
+
# For single value parameters, pass explicit type if available
|
|
433
|
+
# gms_type_info format: (value, type)
|
|
434
|
+
explicit_type = None
|
|
435
|
+
if (
|
|
436
|
+
isinstance(gms_type_info, tuple)
|
|
437
|
+
and len(gms_type_info) >= 2
|
|
438
|
+
and not isinstance(gms_type_info[0], tuple)
|
|
439
|
+
and not isinstance(gms_type_info[1], tuple)
|
|
440
|
+
):
|
|
441
|
+
# Single value format: extract type from second element
|
|
442
|
+
explicit_type = gms_type_info[1]
|
|
443
|
+
self._process_single_value_parameters(criteria_parameters, explicit_type)
|
|
444
|
+
|
|
445
|
+
def _process_criteria_parameters(
|
|
446
|
+
self,
|
|
447
|
+
criteria_parameters: Optional[ColumnValueAssertionParameters],
|
|
448
|
+
) -> None:
|
|
449
|
+
"""Process the criteria_parameters with automatic type inference."""
|
|
450
|
+
if criteria_parameters is None:
|
|
451
|
+
self._process_none_parameters()
|
|
452
|
+
elif isinstance(criteria_parameters, tuple):
|
|
453
|
+
self._process_range_parameters(criteria_parameters)
|
|
454
|
+
else:
|
|
455
|
+
self._process_single_value_parameters(criteria_parameters)
|
|
456
|
+
|
|
457
|
+
def _process_none_parameters(self) -> None:
|
|
458
|
+
"""Process None criteria_parameters.
|
|
459
|
+
|
|
460
|
+
Raises:
|
|
461
|
+
SDKUsageError: If the operator requires parameters but none are provided.
|
|
462
|
+
"""
|
|
463
|
+
if self.operator in SINGLE_VALUE_OPERATORS:
|
|
464
|
+
raise SDKUsageError(
|
|
465
|
+
f"Single value is required for operator {self.operator}. "
|
|
466
|
+
"Provide a criteria_parameters value."
|
|
467
|
+
)
|
|
468
|
+
if self.operator in RANGE_OPERATORS:
|
|
469
|
+
raise SDKUsageError(
|
|
470
|
+
f"Range parameters are required for operator {self.operator}. "
|
|
471
|
+
"Provide a tuple of (min_value, max_value) as criteria_parameters."
|
|
472
|
+
)
|
|
473
|
+
self.criteria_parameters = None
|
|
474
|
+
self.criteria_type = None
|
|
475
|
+
|
|
476
|
+
def _process_range_parameters(
|
|
477
|
+
self,
|
|
478
|
+
criteria_parameters: tuple,
|
|
479
|
+
explicit_types: Optional[
|
|
480
|
+
tuple[
|
|
481
|
+
models.AssertionStdParameterTypeClass,
|
|
482
|
+
models.AssertionStdParameterTypeClass,
|
|
483
|
+
]
|
|
484
|
+
] = None,
|
|
485
|
+
) -> None:
|
|
486
|
+
"""Process tuple criteria_parameters for range operators.
|
|
487
|
+
|
|
488
|
+
Args:
|
|
489
|
+
criteria_parameters: The range parameters (min, max).
|
|
490
|
+
explicit_types: Optional explicit types from GMS (min_type, max_type).
|
|
491
|
+
If provided, these types are used directly. Otherwise, types are
|
|
492
|
+
inferred from the parameters.
|
|
493
|
+
"""
|
|
494
|
+
if self.operator not in RANGE_OPERATORS:
|
|
495
|
+
raise SDKUsageError(
|
|
496
|
+
f"Operator {self.operator} does not support range parameters. "
|
|
497
|
+
"Provide a single value instead of a tuple."
|
|
498
|
+
)
|
|
499
|
+
|
|
500
|
+
if len(criteria_parameters) != 2:
|
|
501
|
+
raise SDKUsageError("Range parameters must be a tuple of exactly 2 values")
|
|
502
|
+
|
|
503
|
+
# Declare validated_range_type with explicit type annotation
|
|
504
|
+
validated_range_type: RangeTypeParsedType
|
|
505
|
+
|
|
506
|
+
# Use explicit types if provided, otherwise infer from parameters
|
|
507
|
+
if explicit_types is not None:
|
|
508
|
+
min_type, max_type = explicit_types
|
|
509
|
+
validated_min_type = _try_parse_and_validate_value_type(min_type)
|
|
510
|
+
validated_max_type = _try_parse_and_validate_value_type(max_type)
|
|
511
|
+
validated_range_type = (validated_min_type, validated_max_type)
|
|
512
|
+
|
|
513
|
+
min_value, max_value = criteria_parameters
|
|
514
|
+
validated_min_value = _try_parse_and_validate_value(
|
|
515
|
+
min_value, validated_min_type
|
|
516
|
+
)
|
|
517
|
+
validated_max_value = _try_parse_and_validate_value(
|
|
518
|
+
max_value, validated_max_type
|
|
519
|
+
)
|
|
520
|
+
validated_range = (validated_min_value, validated_max_value)
|
|
521
|
+
else:
|
|
522
|
+
inferred_range_type = self._infer_criteria_type_from_parameters(
|
|
523
|
+
criteria_parameters
|
|
524
|
+
)
|
|
525
|
+
# Type narrowing: inferred_range_type should be a tuple for range parameters
|
|
526
|
+
if not isinstance(inferred_range_type, tuple):
|
|
527
|
+
raise SDKUsageError(
|
|
528
|
+
"Expected tuple type for range parameters, but got "
|
|
529
|
+
f"{type(inferred_range_type).__name__}"
|
|
530
|
+
)
|
|
531
|
+
validated_range_type = _try_parse_and_validate_range_type(
|
|
532
|
+
inferred_range_type
|
|
533
|
+
)
|
|
534
|
+
validated_range = _try_parse_and_validate_range(
|
|
535
|
+
criteria_parameters, validated_range_type, self.operator
|
|
536
|
+
)
|
|
537
|
+
|
|
538
|
+
self.criteria_parameters = validated_range
|
|
539
|
+
self.criteria_type = validated_range_type
|
|
540
|
+
|
|
541
|
+
def _process_single_value_parameters(
|
|
542
|
+
self,
|
|
543
|
+
criteria_parameters: Union[str, int, float],
|
|
544
|
+
explicit_type: Optional[models.AssertionStdParameterTypeClass] = None,
|
|
545
|
+
) -> None:
|
|
546
|
+
"""Process single value criteria_parameters.
|
|
547
|
+
|
|
548
|
+
Args:
|
|
549
|
+
criteria_parameters: The single value parameter.
|
|
550
|
+
explicit_type: Optional explicit type from GMS. If provided, this type
|
|
551
|
+
is used directly. Otherwise, the type is inferred from the parameter.
|
|
552
|
+
"""
|
|
553
|
+
if self.operator in NO_PARAMETER_OPERATORS:
|
|
554
|
+
raise SDKUsageError(
|
|
555
|
+
f"Value parameters should not be provided for operator {self.operator}"
|
|
556
|
+
)
|
|
557
|
+
if self.operator not in SINGLE_VALUE_OPERATORS:
|
|
558
|
+
raise SDKUsageError(
|
|
559
|
+
f"Operator {self.operator} does not support value parameters. "
|
|
560
|
+
"Use criteria_parameters=None or omit criteria_parameters."
|
|
561
|
+
)
|
|
562
|
+
|
|
563
|
+
# Use explicit type if provided, otherwise infer from parameters
|
|
564
|
+
if explicit_type is not None:
|
|
565
|
+
validated_value_type = _try_parse_and_validate_value_type(explicit_type)
|
|
566
|
+
else:
|
|
567
|
+
inferred_value_type = self._infer_criteria_type_from_parameters(
|
|
568
|
+
criteria_parameters
|
|
569
|
+
)
|
|
570
|
+
if isinstance(inferred_value_type, tuple):
|
|
571
|
+
raise SDKUsageError("Single value type expected, not a tuple type")
|
|
572
|
+
validated_value_type = _try_parse_and_validate_value_type(
|
|
573
|
+
inferred_value_type
|
|
574
|
+
)
|
|
575
|
+
|
|
576
|
+
validated_value = _try_parse_and_validate_value(
|
|
577
|
+
criteria_parameters, validated_value_type
|
|
578
|
+
)
|
|
579
|
+
|
|
580
|
+
self.criteria_parameters = validated_value
|
|
581
|
+
self.criteria_type = validated_value_type
|
|
582
|
+
|
|
583
|
+
def _create_monitor_info(
|
|
584
|
+
self,
|
|
585
|
+
assertion_urn: AssertionUrn,
|
|
586
|
+
status: models.MonitorStatusClass,
|
|
587
|
+
schedule: models.CronScheduleClass,
|
|
588
|
+
) -> models.MonitorInfoClass:
|
|
589
|
+
"""Create a MonitorInfoClass with all the necessary components."""
|
|
590
|
+
source_type, field = self._convert_assertion_source_type_and_field()
|
|
591
|
+
return models.MonitorInfoClass(
|
|
592
|
+
type=models.MonitorTypeClass.ASSERTION,
|
|
593
|
+
status=status,
|
|
594
|
+
assertionMonitor=models.AssertionMonitorClass(
|
|
595
|
+
assertions=[
|
|
596
|
+
models.AssertionEvaluationSpecClass(
|
|
597
|
+
assertion=str(assertion_urn),
|
|
598
|
+
schedule=schedule,
|
|
599
|
+
parameters=self._get_assertion_evaluation_parameters(
|
|
600
|
+
str(source_type), field
|
|
601
|
+
),
|
|
602
|
+
),
|
|
603
|
+
],
|
|
604
|
+
settings=None,
|
|
605
|
+
),
|
|
606
|
+
)
|
|
607
|
+
|
|
608
|
+
def _create_assertion_info(
|
|
609
|
+
self, filter: Optional[models.DatasetFilterClass]
|
|
610
|
+
) -> AssertionInfoInputType:
|
|
611
|
+
"""Create a FieldAssertionInfoClass for a column value assertion."""
|
|
612
|
+
field_spec = self._get_schema_field_spec(self.column_name)
|
|
613
|
+
|
|
614
|
+
field_values_assertion = models.FieldValuesAssertionClass(
|
|
615
|
+
field=field_spec,
|
|
616
|
+
operator=self.operator,
|
|
617
|
+
parameters=self._create_assertion_parameters(),
|
|
618
|
+
transform=self._create_field_transform(),
|
|
619
|
+
failThreshold=models.FieldValuesFailThresholdClass(
|
|
620
|
+
type=self._convert_fail_threshold_type(),
|
|
621
|
+
value=self.fail_threshold_value,
|
|
622
|
+
),
|
|
623
|
+
excludeNulls=self.exclude_nulls,
|
|
624
|
+
)
|
|
625
|
+
|
|
626
|
+
return models.FieldAssertionInfoClass(
|
|
627
|
+
type=models.FieldAssertionTypeClass.FIELD_VALUES,
|
|
628
|
+
entity=str(self.dataset_urn),
|
|
629
|
+
filter=filter,
|
|
630
|
+
fieldValuesAssertion=field_values_assertion,
|
|
631
|
+
fieldMetricAssertion=None,
|
|
632
|
+
)
|
|
633
|
+
|
|
634
|
+
def _convert_fail_threshold_type(self) -> str:
|
|
635
|
+
"""Convert the fail threshold type to the model class."""
|
|
636
|
+
if self.fail_threshold_type == FailThresholdType.COUNT:
|
|
637
|
+
return models.FieldValuesFailThresholdTypeClass.COUNT
|
|
638
|
+
else:
|
|
639
|
+
return models.FieldValuesFailThresholdTypeClass.PERCENTAGE
|
|
640
|
+
|
|
641
|
+
def _create_field_transform(self) -> Optional[models.FieldTransformClass]:
|
|
642
|
+
"""Create the field transform if specified."""
|
|
643
|
+
if self.transform is None:
|
|
644
|
+
return None
|
|
645
|
+
return models.FieldTransformClass(type=self.transform)
|
|
646
|
+
|
|
647
|
+
def _convert_schedule(self) -> models.CronScheduleClass:
|
|
648
|
+
"""Create a schedule for a column value assertion."""
|
|
649
|
+
if self.schedule is None:
|
|
650
|
+
return DEFAULT_EVERY_SIX_HOURS_SCHEDULE
|
|
651
|
+
|
|
652
|
+
return models.CronScheduleClass(
|
|
653
|
+
cron=self.schedule.cron,
|
|
654
|
+
timezone=self.schedule.timezone,
|
|
655
|
+
)
|
|
656
|
+
|
|
657
|
+
def _convert_schema_field_spec_to_freshness_field_spec(
|
|
658
|
+
self, field_spec: models.SchemaFieldSpecClass
|
|
659
|
+
) -> models.FreshnessFieldSpecClass:
|
|
660
|
+
"""Convert a SchemaFieldSpecClass to a FreshnessFieldSpecClass."""
|
|
661
|
+
return models.FreshnessFieldSpecClass(
|
|
662
|
+
path=field_spec.path,
|
|
663
|
+
type=field_spec.type,
|
|
664
|
+
nativeType=field_spec.nativeType,
|
|
665
|
+
kind=models.FreshnessFieldKindClass.HIGH_WATERMARK,
|
|
666
|
+
)
|
|
667
|
+
|
|
668
|
+
def _get_assertion_evaluation_parameters(
|
|
669
|
+
self, source_type: str, field: Optional[FieldSpecType]
|
|
670
|
+
) -> models.AssertionEvaluationParametersClass:
|
|
671
|
+
"""Get evaluation parameters for a column value assertion."""
|
|
672
|
+
if field is not None:
|
|
673
|
+
if isinstance(field, models.SchemaFieldSpecClass):
|
|
674
|
+
field = self._convert_schema_field_spec_to_freshness_field_spec(field)
|
|
675
|
+
assert isinstance(field, models.FreshnessFieldSpecClass), (
|
|
676
|
+
"Field must be FreshnessFieldSpecClass for monitor info"
|
|
677
|
+
)
|
|
678
|
+
return models.AssertionEvaluationParametersClass(
|
|
679
|
+
type=models.AssertionEvaluationParametersTypeClass.DATASET_FIELD,
|
|
680
|
+
datasetFieldParameters=models.DatasetFieldAssertionParametersClass(
|
|
681
|
+
sourceType=source_type,
|
|
682
|
+
changedRowsField=field,
|
|
683
|
+
),
|
|
684
|
+
)
|
|
685
|
+
|
|
686
|
+
def _convert_assertion_source_type_and_field(
|
|
687
|
+
self,
|
|
688
|
+
) -> tuple[str, Optional[FieldSpecType]]:
|
|
689
|
+
"""Convert detection mechanism into source type and field specification."""
|
|
690
|
+
source_type = models.DatasetFieldAssertionSourceTypeClass.ALL_ROWS_QUERY
|
|
691
|
+
field = None
|
|
692
|
+
SUPPORTED_DETECTION_MECHANISMS = [
|
|
693
|
+
_AllRowsQuery().type,
|
|
694
|
+
_AllRowsQueryDataHubDatasetProfile().type,
|
|
695
|
+
_ChangedRowsQuery(column_name="").type,
|
|
696
|
+
]
|
|
697
|
+
|
|
698
|
+
if isinstance(self.detection_mechanism, _ChangedRowsQuery):
|
|
699
|
+
source_type = models.DatasetFieldAssertionSourceTypeClass.CHANGED_ROWS_QUERY
|
|
700
|
+
column_name = self._try_parse_and_validate_column_name_is_valid_type(
|
|
701
|
+
self.detection_mechanism.column_name,
|
|
702
|
+
allowed_column_types=[
|
|
703
|
+
models.NumberTypeClass(),
|
|
704
|
+
models.DateTypeClass(),
|
|
705
|
+
models.TimeTypeClass(),
|
|
706
|
+
],
|
|
707
|
+
)
|
|
708
|
+
field = self._get_schema_field_spec(column_name)
|
|
709
|
+
elif isinstance(self.detection_mechanism, _AllRowsQuery):
|
|
710
|
+
source_type = models.DatasetFieldAssertionSourceTypeClass.ALL_ROWS_QUERY
|
|
711
|
+
elif isinstance(
|
|
712
|
+
self.detection_mechanism,
|
|
713
|
+
(_AllRowsQueryDataHubDatasetProfile, _DatasetProfile),
|
|
714
|
+
):
|
|
715
|
+
source_type = (
|
|
716
|
+
models.DatasetFieldAssertionSourceTypeClass.DATAHUB_DATASET_PROFILE
|
|
717
|
+
)
|
|
718
|
+
else:
|
|
719
|
+
raise SDKNotYetSupportedError(
|
|
720
|
+
f"Detection mechanism {self.detection_mechanism} is not supported for "
|
|
721
|
+
f"column value assertions, please use a supported detection mechanism: "
|
|
722
|
+
f"{', '.join(SUPPORTED_DETECTION_MECHANISMS)}"
|
|
723
|
+
)
|
|
724
|
+
|
|
725
|
+
return source_type, field
|
|
726
|
+
|
|
727
|
+
def _create_assertion_parameters(self) -> models.AssertionStdParametersClass:
|
|
728
|
+
"""Create assertion parameters based on the operator type and provided values."""
|
|
729
|
+
if self.operator in SINGLE_VALUE_OPERATORS:
|
|
730
|
+
if self.criteria_parameters is None or isinstance(
|
|
731
|
+
self.criteria_parameters, tuple
|
|
732
|
+
):
|
|
733
|
+
raise SDKUsageError(
|
|
734
|
+
f"Single value is required for operator {self.operator}"
|
|
735
|
+
)
|
|
736
|
+
if self.criteria_type is None or isinstance(self.criteria_type, tuple):
|
|
737
|
+
raise SDKUsageError(
|
|
738
|
+
f"Single value type is required for operator {self.operator}"
|
|
739
|
+
)
|
|
740
|
+
return models.AssertionStdParametersClass(
|
|
741
|
+
value=models.AssertionStdParameterClass(
|
|
742
|
+
value=str(self.criteria_parameters),
|
|
743
|
+
type=self.criteria_type,
|
|
744
|
+
),
|
|
745
|
+
)
|
|
746
|
+
elif self.operator in RANGE_OPERATORS:
|
|
747
|
+
if not isinstance(self.criteria_parameters, tuple):
|
|
748
|
+
raise SDKUsageError(
|
|
749
|
+
f"Range parameters are required for operator {self.operator}"
|
|
750
|
+
)
|
|
751
|
+
if not isinstance(self.criteria_type, tuple):
|
|
752
|
+
raise SDKUsageError(
|
|
753
|
+
f"Range type is required for operator {self.operator}"
|
|
754
|
+
)
|
|
755
|
+
return models.AssertionStdParametersClass(
|
|
756
|
+
minValue=models.AssertionStdParameterClass(
|
|
757
|
+
value=str(self.criteria_parameters[0]),
|
|
758
|
+
type=self.criteria_type[0],
|
|
759
|
+
),
|
|
760
|
+
maxValue=models.AssertionStdParameterClass(
|
|
761
|
+
value=str(self.criteria_parameters[1]),
|
|
762
|
+
type=self.criteria_type[1],
|
|
763
|
+
),
|
|
764
|
+
)
|
|
765
|
+
elif self.operator in NO_PARAMETER_OPERATORS:
|
|
766
|
+
return models.AssertionStdParametersClass()
|
|
767
|
+
else:
|
|
768
|
+
raise SDKUsageError(f"Unsupported operator type: {self.operator}")
|
|
769
|
+
|
|
770
|
+
def _try_parse_and_validate_column_name_is_valid_type(
|
|
771
|
+
self,
|
|
772
|
+
column_name: str,
|
|
773
|
+
allowed_column_types: list[
|
|
774
|
+
models.DictWrapper
|
|
775
|
+
] = ALLOWED_COLUMN_TYPES_FOR_COLUMN_METRIC_ASSERTION,
|
|
776
|
+
) -> str:
|
|
777
|
+
"""Parse and validate a column name and its type."""
|
|
778
|
+
field_spec = self._get_schema_field_spec(column_name)
|
|
779
|
+
self._validate_field_type(
|
|
780
|
+
field_spec,
|
|
781
|
+
column_name,
|
|
782
|
+
allowed_column_types,
|
|
783
|
+
"column value assertion",
|
|
784
|
+
)
|
|
785
|
+
return column_name
|
|
786
|
+
|
|
787
|
+
def _assertion_type(self) -> str:
|
|
788
|
+
"""Get the assertion type."""
|
|
789
|
+
return models.AssertionTypeClass.FIELD
|
|
790
|
+
|
|
791
|
+
def _validate_field_type_and_operator_compatibility(
|
|
792
|
+
self, column_name: str, operator: models.AssertionStdOperatorClass
|
|
793
|
+
) -> None:
|
|
794
|
+
"""Validate that the field type is compatible with the operator.
|
|
795
|
+
|
|
796
|
+
Args:
|
|
797
|
+
column_name: The name of the column to validate.
|
|
798
|
+
operator: The operator to validate against.
|
|
799
|
+
|
|
800
|
+
Raises:
|
|
801
|
+
SDKUsageError: If the field type is not compatible with the operator.
|
|
802
|
+
"""
|
|
803
|
+
field_spec = self._get_schema_field_spec(column_name)
|
|
804
|
+
allowed_operators = FIELD_VALUES_OPERATOR_CONFIG.get(field_spec.type, [])
|
|
805
|
+
if operator not in allowed_operators:
|
|
806
|
+
raise SDKUsageError(
|
|
807
|
+
f"Operator {operator} is not allowed for field type {field_spec.type} "
|
|
808
|
+
f"for column '{column_name}'. Allowed operators: "
|
|
809
|
+
f"{', '.join(str(op) for op in allowed_operators)}"
|
|
810
|
+
)
|