acryl-datahub-cloud 0.3.10rc4__py3-none-any.whl → 0.3.16.1rc0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub-cloud might be problematic. Click here for more details.
- acryl_datahub_cloud/_codegen_config.json +1 -1
- acryl_datahub_cloud/acryl_cs_issues/acryl_customer.py +1 -1
- acryl_datahub_cloud/acryl_cs_issues/models.py +5 -3
- acryl_datahub_cloud/action_request/action_request_owner_source.py +37 -8
- acryl_datahub_cloud/datahub_forms_notifications/__init__.py +0 -0
- acryl_datahub_cloud/datahub_forms_notifications/forms_notifications_source.py +569 -0
- acryl_datahub_cloud/datahub_forms_notifications/get_feature_flag.gql +7 -0
- acryl_datahub_cloud/datahub_forms_notifications/get_search_results_total.gql +14 -0
- acryl_datahub_cloud/datahub_forms_notifications/query.py +17 -0
- acryl_datahub_cloud/datahub_forms_notifications/scroll_forms_for_notification.gql +29 -0
- acryl_datahub_cloud/datahub_forms_notifications/send_form_notification_request.gql +5 -0
- acryl_datahub_cloud/datahub_reporting/datahub_dataset.py +39 -19
- acryl_datahub_cloud/datahub_reporting/datahub_form_reporting.py +60 -25
- acryl_datahub_cloud/datahub_reporting/extract_graph.py +9 -3
- acryl_datahub_cloud/datahub_reporting/extract_sql.py +248 -52
- acryl_datahub_cloud/datahub_reporting/forms.py +1 -1
- acryl_datahub_cloud/datahub_reporting/forms_config.py +3 -2
- acryl_datahub_cloud/datahub_restore/source.py +3 -2
- acryl_datahub_cloud/datahub_usage_reporting/excluded.py +94 -0
- acryl_datahub_cloud/datahub_usage_reporting/query_builder.py +48 -8
- acryl_datahub_cloud/datahub_usage_reporting/usage_feature_reporter.py +532 -109
- acryl_datahub_cloud/elasticsearch/graph_service.py +76 -14
- acryl_datahub_cloud/graphql_utils.py +64 -0
- acryl_datahub_cloud/lineage_features/source.py +555 -49
- acryl_datahub_cloud/metadata/_urns/urn_defs.py +2390 -1938
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/actionworkflow/__init__.py +53 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/anomaly/__init__.py +2 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/assertion/__init__.py +6 -2
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/common/__init__.py +6 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/conversation/__init__.py +29 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/event/notification/settings/__init__.py +2 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/execution/__init__.py +2 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/file/__init__.py +19 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/form/__init__.py +8 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/identity/__init__.py +8 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/knowledge/__init__.py +33 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +14 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/metadata/search/features/__init__.py +2 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/module/__init__.py +31 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/monitor/__init__.py +6 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/notification/__init__.py +19 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py +4 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/role/__init__.py +2 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/settings/asset/__init__.py +19 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +28 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/template/__init__.py +31 -0
- acryl_datahub_cloud/metadata/schema.avsc +27843 -23200
- acryl_datahub_cloud/metadata/schema_classes.py +29901 -24310
- acryl_datahub_cloud/metadata/schemas/ActionRequestInfo.avsc +235 -2
- acryl_datahub_cloud/metadata/schemas/ActionWorkflowInfo.avsc +683 -0
- acryl_datahub_cloud/metadata/schemas/ActionWorkflowKey.avsc +21 -0
- acryl_datahub_cloud/metadata/schemas/Actors.avsc +38 -1
- acryl_datahub_cloud/metadata/schemas/ApplicationKey.avsc +31 -0
- acryl_datahub_cloud/metadata/schemas/ApplicationProperties.avsc +75 -0
- acryl_datahub_cloud/metadata/schemas/Applications.avsc +38 -0
- acryl_datahub_cloud/metadata/schemas/AssertionAnalyticsRunEvent.avsc +375 -212
- acryl_datahub_cloud/metadata/schemas/AssertionInfo.avsc +147 -20
- acryl_datahub_cloud/metadata/schemas/AssertionKey.avsc +1 -1
- acryl_datahub_cloud/metadata/schemas/AssertionRunEvent.avsc +191 -21
- acryl_datahub_cloud/metadata/schemas/{AssertionSummary.avsc → AssertionRunSummary.avsc} +15 -2
- acryl_datahub_cloud/metadata/schemas/AssertionsSummary.avsc +54 -0
- acryl_datahub_cloud/metadata/schemas/AssetSettings.avsc +63 -0
- acryl_datahub_cloud/metadata/schemas/BusinessAttributeInfo.avsc +7 -3
- acryl_datahub_cloud/metadata/schemas/ChartInfo.avsc +20 -6
- acryl_datahub_cloud/metadata/schemas/ChartKey.avsc +1 -0
- acryl_datahub_cloud/metadata/schemas/ConstraintInfo.avsc +12 -1
- acryl_datahub_cloud/metadata/schemas/ContainerKey.avsc +1 -0
- acryl_datahub_cloud/metadata/schemas/ContainerProperties.avsc +16 -5
- acryl_datahub_cloud/metadata/schemas/CorpGroupEditableInfo.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/CorpGroupInfo.avsc +7 -3
- acryl_datahub_cloud/metadata/schemas/CorpGroupKey.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/CorpGroupSettings.avsc +127 -2
- acryl_datahub_cloud/metadata/schemas/CorpUserEditableInfo.avsc +1 -1
- acryl_datahub_cloud/metadata/schemas/CorpUserInfo.avsc +18 -2
- acryl_datahub_cloud/metadata/schemas/CorpUserInvitationStatus.avsc +106 -0
- acryl_datahub_cloud/metadata/schemas/CorpUserKey.avsc +4 -1
- acryl_datahub_cloud/metadata/schemas/CorpUserSettings.avsc +304 -2
- acryl_datahub_cloud/metadata/schemas/CorpUserUsageFeatures.avsc +86 -0
- acryl_datahub_cloud/metadata/schemas/DashboardInfo.avsc +11 -5
- acryl_datahub_cloud/metadata/schemas/DashboardKey.avsc +1 -0
- acryl_datahub_cloud/metadata/schemas/DataContractKey.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/DataFlowInfo.avsc +15 -5
- acryl_datahub_cloud/metadata/schemas/DataFlowKey.avsc +1 -0
- acryl_datahub_cloud/metadata/schemas/DataHubAiConversationInfo.avsc +256 -0
- acryl_datahub_cloud/metadata/schemas/DataHubAiConversationKey.avsc +22 -0
- acryl_datahub_cloud/metadata/schemas/DataHubFileInfo.avsc +234 -0
- acryl_datahub_cloud/metadata/schemas/DataHubFileKey.avsc +22 -0
- acryl_datahub_cloud/metadata/schemas/DataHubIngestionSourceKey.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/DataHubOpenAPISchemaKey.avsc +22 -0
- acryl_datahub_cloud/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
- acryl_datahub_cloud/metadata/schemas/DataHubPageModuleProperties.avsc +308 -0
- acryl_datahub_cloud/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
- acryl_datahub_cloud/metadata/schemas/DataHubPageTemplateProperties.avsc +251 -0
- acryl_datahub_cloud/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
- acryl_datahub_cloud/metadata/schemas/DataJobInfo.avsc +13 -4
- acryl_datahub_cloud/metadata/schemas/DataJobInputOutput.avsc +8 -0
- acryl_datahub_cloud/metadata/schemas/DataJobKey.avsc +1 -0
- acryl_datahub_cloud/metadata/schemas/DataPlatformInfo.avsc +3 -1
- acryl_datahub_cloud/metadata/schemas/DataPlatformInstanceProperties.avsc +5 -2
- acryl_datahub_cloud/metadata/schemas/DataProcessKey.avsc +4 -0
- acryl_datahub_cloud/metadata/schemas/DataProductKey.avsc +2 -0
- acryl_datahub_cloud/metadata/schemas/DataProductProperties.avsc +6 -3
- acryl_datahub_cloud/metadata/schemas/DataTransformLogic.avsc +4 -2
- acryl_datahub_cloud/metadata/schemas/DataTypeInfo.avsc +5 -0
- acryl_datahub_cloud/metadata/schemas/DatasetKey.avsc +10 -2
- acryl_datahub_cloud/metadata/schemas/DatasetProperties.avsc +12 -5
- acryl_datahub_cloud/metadata/schemas/DatasetUsageStatistics.avsc +8 -0
- acryl_datahub_cloud/metadata/schemas/DocumentInfo.avsc +407 -0
- acryl_datahub_cloud/metadata/schemas/DocumentKey.avsc +35 -0
- acryl_datahub_cloud/metadata/schemas/DocumentSettings.avsc +79 -0
- acryl_datahub_cloud/metadata/schemas/DomainKey.avsc +2 -0
- acryl_datahub_cloud/metadata/schemas/DomainProperties.avsc +7 -3
- acryl_datahub_cloud/metadata/schemas/EditableContainerProperties.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/EditableDashboardProperties.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/EditableDataFlowProperties.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/EditableDataJobProperties.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/EditableDatasetProperties.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/EditableERModelRelationshipProperties.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/EditableMLFeatureProperties.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/EditableMLFeatureTableProperties.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/EditableMLModelGroupProperties.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/EditableMLModelProperties.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/EditableNotebookProperties.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/EditableSchemaMetadata.avsc +4 -2
- acryl_datahub_cloud/metadata/schemas/EntityTypeInfo.avsc +5 -0
- acryl_datahub_cloud/metadata/schemas/ExecutionRequestArtifactsLocation.avsc +16 -0
- acryl_datahub_cloud/metadata/schemas/ExecutionRequestKey.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/FormAssignmentStatus.avsc +36 -0
- acryl_datahub_cloud/metadata/schemas/FormInfo.avsc +6 -0
- acryl_datahub_cloud/metadata/schemas/FormKey.avsc +3 -1
- acryl_datahub_cloud/metadata/schemas/FormNotifications.avsc +69 -0
- acryl_datahub_cloud/metadata/schemas/FormSettings.avsc +30 -0
- acryl_datahub_cloud/metadata/schemas/GlobalSettingsInfo.avsc +416 -0
- acryl_datahub_cloud/metadata/schemas/GlobalTags.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/GlossaryNodeInfo.avsc +3 -1
- acryl_datahub_cloud/metadata/schemas/GlossaryNodeKey.avsc +1 -0
- acryl_datahub_cloud/metadata/schemas/GlossaryTermInfo.avsc +3 -1
- acryl_datahub_cloud/metadata/schemas/GlossaryTermKey.avsc +2 -0
- acryl_datahub_cloud/metadata/schemas/IcebergWarehouseInfo.avsc +4 -0
- acryl_datahub_cloud/metadata/schemas/IncidentActivityEvent.avsc +3 -3
- acryl_datahub_cloud/metadata/schemas/IncidentInfo.avsc +3 -3
- acryl_datahub_cloud/metadata/schemas/InferredMetadata.avsc +71 -1
- acryl_datahub_cloud/metadata/schemas/InputFields.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/InviteToken.avsc +26 -0
- acryl_datahub_cloud/metadata/schemas/LineageFeatures.avsc +67 -42
- acryl_datahub_cloud/metadata/schemas/LogicalParent.avsc +145 -0
- acryl_datahub_cloud/metadata/schemas/MLFeatureKey.avsc +4 -1
- acryl_datahub_cloud/metadata/schemas/MLFeatureTableKey.avsc +4 -1
- acryl_datahub_cloud/metadata/schemas/MLModelDeploymentKey.avsc +7 -1
- acryl_datahub_cloud/metadata/schemas/MLModelDeploymentProperties.avsc +3 -0
- acryl_datahub_cloud/metadata/schemas/MLModelGroupKey.avsc +9 -1
- acryl_datahub_cloud/metadata/schemas/MLModelKey.avsc +9 -1
- acryl_datahub_cloud/metadata/schemas/MLModelProperties.avsc +4 -2
- acryl_datahub_cloud/metadata/schemas/MLPrimaryKeyKey.avsc +4 -1
- acryl_datahub_cloud/metadata/schemas/MetadataChangeEvent.avsc +424 -97
- acryl_datahub_cloud/metadata/schemas/MetadataChangeLog.avsc +65 -44
- acryl_datahub_cloud/metadata/schemas/MetadataChangeProposal.avsc +64 -0
- acryl_datahub_cloud/metadata/schemas/MonitorAnomalyEvent.avsc +84 -29
- acryl_datahub_cloud/metadata/schemas/MonitorInfo.avsc +221 -23
- acryl_datahub_cloud/metadata/schemas/MonitorKey.avsc +9 -1
- acryl_datahub_cloud/metadata/schemas/MonitorSuiteInfo.avsc +128 -3
- acryl_datahub_cloud/metadata/schemas/NotebookInfo.avsc +5 -2
- acryl_datahub_cloud/metadata/schemas/NotebookKey.avsc +1 -0
- acryl_datahub_cloud/metadata/schemas/NotificationRequest.avsc +91 -4
- acryl_datahub_cloud/metadata/schemas/Operation.avsc +17 -0
- acryl_datahub_cloud/metadata/schemas/Ownership.avsc +71 -1
- acryl_datahub_cloud/metadata/schemas/QueryProperties.avsc +4 -2
- acryl_datahub_cloud/metadata/schemas/QuerySubjects.avsc +2 -13
- acryl_datahub_cloud/metadata/schemas/RelationshipChangeEvent.avsc +215 -0
- acryl_datahub_cloud/metadata/schemas/RoleProperties.avsc +3 -1
- acryl_datahub_cloud/metadata/schemas/SchemaFieldInfo.avsc +3 -1
- acryl_datahub_cloud/metadata/schemas/SchemaFieldKey.avsc +3 -0
- acryl_datahub_cloud/metadata/schemas/SchemaMetadata.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/SemanticContent.avsc +123 -0
- acryl_datahub_cloud/metadata/schemas/StructuredProperties.avsc +69 -0
- acryl_datahub_cloud/metadata/schemas/StructuredPropertyDefinition.avsc +15 -4
- acryl_datahub_cloud/metadata/schemas/StructuredPropertySettings.avsc +9 -0
- acryl_datahub_cloud/metadata/schemas/SubscriptionInfo.avsc +136 -5
- acryl_datahub_cloud/metadata/schemas/SubscriptionKey.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/SystemMetadata.avsc +147 -0
- acryl_datahub_cloud/metadata/schemas/TagProperties.avsc +3 -1
- acryl_datahub_cloud/metadata/schemas/TestInfo.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/UpstreamLineage.avsc +9 -0
- acryl_datahub_cloud/metadata/schemas/UsageFeatures.avsc +10 -0
- acryl_datahub_cloud/metadata/schemas/__init__.py +3 -3
- acryl_datahub_cloud/notifications/__init__.py +0 -0
- acryl_datahub_cloud/notifications/notification_recipient_builder.py +399 -0
- acryl_datahub_cloud/sdk/__init__.py +69 -0
- acryl_datahub_cloud/sdk/assertion/__init__.py +58 -0
- acryl_datahub_cloud/sdk/assertion/assertion_base.py +779 -0
- acryl_datahub_cloud/sdk/assertion/column_metric_assertion.py +191 -0
- acryl_datahub_cloud/sdk/assertion/column_value_assertion.py +431 -0
- acryl_datahub_cloud/sdk/assertion/freshness_assertion.py +201 -0
- acryl_datahub_cloud/sdk/assertion/schema_assertion.py +268 -0
- acryl_datahub_cloud/sdk/assertion/smart_column_metric_assertion.py +212 -0
- acryl_datahub_cloud/sdk/assertion/smart_freshness_assertion.py +165 -0
- acryl_datahub_cloud/sdk/assertion/smart_sql_assertion.py +156 -0
- acryl_datahub_cloud/sdk/assertion/smart_volume_assertion.py +162 -0
- acryl_datahub_cloud/sdk/assertion/sql_assertion.py +273 -0
- acryl_datahub_cloud/sdk/assertion/types.py +20 -0
- acryl_datahub_cloud/sdk/assertion/volume_assertion.py +156 -0
- acryl_datahub_cloud/sdk/assertion_client/__init__.py +0 -0
- acryl_datahub_cloud/sdk/assertion_client/column_metric.py +545 -0
- acryl_datahub_cloud/sdk/assertion_client/column_value.py +617 -0
- acryl_datahub_cloud/sdk/assertion_client/freshness.py +371 -0
- acryl_datahub_cloud/sdk/assertion_client/helpers.py +166 -0
- acryl_datahub_cloud/sdk/assertion_client/schema.py +358 -0
- acryl_datahub_cloud/sdk/assertion_client/smart_column_metric.py +540 -0
- acryl_datahub_cloud/sdk/assertion_client/smart_freshness.py +373 -0
- acryl_datahub_cloud/sdk/assertion_client/smart_sql.py +411 -0
- acryl_datahub_cloud/sdk/assertion_client/smart_volume.py +380 -0
- acryl_datahub_cloud/sdk/assertion_client/sql.py +410 -0
- acryl_datahub_cloud/sdk/assertion_client/volume.py +446 -0
- acryl_datahub_cloud/sdk/assertion_input/__init__.py +0 -0
- acryl_datahub_cloud/sdk/assertion_input/assertion_input.py +1470 -0
- acryl_datahub_cloud/sdk/assertion_input/column_assertion_constants.py +114 -0
- acryl_datahub_cloud/sdk/assertion_input/column_assertion_utils.py +284 -0
- acryl_datahub_cloud/sdk/assertion_input/column_metric_assertion_input.py +759 -0
- acryl_datahub_cloud/sdk/assertion_input/column_metric_constants.py +109 -0
- acryl_datahub_cloud/sdk/assertion_input/column_value_assertion_input.py +810 -0
- acryl_datahub_cloud/sdk/assertion_input/freshness_assertion_input.py +305 -0
- acryl_datahub_cloud/sdk/assertion_input/schema_assertion_input.py +413 -0
- acryl_datahub_cloud/sdk/assertion_input/smart_column_metric_assertion_input.py +793 -0
- acryl_datahub_cloud/sdk/assertion_input/smart_freshness_assertion_input.py +218 -0
- acryl_datahub_cloud/sdk/assertion_input/smart_sql_assertion_input.py +181 -0
- acryl_datahub_cloud/sdk/assertion_input/smart_volume_assertion_input.py +189 -0
- acryl_datahub_cloud/sdk/assertion_input/sql_assertion_input.py +320 -0
- acryl_datahub_cloud/sdk/assertion_input/volume_assertion_input.py +635 -0
- acryl_datahub_cloud/sdk/assertions_client.py +1074 -0
- acryl_datahub_cloud/sdk/entities/__init__.py +0 -0
- acryl_datahub_cloud/sdk/entities/assertion.py +439 -0
- acryl_datahub_cloud/sdk/entities/monitor.py +291 -0
- acryl_datahub_cloud/sdk/entities/subscription.py +100 -0
- acryl_datahub_cloud/sdk/errors.py +34 -0
- acryl_datahub_cloud/sdk/resolver_client.py +42 -0
- acryl_datahub_cloud/sdk/subscription_client.py +737 -0
- {acryl_datahub_cloud-0.3.10rc4.dist-info → acryl_datahub_cloud-0.3.16.1rc0.dist-info}/METADATA +49 -43
- {acryl_datahub_cloud-0.3.10rc4.dist-info → acryl_datahub_cloud-0.3.16.1rc0.dist-info}/RECORD +243 -145
- {acryl_datahub_cloud-0.3.10rc4.dist-info → acryl_datahub_cloud-0.3.16.1rc0.dist-info}/WHEEL +1 -1
- {acryl_datahub_cloud-0.3.10rc4.dist-info → acryl_datahub_cloud-0.3.16.1rc0.dist-info}/entry_points.txt +1 -0
- {acryl_datahub_cloud-0.3.10rc4.dist-info → acryl_datahub_cloud-0.3.16.1rc0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,1074 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import functools
|
|
4
|
+
import logging
|
|
5
|
+
import time
|
|
6
|
+
from typing import TYPE_CHECKING, Optional, Union
|
|
7
|
+
|
|
8
|
+
from acryl_datahub_cloud.sdk.assertion.assertion_base import (
|
|
9
|
+
FreshnessAssertion,
|
|
10
|
+
SmartFreshnessAssertion,
|
|
11
|
+
SmartVolumeAssertion,
|
|
12
|
+
SqlAssertion,
|
|
13
|
+
VolumeAssertion,
|
|
14
|
+
)
|
|
15
|
+
from acryl_datahub_cloud.sdk.assertion.column_metric_assertion import (
|
|
16
|
+
ColumnMetricAssertion,
|
|
17
|
+
)
|
|
18
|
+
from acryl_datahub_cloud.sdk.assertion.column_value_assertion import (
|
|
19
|
+
ColumnValueAssertion,
|
|
20
|
+
)
|
|
21
|
+
from acryl_datahub_cloud.sdk.assertion.schema_assertion import SchemaAssertion
|
|
22
|
+
from acryl_datahub_cloud.sdk.assertion.smart_column_metric_assertion import (
|
|
23
|
+
SmartColumnMetricAssertion,
|
|
24
|
+
)
|
|
25
|
+
from acryl_datahub_cloud.sdk.assertion.smart_sql_assertion import SmartSqlAssertion
|
|
26
|
+
from acryl_datahub_cloud.sdk.assertion_client.column_metric import (
|
|
27
|
+
ColumnMetricAssertionClient,
|
|
28
|
+
)
|
|
29
|
+
from acryl_datahub_cloud.sdk.assertion_client.column_value import (
|
|
30
|
+
ColumnValueAssertionClient,
|
|
31
|
+
)
|
|
32
|
+
from acryl_datahub_cloud.sdk.assertion_client.freshness import (
|
|
33
|
+
FreshnessAssertionClient,
|
|
34
|
+
)
|
|
35
|
+
from acryl_datahub_cloud.sdk.assertion_client.helpers import (
|
|
36
|
+
_print_experimental_warning,
|
|
37
|
+
)
|
|
38
|
+
from acryl_datahub_cloud.sdk.assertion_client.schema import (
|
|
39
|
+
SchemaAssertionClient,
|
|
40
|
+
)
|
|
41
|
+
from acryl_datahub_cloud.sdk.assertion_client.smart_column_metric import (
|
|
42
|
+
SmartColumnMetricAssertionClient,
|
|
43
|
+
)
|
|
44
|
+
from acryl_datahub_cloud.sdk.assertion_client.smart_freshness import (
|
|
45
|
+
SmartFreshnessAssertionClient,
|
|
46
|
+
)
|
|
47
|
+
from acryl_datahub_cloud.sdk.assertion_client.smart_sql import (
|
|
48
|
+
SmartSqlAssertionClient,
|
|
49
|
+
)
|
|
50
|
+
from acryl_datahub_cloud.sdk.assertion_client.smart_volume import (
|
|
51
|
+
SmartVolumeAssertionClient,
|
|
52
|
+
)
|
|
53
|
+
from acryl_datahub_cloud.sdk.assertion_client.sql import (
|
|
54
|
+
SqlAssertionClient,
|
|
55
|
+
)
|
|
56
|
+
from acryl_datahub_cloud.sdk.assertion_client.volume import (
|
|
57
|
+
VolumeAssertionClient,
|
|
58
|
+
)
|
|
59
|
+
from acryl_datahub_cloud.sdk.assertion_input.assertion_input import (
|
|
60
|
+
AssertionIncidentBehaviorInputTypes,
|
|
61
|
+
DetectionMechanismInputTypes,
|
|
62
|
+
ExclusionWindowInputTypes,
|
|
63
|
+
InferenceSensitivity,
|
|
64
|
+
TimeWindowSizeInputTypes,
|
|
65
|
+
)
|
|
66
|
+
from acryl_datahub_cloud.sdk.assertion_input.column_metric_assertion_input import (
|
|
67
|
+
ColumnMetricAssertionParameters,
|
|
68
|
+
)
|
|
69
|
+
from acryl_datahub_cloud.sdk.assertion_input.column_metric_constants import (
|
|
70
|
+
MetricInputType,
|
|
71
|
+
OperatorInputType,
|
|
72
|
+
)
|
|
73
|
+
from acryl_datahub_cloud.sdk.assertion_input.column_value_assertion_input import (
|
|
74
|
+
ColumnValueAssertionParameters,
|
|
75
|
+
FailThresholdInputType,
|
|
76
|
+
FieldTransformInputType,
|
|
77
|
+
)
|
|
78
|
+
from acryl_datahub_cloud.sdk.assertion_input.freshness_assertion_input import (
|
|
79
|
+
FreshnessAssertionScheduleCheckType,
|
|
80
|
+
)
|
|
81
|
+
from acryl_datahub_cloud.sdk.assertion_input.schema_assertion_input import (
|
|
82
|
+
SchemaAssertionCompatibility,
|
|
83
|
+
SchemaAssertionFieldsInputType,
|
|
84
|
+
)
|
|
85
|
+
from acryl_datahub_cloud.sdk.assertion_input.sql_assertion_input import (
|
|
86
|
+
SqlAssertionCondition,
|
|
87
|
+
)
|
|
88
|
+
from acryl_datahub_cloud.sdk.assertion_input.volume_assertion_input import (
|
|
89
|
+
VolumeAssertionCondition,
|
|
90
|
+
VolumeAssertionDefinitionParameters,
|
|
91
|
+
)
|
|
92
|
+
from acryl_datahub_cloud.sdk.entities.assertion import TagsInputType
|
|
93
|
+
from acryl_datahub_cloud.sdk.errors import SDKUsageError
|
|
94
|
+
from datahub.ingestion.graph.client import DataHubGraph
|
|
95
|
+
from datahub.metadata import schema_classes as models
|
|
96
|
+
from datahub.metadata.urns import AssertionUrn, CorpUserUrn, DatasetUrn
|
|
97
|
+
|
|
98
|
+
if TYPE_CHECKING:
|
|
99
|
+
from datahub.sdk.main_client import DataHubClient
|
|
100
|
+
|
|
101
|
+
logger = logging.getLogger(__name__)
|
|
102
|
+
|
|
103
|
+
# TODO: Replace __datahub_system with the actual datahub system user https://linear.app/acryl-data/issue/OBS-1351/auditstamp-actor-hydration-pattern-for-sdk-calls
|
|
104
|
+
DEFAULT_CREATED_BY = CorpUserUrn.from_string("urn:li:corpuser:__datahub_system")
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
class AssertionsClient:
|
|
108
|
+
def __init__(self, client: "DataHubClient"):
|
|
109
|
+
self.client = client
|
|
110
|
+
self._freshness_client = FreshnessAssertionClient(client)
|
|
111
|
+
self._volume_client = VolumeAssertionClient(client)
|
|
112
|
+
self._sql_client = SqlAssertionClient(client)
|
|
113
|
+
self._smart_freshness_client = SmartFreshnessAssertionClient(client)
|
|
114
|
+
self._smart_volume_client = SmartVolumeAssertionClient(client)
|
|
115
|
+
self._smart_sql_client = SmartSqlAssertionClient(client)
|
|
116
|
+
self._smart_column_metric_client = SmartColumnMetricAssertionClient(client)
|
|
117
|
+
self._column_metric_client = ColumnMetricAssertionClient(client)
|
|
118
|
+
self._schema_client = SchemaAssertionClient(client)
|
|
119
|
+
self._column_value_client = ColumnValueAssertionClient(client)
|
|
120
|
+
# Create a cached version of the existence check with TTL using time bucketing
|
|
121
|
+
# The time_bucket parameter is used only as a cache key to invalidate entries
|
|
122
|
+
# every 60 seconds - it's not used in the function body itself
|
|
123
|
+
self._cached_exists = functools.lru_cache(maxsize=128)(
|
|
124
|
+
lambda urn, _time_bucket: self._graph.exists(urn)
|
|
125
|
+
)
|
|
126
|
+
_print_experimental_warning()
|
|
127
|
+
|
|
128
|
+
@property
|
|
129
|
+
def _graph(self) -> DataHubGraph:
|
|
130
|
+
"""Access to the underlying DataHubGraph client."""
|
|
131
|
+
return self.client._graph
|
|
132
|
+
|
|
133
|
+
def _check_dataset_exists(
|
|
134
|
+
self, dataset_urn: Union[str, DatasetUrn], skip_check: bool
|
|
135
|
+
) -> None:
|
|
136
|
+
"""Verify the dataset exists in DataHub.
|
|
137
|
+
|
|
138
|
+
Uses a short-lived cache (60s TTL) via functools.lru_cache to avoid
|
|
139
|
+
redundant network calls when checking multiple assertions for the same dataset.
|
|
140
|
+
|
|
141
|
+
Args:
|
|
142
|
+
dataset_urn: The URN of the dataset to check.
|
|
143
|
+
skip_check: If True, skip the existence check.
|
|
144
|
+
|
|
145
|
+
Raises:
|
|
146
|
+
SDKUsageError: If the dataset does not exist and skip_check is False.
|
|
147
|
+
"""
|
|
148
|
+
if not skip_check:
|
|
149
|
+
dataset_urn_str = str(dataset_urn)
|
|
150
|
+
# Use time bucketing for TTL: bucket time into 60-second intervals
|
|
151
|
+
time_bucket = int(time.time() // 60)
|
|
152
|
+
exists = self._cached_exists(dataset_urn_str, time_bucket)
|
|
153
|
+
|
|
154
|
+
if not exists:
|
|
155
|
+
raise SDKUsageError(f"Dataset {dataset_urn_str} does not exist")
|
|
156
|
+
|
|
157
|
+
def sync_smart_freshness_assertion(
|
|
158
|
+
self,
|
|
159
|
+
*,
|
|
160
|
+
dataset_urn: Union[str, DatasetUrn],
|
|
161
|
+
urn: Optional[Union[str, AssertionUrn]] = None,
|
|
162
|
+
display_name: Optional[str] = None,
|
|
163
|
+
enabled: Optional[bool] = None,
|
|
164
|
+
detection_mechanism: DetectionMechanismInputTypes = None,
|
|
165
|
+
sensitivity: Optional[Union[str, InferenceSensitivity]] = None,
|
|
166
|
+
exclusion_windows: Optional[ExclusionWindowInputTypes] = None,
|
|
167
|
+
training_data_lookback_days: Optional[int] = None,
|
|
168
|
+
incident_behavior: Optional[AssertionIncidentBehaviorInputTypes] = None,
|
|
169
|
+
tags: Optional[TagsInputType] = None,
|
|
170
|
+
updated_by: Optional[Union[str, CorpUserUrn]] = None,
|
|
171
|
+
skip_dataset_exists_check: bool = False,
|
|
172
|
+
) -> SmartFreshnessAssertion:
|
|
173
|
+
"""Upsert and merge a smart freshness assertion.
|
|
174
|
+
|
|
175
|
+
Note:
|
|
176
|
+
Keyword arguments are required.
|
|
177
|
+
|
|
178
|
+
Upsert and merge is a combination of create and update. If the assertion does not exist,
|
|
179
|
+
it will be created. If it does exist, it will be updated. Existing assertion fields will
|
|
180
|
+
be updated if the input value is not None. If the input value is None, the existing value
|
|
181
|
+
will be preserved. If the input value can be un-set (e.g. by passing an empty list or
|
|
182
|
+
empty string), it will be unset.
|
|
183
|
+
|
|
184
|
+
Schedule behavior:
|
|
185
|
+
- Create case: Uses default daily schedule ("0 0 * * *")
|
|
186
|
+
- Update case: Preserves existing schedule from backend (not modifiable)
|
|
187
|
+
|
|
188
|
+
Args:
|
|
189
|
+
dataset_urn (Union[str, DatasetUrn]): The urn of the dataset to be monitored.
|
|
190
|
+
urn (Optional[Union[str, AssertionUrn]]): The urn of the assertion. If not provided, a urn will be generated and the assertion will be created in the DataHub instance.
|
|
191
|
+
display_name (Optional[str]): The display name of the assertion. If not provided, a random display name will be generated.
|
|
192
|
+
enabled (Optional[bool]): Whether the assertion is enabled. If not provided, the existing value will be preserved.
|
|
193
|
+
detection_mechanism (DetectionMechanismInputTypes): The detection mechanism to be used for the assertion. Information schema is recommended. Valid values are:
|
|
194
|
+
- "information_schema" or DetectionMechanism.INFORMATION_SCHEMA
|
|
195
|
+
- "audit_log" or DetectionMechanism.AUDIT_LOG
|
|
196
|
+
- {"type": "last_modified_column", "column_name": "last_modified", "additional_filter": "last_modified > '2021-01-01'"} or DetectionMechanism.LAST_MODIFIED_COLUMN(column_name='last_modified', additional_filter='last_modified > 2021-01-01')
|
|
197
|
+
- "datahub_operation" or DetectionMechanism.DATAHUB_OPERATION
|
|
198
|
+
sensitivity (Optional[Union[str, InferenceSensitivity]]): The sensitivity to be applied to the assertion. Valid values are: "low", "medium", "high".
|
|
199
|
+
exclusion_windows (Optional[ExclusionWindowInputTypes]): The exclusion windows to be applied to the assertion. Only fixed range exclusion windows are supported. Valid values are:
|
|
200
|
+
- {"start": "2025-01-01T00:00:00", "end": "2025-01-02T00:00:00"} (using ISO strings)
|
|
201
|
+
- {"start": datetime(2025, 1, 1, 0, 0, 0), "end": datetime(2025, 1, 2, 0, 0, 0)} (using datetime objects)
|
|
202
|
+
- FixedRangeExclusionWindow(start=datetime(2025, 1, 1, 0, 0, 0), end=datetime(2025, 1, 2, 0, 0, 0)) (using typed object)
|
|
203
|
+
- A list of any of the above formats
|
|
204
|
+
training_data_lookback_days (Optional[int]): The training data lookback days to be applied to the assertion as an integer.
|
|
205
|
+
incident_behavior (Optional[Union[str, list[str], AssertionIncidentBehavior, list[AssertionIncidentBehavior]]]): The incident behavior to be applied to the assertion. Valid values are: "raise_on_fail", "resolve_on_pass" or the typed ones (AssertionIncidentBehavior.RAISE_ON_FAIL and AssertionIncidentBehavior.RESOLVE_ON_PASS).
|
|
206
|
+
tags (Optional[TagsInputType]): The tags to be applied to the assertion. Valid values are: a list of strings, TagUrn objects, or TagAssociationClass objects.
|
|
207
|
+
updated_by (Optional[Union[str, CorpUserUrn]]): Optional urn of the user who updated the assertion. The format is "urn:li:corpuser:<username>". The default is the datahub system user.
|
|
208
|
+
skip_dataset_exists_check (bool): If False (default), verifies the dataset_urn exists before creating/updating the assertion.
|
|
209
|
+
Set to True when creating assertions before ingesting datasets (e.g., setting up assertions in a new environment
|
|
210
|
+
before running ingestion pipelines), or when the dataset exists but may not be visible to the current API endpoint.
|
|
211
|
+
|
|
212
|
+
Returns:
|
|
213
|
+
SmartFreshnessAssertion: The created or updated assertion.
|
|
214
|
+
"""
|
|
215
|
+
self._check_dataset_exists(dataset_urn, skip_dataset_exists_check)
|
|
216
|
+
return self._smart_freshness_client.sync_smart_freshness_assertion(
|
|
217
|
+
dataset_urn=dataset_urn,
|
|
218
|
+
urn=urn,
|
|
219
|
+
display_name=display_name,
|
|
220
|
+
enabled=enabled,
|
|
221
|
+
detection_mechanism=detection_mechanism,
|
|
222
|
+
sensitivity=sensitivity,
|
|
223
|
+
exclusion_windows=exclusion_windows,
|
|
224
|
+
training_data_lookback_days=training_data_lookback_days,
|
|
225
|
+
incident_behavior=incident_behavior,
|
|
226
|
+
tags=tags,
|
|
227
|
+
updated_by=updated_by,
|
|
228
|
+
)
|
|
229
|
+
|
|
230
|
+
def sync_smart_volume_assertion(
|
|
231
|
+
self,
|
|
232
|
+
*,
|
|
233
|
+
dataset_urn: Union[str, DatasetUrn],
|
|
234
|
+
urn: Optional[Union[str, AssertionUrn]] = None,
|
|
235
|
+
display_name: Optional[str] = None,
|
|
236
|
+
enabled: Optional[bool] = None,
|
|
237
|
+
detection_mechanism: DetectionMechanismInputTypes = None,
|
|
238
|
+
sensitivity: Optional[Union[str, InferenceSensitivity]] = None,
|
|
239
|
+
exclusion_windows: Optional[ExclusionWindowInputTypes] = None,
|
|
240
|
+
training_data_lookback_days: Optional[int] = None,
|
|
241
|
+
incident_behavior: Optional[AssertionIncidentBehaviorInputTypes] = None,
|
|
242
|
+
tags: Optional[TagsInputType] = None,
|
|
243
|
+
updated_by: Optional[Union[str, CorpUserUrn]] = None,
|
|
244
|
+
schedule: Optional[Union[str, models.CronScheduleClass]] = None,
|
|
245
|
+
skip_dataset_exists_check: bool = False,
|
|
246
|
+
) -> SmartVolumeAssertion:
|
|
247
|
+
"""Upsert and merge a smart volume assertion.
|
|
248
|
+
|
|
249
|
+
Note:
|
|
250
|
+
Keyword arguments are required.
|
|
251
|
+
|
|
252
|
+
Upsert and merge is a combination of create and update. If the assertion does not exist,
|
|
253
|
+
it will be created. If it does exist, it will be updated. Existing assertion fields will
|
|
254
|
+
be updated if the input value is not None. If the input value is None, the existing value
|
|
255
|
+
will be preserved. If the input value can be un-set (e.g. by passing an empty list or
|
|
256
|
+
empty string), it will be unset.
|
|
257
|
+
|
|
258
|
+
Schedule behavior:
|
|
259
|
+
- Create case: Uses default daily schedule ("0 0 * * *") or provided schedule
|
|
260
|
+
- Update case: Schedule is updated if provided, otherwise existing schedule is preserved.
|
|
261
|
+
|
|
262
|
+
Args:
|
|
263
|
+
dataset_urn (Union[str, DatasetUrn]): The urn of the dataset to be monitored.
|
|
264
|
+
urn (Optional[Union[str, AssertionUrn]]): The urn of the assertion. If not provided, a urn will be generated and the assertion will be created in the DataHub instance.
|
|
265
|
+
display_name (Optional[str]): The display name of the assertion. If not provided, a random display name will be generated.
|
|
266
|
+
enabled (Optional[bool]): Whether the assertion is enabled. If not provided, the existing value will be preserved.
|
|
267
|
+
detection_mechanism (DetectionMechanismInputTypes): The detection mechanism to be used for the assertion. Information schema is recommended. Valid values are:
|
|
268
|
+
- "information_schema" or DetectionMechanism.INFORMATION_SCHEMA
|
|
269
|
+
- {"type": "query", "additional_filter": "value > 1000"} or DetectionMechanism.QUERY(additional_filter='value > 1000')
|
|
270
|
+
- "dataset_profile" or DetectionMechanism.DATASET_PROFILE
|
|
271
|
+
sensitivity (Optional[Union[str, InferenceSensitivity]]): The sensitivity to be applied to the assertion. Valid values are: "low", "medium", "high".
|
|
272
|
+
exclusion_windows (Optional[ExclusionWindowInputTypes]): The exclusion windows to be applied to the assertion. Only fixed range exclusion windows are supported. Valid values are:
|
|
273
|
+
- {"start": "2025-01-01T00:00:00", "end": "2025-01-02T00:00:00"} (using ISO strings)
|
|
274
|
+
- {"start": datetime(2025, 1, 1, 0, 0, 0), "end": datetime(2025, 1, 2, 0, 0, 0)} (using datetime objects)
|
|
275
|
+
- FixedRangeExclusionWindow(start=datetime(2025, 1, 1, 0, 0, 0), end=datetime(2025, 1, 2, 0, 0, 0)) (using typed object)
|
|
276
|
+
- A list of any of the above formats
|
|
277
|
+
training_data_lookback_days (Optional[int]): The training data lookback days to be applied to the assertion as an integer.
|
|
278
|
+
incident_behavior (Optional[Union[str, list[str], AssertionIncidentBehavior, list[AssertionIncidentBehavior]]]): The incident behavior to be applied to the assertion. Valid values are: "raise_on_fail", "resolve_on_pass", or the typed ones (AssertionIncidentBehavior.RAISE_ON_FAIL and AssertionIncidentBehavior.RESOLVE_ON_PASS).
|
|
279
|
+
tags (Optional[TagsInputType]): The tags to be applied to the assertion. Valid values are: a list of strings, TagUrn objects, or TagAssociationClass objects.
|
|
280
|
+
updated_by (Optional[Union[str, CorpUserUrn]]): Optional urn of the user who updated the assertion. The format is "urn:li:corpuser:<username>". The default is the datahub system user.
|
|
281
|
+
schedule (Optional[Union[str, models.CronScheduleClass]]): Optional cron formatted schedule for the assertion. If not provided, a default daily schedule will be used. The format is a cron expression, e.g. "0 0 * * *" for daily at midnight using UTC timezone. Alternatively, a models.CronScheduleClass object can be provided.
|
|
282
|
+
skip_dataset_exists_check (bool): If False (default), verifies the dataset_urn exists before creating/updating the assertion.
|
|
283
|
+
Set to True when creating assertions before ingesting datasets (e.g., setting up assertions in a new environment
|
|
284
|
+
before running ingestion pipelines), or when the dataset exists but may not be visible to the current API endpoint.
|
|
285
|
+
|
|
286
|
+
Returns:
|
|
287
|
+
SmartVolumeAssertion: The created or updated assertion.
|
|
288
|
+
"""
|
|
289
|
+
self._check_dataset_exists(dataset_urn, skip_dataset_exists_check)
|
|
290
|
+
return self._smart_volume_client.sync_smart_volume_assertion(
|
|
291
|
+
dataset_urn=dataset_urn,
|
|
292
|
+
urn=urn,
|
|
293
|
+
display_name=display_name,
|
|
294
|
+
enabled=enabled,
|
|
295
|
+
detection_mechanism=detection_mechanism,
|
|
296
|
+
sensitivity=sensitivity,
|
|
297
|
+
exclusion_windows=exclusion_windows,
|
|
298
|
+
training_data_lookback_days=training_data_lookback_days,
|
|
299
|
+
incident_behavior=incident_behavior,
|
|
300
|
+
tags=tags,
|
|
301
|
+
updated_by=updated_by,
|
|
302
|
+
schedule=schedule,
|
|
303
|
+
)
|
|
304
|
+
|
|
305
|
+
def sync_column_metric_assertion( # TODO: Refactor
|
|
306
|
+
self,
|
|
307
|
+
*,
|
|
308
|
+
dataset_urn: Union[str, DatasetUrn],
|
|
309
|
+
column_name: Optional[str] = None,
|
|
310
|
+
metric_type: Optional[MetricInputType] = None,
|
|
311
|
+
operator: Optional[OperatorInputType] = None,
|
|
312
|
+
criteria_parameters: Optional[ColumnMetricAssertionParameters] = None,
|
|
313
|
+
urn: Optional[Union[str, AssertionUrn]] = None,
|
|
314
|
+
display_name: Optional[str] = None,
|
|
315
|
+
enabled: Optional[bool] = None,
|
|
316
|
+
detection_mechanism: DetectionMechanismInputTypes = None,
|
|
317
|
+
incident_behavior: Optional[AssertionIncidentBehaviorInputTypes] = None,
|
|
318
|
+
tags: Optional[TagsInputType] = None,
|
|
319
|
+
updated_by: Optional[Union[str, CorpUserUrn]] = None,
|
|
320
|
+
schedule: Optional[Union[str, models.CronScheduleClass]] = None,
|
|
321
|
+
skip_dataset_exists_check: bool = False,
|
|
322
|
+
) -> ColumnMetricAssertion:
|
|
323
|
+
"""Upsert and merge a column metric assertion.
|
|
324
|
+
|
|
325
|
+
Note:
|
|
326
|
+
Keyword arguments are required.
|
|
327
|
+
|
|
328
|
+
Upsert and merge is a combination of create and update. If the assertion does not exist,
|
|
329
|
+
it will be created. If it does exist, it will be updated.
|
|
330
|
+
|
|
331
|
+
Existing assertion fields will be updated if the input value is not None. If the input value is None, the existing value
|
|
332
|
+
will be preserved. If the input value can be un-set (e.g. by passing an empty list or
|
|
333
|
+
empty string), it will be unset.
|
|
334
|
+
|
|
335
|
+
Schedule behavior:
|
|
336
|
+
- Create case: Uses default daily schedule ("0 0 * * *") or provided schedule
|
|
337
|
+
- Update case: Uses existing schedule or provided schedule.
|
|
338
|
+
|
|
339
|
+
Examples:
|
|
340
|
+
# Using enum values (recommended for type safety)
|
|
341
|
+
from acryl_datahub_cloud.sdk.assertion_input.column_metric_constants import MetricType, OperatorType
|
|
342
|
+
client.sync_column_metric_assertion(
|
|
343
|
+
dataset_urn="urn:li:dataset:(urn:li:dataPlatform:snowflake,database.schema.table,PROD)",
|
|
344
|
+
column_name="user_id",
|
|
345
|
+
metric_type=MetricType.NULL_COUNT,
|
|
346
|
+
operator=OperatorType.GREATER_THAN,
|
|
347
|
+
criteria_parameters=10
|
|
348
|
+
)
|
|
349
|
+
|
|
350
|
+
# Using case-insensitive strings (more flexible)
|
|
351
|
+
client.sync_column_metric_assertion(
|
|
352
|
+
dataset_urn="urn:li:dataset:(urn:li:dataPlatform:snowflake,database.schema.table,PROD)",
|
|
353
|
+
column_name="price",
|
|
354
|
+
metric_type="mean",
|
|
355
|
+
operator="between",
|
|
356
|
+
criteria_parameters=(100.0, 500.0)
|
|
357
|
+
)
|
|
358
|
+
|
|
359
|
+
Args:
|
|
360
|
+
dataset_urn (Union[str, DatasetUrn]): The urn of the dataset to be monitored.
|
|
361
|
+
column_name (Optional[str]): The name of the column to be monitored. Required for creation, optional for updates.
|
|
362
|
+
metric_type (Optional[MetricInputType]): The type of the metric to be monitored. Required for creation, optional for updates. Valid values are:
|
|
363
|
+
- Using MetricType enum: MetricType.NULL_COUNT, MetricType.NULL_PERCENTAGE, MetricType.UNIQUE_COUNT,
|
|
364
|
+
MetricType.UNIQUE_PERCENTAGE, MetricType.MAX_LENGTH, MetricType.MIN_LENGTH, MetricType.EMPTY_COUNT,
|
|
365
|
+
MetricType.EMPTY_PERCENTAGE, MetricType.MIN, MetricType.MAX, MetricType.MEAN, MetricType.MEDIAN,
|
|
366
|
+
MetricType.STDDEV, MetricType.NEGATIVE_COUNT, MetricType.NEGATIVE_PERCENTAGE, MetricType.ZERO_COUNT,
|
|
367
|
+
MetricType.ZERO_PERCENTAGE
|
|
368
|
+
- Using case-insensitive strings: "null_count", "MEAN", "Max_Length", etc.
|
|
369
|
+
- Using models enum: models.FieldMetricTypeClass.NULL_COUNT, etc. (import with: from datahub.metadata import schema_classes as models)
|
|
370
|
+
operator (Optional[OperatorInputType]): The operator to be used for the assertion. Required for creation, optional for updates. Valid values are:
|
|
371
|
+
- Using OperatorType enum: OperatorType.EQUAL_TO, OperatorType.NOT_EQUAL_TO, OperatorType.GREATER_THAN,
|
|
372
|
+
OperatorType.GREATER_THAN_OR_EQUAL_TO, OperatorType.LESS_THAN, OperatorType.LESS_THAN_OR_EQUAL_TO,
|
|
373
|
+
OperatorType.BETWEEN, OperatorType.IN, OperatorType.NOT_IN, OperatorType.NULL, OperatorType.NOT_NULL,
|
|
374
|
+
OperatorType.IS_TRUE, OperatorType.IS_FALSE, OperatorType.CONTAIN, OperatorType.END_WITH,
|
|
375
|
+
OperatorType.START_WITH, OperatorType.REGEX_MATCH
|
|
376
|
+
- Using case-insensitive strings: "equal_to", "not_equal_to", "greater_than", "greater_than_or_equal_to",
|
|
377
|
+
"less_than", "less_than_or_equal_to", "between", "in", "not_in", "null", "not_null", "is_true",
|
|
378
|
+
"is_false", "contain", "end_with", "start_with", "regex_match"
|
|
379
|
+
- Using models enum: models.AssertionStdOperatorClass.EQUAL_TO, models.AssertionStdOperatorClass.GREATER_THAN, etc.
|
|
380
|
+
criteria_parameters (Optional[ColumnMetricAssertionParameters]): The criteria parameters for the assertion. Required for creation (except for operators that don't need parameters), optional for updates.
|
|
381
|
+
- Single value operators (EQUAL_TO, NOT_EQUAL_TO, GREATER_THAN, GREATER_THAN_OR_EQUAL_TO, LESS_THAN, LESS_THAN_OR_EQUAL_TO, CONTAIN, END_WITH, START_WITH, REGEX_MATCH): pass a single number or string
|
|
382
|
+
- Range operators (BETWEEN): pass a tuple of two numbers (min_value, max_value)
|
|
383
|
+
- List operators (IN, NOT_IN): pass a list of values
|
|
384
|
+
- No parameter operators (NULL, NOT_NULL, IS_TRUE, IS_FALSE): pass None or omit this parameter
|
|
385
|
+
urn (Optional[Union[str, AssertionUrn]]): The urn of the assertion. If not provided, a urn will be generated and the assertion will be created in the DataHub instance.
|
|
386
|
+
display_name (Optional[str]): The display name of the assertion. If not provided, a random display name will be generated.
|
|
387
|
+
enabled (Optional[bool]): Whether the assertion is enabled. If not provided, the existing value will be preserved.
|
|
388
|
+
detection_mechanism (DetectionMechanismInputTypes): The detection mechanism to be used for the assertion. Valid values are (additional_filter is optional):
|
|
389
|
+
- "all_rows_query_datahub_dataset_profile" or DetectionMechanism.ALL_ROWS_QUERY_DATAHUB_DATASET_PROFILE
|
|
390
|
+
- "all_rows_query" or DetectionMechanism.ALL_ROWS_QUERY(), or with additional_filter: {"type": "all_rows_query", "additional_filter": "last_modified > '2021-01-01'"} or DetectionMechanism.ALL_ROWS_QUERY(additional_filter='last_modified > 2021-01-01')
|
|
391
|
+
- {"type": "changed_rows_query", "column_name": "last_modified", "additional_filter": "last_modified > '2021-01-01'"} or DetectionMechanism.CHANGED_ROWS_QUERY(column_name='last_modified', additional_filter='last_modified > 2021-01-01')
|
|
392
|
+
incident_behavior (Optional[Union[str, list[str], AssertionIncidentBehavior, list[AssertionIncidentBehavior]]]): The incident behavior to be applied to the assertion. Valid values are: "raise_on_fail", "resolve_on_pass", or the typed ones (AssertionIncidentBehavior.RAISE_ON_FAIL and AssertionIncidentBehavior.RESOLVE_ON_PASS).
|
|
393
|
+
tags (Optional[TagsInputType]): The tags to be applied to the assertion. Valid values are: a list of strings, TagUrn objects, or TagAssociationClass objects.
|
|
394
|
+
updated_by (Optional[Union[str, CorpUserUrn]]): Optional urn of the user who updated the assertion. The format is "urn:li:corpuser:<username>". The default is the datahub system user.
|
|
395
|
+
schedule (Optional[Union[str, models.CronScheduleClass]]): Optional cron formatted schedule for the assertion. If not provided, a default daily schedule will be used. The format is a cron expression, e.g. "0 0 * * *" for daily at midnight using UTC timezone. Alternatively, a models.CronScheduleClass object can be provided.
|
|
396
|
+
skip_dataset_exists_check (bool): If False (default), verifies the dataset_urn exists before creating/updating the assertion.
|
|
397
|
+
Set to True when creating assertions before ingesting datasets (e.g., setting up assertions in a new environment
|
|
398
|
+
before running ingestion pipelines), or when the dataset exists but may not be visible to the current API endpoint.
|
|
399
|
+
|
|
400
|
+
Returns:
|
|
401
|
+
ColumnMetricAssertion: The created or updated assertion.
|
|
402
|
+
"""
|
|
403
|
+
self._check_dataset_exists(dataset_urn, skip_dataset_exists_check)
|
|
404
|
+
return self._column_metric_client.sync_column_metric_assertion(
|
|
405
|
+
dataset_urn=dataset_urn,
|
|
406
|
+
column_name=column_name,
|
|
407
|
+
metric_type=metric_type,
|
|
408
|
+
operator=operator,
|
|
409
|
+
criteria_parameters=criteria_parameters,
|
|
410
|
+
urn=urn,
|
|
411
|
+
display_name=display_name,
|
|
412
|
+
enabled=enabled,
|
|
413
|
+
detection_mechanism=detection_mechanism,
|
|
414
|
+
incident_behavior=incident_behavior,
|
|
415
|
+
tags=tags,
|
|
416
|
+
updated_by=updated_by,
|
|
417
|
+
schedule=schedule,
|
|
418
|
+
)
|
|
419
|
+
|
|
420
|
+
def sync_column_value_assertion(
|
|
421
|
+
self,
|
|
422
|
+
*,
|
|
423
|
+
dataset_urn: Union[str, DatasetUrn],
|
|
424
|
+
column_name: Optional[str] = None,
|
|
425
|
+
operator: Optional[OperatorInputType] = None,
|
|
426
|
+
criteria_parameters: Optional[ColumnValueAssertionParameters] = None,
|
|
427
|
+
transform: Optional[FieldTransformInputType] = None,
|
|
428
|
+
fail_threshold_type: Optional[FailThresholdInputType] = None,
|
|
429
|
+
fail_threshold_value: Optional[int] = None,
|
|
430
|
+
exclude_nulls: Optional[bool] = None,
|
|
431
|
+
urn: Optional[Union[str, AssertionUrn]] = None,
|
|
432
|
+
display_name: Optional[str] = None,
|
|
433
|
+
enabled: Optional[bool] = None,
|
|
434
|
+
detection_mechanism: DetectionMechanismInputTypes = None,
|
|
435
|
+
incident_behavior: Optional[AssertionIncidentBehaviorInputTypes] = None,
|
|
436
|
+
tags: Optional[TagsInputType] = None,
|
|
437
|
+
updated_by: Optional[Union[str, CorpUserUrn]] = None,
|
|
438
|
+
schedule: Optional[Union[str, models.CronScheduleClass]] = None,
|
|
439
|
+
skip_dataset_exists_check: bool = False,
|
|
440
|
+
) -> ColumnValueAssertion:
|
|
441
|
+
"""Upsert and merge a column value assertion.
|
|
442
|
+
|
|
443
|
+
Note:
|
|
444
|
+
Keyword arguments are required.
|
|
445
|
+
|
|
446
|
+
Upsert and merge is a combination of create and update. If the assertion does not exist,
|
|
447
|
+
it will be created. If it does exist, it will be updated.
|
|
448
|
+
|
|
449
|
+
Existing assertion fields will be updated if the input value is not None. If the input value is None, the existing value
|
|
450
|
+
will be preserved. If the input value can be un-set (e.g. by passing an empty list or
|
|
451
|
+
empty string), it will be unset.
|
|
452
|
+
|
|
453
|
+
Column value assertions validate individual row values in a column against
|
|
454
|
+
semantic constraints (e.g., "all values must match pattern X" or "no NULL values allowed").
|
|
455
|
+
This differs from column metric assertions which validate aggregated metrics.
|
|
456
|
+
|
|
457
|
+
Schedule behavior:
|
|
458
|
+
- Create case: Uses default schedule of every 6 hours or provided schedule
|
|
459
|
+
- Update case: Uses existing schedule or provided schedule.
|
|
460
|
+
|
|
461
|
+
Examples:
|
|
462
|
+
# Simple email regex validation
|
|
463
|
+
client.assertions.sync_column_value_assertion(
|
|
464
|
+
dataset_urn="urn:li:dataset:(urn:li:dataPlatform:snowflake,database.schema.table,PROD)",
|
|
465
|
+
column_name="email",
|
|
466
|
+
operator="regex_match",
|
|
467
|
+
criteria_parameters=r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$"
|
|
468
|
+
)
|
|
469
|
+
|
|
470
|
+
# Validate quantity is positive with 5% failure tolerance
|
|
471
|
+
client.assertions.sync_column_value_assertion(
|
|
472
|
+
dataset_urn="urn:li:dataset:(urn:li:dataPlatform:snowflake,database.schema.table,PROD)",
|
|
473
|
+
column_name="quantity",
|
|
474
|
+
operator="greater_than",
|
|
475
|
+
criteria_parameters=0,
|
|
476
|
+
fail_threshold_type="percentage",
|
|
477
|
+
fail_threshold_value=5,
|
|
478
|
+
exclude_nulls=True
|
|
479
|
+
)
|
|
480
|
+
|
|
481
|
+
# Range validation with transform
|
|
482
|
+
client.assertions.sync_column_value_assertion(
|
|
483
|
+
dataset_urn="urn:li:dataset:(urn:li:dataPlatform:snowflake,database.schema.table,PROD)",
|
|
484
|
+
column_name="description",
|
|
485
|
+
operator="between",
|
|
486
|
+
criteria_parameters=(10, 500),
|
|
487
|
+
transform="length", # Only for STRING columns
|
|
488
|
+
schedule="0 */6 * * *"
|
|
489
|
+
)
|
|
490
|
+
|
|
491
|
+
Args:
|
|
492
|
+
dataset_urn (Union[str, DatasetUrn]): The urn of the dataset to be monitored.
|
|
493
|
+
column_name (Optional[str]): The name of the column to validate. Required for creation, optional for updates.
|
|
494
|
+
operator (Optional[OperatorInputType]): The operator to use for validation. Required for creation, optional for updates. Valid values are:
|
|
495
|
+
- Using OperatorType enum: OperatorType.NOT_NULL, OperatorType.EQUAL_TO, OperatorType.GREATER_THAN,
|
|
496
|
+
OperatorType.LESS_THAN, OperatorType.BETWEEN, OperatorType.IN, OperatorType.REGEX_MATCH, etc.
|
|
497
|
+
- Using case-insensitive strings: "not_null", "equal_to", "greater_than", "less_than", "between",
|
|
498
|
+
"in", "regex_match", etc.
|
|
499
|
+
- Using models enum: models.AssertionStdOperatorClass.NOT_NULL, etc.
|
|
500
|
+
criteria_parameters (Optional[ColumnValueAssertionParameters]): The criteria parameters for the operator.
|
|
501
|
+
- Single value operators (EQUAL_TO, GREATER_THAN, LESS_THAN, REGEX_MATCH, etc.): pass a single number or string
|
|
502
|
+
- Range operators (BETWEEN): pass a tuple of two values (min_value, max_value)
|
|
503
|
+
- List operators (IN, NOT_IN): pass a list of values
|
|
504
|
+
- No parameter operators (NOT_NULL, NULL): pass None or omit this parameter
|
|
505
|
+
transform (Optional[FieldTransformInputType]): Optional transform to apply to field values before evaluation.
|
|
506
|
+
Currently only "length" or "LENGTH" is supported, and only for STRING columns.
|
|
507
|
+
fail_threshold_type (Optional[FailThresholdInputType]): The type of failure threshold. Valid values are:
|
|
508
|
+
- "count" or "COUNT": Absolute number of failing rows
|
|
509
|
+
- "percentage" or "PERCENTAGE": Percentage of failing rows
|
|
510
|
+
If not provided, defaults to "count" for new assertions.
|
|
511
|
+
fail_threshold_value (Optional[int]): The failure threshold value. For COUNT type, this is the maximum
|
|
512
|
+
number of rows allowed to fail. For PERCENTAGE type, this is the maximum percentage (0-100) allowed to fail.
|
|
513
|
+
Defaults to 0 (no failures allowed) if not provided for new assertions.
|
|
514
|
+
exclude_nulls (Optional[bool]): Whether to exclude null values when evaluating the assertion.
|
|
515
|
+
Defaults to True if not provided for new assertions.
|
|
516
|
+
urn (Optional[Union[str, AssertionUrn]]): The urn of the assertion. If not provided, a urn will be generated and the assertion will be created in the DataHub instance.
|
|
517
|
+
display_name (Optional[str]): The display name of the assertion. If not provided, a random display name will be generated.
|
|
518
|
+
enabled (Optional[bool]): Whether the assertion is enabled. If not provided, defaults to True for new assertions.
|
|
519
|
+
detection_mechanism (DetectionMechanismInputTypes): The detection mechanism to be used for the assertion. Valid values are (additional_filter is optional):
|
|
520
|
+
- "all_rows_query_datahub_dataset_profile" or DetectionMechanism.ALL_ROWS_QUERY_DATAHUB_DATASET_PROFILE
|
|
521
|
+
- "all_rows_query" or DetectionMechanism.ALL_ROWS_QUERY(), or with additional_filter: {"type": "all_rows_query", "additional_filter": "last_modified > '2021-01-01'"} or DetectionMechanism.ALL_ROWS_QUERY(additional_filter='last_modified > 2021-01-01')
|
|
522
|
+
- {"type": "changed_rows_query", "column_name": "last_modified", "additional_filter": "last_modified > '2021-01-01'"} or DetectionMechanism.CHANGED_ROWS_QUERY(column_name='last_modified', additional_filter='last_modified > 2021-01-01')
|
|
523
|
+
incident_behavior (Optional[Union[str, list[str], AssertionIncidentBehavior, list[AssertionIncidentBehavior]]]): The incident behavior to be applied to the assertion. Valid values are: "raise_on_fail", "resolve_on_pass", or the typed ones (AssertionIncidentBehavior.RAISE_ON_FAIL and AssertionIncidentBehavior.RESOLVE_ON_PASS).
|
|
524
|
+
tags (Optional[TagsInputType]): The tags to be applied to the assertion. Valid values are: a list of strings, TagUrn objects, or TagAssociationClass objects.
|
|
525
|
+
updated_by (Optional[Union[str, CorpUserUrn]]): Optional urn of the user who updated the assertion. The format is "urn:li:corpuser:<username>". The default is the datahub system user.
|
|
526
|
+
schedule (Optional[Union[str, models.CronScheduleClass]]): Optional cron formatted schedule for the assertion. If not provided, a default schedule of every 6 hours will be used. The format is a cron expression, e.g. "0 * * * *" for every hour using UTC timezone. Alternatively, a models.CronScheduleClass object can be provided.
|
|
527
|
+
skip_dataset_exists_check (bool): If False (default), verifies the dataset_urn exists before creating/updating the assertion.
|
|
528
|
+
Set to True when creating assertions before ingesting datasets (e.g., setting up assertions in a new environment
|
|
529
|
+
before running ingestion pipelines), or when the dataset exists but may not be visible to the current API endpoint.
|
|
530
|
+
|
|
531
|
+
Returns:
|
|
532
|
+
ColumnValueAssertion: The created or updated assertion.
|
|
533
|
+
"""
|
|
534
|
+
self._check_dataset_exists(dataset_urn, skip_dataset_exists_check)
|
|
535
|
+
return self._column_value_client.sync_column_value_assertion(
|
|
536
|
+
dataset_urn=dataset_urn,
|
|
537
|
+
column_name=column_name,
|
|
538
|
+
operator=operator,
|
|
539
|
+
criteria_parameters=criteria_parameters,
|
|
540
|
+
transform=transform,
|
|
541
|
+
fail_threshold_type=fail_threshold_type,
|
|
542
|
+
fail_threshold_value=fail_threshold_value,
|
|
543
|
+
exclude_nulls=exclude_nulls,
|
|
544
|
+
urn=urn,
|
|
545
|
+
display_name=display_name,
|
|
546
|
+
enabled=enabled,
|
|
547
|
+
detection_mechanism=detection_mechanism,
|
|
548
|
+
incident_behavior=incident_behavior,
|
|
549
|
+
tags=tags,
|
|
550
|
+
updated_by=updated_by,
|
|
551
|
+
schedule=schedule,
|
|
552
|
+
)
|
|
553
|
+
|
|
554
|
+
def sync_smart_column_metric_assertion(
|
|
555
|
+
self,
|
|
556
|
+
*,
|
|
557
|
+
dataset_urn: Union[str, DatasetUrn],
|
|
558
|
+
column_name: Optional[str] = None,
|
|
559
|
+
metric_type: Optional[MetricInputType] = None,
|
|
560
|
+
urn: Optional[Union[str, AssertionUrn]] = None,
|
|
561
|
+
display_name: Optional[str] = None,
|
|
562
|
+
enabled: Optional[bool] = None,
|
|
563
|
+
detection_mechanism: DetectionMechanismInputTypes = None,
|
|
564
|
+
sensitivity: Optional[Union[str, InferenceSensitivity]] = None,
|
|
565
|
+
exclusion_windows: Optional[ExclusionWindowInputTypes] = None,
|
|
566
|
+
training_data_lookback_days: Optional[int] = None,
|
|
567
|
+
incident_behavior: Optional[AssertionIncidentBehaviorInputTypes] = None,
|
|
568
|
+
tags: Optional[TagsInputType] = None,
|
|
569
|
+
updated_by: Optional[Union[str, CorpUserUrn]] = None,
|
|
570
|
+
schedule: Optional[Union[str, models.CronScheduleClass]] = None,
|
|
571
|
+
skip_dataset_exists_check: bool = False,
|
|
572
|
+
) -> SmartColumnMetricAssertion:
|
|
573
|
+
"""Upsert and merge a smart column metric assertion.
|
|
574
|
+
|
|
575
|
+
Note:
|
|
576
|
+
Keyword arguments are required.
|
|
577
|
+
|
|
578
|
+
Upsert and merge is a combination of create and update. If the assertion does not exist,
|
|
579
|
+
it will be created. If it does exist, it will be updated.
|
|
580
|
+
|
|
581
|
+
Existing assertion fields will be updated if the input value is not None. If the input value is None, the existing value
|
|
582
|
+
will be preserved. If the input value can be un-set (e.g. by passing an empty list or
|
|
583
|
+
empty string), it will be unset.
|
|
584
|
+
|
|
585
|
+
Schedule behavior:
|
|
586
|
+
- Create case: Uses default daily schedule ("0 0 * * *") or provided schedule
|
|
587
|
+
- Update case: Uses existing schedule or provided schedule.
|
|
588
|
+
|
|
589
|
+
Examples:
|
|
590
|
+
# Using enum values (recommended for type safety)
|
|
591
|
+
client.sync_smart_column_metric_assertion(
|
|
592
|
+
dataset_urn="urn:li:dataset:(urn:li:dataPlatform:snowflake,database.schema.table,PROD)",
|
|
593
|
+
column_name="user_id",
|
|
594
|
+
metric_type=MetricType.NULL_COUNT
|
|
595
|
+
)
|
|
596
|
+
|
|
597
|
+
# Using case-insensitive strings (more flexible)
|
|
598
|
+
client.sync_smart_column_metric_assertion(
|
|
599
|
+
dataset_urn="urn:li:dataset:(urn:li:dataPlatform:snowflake,database.schema.table,PROD)",
|
|
600
|
+
column_name="price",
|
|
601
|
+
metric_type="mean"
|
|
602
|
+
)
|
|
603
|
+
|
|
604
|
+
Args:
|
|
605
|
+
dataset_urn (Union[str, DatasetUrn]): The urn of the dataset to be monitored.
|
|
606
|
+
column_name (Optional[str]): The name of the column to be monitored. Required for creation, optional for updates.
|
|
607
|
+
metric_type (Optional[MetricInputType]): The type of the metric to be monitored. Required for creation, optional for updates. Valid values are:
|
|
608
|
+
- Using MetricType enum: MetricType.NULL_COUNT, MetricType.NULL_PERCENTAGE, MetricType.UNIQUE_COUNT,
|
|
609
|
+
MetricType.UNIQUE_PERCENTAGE, MetricType.MAX_LENGTH, MetricType.MIN_LENGTH, MetricType.EMPTY_COUNT,
|
|
610
|
+
MetricType.EMPTY_PERCENTAGE, MetricType.MIN, MetricType.MAX, MetricType.MEAN, MetricType.MEDIAN,
|
|
611
|
+
MetricType.STDDEV, MetricType.NEGATIVE_COUNT, MetricType.NEGATIVE_PERCENTAGE, MetricType.ZERO_COUNT,
|
|
612
|
+
MetricType.ZERO_PERCENTAGE
|
|
613
|
+
- Using case-insensitive strings: "null_count", "MEAN", "Max_Length", etc.
|
|
614
|
+
- Using models enum: models.FieldMetricTypeClass.NULL_COUNT, etc. (import with: from datahub.metadata import schema_classes as models)
|
|
615
|
+
urn (Optional[Union[str, AssertionUrn]]): The urn of the assertion. If not provided, a urn will be generated and the assertion will be created in the DataHub instance.
|
|
616
|
+
display_name (Optional[str]): The display name of the assertion. If not provided, a random display name will be generated.
|
|
617
|
+
enabled (Optional[bool]): Whether the assertion is enabled. If not provided, the existing value will be preserved.
|
|
618
|
+
detection_mechanism (DetectionMechanismInputTypes): The detection mechanism to be used for the assertion. Valid values are (additional_filter is optional):
|
|
619
|
+
- "all_rows_query_datahub_dataset_profile" or DetectionMechanism.ALL_ROWS_QUERY_DATAHUB_DATASET_PROFILE
|
|
620
|
+
- "all_rows_query" or DetectionMechanism.ALL_ROWS_QUERY(), or with additional_filter: {"type": "all_rows_query", "additional_filter": "last_modified > '2021-01-01'"} or DetectionMechanism.ALL_ROWS_QUERY(additional_filter='last_modified > 2021-01-01')
|
|
621
|
+
- {"type": "changed_rows_query", "column_name": "last_modified", "additional_filter": "last_modified > '2021-01-01'"} or DetectionMechanism.CHANGED_ROWS_QUERY(column_name='last_modified', additional_filter='last_modified > 2021-01-01')
|
|
622
|
+
sensitivity (Optional[Union[str, InferenceSensitivity]]): The sensitivity to be applied to the assertion. Valid values are: "low", "medium", "high".
|
|
623
|
+
exclusion_windows (Optional[ExclusionWindowInputTypes]): The exclusion windows to be applied to the assertion. Only fixed range exclusion windows are supported.
|
|
624
|
+
training_data_lookback_days (Optional[int]): The training data lookback days to be applied to the assertion as an integer.
|
|
625
|
+
incident_behavior (Optional[Union[str, list[str], AssertionIncidentBehavior, list[AssertionIncidentBehavior]]]): The incident behavior to be applied to the assertion. Valid values are: "raise_on_fail", "resolve_on_pass", or the typed ones (AssertionIncidentBehavior.RAISE_ON_FAIL and AssertionIncidentBehavior.RESOLVE_ON_PASS).
|
|
626
|
+
tags (Optional[TagsInputType]): The tags to be applied to the assertion. Valid values are: a list of strings, TagUrn objects, or TagAssociationClass objects.
|
|
627
|
+
updated_by (Optional[Union[str, CorpUserUrn]]): Optional urn of the user who updated the assertion. The format is "urn:li:corpuser:<username>". The default is the datahub system user.
|
|
628
|
+
schedule (Optional[Union[str, models.CronScheduleClass]]): Optional cron formatted schedule for the assertion. If not provided, a default daily schedule will be used. The format is a cron expression, e.g. "0 0 * * *" for daily at midnight using UTC timezone. Alternatively, a models.CronScheduleClass object can be provided.
|
|
629
|
+
skip_dataset_exists_check (bool): If False (default), verifies the dataset_urn exists before creating/updating the assertion.
|
|
630
|
+
Set to True when creating assertions before ingesting datasets (e.g., setting up assertions in a new environment
|
|
631
|
+
before running ingestion pipelines), or when the dataset exists but may not be visible to the current API endpoint.
|
|
632
|
+
|
|
633
|
+
Returns:
|
|
634
|
+
SmartColumnMetricAssertion: The created or updated assertion.
|
|
635
|
+
"""
|
|
636
|
+
self._check_dataset_exists(dataset_urn, skip_dataset_exists_check)
|
|
637
|
+
return self._smart_column_metric_client.sync_smart_column_metric_assertion(
|
|
638
|
+
dataset_urn=dataset_urn,
|
|
639
|
+
column_name=column_name,
|
|
640
|
+
metric_type=metric_type,
|
|
641
|
+
urn=urn,
|
|
642
|
+
display_name=display_name,
|
|
643
|
+
enabled=enabled,
|
|
644
|
+
detection_mechanism=detection_mechanism,
|
|
645
|
+
sensitivity=sensitivity,
|
|
646
|
+
exclusion_windows=exclusion_windows,
|
|
647
|
+
training_data_lookback_days=training_data_lookback_days,
|
|
648
|
+
incident_behavior=incident_behavior,
|
|
649
|
+
tags=tags,
|
|
650
|
+
updated_by=updated_by,
|
|
651
|
+
schedule=schedule,
|
|
652
|
+
)
|
|
653
|
+
|
|
654
|
+
def sync_freshness_assertion(
|
|
655
|
+
self,
|
|
656
|
+
*,
|
|
657
|
+
dataset_urn: Union[str, DatasetUrn],
|
|
658
|
+
urn: Optional[Union[str, AssertionUrn]] = None,
|
|
659
|
+
display_name: Optional[str] = None,
|
|
660
|
+
enabled: Optional[bool] = None,
|
|
661
|
+
detection_mechanism: DetectionMechanismInputTypes = None,
|
|
662
|
+
incident_behavior: Optional[AssertionIncidentBehaviorInputTypes] = None,
|
|
663
|
+
tags: Optional[TagsInputType] = None,
|
|
664
|
+
updated_by: Optional[Union[str, CorpUserUrn]] = None,
|
|
665
|
+
freshness_schedule_check_type: Optional[
|
|
666
|
+
Union[
|
|
667
|
+
str,
|
|
668
|
+
FreshnessAssertionScheduleCheckType,
|
|
669
|
+
models.FreshnessAssertionScheduleTypeClass,
|
|
670
|
+
]
|
|
671
|
+
] = None,
|
|
672
|
+
schedule: Optional[Union[str, models.CronScheduleClass]] = None,
|
|
673
|
+
lookback_window: Optional[TimeWindowSizeInputTypes] = None,
|
|
674
|
+
skip_dataset_exists_check: bool = False,
|
|
675
|
+
) -> FreshnessAssertion:
|
|
676
|
+
"""Upsert and merge a freshness assertion.
|
|
677
|
+
|
|
678
|
+
Note:
|
|
679
|
+
Keyword arguments are required.
|
|
680
|
+
|
|
681
|
+
Upsert and merge is a combination of create and update. If the assertion does not exist,
|
|
682
|
+
it will be created. If it does exist, it will be updated. Existing assertion fields will
|
|
683
|
+
be updated if the input value is not None. If the input value is None, the existing value
|
|
684
|
+
will be preserved. If the input value can be un-set (e.g. by passing an empty list or
|
|
685
|
+
empty string), it will be unset.
|
|
686
|
+
|
|
687
|
+
Schedule behavior:
|
|
688
|
+
- Create case: Uses default daily schedule ("0 0 * * *") or provided schedule
|
|
689
|
+
- Update case: Uses existing schedule or provided schedule.
|
|
690
|
+
|
|
691
|
+
Args:
|
|
692
|
+
dataset_urn (Union[str, DatasetUrn]): The urn of the dataset to be monitored.
|
|
693
|
+
urn (Optional[Union[str, AssertionUrn]]): The urn of the assertion. If not provided, a urn will be generated and the assertion will be created in the DataHub instance.
|
|
694
|
+
display_name (Optional[str]): The display name of the assertion. If not provided, a random display name will be generated.
|
|
695
|
+
enabled (Optional[bool]): Whether the assertion is enabled. If not provided, the existing value will be preserved.
|
|
696
|
+
detection_mechanism (DetectionMechanismInputTypes): The detection mechanism to be used for the assertion. Information schema is recommended. Valid values are:
|
|
697
|
+
- "information_schema" or DetectionMechanism.INFORMATION_SCHEMA
|
|
698
|
+
- "audit_log" or DetectionMechanism.AUDIT_LOG
|
|
699
|
+
- {"type": "last_modified_column", "column_name": "last_modified", "additional_filter": "last_modified > '2021-01-01'"} or DetectionMechanism.LAST_MODIFIED_COLUMN(column_name='last_modified', additional_filter='last_modified > 2021-01-01')
|
|
700
|
+
- {"type": "high_watermark_column", "column_name": "id", "additional_filter": "id > 1000"} or DetectionMechanism.HIGH_WATERMARK_COLUMN(column_name='id', additional_filter='id > 1000')
|
|
701
|
+
- "datahub_operation" or DetectionMechanism.DATAHUB_OPERATION
|
|
702
|
+
incident_behavior (Optional[Union[str, list[str], AssertionIncidentBehavior, list[AssertionIncidentBehavior]]]): The incident behavior to be applied to the assertion. Valid values are: "raise_on_fail", "resolve_on_pass", or the typed ones (AssertionIncidentBehavior.RAISE_ON_FAIL and AssertionIncidentBehavior.RESOLVE_ON_PASS).
|
|
703
|
+
tags (Optional[TagsInputType]): The tags to be applied to the assertion. Valid values are: a list of strings, TagUrn objects, or TagAssociationClass objects.
|
|
704
|
+
updated_by (Optional[Union[str, CorpUserUrn]]): Optional urn of the user who updated the assertion. The format is "urn:li:corpuser:<username>". The default is the datahub system user.
|
|
705
|
+
freshness_schedule_check_type (Optional[Union[str, FreshnessAssertionScheduleCheckType, models.FreshnessAssertionScheduleTypeClass]]): The freshness schedule check type to be applied to the assertion. Valid values are: "since_the_last_check", "fixed_interval".
|
|
706
|
+
schedule (Optional[Union[str, models.CronScheduleClass]]): Optional cron formatted schedule for the assertion. If not provided, a default daily schedule will be used. The format is a cron expression, e.g. "0 0 * * *" for daily at midnight using UTC timezone. Alternatively, a models.CronScheduleClass object can be provided.
|
|
707
|
+
lookback_window (Optional[TimeWindowSizeInputTypes]): The lookback window to be applied to the assertion. Valid values are:
|
|
708
|
+
- TimeWindowSize(unit=CalendarInterval.MINUTE, multiple=10) for 10 minutes
|
|
709
|
+
- TimeWindowSize(unit=CalendarInterval.HOUR, multiple=2) for 2 hours
|
|
710
|
+
- TimeWindowSize(unit=CalendarInterval.DAY, multiple=1) for 1 day
|
|
711
|
+
- {"unit": "MINUTE", "multiple": 30} for 30 minutes (using dict)
|
|
712
|
+
- {"unit": "HOUR", "multiple": 6} for 6 hours (using dict)
|
|
713
|
+
- {"unit": "DAY", "multiple": 7} for 7 days (using dict)
|
|
714
|
+
Valid values for CalendarInterval are: "MINUTE", "HOUR", "DAY" and for multiple, the integer number of units.
|
|
715
|
+
skip_dataset_exists_check (bool): If False (default), verifies the dataset_urn exists before creating/updating the assertion.
|
|
716
|
+
Set to True when creating assertions before ingesting datasets (e.g., setting up assertions in a new environment
|
|
717
|
+
before running ingestion pipelines), or when the dataset exists but may not be visible to the current API endpoint.
|
|
718
|
+
|
|
719
|
+
Returns:
|
|
720
|
+
FreshnessAssertion: The created or updated assertion.
|
|
721
|
+
"""
|
|
722
|
+
self._check_dataset_exists(dataset_urn, skip_dataset_exists_check)
|
|
723
|
+
return self._freshness_client.sync_freshness_assertion(
|
|
724
|
+
dataset_urn=dataset_urn,
|
|
725
|
+
urn=urn,
|
|
726
|
+
display_name=display_name,
|
|
727
|
+
enabled=enabled,
|
|
728
|
+
detection_mechanism=detection_mechanism,
|
|
729
|
+
incident_behavior=incident_behavior,
|
|
730
|
+
tags=tags,
|
|
731
|
+
updated_by=updated_by,
|
|
732
|
+
freshness_schedule_check_type=freshness_schedule_check_type,
|
|
733
|
+
schedule=schedule,
|
|
734
|
+
lookback_window=lookback_window,
|
|
735
|
+
)
|
|
736
|
+
|
|
737
|
+
def sync_volume_assertion(
|
|
738
|
+
self,
|
|
739
|
+
*,
|
|
740
|
+
dataset_urn: Union[str, DatasetUrn],
|
|
741
|
+
urn: Optional[Union[str, AssertionUrn]] = None,
|
|
742
|
+
display_name: Optional[str] = None,
|
|
743
|
+
enabled: Optional[bool] = None,
|
|
744
|
+
detection_mechanism: DetectionMechanismInputTypes = None,
|
|
745
|
+
incident_behavior: Optional[AssertionIncidentBehaviorInputTypes] = None,
|
|
746
|
+
tags: Optional[TagsInputType] = None,
|
|
747
|
+
updated_by: Optional[Union[str, CorpUserUrn]] = None,
|
|
748
|
+
schedule: Optional[Union[str, models.CronScheduleClass]] = None,
|
|
749
|
+
criteria_condition: Optional[Union[str, VolumeAssertionCondition]] = None,
|
|
750
|
+
criteria_parameters: Optional[VolumeAssertionDefinitionParameters] = None,
|
|
751
|
+
skip_dataset_exists_check: bool = False,
|
|
752
|
+
) -> VolumeAssertion:
|
|
753
|
+
"""Upsert and merge a volume assertion.
|
|
754
|
+
|
|
755
|
+
Note:
|
|
756
|
+
Keyword arguments are required.
|
|
757
|
+
|
|
758
|
+
Upsert and merge is a combination of create and update. If the assertion does not exist,
|
|
759
|
+
it will be created. If it does exist, it will be updated. Existing assertion fields will
|
|
760
|
+
be updated if the input value is not None. If the input value is None, the existing value
|
|
761
|
+
will be preserved. If the input value can be un-set (e.g. by passing an empty list or
|
|
762
|
+
empty string), it will be unset.
|
|
763
|
+
|
|
764
|
+
Schedule behavior:
|
|
765
|
+
- Create case: Uses default daily schedule ("0 0 * * *") or provided schedule
|
|
766
|
+
- Update case: Uses existing schedule or provided schedule.
|
|
767
|
+
|
|
768
|
+
Args:
|
|
769
|
+
dataset_urn (Union[str, DatasetUrn]): The urn of the dataset to be monitored.
|
|
770
|
+
urn (Optional[Union[str, AssertionUrn]]): The urn of the assertion. If not provided, a urn will be generated and the assertion will be created in the DataHub instance.
|
|
771
|
+
display_name (Optional[str]): The display name of the assertion. If not provided, a random display name will be generated.
|
|
772
|
+
enabled (Optional[bool]): Whether the assertion is enabled. If not provided, the existing value will be preserved.
|
|
773
|
+
detection_mechanism (DetectionMechanismInputTypes): The detection mechanism to be used for the assertion. Information schema is recommended. Valid values are (additional_filter is optional):
|
|
774
|
+
- "information_schema" or DetectionMechanism.INFORMATION_SCHEMA
|
|
775
|
+
- {"type": "query", "additional_filter": "value > 1000"} or DetectionMechanism.QUERY(additional_filter='value > 1000')
|
|
776
|
+
- "dataset_profile" or DetectionMechanism.DATASET_PROFILE
|
|
777
|
+
incident_behavior (Optional[Union[str, list[str], AssertionIncidentBehavior, list[AssertionIncidentBehavior]]]): The incident behavior to be applied to the assertion. Valid values are: "raise_on_fail", "resolve_on_pass", or the typed ones (AssertionIncidentBehavior.RAISE_ON_FAIL and AssertionIncidentBehavior.RESOLVE_ON_PASS).
|
|
778
|
+
tags (Optional[TagsInputType]): The tags to be applied to the assertion. Valid values are: a list of strings, TagUrn objects, or TagAssociationClass objects.
|
|
779
|
+
updated_by (Optional[Union[str, CorpUserUrn]]): Optional urn of the user who updated the assertion. The format is "urn:li:corpuser:<username>". The default is the datahub system user.
|
|
780
|
+
schedule (Optional[Union[str, models.CronScheduleClass]]): Optional cron formatted schedule for the assertion. If not provided, a default daily schedule will be used. The format is a cron expression, e.g. "0 0 * * *" for daily at midnight using UTC timezone. Alternatively, a models.CronScheduleClass object can be provided.
|
|
781
|
+
criteria_condition (Optional[Union[str, VolumeAssertionCondition]]): Optional condition for the volume assertion. Valid values are:
|
|
782
|
+
- "ROW_COUNT_IS_LESS_THAN_OR_EQUAL_TO" -> The row count is less than or equal to the threshold.
|
|
783
|
+
- "ROW_COUNT_IS_GREATER_THAN_OR_EQUAL_TO" -> The row count is greater than or equal to the threshold.
|
|
784
|
+
- "ROW_COUNT_IS_WITHIN_A_RANGE" -> The row count is within the specified range.
|
|
785
|
+
- "ROW_COUNT_GROWS_BY_AT_MOST_ABSOLUTE" -> The row count growth is at most the threshold (absolute change).
|
|
786
|
+
- "ROW_COUNT_GROWS_BY_AT_LEAST_ABSOLUTE" -> The row count growth is at least the threshold (absolute change).
|
|
787
|
+
- "ROW_COUNT_GROWS_WITHIN_A_RANGE_ABSOLUTE" -> The row count growth is within the specified range (absolute change).
|
|
788
|
+
- "ROW_COUNT_GROWS_BY_AT_MOST_PERCENTAGE" -> The row count growth is at most the threshold (percentage change).
|
|
789
|
+
- "ROW_COUNT_GROWS_BY_AT_LEAST_PERCENTAGE" -> The row count growth is at least the threshold (percentage change).
|
|
790
|
+
- "ROW_COUNT_GROWS_WITHIN_A_RANGE_PERCENTAGE" -> The row count growth is within the specified range (percentage change).
|
|
791
|
+
If not provided, the existing definition from the backend will be preserved (for update operations). Required when creating a new assertion (when urn is None).
|
|
792
|
+
criteria_parameters (Optional[VolumeAssertionDefinitionParameters]): Optional threshold parameters to be used for the assertion. This can be a single threshold value or a tuple range.
|
|
793
|
+
- If the condition is range-based (ROW_COUNT_IS_WITHIN_A_RANGE, ROW_COUNT_GROWS_WITHIN_A_RANGE_ABSOLUTE, ROW_COUNT_GROWS_WITHIN_A_RANGE_PERCENTAGE), the value is a tuple of two threshold values, with format (min, max).
|
|
794
|
+
- For other conditions, the value is a single numeric threshold value.
|
|
795
|
+
If not provided, existing value is preserved for updates. Required when creating a new assertion.
|
|
796
|
+
skip_dataset_exists_check (bool): If False (default), verifies the dataset_urn exists before creating/updating the assertion.
|
|
797
|
+
Set to True when creating assertions before ingesting datasets (e.g., setting up assertions in a new environment
|
|
798
|
+
before running ingestion pipelines), or when the dataset exists but may not be visible to the current API endpoint.
|
|
799
|
+
|
|
800
|
+
Returns:
|
|
801
|
+
VolumeAssertion: The created or updated assertion.
|
|
802
|
+
"""
|
|
803
|
+
self._check_dataset_exists(dataset_urn, skip_dataset_exists_check)
|
|
804
|
+
return self._volume_client.sync_volume_assertion(
|
|
805
|
+
dataset_urn=dataset_urn,
|
|
806
|
+
urn=urn,
|
|
807
|
+
display_name=display_name,
|
|
808
|
+
enabled=enabled,
|
|
809
|
+
detection_mechanism=detection_mechanism,
|
|
810
|
+
incident_behavior=incident_behavior,
|
|
811
|
+
tags=tags,
|
|
812
|
+
updated_by=updated_by,
|
|
813
|
+
schedule=schedule,
|
|
814
|
+
criteria_condition=criteria_condition,
|
|
815
|
+
criteria_parameters=criteria_parameters,
|
|
816
|
+
)
|
|
817
|
+
|
|
818
|
+
def sync_sql_assertion(
|
|
819
|
+
self,
|
|
820
|
+
*,
|
|
821
|
+
dataset_urn: Union[str, DatasetUrn],
|
|
822
|
+
urn: Optional[Union[str, AssertionUrn]] = None,
|
|
823
|
+
display_name: Optional[str] = None,
|
|
824
|
+
enabled: Optional[bool] = None,
|
|
825
|
+
statement: Optional[str] = None,
|
|
826
|
+
criteria_condition: Optional[Union[SqlAssertionCondition, str]] = None,
|
|
827
|
+
criteria_parameters: Optional[
|
|
828
|
+
Union[Union[float, int], tuple[Union[float, int], Union[float, int]]]
|
|
829
|
+
] = None,
|
|
830
|
+
incident_behavior: Optional[AssertionIncidentBehaviorInputTypes] = None,
|
|
831
|
+
tags: Optional[TagsInputType] = None,
|
|
832
|
+
updated_by: Optional[Union[str, CorpUserUrn]] = None,
|
|
833
|
+
schedule: Optional[Union[str, models.CronScheduleClass]] = None,
|
|
834
|
+
skip_dataset_exists_check: bool = False,
|
|
835
|
+
) -> SqlAssertion:
|
|
836
|
+
"""Upsert and merge a sql assertion.
|
|
837
|
+
|
|
838
|
+
Note:
|
|
839
|
+
Keyword arguments are required.
|
|
840
|
+
|
|
841
|
+
Upsert and merge is a combination of create and update. If the assertion does not exist,
|
|
842
|
+
it will be created. If it does exist, it will be updated. Existing assertion fields will
|
|
843
|
+
be updated if the input value is not None. If the input value is None, the existing value
|
|
844
|
+
will be preserved. If the input value can be un-set (e.g. by passing an empty list or
|
|
845
|
+
empty string), it will be unset.
|
|
846
|
+
|
|
847
|
+
Schedule behavior:
|
|
848
|
+
- Create case: Uses default daily schedule ("0 0 * * *") or provided schedule
|
|
849
|
+
- Update case: Uses existing schedule or provided schedule.
|
|
850
|
+
|
|
851
|
+
Args:
|
|
852
|
+
dataset_urn (Union[str, DatasetUrn]): The urn of the dataset to be monitored.
|
|
853
|
+
urn (Optional[Union[str, AssertionUrn]]): The urn of the assertion. If not provided, a urn will be generated and the assertion will be created in the DataHub instance.
|
|
854
|
+
display_name (Optional[str]): The display name of the assertion. If not provided, a random display name will be generated.
|
|
855
|
+
enabled (Optional[bool]): Whether the assertion is enabled. If not provided, the existing value will be preserved.
|
|
856
|
+
statement (Optional[str]): The SQL statement to be used for the assertion. Required when creating a new assertion (urn=None), optional when updating an existing assertion.
|
|
857
|
+
criteria_condition (Optional[Union[SqlAssertionCondition, str]]): The condition for the sql assertion. Required when creating a new assertion (urn=None), optional when updating an existing assertion. Valid values are:
|
|
858
|
+
- "IS_EQUAL_TO" -> The metric value equals the threshold.
|
|
859
|
+
- "IS_NOT_EQUAL_TO" -> The metric value does not equal the threshold.
|
|
860
|
+
- "IS_GREATER_THAN" -> The metric value is greater than the threshold.
|
|
861
|
+
- "IS_LESS_THAN" -> The metric value is less than the threshold.
|
|
862
|
+
- "IS_WITHIN_A_RANGE" -> The metric value is within the specified range.
|
|
863
|
+
- "GROWS_AT_MOST_ABSOLUTE" -> The metric growth is at most the threshold (absolute change).
|
|
864
|
+
- "GROWS_AT_MOST_PERCENTAGE" -> The metric growth is at most the threshold (percentage change).
|
|
865
|
+
- "GROWS_AT_LEAST_ABSOLUTE" -> The metric growth is at least the threshold (absolute change).
|
|
866
|
+
- "GROWS_AT_LEAST_PERCENTAGE" -> The metric growth is at least the threshold (percentage change).
|
|
867
|
+
- "GROWS_WITHIN_A_RANGE_ABSOLUTE" -> The metric growth is within the specified range (absolute change).
|
|
868
|
+
- "GROWS_WITHIN_A_RANGE_PERCENTAGE" -> The metric growth is within the specified range (percentage change).
|
|
869
|
+
criteria_parameters (Optional[Union[float, int, tuple[float, int]]]): The threshold parameters to be used for the assertion. Required when creating a new assertion (urn=None), optional when updating an existing assertion. This can be a single threshold value or a tuple range.
|
|
870
|
+
- If the condition is range-based (IS_WITHIN_A_RANGE, GROWS_WITHIN_A_RANGE_ABSOLUTE, GROWS_WITHIN_A_RANGE_PERCENTAGE), the value is a tuple of two threshold values, with format (min, max).
|
|
871
|
+
- For other conditions, the value is a single numeric threshold value.
|
|
872
|
+
incident_behavior (Optional[Union[str, list[str], AssertionIncidentBehavior, list[AssertionIncidentBehavior]]]): The incident behavior to be applied to the assertion. Valid values are: "raise_on_fail", "resolve_on_pass", or the typed ones (AssertionIncidentBehavior.RAISE_ON_FAIL and AssertionIncidentBehavior.RESOLVE_ON_PASS).
|
|
873
|
+
tags (Optional[TagsInputType]): The tags to be applied to the assertion. Valid values are: a list of strings, TagUrn objects, or TagAssociationClass objects.
|
|
874
|
+
updated_by (Optional[Union[str, CorpUserUrn]]): Optional urn of the user who updated the assertion. The format is "urn:li:corpuser:<username>". The default is the datahub system user.
|
|
875
|
+
schedule (Optional[Union[str, models.CronScheduleClass]]): Optional cron formatted schedule for the assertion. If not provided, a default daily schedule will be used. The format is a cron expression, e.g. "0 0 * * *" for daily at midnight using UTC timezone. Alternatively, a models.CronScheduleClass object can be provided.
|
|
876
|
+
skip_dataset_exists_check (bool): If False (default), verifies the dataset_urn exists before creating/updating the assertion.
|
|
877
|
+
Set to True when creating assertions before ingesting datasets (e.g., setting up assertions in a new environment
|
|
878
|
+
before running ingestion pipelines), or when the dataset exists but may not be visible to the current API endpoint.
|
|
879
|
+
|
|
880
|
+
Returns:
|
|
881
|
+
SqlAssertion: The created or updated assertion.
|
|
882
|
+
"""
|
|
883
|
+
self._check_dataset_exists(dataset_urn, skip_dataset_exists_check)
|
|
884
|
+
return self._sql_client.sync_sql_assertion(
|
|
885
|
+
dataset_urn=dataset_urn,
|
|
886
|
+
urn=urn,
|
|
887
|
+
display_name=display_name,
|
|
888
|
+
enabled=enabled,
|
|
889
|
+
statement=statement,
|
|
890
|
+
criteria_condition=criteria_condition,
|
|
891
|
+
criteria_parameters=criteria_parameters,
|
|
892
|
+
incident_behavior=incident_behavior,
|
|
893
|
+
tags=tags,
|
|
894
|
+
updated_by=updated_by,
|
|
895
|
+
schedule=schedule,
|
|
896
|
+
)
|
|
897
|
+
|
|
898
|
+
def sync_smart_sql_assertion(
|
|
899
|
+
self,
|
|
900
|
+
*,
|
|
901
|
+
dataset_urn: Union[str, DatasetUrn],
|
|
902
|
+
statement: Optional[str] = None,
|
|
903
|
+
urn: Optional[Union[str, AssertionUrn]] = None,
|
|
904
|
+
display_name: Optional[str] = None,
|
|
905
|
+
enabled: Optional[bool] = None,
|
|
906
|
+
sensitivity: Optional[Union[str, InferenceSensitivity]] = None,
|
|
907
|
+
exclusion_windows: Optional[ExclusionWindowInputTypes] = None,
|
|
908
|
+
training_data_lookback_days: Optional[int] = None,
|
|
909
|
+
incident_behavior: Optional[AssertionIncidentBehaviorInputTypes] = None,
|
|
910
|
+
tags: Optional[TagsInputType] = None,
|
|
911
|
+
updated_by: Optional[Union[str, CorpUserUrn]] = None,
|
|
912
|
+
schedule: Optional[Union[str, models.CronScheduleClass]] = None,
|
|
913
|
+
skip_dataset_exists_check: bool = False,
|
|
914
|
+
) -> SmartSqlAssertion:
|
|
915
|
+
"""Upsert and merge a smart SQL assertion with AI-powered inference.
|
|
916
|
+
|
|
917
|
+
Note:
|
|
918
|
+
Keyword arguments are required.
|
|
919
|
+
|
|
920
|
+
Upsert and merge is a combination of create and update. If the assertion does not exist,
|
|
921
|
+
it will be created. If it does exist, it will be updated. Existing assertion fields will
|
|
922
|
+
be updated if the input value is not None. If the input value is None, the existing value
|
|
923
|
+
will be preserved. If the input value can be un-set (e.g. by passing an empty list or
|
|
924
|
+
empty string), it will be unset.
|
|
925
|
+
|
|
926
|
+
Smart SQL assertions use machine learning to infer appropriate thresholds for your
|
|
927
|
+
SQL query results, rather than requiring you to specify fixed threshold values.
|
|
928
|
+
|
|
929
|
+
Schedule behavior:
|
|
930
|
+
- Create case: Uses default schedule of every 6 hours or provided schedule
|
|
931
|
+
- Update case: Uses existing schedule or provided schedule.
|
|
932
|
+
|
|
933
|
+
Args:
|
|
934
|
+
dataset_urn (Union[str, DatasetUrn]): The urn of the dataset to be monitored.
|
|
935
|
+
statement (Optional[str]): The SQL statement to be used for the assertion. Required when creating a new assertion (urn=None), optional when updating an existing assertion.
|
|
936
|
+
The SQL query should return a single numeric value. Example: "SELECT COUNT(*) FROM table WHERE status = 'active'"
|
|
937
|
+
urn (Optional[Union[str, AssertionUrn]]): The urn of the assertion. If not provided, a urn will be generated and the assertion will be created in the DataHub instance.
|
|
938
|
+
display_name (Optional[str]): The display name of the assertion. If not provided, a random display name will be generated.
|
|
939
|
+
enabled (Optional[bool]): Whether the assertion is enabled. If not provided, the existing value will be preserved.
|
|
940
|
+
sensitivity (Optional[Union[str, InferenceSensitivity]]): The sensitivity level for AI inference. Valid values are: "low", "medium", "high".
|
|
941
|
+
- "low": Less sensitive, fewer alerts for anomalies
|
|
942
|
+
- "medium": Balanced sensitivity (default)
|
|
943
|
+
- "high": More sensitive, more alerts for smaller deviations
|
|
944
|
+
exclusion_windows (Optional[ExclusionWindowInputTypes]): The exclusion windows to be applied to the assertion. Only fixed range exclusion windows are supported. Valid values are:
|
|
945
|
+
- {"start": "2025-01-01T00:00:00", "end": "2025-01-02T00:00:00"} (using ISO strings)
|
|
946
|
+
- {"start": datetime(2025, 1, 1, 0, 0, 0), "end": datetime(2025, 1, 2, 0, 0, 0)} (using datetime objects)
|
|
947
|
+
- FixedRangeExclusionWindow(start=datetime(2025, 1, 1, 0, 0, 0), end=datetime(2025, 1, 2, 0, 0, 0)) (using typed object)
|
|
948
|
+
- A list of any of the above formats
|
|
949
|
+
training_data_lookback_days (Optional[int]): The number of days of historical data to use for training the AI model.
|
|
950
|
+
incident_behavior (Optional[Union[str, list[str], AssertionIncidentBehavior, list[AssertionIncidentBehavior]]]): The incident behavior to be applied to the assertion. Valid values are: "raise_on_fail", "resolve_on_pass", or the typed ones (AssertionIncidentBehavior.RAISE_ON_FAIL and AssertionIncidentBehavior.RESOLVE_ON_PASS).
|
|
951
|
+
tags (Optional[TagsInputType]): The tags to be applied to the assertion. Valid values are: a list of strings, TagUrn objects, or TagAssociationClass objects.
|
|
952
|
+
updated_by (Optional[Union[str, CorpUserUrn]]): Optional urn of the user who updated the assertion. The format is "urn:li:corpuser:<username>". The default is the datahub system user.
|
|
953
|
+
schedule (Optional[Union[str, models.CronScheduleClass]]): Optional cron formatted schedule for the assertion. If not provided, a default schedule of every 6 hours will be used. The format is a cron expression, e.g. "0 */6 * * *" for every 6 hours using UTC timezone. Alternatively, a models.CronScheduleClass object can be provided.
|
|
954
|
+
skip_dataset_exists_check (bool): If False (default), verifies the dataset_urn exists before creating/updating the assertion.
|
|
955
|
+
Set to True when creating assertions before ingesting datasets (e.g., setting up assertions in a new environment
|
|
956
|
+
before running ingestion pipelines), or when the dataset exists but may not be visible to the current API endpoint.
|
|
957
|
+
|
|
958
|
+
Returns:
|
|
959
|
+
SmartSqlAssertion: The created or updated assertion.
|
|
960
|
+
"""
|
|
961
|
+
self._check_dataset_exists(dataset_urn, skip_dataset_exists_check)
|
|
962
|
+
return self._smart_sql_client.sync_smart_sql_assertion(
|
|
963
|
+
dataset_urn=dataset_urn,
|
|
964
|
+
statement=statement,
|
|
965
|
+
urn=urn,
|
|
966
|
+
display_name=display_name,
|
|
967
|
+
enabled=enabled,
|
|
968
|
+
sensitivity=sensitivity,
|
|
969
|
+
exclusion_windows=exclusion_windows,
|
|
970
|
+
training_data_lookback_days=training_data_lookback_days,
|
|
971
|
+
incident_behavior=incident_behavior,
|
|
972
|
+
tags=tags,
|
|
973
|
+
updated_by=updated_by,
|
|
974
|
+
schedule=schedule,
|
|
975
|
+
)
|
|
976
|
+
|
|
977
|
+
def sync_schema_assertion(
|
|
978
|
+
self,
|
|
979
|
+
*,
|
|
980
|
+
dataset_urn: Union[str, DatasetUrn],
|
|
981
|
+
urn: Optional[Union[str, AssertionUrn]] = None,
|
|
982
|
+
display_name: Optional[str] = None,
|
|
983
|
+
enabled: Optional[bool] = None,
|
|
984
|
+
compatibility: Optional[Union[str, SchemaAssertionCompatibility]] = None,
|
|
985
|
+
fields: Optional[SchemaAssertionFieldsInputType] = None,
|
|
986
|
+
incident_behavior: Optional[AssertionIncidentBehaviorInputTypes] = None,
|
|
987
|
+
tags: Optional[TagsInputType] = None,
|
|
988
|
+
updated_by: Optional[Union[str, CorpUserUrn]] = None,
|
|
989
|
+
schedule: Optional[Union[str, models.CronScheduleClass]] = None,
|
|
990
|
+
skip_dataset_exists_check: bool = False,
|
|
991
|
+
) -> SchemaAssertion:
|
|
992
|
+
"""Upsert and merge a schema assertion to validate dataset schema.
|
|
993
|
+
|
|
994
|
+
Note:
|
|
995
|
+
Keyword arguments are required.
|
|
996
|
+
|
|
997
|
+
Upsert and merge is a combination of create and update. If the assertion does not exist,
|
|
998
|
+
it will be created. If it does exist, it will be updated. Existing assertion fields will
|
|
999
|
+
be updated if the input value is not None. If the input value is None, the existing value
|
|
1000
|
+
will be preserved. If the input value can be un-set (e.g. by passing an empty list or
|
|
1001
|
+
empty string), it will be unset.
|
|
1002
|
+
|
|
1003
|
+
Schema assertions validate that a dataset's schema matches expected field definitions
|
|
1004
|
+
with configurable compatibility modes (EXACT_MATCH, SUPERSET, SUBSET).
|
|
1005
|
+
|
|
1006
|
+
Schedule behavior:
|
|
1007
|
+
- Create case: Uses default schedule of every 6 hours or provided schedule
|
|
1008
|
+
- Update case: Uses existing schedule or provided schedule.
|
|
1009
|
+
|
|
1010
|
+
Args:
|
|
1011
|
+
dataset_urn (Union[str, DatasetUrn]): The urn of the dataset to be monitored.
|
|
1012
|
+
urn (Optional[Union[str, AssertionUrn]]): The urn of the assertion. If not provided, a urn will be generated and the assertion will be created in the DataHub instance.
|
|
1013
|
+
display_name (Optional[str]): The display name of the assertion. If not provided, a random display name will be generated.
|
|
1014
|
+
enabled (Optional[bool]): Whether the assertion is enabled. If not provided, the existing value will be preserved.
|
|
1015
|
+
compatibility (Optional[Union[str, SchemaAssertionCompatibility]]): The compatibility mode for schema validation. Valid values are:
|
|
1016
|
+
- "EXACT_MATCH" -> The schema must exactly match the expected fields (default).
|
|
1017
|
+
- "SUPERSET" -> The schema must contain at least all the expected fields.
|
|
1018
|
+
- "SUBSET" -> The schema must be a subset of the expected fields.
|
|
1019
|
+
fields (Optional[SchemaAssertionFieldsInputType]): The expected schema fields. Required when creating a new assertion (urn=None).
|
|
1020
|
+
Each field can be specified as:
|
|
1021
|
+
- A dict with 'path' and 'type' keys: {"path": "column_name", "type": "STRING"}
|
|
1022
|
+
- A dict with optional 'native_type': {"path": "id", "type": "NUMBER", "native_type": "BIGINT"}
|
|
1023
|
+
- A SchemaAssertionField object
|
|
1024
|
+
Valid type values are: BYTES, FIXED, BOOLEAN, STRING, NUMBER, DATE, TIME, ENUM, NULL, ARRAY, MAP, STRUCT, UNION
|
|
1025
|
+
incident_behavior (Optional[Union[str, list[str], AssertionIncidentBehavior, list[AssertionIncidentBehavior]]]): The incident behavior to be applied to the assertion. Valid values are: "raise_on_fail", "resolve_on_pass", or the typed ones (AssertionIncidentBehavior.RAISE_ON_FAIL and AssertionIncidentBehavior.RESOLVE_ON_PASS).
|
|
1026
|
+
tags (Optional[TagsInputType]): The tags to be applied to the assertion. Valid values are: a list of strings, TagUrn objects, or TagAssociationClass objects.
|
|
1027
|
+
updated_by (Optional[Union[str, CorpUserUrn]]): Optional urn of the user who updated the assertion. The format is "urn:li:corpuser:<username>". The default is the datahub system user.
|
|
1028
|
+
schedule (Optional[Union[str, models.CronScheduleClass]]): Optional cron formatted schedule for the assertion. If not provided, a default schedule of every 6 hours will be used. The format is a cron expression, e.g. "0 */6 * * *" for every 6 hours using UTC timezone.
|
|
1029
|
+
skip_dataset_exists_check (bool): If False (default), verifies the dataset_urn exists before creating/updating the assertion.
|
|
1030
|
+
Set to True when creating assertions before ingesting datasets (e.g., setting up assertions in a new environment
|
|
1031
|
+
before running ingestion pipelines), or when the dataset exists but may not be visible to the current API endpoint.
|
|
1032
|
+
|
|
1033
|
+
Returns:
|
|
1034
|
+
SchemaAssertion: The created or updated assertion.
|
|
1035
|
+
|
|
1036
|
+
Example:
|
|
1037
|
+
```python
|
|
1038
|
+
# Create a schema assertion with exact match
|
|
1039
|
+
assertion = client.assertions.sync_schema_assertion(
|
|
1040
|
+
dataset_urn="urn:li:dataset:(urn:li:dataPlatform:snowflake,db.schema.table,PROD)",
|
|
1041
|
+
display_name="Expected Schema Check",
|
|
1042
|
+
compatibility="EXACT_MATCH",
|
|
1043
|
+
fields=[
|
|
1044
|
+
{"path": "id", "type": "STRING"},
|
|
1045
|
+
{"path": "count", "type": "NUMBER"},
|
|
1046
|
+
{"path": "created_at", "type": "TIME"},
|
|
1047
|
+
],
|
|
1048
|
+
enabled=True
|
|
1049
|
+
)
|
|
1050
|
+
|
|
1051
|
+
# Create a superset schema assertion (actual schema must contain at least these fields)
|
|
1052
|
+
assertion = client.assertions.sync_schema_assertion(
|
|
1053
|
+
dataset_urn="urn:li:dataset:(urn:li:dataPlatform:snowflake,db.schema.table,PROD)",
|
|
1054
|
+
compatibility="SUPERSET",
|
|
1055
|
+
fields=[
|
|
1056
|
+
{"path": "id", "type": "STRING"},
|
|
1057
|
+
{"path": "name", "type": "STRING"},
|
|
1058
|
+
],
|
|
1059
|
+
)
|
|
1060
|
+
```
|
|
1061
|
+
"""
|
|
1062
|
+
self._check_dataset_exists(dataset_urn, skip_dataset_exists_check)
|
|
1063
|
+
return self._schema_client.sync_schema_assertion(
|
|
1064
|
+
dataset_urn=dataset_urn,
|
|
1065
|
+
urn=urn,
|
|
1066
|
+
display_name=display_name,
|
|
1067
|
+
enabled=enabled,
|
|
1068
|
+
compatibility=compatibility,
|
|
1069
|
+
fields=fields,
|
|
1070
|
+
incident_behavior=incident_behavior,
|
|
1071
|
+
tags=tags,
|
|
1072
|
+
updated_by=updated_by,
|
|
1073
|
+
schedule=schedule,
|
|
1074
|
+
)
|