acryl-datahub-cloud 0.3.11rc0__py3-none-any.whl → 0.3.16.1rc0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub-cloud might be problematic. Click here for more details.
- acryl_datahub_cloud/_codegen_config.json +1 -1
- acryl_datahub_cloud/acryl_cs_issues/models.py +5 -3
- acryl_datahub_cloud/action_request/action_request_owner_source.py +36 -6
- acryl_datahub_cloud/datahub_forms_notifications/__init__.py +0 -0
- acryl_datahub_cloud/datahub_forms_notifications/forms_notifications_source.py +569 -0
- acryl_datahub_cloud/datahub_forms_notifications/get_feature_flag.gql +7 -0
- acryl_datahub_cloud/datahub_forms_notifications/get_search_results_total.gql +14 -0
- acryl_datahub_cloud/datahub_forms_notifications/query.py +17 -0
- acryl_datahub_cloud/datahub_forms_notifications/scroll_forms_for_notification.gql +29 -0
- acryl_datahub_cloud/datahub_forms_notifications/send_form_notification_request.gql +5 -0
- acryl_datahub_cloud/datahub_reporting/datahub_dataset.py +37 -13
- acryl_datahub_cloud/datahub_reporting/datahub_form_reporting.py +55 -24
- acryl_datahub_cloud/datahub_reporting/extract_graph.py +4 -3
- acryl_datahub_cloud/datahub_reporting/extract_sql.py +242 -51
- acryl_datahub_cloud/datahub_reporting/forms.py +1 -1
- acryl_datahub_cloud/datahub_reporting/forms_config.py +3 -2
- acryl_datahub_cloud/datahub_restore/source.py +3 -2
- acryl_datahub_cloud/datahub_usage_reporting/excluded.py +94 -0
- acryl_datahub_cloud/datahub_usage_reporting/query_builder.py +48 -8
- acryl_datahub_cloud/datahub_usage_reporting/usage_feature_reporter.py +518 -77
- acryl_datahub_cloud/elasticsearch/graph_service.py +76 -14
- acryl_datahub_cloud/graphql_utils.py +64 -0
- acryl_datahub_cloud/lineage_features/source.py +555 -49
- acryl_datahub_cloud/metadata/_urns/urn_defs.py +2296 -1900
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/actionworkflow/__init__.py +53 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/anomaly/__init__.py +2 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/assertion/__init__.py +4 -2
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/common/__init__.py +6 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/conversation/__init__.py +29 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/event/notification/settings/__init__.py +2 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/execution/__init__.py +2 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/file/__init__.py +19 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/form/__init__.py +8 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/identity/__init__.py +8 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/knowledge/__init__.py +33 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +12 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/metadata/search/features/__init__.py +2 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/module/__init__.py +31 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/notification/__init__.py +19 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py +4 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/role/__init__.py +2 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/settings/asset/__init__.py +19 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +28 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/template/__init__.py +31 -0
- acryl_datahub_cloud/metadata/schema.avsc +25091 -20557
- acryl_datahub_cloud/metadata/schema_classes.py +29269 -23863
- acryl_datahub_cloud/metadata/schemas/ActionRequestInfo.avsc +235 -2
- acryl_datahub_cloud/metadata/schemas/ActionWorkflowInfo.avsc +683 -0
- acryl_datahub_cloud/metadata/schemas/ActionWorkflowKey.avsc +21 -0
- acryl_datahub_cloud/metadata/schemas/Actors.avsc +38 -1
- acryl_datahub_cloud/metadata/schemas/ApplicationKey.avsc +31 -0
- acryl_datahub_cloud/metadata/schemas/ApplicationProperties.avsc +75 -0
- acryl_datahub_cloud/metadata/schemas/Applications.avsc +38 -0
- acryl_datahub_cloud/metadata/schemas/AssertionAnalyticsRunEvent.avsc +353 -215
- acryl_datahub_cloud/metadata/schemas/AssertionInfo.avsc +147 -20
- acryl_datahub_cloud/metadata/schemas/AssertionKey.avsc +1 -1
- acryl_datahub_cloud/metadata/schemas/AssertionRunEvent.avsc +166 -21
- acryl_datahub_cloud/metadata/schemas/{AssertionSummary.avsc → AssertionRunSummary.avsc} +15 -2
- acryl_datahub_cloud/metadata/schemas/AssertionsSummary.avsc +54 -0
- acryl_datahub_cloud/metadata/schemas/AssetSettings.avsc +63 -0
- acryl_datahub_cloud/metadata/schemas/BusinessAttributeInfo.avsc +7 -3
- acryl_datahub_cloud/metadata/schemas/ChartInfo.avsc +20 -6
- acryl_datahub_cloud/metadata/schemas/ChartKey.avsc +1 -0
- acryl_datahub_cloud/metadata/schemas/ConstraintInfo.avsc +12 -1
- acryl_datahub_cloud/metadata/schemas/ContainerKey.avsc +1 -0
- acryl_datahub_cloud/metadata/schemas/ContainerProperties.avsc +16 -5
- acryl_datahub_cloud/metadata/schemas/CorpGroupEditableInfo.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/CorpGroupInfo.avsc +7 -3
- acryl_datahub_cloud/metadata/schemas/CorpGroupKey.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/CorpGroupSettings.avsc +127 -2
- acryl_datahub_cloud/metadata/schemas/CorpUserEditableInfo.avsc +1 -1
- acryl_datahub_cloud/metadata/schemas/CorpUserInfo.avsc +18 -2
- acryl_datahub_cloud/metadata/schemas/CorpUserInvitationStatus.avsc +106 -0
- acryl_datahub_cloud/metadata/schemas/CorpUserKey.avsc +4 -1
- acryl_datahub_cloud/metadata/schemas/CorpUserSettings.avsc +304 -2
- acryl_datahub_cloud/metadata/schemas/CorpUserUsageFeatures.avsc +86 -0
- acryl_datahub_cloud/metadata/schemas/DashboardInfo.avsc +11 -5
- acryl_datahub_cloud/metadata/schemas/DashboardKey.avsc +1 -0
- acryl_datahub_cloud/metadata/schemas/DataFlowInfo.avsc +15 -5
- acryl_datahub_cloud/metadata/schemas/DataFlowKey.avsc +1 -0
- acryl_datahub_cloud/metadata/schemas/DataHubAiConversationInfo.avsc +256 -0
- acryl_datahub_cloud/metadata/schemas/DataHubAiConversationKey.avsc +22 -0
- acryl_datahub_cloud/metadata/schemas/DataHubFileInfo.avsc +234 -0
- acryl_datahub_cloud/metadata/schemas/DataHubFileKey.avsc +22 -0
- acryl_datahub_cloud/metadata/schemas/DataHubIngestionSourceKey.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
- acryl_datahub_cloud/metadata/schemas/DataHubPageModuleProperties.avsc +308 -0
- acryl_datahub_cloud/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
- acryl_datahub_cloud/metadata/schemas/DataHubPageTemplateProperties.avsc +251 -0
- acryl_datahub_cloud/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
- acryl_datahub_cloud/metadata/schemas/DataJobInfo.avsc +13 -4
- acryl_datahub_cloud/metadata/schemas/DataJobInputOutput.avsc +8 -0
- acryl_datahub_cloud/metadata/schemas/DataJobKey.avsc +1 -0
- acryl_datahub_cloud/metadata/schemas/DataPlatformInfo.avsc +3 -1
- acryl_datahub_cloud/metadata/schemas/DataPlatformInstanceProperties.avsc +5 -2
- acryl_datahub_cloud/metadata/schemas/DataProcessKey.avsc +4 -0
- acryl_datahub_cloud/metadata/schemas/DataProductKey.avsc +2 -0
- acryl_datahub_cloud/metadata/schemas/DataProductProperties.avsc +6 -3
- acryl_datahub_cloud/metadata/schemas/DataTypeInfo.avsc +5 -0
- acryl_datahub_cloud/metadata/schemas/DatasetKey.avsc +10 -2
- acryl_datahub_cloud/metadata/schemas/DatasetProperties.avsc +12 -5
- acryl_datahub_cloud/metadata/schemas/DatasetUsageStatistics.avsc +8 -0
- acryl_datahub_cloud/metadata/schemas/DocumentInfo.avsc +407 -0
- acryl_datahub_cloud/metadata/schemas/DocumentKey.avsc +35 -0
- acryl_datahub_cloud/metadata/schemas/DocumentSettings.avsc +79 -0
- acryl_datahub_cloud/metadata/schemas/DomainKey.avsc +2 -0
- acryl_datahub_cloud/metadata/schemas/DomainProperties.avsc +7 -3
- acryl_datahub_cloud/metadata/schemas/EditableContainerProperties.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/EditableDashboardProperties.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/EditableDataFlowProperties.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/EditableDataJobProperties.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/EditableDatasetProperties.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/EditableERModelRelationshipProperties.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/EditableMLFeatureProperties.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/EditableMLFeatureTableProperties.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/EditableMLModelGroupProperties.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/EditableMLModelProperties.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/EditableNotebookProperties.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/EditableSchemaMetadata.avsc +4 -2
- acryl_datahub_cloud/metadata/schemas/EntityTypeInfo.avsc +5 -0
- acryl_datahub_cloud/metadata/schemas/ExecutionRequestArtifactsLocation.avsc +16 -0
- acryl_datahub_cloud/metadata/schemas/ExecutionRequestKey.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/FormAssignmentStatus.avsc +36 -0
- acryl_datahub_cloud/metadata/schemas/FormInfo.avsc +6 -0
- acryl_datahub_cloud/metadata/schemas/FormKey.avsc +3 -1
- acryl_datahub_cloud/metadata/schemas/FormNotifications.avsc +69 -0
- acryl_datahub_cloud/metadata/schemas/FormSettings.avsc +30 -0
- acryl_datahub_cloud/metadata/schemas/GlobalSettingsInfo.avsc +416 -0
- acryl_datahub_cloud/metadata/schemas/GlobalTags.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/GlossaryNodeInfo.avsc +3 -1
- acryl_datahub_cloud/metadata/schemas/GlossaryNodeKey.avsc +1 -0
- acryl_datahub_cloud/metadata/schemas/GlossaryTermInfo.avsc +3 -1
- acryl_datahub_cloud/metadata/schemas/GlossaryTermKey.avsc +2 -0
- acryl_datahub_cloud/metadata/schemas/IcebergWarehouseInfo.avsc +4 -0
- acryl_datahub_cloud/metadata/schemas/IncidentActivityEvent.avsc +3 -3
- acryl_datahub_cloud/metadata/schemas/IncidentInfo.avsc +3 -3
- acryl_datahub_cloud/metadata/schemas/InferredMetadata.avsc +71 -1
- acryl_datahub_cloud/metadata/schemas/InputFields.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/InviteToken.avsc +26 -0
- acryl_datahub_cloud/metadata/schemas/LineageFeatures.avsc +67 -42
- acryl_datahub_cloud/metadata/schemas/LogicalParent.avsc +145 -0
- acryl_datahub_cloud/metadata/schemas/MLFeatureKey.avsc +4 -1
- acryl_datahub_cloud/metadata/schemas/MLFeatureTableKey.avsc +4 -1
- acryl_datahub_cloud/metadata/schemas/MLModelDeploymentKey.avsc +7 -1
- acryl_datahub_cloud/metadata/schemas/MLModelGroupKey.avsc +9 -1
- acryl_datahub_cloud/metadata/schemas/MLModelKey.avsc +9 -1
- acryl_datahub_cloud/metadata/schemas/MLModelProperties.avsc +4 -2
- acryl_datahub_cloud/metadata/schemas/MLPrimaryKeyKey.avsc +4 -1
- acryl_datahub_cloud/metadata/schemas/MetadataChangeEvent.avsc +418 -97
- acryl_datahub_cloud/metadata/schemas/MetadataChangeLog.avsc +62 -44
- acryl_datahub_cloud/metadata/schemas/MetadataChangeProposal.avsc +61 -0
- acryl_datahub_cloud/metadata/schemas/MonitorAnomalyEvent.avsc +54 -9
- acryl_datahub_cloud/metadata/schemas/MonitorInfo.avsc +163 -23
- acryl_datahub_cloud/metadata/schemas/MonitorKey.avsc +9 -1
- acryl_datahub_cloud/metadata/schemas/MonitorSuiteInfo.avsc +128 -3
- acryl_datahub_cloud/metadata/schemas/NotebookInfo.avsc +5 -2
- acryl_datahub_cloud/metadata/schemas/NotebookKey.avsc +1 -0
- acryl_datahub_cloud/metadata/schemas/NotificationRequest.avsc +91 -4
- acryl_datahub_cloud/metadata/schemas/Operation.avsc +17 -0
- acryl_datahub_cloud/metadata/schemas/Ownership.avsc +71 -1
- acryl_datahub_cloud/metadata/schemas/QuerySubjects.avsc +2 -13
- acryl_datahub_cloud/metadata/schemas/RelationshipChangeEvent.avsc +215 -0
- acryl_datahub_cloud/metadata/schemas/RoleProperties.avsc +3 -1
- acryl_datahub_cloud/metadata/schemas/SchemaFieldInfo.avsc +3 -1
- acryl_datahub_cloud/metadata/schemas/SchemaFieldKey.avsc +3 -0
- acryl_datahub_cloud/metadata/schemas/SchemaMetadata.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/SemanticContent.avsc +123 -0
- acryl_datahub_cloud/metadata/schemas/StructuredProperties.avsc +69 -0
- acryl_datahub_cloud/metadata/schemas/StructuredPropertyDefinition.avsc +15 -4
- acryl_datahub_cloud/metadata/schemas/StructuredPropertySettings.avsc +9 -0
- acryl_datahub_cloud/metadata/schemas/SubscriptionInfo.avsc +136 -5
- acryl_datahub_cloud/metadata/schemas/SubscriptionKey.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/SystemMetadata.avsc +61 -0
- acryl_datahub_cloud/metadata/schemas/TagProperties.avsc +3 -1
- acryl_datahub_cloud/metadata/schemas/TestInfo.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/UpstreamLineage.avsc +9 -0
- acryl_datahub_cloud/metadata/schemas/UsageFeatures.avsc +10 -0
- acryl_datahub_cloud/notifications/__init__.py +0 -0
- acryl_datahub_cloud/notifications/notification_recipient_builder.py +399 -0
- acryl_datahub_cloud/sdk/__init__.py +69 -0
- acryl_datahub_cloud/sdk/assertion/__init__.py +58 -0
- acryl_datahub_cloud/sdk/assertion/assertion_base.py +779 -0
- acryl_datahub_cloud/sdk/assertion/column_metric_assertion.py +191 -0
- acryl_datahub_cloud/sdk/assertion/column_value_assertion.py +431 -0
- acryl_datahub_cloud/sdk/assertion/freshness_assertion.py +201 -0
- acryl_datahub_cloud/sdk/assertion/schema_assertion.py +268 -0
- acryl_datahub_cloud/sdk/assertion/smart_column_metric_assertion.py +212 -0
- acryl_datahub_cloud/sdk/assertion/smart_freshness_assertion.py +165 -0
- acryl_datahub_cloud/sdk/assertion/smart_sql_assertion.py +156 -0
- acryl_datahub_cloud/sdk/assertion/smart_volume_assertion.py +162 -0
- acryl_datahub_cloud/sdk/assertion/sql_assertion.py +273 -0
- acryl_datahub_cloud/sdk/assertion/types.py +20 -0
- acryl_datahub_cloud/sdk/assertion/volume_assertion.py +156 -0
- acryl_datahub_cloud/sdk/assertion_client/__init__.py +0 -0
- acryl_datahub_cloud/sdk/assertion_client/column_metric.py +545 -0
- acryl_datahub_cloud/sdk/assertion_client/column_value.py +617 -0
- acryl_datahub_cloud/sdk/assertion_client/freshness.py +371 -0
- acryl_datahub_cloud/sdk/assertion_client/helpers.py +166 -0
- acryl_datahub_cloud/sdk/assertion_client/schema.py +358 -0
- acryl_datahub_cloud/sdk/assertion_client/smart_column_metric.py +540 -0
- acryl_datahub_cloud/sdk/assertion_client/smart_freshness.py +373 -0
- acryl_datahub_cloud/sdk/assertion_client/smart_sql.py +411 -0
- acryl_datahub_cloud/sdk/assertion_client/smart_volume.py +380 -0
- acryl_datahub_cloud/sdk/assertion_client/sql.py +410 -0
- acryl_datahub_cloud/sdk/assertion_client/volume.py +446 -0
- acryl_datahub_cloud/sdk/assertion_input/__init__.py +0 -0
- acryl_datahub_cloud/sdk/assertion_input/assertion_input.py +1470 -0
- acryl_datahub_cloud/sdk/assertion_input/column_assertion_constants.py +114 -0
- acryl_datahub_cloud/sdk/assertion_input/column_assertion_utils.py +284 -0
- acryl_datahub_cloud/sdk/assertion_input/column_metric_assertion_input.py +759 -0
- acryl_datahub_cloud/sdk/assertion_input/column_metric_constants.py +109 -0
- acryl_datahub_cloud/sdk/assertion_input/column_value_assertion_input.py +810 -0
- acryl_datahub_cloud/sdk/assertion_input/freshness_assertion_input.py +305 -0
- acryl_datahub_cloud/sdk/assertion_input/schema_assertion_input.py +413 -0
- acryl_datahub_cloud/sdk/assertion_input/smart_column_metric_assertion_input.py +793 -0
- acryl_datahub_cloud/sdk/assertion_input/smart_freshness_assertion_input.py +218 -0
- acryl_datahub_cloud/sdk/assertion_input/smart_sql_assertion_input.py +181 -0
- acryl_datahub_cloud/sdk/assertion_input/smart_volume_assertion_input.py +189 -0
- acryl_datahub_cloud/sdk/assertion_input/sql_assertion_input.py +320 -0
- acryl_datahub_cloud/sdk/assertion_input/volume_assertion_input.py +635 -0
- acryl_datahub_cloud/sdk/assertions_client.py +1074 -0
- acryl_datahub_cloud/sdk/entities/__init__.py +0 -0
- acryl_datahub_cloud/sdk/entities/assertion.py +439 -0
- acryl_datahub_cloud/sdk/entities/monitor.py +291 -0
- acryl_datahub_cloud/sdk/entities/subscription.py +100 -0
- acryl_datahub_cloud/sdk/errors.py +34 -0
- acryl_datahub_cloud/sdk/resolver_client.py +42 -0
- acryl_datahub_cloud/sdk/subscription_client.py +737 -0
- {acryl_datahub_cloud-0.3.11rc0.dist-info → acryl_datahub_cloud-0.3.16.1rc0.dist-info}/METADATA +55 -49
- {acryl_datahub_cloud-0.3.11rc0.dist-info → acryl_datahub_cloud-0.3.16.1rc0.dist-info}/RECORD +235 -142
- {acryl_datahub_cloud-0.3.11rc0.dist-info → acryl_datahub_cloud-0.3.16.1rc0.dist-info}/WHEEL +1 -1
- {acryl_datahub_cloud-0.3.11rc0.dist-info → acryl_datahub_cloud-0.3.16.1rc0.dist-info}/entry_points.txt +1 -0
- acryl_datahub_cloud/_sdk_extras/__init__.py +0 -4
- acryl_datahub_cloud/_sdk_extras/assertion.py +0 -15
- acryl_datahub_cloud/_sdk_extras/assertions_client.py +0 -23
- {acryl_datahub_cloud-0.3.11rc0.dist-info → acryl_datahub_cloud-0.3.16.1rc0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,413 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Schema assertion input module.
|
|
3
|
+
|
|
4
|
+
This module provides the input types and classes for creating schema assertions
|
|
5
|
+
that validate dataset schemas match expected field definitions.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from collections.abc import Sequence
|
|
9
|
+
from dataclasses import dataclass
|
|
10
|
+
from datetime import datetime
|
|
11
|
+
from enum import Enum
|
|
12
|
+
from typing import Optional, Union
|
|
13
|
+
|
|
14
|
+
from acryl_datahub_cloud.sdk.assertion_input.assertion_input import (
|
|
15
|
+
DEFAULT_EVERY_SIX_HOURS_SCHEDULE,
|
|
16
|
+
AssertionIncidentBehaviorInputTypes,
|
|
17
|
+
FieldSpecType,
|
|
18
|
+
_AssertionInput,
|
|
19
|
+
_SchemaMetadata,
|
|
20
|
+
)
|
|
21
|
+
from acryl_datahub_cloud.sdk.entities.assertion import (
|
|
22
|
+
AssertionInfoInputType,
|
|
23
|
+
TagsInputType,
|
|
24
|
+
)
|
|
25
|
+
from acryl_datahub_cloud.sdk.errors import SDKUsageError, SDKUsageErrorWithExamples
|
|
26
|
+
from datahub.metadata import schema_classes as models
|
|
27
|
+
from datahub.metadata.urns import AssertionUrn, CorpUserUrn, DatasetUrn
|
|
28
|
+
from datahub.sdk.entity_client import EntityClient
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class SchemaFieldDataType(str, Enum):
|
|
32
|
+
"""The type of a schema field for schema assertions."""
|
|
33
|
+
|
|
34
|
+
BYTES = "BYTES"
|
|
35
|
+
FIXED = "FIXED"
|
|
36
|
+
BOOLEAN = "BOOLEAN"
|
|
37
|
+
STRING = "STRING"
|
|
38
|
+
NUMBER = "NUMBER"
|
|
39
|
+
DATE = "DATE"
|
|
40
|
+
TIME = "TIME"
|
|
41
|
+
ENUM = "ENUM"
|
|
42
|
+
NULL = "NULL"
|
|
43
|
+
ARRAY = "ARRAY"
|
|
44
|
+
MAP = "MAP"
|
|
45
|
+
STRUCT = "STRUCT" # Maps to RecordType in the backend
|
|
46
|
+
UNION = "UNION"
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class SchemaAssertionCompatibility(str, Enum):
|
|
50
|
+
"""The compatibility level required for a schema assertion to pass."""
|
|
51
|
+
|
|
52
|
+
EXACT_MATCH = "EXACT_MATCH"
|
|
53
|
+
SUPERSET = "SUPERSET"
|
|
54
|
+
SUBSET = "SUBSET"
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
DEFAULT_SCHEMA_ASSERTION_COMPATIBILITY = SchemaAssertionCompatibility.EXACT_MATCH
|
|
58
|
+
|
|
59
|
+
# Default detection mechanism for schema assertions
|
|
60
|
+
# Schema assertions validate against DataHub's schema metadata (DATAHUB_SCHEMA source)
|
|
61
|
+
DEFAULT_SCHEMA_DETECTION_MECHANISM = _SchemaMetadata()
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
SCHEMA_FIELD_DATA_TYPE_EXAMPLES = {
|
|
65
|
+
"String from enum": "SchemaFieldDataType.STRING",
|
|
66
|
+
"String from string": "STRING",
|
|
67
|
+
"Number from enum": "SchemaFieldDataType.NUMBER",
|
|
68
|
+
"Number from string": "NUMBER",
|
|
69
|
+
"Boolean from enum": "SchemaFieldDataType.BOOLEAN",
|
|
70
|
+
"Struct from enum": "SchemaFieldDataType.STRUCT",
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def _parse_schema_field_data_type(
|
|
75
|
+
field_type: Union[str, SchemaFieldDataType],
|
|
76
|
+
) -> SchemaFieldDataType:
|
|
77
|
+
"""Parse a schema field data type from string or enum."""
|
|
78
|
+
if isinstance(field_type, SchemaFieldDataType):
|
|
79
|
+
return field_type
|
|
80
|
+
|
|
81
|
+
if isinstance(field_type, str):
|
|
82
|
+
try:
|
|
83
|
+
return SchemaFieldDataType(field_type.upper())
|
|
84
|
+
except ValueError as e:
|
|
85
|
+
raise SDKUsageErrorWithExamples(
|
|
86
|
+
msg=f"Invalid schema field data type: {field_type}. "
|
|
87
|
+
f"Valid options are: {[t.value for t in SchemaFieldDataType]}",
|
|
88
|
+
examples=SCHEMA_FIELD_DATA_TYPE_EXAMPLES,
|
|
89
|
+
) from e
|
|
90
|
+
|
|
91
|
+
raise SDKUsageErrorWithExamples(
|
|
92
|
+
msg=f"Invalid schema field data type: {field_type}",
|
|
93
|
+
examples=SCHEMA_FIELD_DATA_TYPE_EXAMPLES,
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
SCHEMA_ASSERTION_COMPATIBILITY_EXAMPLES = {
|
|
98
|
+
"Exact match from enum": "SchemaAssertionCompatibility.EXACT_MATCH",
|
|
99
|
+
"Exact match from string": "EXACT_MATCH",
|
|
100
|
+
"Superset from enum": "SchemaAssertionCompatibility.SUPERSET",
|
|
101
|
+
"Subset from enum": "SchemaAssertionCompatibility.SUBSET",
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def _parse_schema_assertion_compatibility(
|
|
106
|
+
compatibility: Optional[Union[str, SchemaAssertionCompatibility]],
|
|
107
|
+
) -> SchemaAssertionCompatibility:
|
|
108
|
+
"""Parse a schema assertion compatibility from string or enum."""
|
|
109
|
+
if compatibility is None:
|
|
110
|
+
return DEFAULT_SCHEMA_ASSERTION_COMPATIBILITY
|
|
111
|
+
|
|
112
|
+
if isinstance(compatibility, SchemaAssertionCompatibility):
|
|
113
|
+
return compatibility
|
|
114
|
+
|
|
115
|
+
if isinstance(compatibility, str):
|
|
116
|
+
try:
|
|
117
|
+
return SchemaAssertionCompatibility(compatibility.upper())
|
|
118
|
+
except ValueError as e:
|
|
119
|
+
raise SDKUsageErrorWithExamples(
|
|
120
|
+
msg=f"Invalid schema assertion compatibility: {compatibility}. "
|
|
121
|
+
f"Valid options are: {[c.value for c in SchemaAssertionCompatibility]}",
|
|
122
|
+
examples=SCHEMA_ASSERTION_COMPATIBILITY_EXAMPLES,
|
|
123
|
+
) from e
|
|
124
|
+
|
|
125
|
+
raise SDKUsageErrorWithExamples(
|
|
126
|
+
msg=f"Invalid schema assertion compatibility: {compatibility}",
|
|
127
|
+
examples=SCHEMA_ASSERTION_COMPATIBILITY_EXAMPLES,
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
@dataclass(frozen=True)
|
|
132
|
+
class SchemaAssertionField:
|
|
133
|
+
"""
|
|
134
|
+
A field definition for schema assertions.
|
|
135
|
+
|
|
136
|
+
Args:
|
|
137
|
+
path: The field path within the schema (e.g., "id", "struct.nestedField").
|
|
138
|
+
type: The expected data type of the field.
|
|
139
|
+
native_type: Optional platform-specific native type (e.g., "VARCHAR(255)").
|
|
140
|
+
"""
|
|
141
|
+
|
|
142
|
+
path: str
|
|
143
|
+
type: SchemaFieldDataType
|
|
144
|
+
native_type: Optional[str] = None
|
|
145
|
+
|
|
146
|
+
@classmethod
|
|
147
|
+
def from_dict(cls, data: dict) -> "SchemaAssertionField":
|
|
148
|
+
"""Create a SchemaAssertionField from a dictionary."""
|
|
149
|
+
if "path" not in data:
|
|
150
|
+
raise SDKUsageError("SchemaAssertionField requires 'path' field")
|
|
151
|
+
if "type" not in data:
|
|
152
|
+
raise SDKUsageError("SchemaAssertionField requires 'type' field")
|
|
153
|
+
|
|
154
|
+
return cls(
|
|
155
|
+
path=data["path"],
|
|
156
|
+
type=_parse_schema_field_data_type(data["type"]),
|
|
157
|
+
native_type=data.get("native_type") or data.get("nativeType"),
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
SchemaAssertionFieldInputType = Union[SchemaAssertionField, dict]
|
|
162
|
+
SchemaAssertionFieldsInputType = Sequence[SchemaAssertionFieldInputType]
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
SCHEMA_ASSERTION_FIELD_EXAMPLES: dict[str, Union[dict[str, str], str]] = {
|
|
166
|
+
"Field from dict": {"path": "id", "type": "STRING"},
|
|
167
|
+
"Field from dict with native type": {
|
|
168
|
+
"path": "count",
|
|
169
|
+
"type": "NUMBER",
|
|
170
|
+
"native_type": "BIGINT",
|
|
171
|
+
},
|
|
172
|
+
"Field from SchemaAssertionField": "SchemaAssertionField(path='id', type=SchemaFieldDataType.STRING)",
|
|
173
|
+
"List of fields": '[{"path": "id", "type": "STRING"}, {"path": "count", "type": "NUMBER"}]',
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
def _parse_schema_assertion_fields(
|
|
178
|
+
fields: Optional[SchemaAssertionFieldsInputType],
|
|
179
|
+
) -> Optional[list[SchemaAssertionField]]:
|
|
180
|
+
"""Parse a list of schema assertion fields from various input types."""
|
|
181
|
+
if fields is None:
|
|
182
|
+
return None
|
|
183
|
+
|
|
184
|
+
if not isinstance(fields, list):
|
|
185
|
+
raise SDKUsageErrorWithExamples(
|
|
186
|
+
msg=f"Schema assertion fields must be a list, got {type(fields).__name__}",
|
|
187
|
+
examples=SCHEMA_ASSERTION_FIELD_EXAMPLES,
|
|
188
|
+
)
|
|
189
|
+
|
|
190
|
+
if len(fields) == 0:
|
|
191
|
+
raise SDKUsageError("Schema assertion fields cannot be empty")
|
|
192
|
+
|
|
193
|
+
parsed_fields = []
|
|
194
|
+
for field in fields:
|
|
195
|
+
if isinstance(field, SchemaAssertionField):
|
|
196
|
+
parsed_fields.append(field)
|
|
197
|
+
elif isinstance(field, dict):
|
|
198
|
+
parsed_fields.append(SchemaAssertionField.from_dict(field))
|
|
199
|
+
else:
|
|
200
|
+
raise SDKUsageErrorWithExamples(
|
|
201
|
+
msg=f"Invalid schema assertion field: {field}",
|
|
202
|
+
examples=SCHEMA_ASSERTION_FIELD_EXAMPLES,
|
|
203
|
+
)
|
|
204
|
+
|
|
205
|
+
return parsed_fields
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
# Mapping from SchemaFieldDataType to schema_classes type
|
|
209
|
+
SCHEMA_FIELD_TYPE_TO_CLASS_MAP: dict[SchemaFieldDataType, type] = {
|
|
210
|
+
SchemaFieldDataType.BYTES: models.BytesTypeClass,
|
|
211
|
+
SchemaFieldDataType.FIXED: models.FixedTypeClass,
|
|
212
|
+
SchemaFieldDataType.BOOLEAN: models.BooleanTypeClass,
|
|
213
|
+
SchemaFieldDataType.STRING: models.StringTypeClass,
|
|
214
|
+
SchemaFieldDataType.NUMBER: models.NumberTypeClass,
|
|
215
|
+
SchemaFieldDataType.DATE: models.DateTypeClass,
|
|
216
|
+
SchemaFieldDataType.TIME: models.TimeTypeClass,
|
|
217
|
+
SchemaFieldDataType.ENUM: models.EnumTypeClass,
|
|
218
|
+
SchemaFieldDataType.NULL: models.NullTypeClass,
|
|
219
|
+
SchemaFieldDataType.ARRAY: models.ArrayTypeClass,
|
|
220
|
+
SchemaFieldDataType.MAP: models.MapTypeClass,
|
|
221
|
+
SchemaFieldDataType.STRUCT: models.RecordTypeClass, # STRUCT maps to RecordType
|
|
222
|
+
SchemaFieldDataType.UNION: models.UnionTypeClass,
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
# Reverse mapping for parsing from backend
|
|
227
|
+
SCHEMA_FIELD_CLASS_TO_TYPE_MAP: dict[type, SchemaFieldDataType] = {
|
|
228
|
+
models.BytesTypeClass: SchemaFieldDataType.BYTES,
|
|
229
|
+
models.FixedTypeClass: SchemaFieldDataType.FIXED,
|
|
230
|
+
models.BooleanTypeClass: SchemaFieldDataType.BOOLEAN,
|
|
231
|
+
models.StringTypeClass: SchemaFieldDataType.STRING,
|
|
232
|
+
models.NumberTypeClass: SchemaFieldDataType.NUMBER,
|
|
233
|
+
models.DateTypeClass: SchemaFieldDataType.DATE,
|
|
234
|
+
models.TimeTypeClass: SchemaFieldDataType.TIME,
|
|
235
|
+
models.EnumTypeClass: SchemaFieldDataType.ENUM,
|
|
236
|
+
models.NullTypeClass: SchemaFieldDataType.NULL,
|
|
237
|
+
models.ArrayTypeClass: SchemaFieldDataType.ARRAY,
|
|
238
|
+
models.MapTypeClass: SchemaFieldDataType.MAP,
|
|
239
|
+
models.RecordTypeClass: SchemaFieldDataType.STRUCT,
|
|
240
|
+
models.UnionTypeClass: SchemaFieldDataType.UNION,
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
class _SchemaAssertionInput(_AssertionInput):
|
|
245
|
+
"""Input class for creating schema assertions."""
|
|
246
|
+
|
|
247
|
+
def _assertion_type(self) -> str:
|
|
248
|
+
"""Get the assertion type."""
|
|
249
|
+
return models.AssertionTypeClass.DATA_SCHEMA
|
|
250
|
+
|
|
251
|
+
def __init__(
|
|
252
|
+
self,
|
|
253
|
+
*,
|
|
254
|
+
dataset_urn: Union[str, DatasetUrn],
|
|
255
|
+
entity_client: EntityClient,
|
|
256
|
+
compatibility: Optional[Union[str, SchemaAssertionCompatibility]] = None,
|
|
257
|
+
fields: Optional[SchemaAssertionFieldsInputType] = None,
|
|
258
|
+
urn: Optional[Union[str, AssertionUrn]] = None,
|
|
259
|
+
display_name: Optional[str] = None,
|
|
260
|
+
enabled: bool = True,
|
|
261
|
+
schedule: Optional[Union[str, models.CronScheduleClass]] = None,
|
|
262
|
+
incident_behavior: Optional[AssertionIncidentBehaviorInputTypes] = None,
|
|
263
|
+
tags: Optional[TagsInputType] = None,
|
|
264
|
+
created_by: Union[str, CorpUserUrn],
|
|
265
|
+
created_at: datetime,
|
|
266
|
+
updated_by: Union[str, CorpUserUrn],
|
|
267
|
+
updated_at: datetime,
|
|
268
|
+
):
|
|
269
|
+
"""
|
|
270
|
+
Create a SchemaAssertionInput object.
|
|
271
|
+
|
|
272
|
+
Args:
|
|
273
|
+
dataset_urn: The URN of the dataset to validate.
|
|
274
|
+
entity_client: The entity client for API operations.
|
|
275
|
+
compatibility: The compatibility mode for schema validation
|
|
276
|
+
(EXACT_MATCH, SUPERSET, SUBSET). Defaults to EXACT_MATCH.
|
|
277
|
+
fields: The expected schema fields to validate.
|
|
278
|
+
urn: Optional assertion URN for updates.
|
|
279
|
+
display_name: Display name for the assertion.
|
|
280
|
+
enabled: Whether the assertion is enabled.
|
|
281
|
+
schedule: Cron schedule for evaluation.
|
|
282
|
+
incident_behavior: Incident behavior on pass/fail.
|
|
283
|
+
tags: Tags to apply to the assertion.
|
|
284
|
+
created_by: User who created the assertion.
|
|
285
|
+
created_at: Creation timestamp.
|
|
286
|
+
updated_by: User who last updated the assertion.
|
|
287
|
+
updated_at: Last update timestamp.
|
|
288
|
+
"""
|
|
289
|
+
# Schema assertions validate against DataHub's schema metadata
|
|
290
|
+
# They always use DATAHUB_SCHEMA as the source type
|
|
291
|
+
super().__init__(
|
|
292
|
+
dataset_urn=dataset_urn,
|
|
293
|
+
entity_client=entity_client,
|
|
294
|
+
urn=urn,
|
|
295
|
+
display_name=display_name,
|
|
296
|
+
enabled=enabled,
|
|
297
|
+
schedule=schedule,
|
|
298
|
+
detection_mechanism=None,
|
|
299
|
+
default_detection_mechanism=DEFAULT_SCHEMA_DETECTION_MECHANISM,
|
|
300
|
+
incident_behavior=incident_behavior,
|
|
301
|
+
tags=tags,
|
|
302
|
+
source_type=models.AssertionSourceTypeClass.NATIVE,
|
|
303
|
+
created_by=created_by,
|
|
304
|
+
created_at=created_at,
|
|
305
|
+
updated_by=updated_by,
|
|
306
|
+
updated_at=updated_at,
|
|
307
|
+
)
|
|
308
|
+
|
|
309
|
+
self.compatibility = _parse_schema_assertion_compatibility(compatibility)
|
|
310
|
+
self.fields = _parse_schema_assertion_fields(fields)
|
|
311
|
+
|
|
312
|
+
def _convert_schedule(self) -> models.CronScheduleClass:
|
|
313
|
+
"""Convert schedule to CronScheduleClass, using default if not provided."""
|
|
314
|
+
if self.schedule is None:
|
|
315
|
+
return DEFAULT_EVERY_SIX_HOURS_SCHEDULE
|
|
316
|
+
return models.CronScheduleClass(
|
|
317
|
+
cron=self.schedule.cron,
|
|
318
|
+
timezone=self.schedule.timezone,
|
|
319
|
+
)
|
|
320
|
+
|
|
321
|
+
def _create_assertion_info(
|
|
322
|
+
self, filter: Optional[models.DatasetFilterClass]
|
|
323
|
+
) -> AssertionInfoInputType:
|
|
324
|
+
"""Create a SchemaAssertionInfoClass for the schema assertion."""
|
|
325
|
+
if self.fields is None:
|
|
326
|
+
raise SDKUsageError(
|
|
327
|
+
"Schema assertion requires 'fields' to be specified. "
|
|
328
|
+
"Please provide a list of expected schema fields."
|
|
329
|
+
)
|
|
330
|
+
|
|
331
|
+
return models.SchemaAssertionInfoClass(
|
|
332
|
+
entity=str(self.dataset_urn),
|
|
333
|
+
compatibility=getattr(
|
|
334
|
+
models.SchemaAssertionCompatibilityClass,
|
|
335
|
+
self.compatibility.value,
|
|
336
|
+
models.SchemaAssertionCompatibilityClass.EXACT_MATCH,
|
|
337
|
+
),
|
|
338
|
+
schema=self._create_schema_metadata(),
|
|
339
|
+
)
|
|
340
|
+
|
|
341
|
+
def _create_schema_metadata(self) -> models.SchemaMetadataClass:
|
|
342
|
+
"""Convert fields to SchemaMetadata for the assertion."""
|
|
343
|
+
if self.fields is None:
|
|
344
|
+
raise SDKUsageError("Schema assertion requires fields to be specified.")
|
|
345
|
+
|
|
346
|
+
schema_fields = [self._create_schema_field(field) for field in self.fields]
|
|
347
|
+
|
|
348
|
+
# Create SchemaMetadata with required fields
|
|
349
|
+
# These are placeholder values since the actual schema metadata
|
|
350
|
+
# is only used for the assertion definition, not for real schema tracking
|
|
351
|
+
return models.SchemaMetadataClass(
|
|
352
|
+
schemaName="assertion-schema-name",
|
|
353
|
+
platform=str(self.dataset_urn.platform),
|
|
354
|
+
version=0,
|
|
355
|
+
hash="assertion-schema-hash",
|
|
356
|
+
platformSchema=models.OtherSchemaClass(rawSchema=""),
|
|
357
|
+
fields=schema_fields,
|
|
358
|
+
)
|
|
359
|
+
|
|
360
|
+
def _create_schema_field(
|
|
361
|
+
self, field: SchemaAssertionField
|
|
362
|
+
) -> models.SchemaFieldClass:
|
|
363
|
+
"""Convert a SchemaAssertionField to a SchemaFieldClass."""
|
|
364
|
+
type_class = SCHEMA_FIELD_TYPE_TO_CLASS_MAP.get(field.type)
|
|
365
|
+
if type_class is None:
|
|
366
|
+
raise SDKUsageError(f"Unknown schema field type: {field.type}")
|
|
367
|
+
|
|
368
|
+
return models.SchemaFieldClass(
|
|
369
|
+
fieldPath=field.path,
|
|
370
|
+
type=models.SchemaFieldDataTypeClass(type=type_class()),
|
|
371
|
+
nativeDataType=field.native_type or "",
|
|
372
|
+
nullable=False,
|
|
373
|
+
)
|
|
374
|
+
|
|
375
|
+
def _create_monitor_info(
|
|
376
|
+
self,
|
|
377
|
+
assertion_urn: AssertionUrn,
|
|
378
|
+
status: models.MonitorStatusClass,
|
|
379
|
+
schedule: models.CronScheduleClass,
|
|
380
|
+
) -> models.MonitorInfoClass:
|
|
381
|
+
"""Create a MonitorInfoClass for the schema assertion."""
|
|
382
|
+
return models.MonitorInfoClass(
|
|
383
|
+
type=models.MonitorTypeClass.ASSERTION,
|
|
384
|
+
status=status,
|
|
385
|
+
assertionMonitor=models.AssertionMonitorClass(
|
|
386
|
+
assertions=[
|
|
387
|
+
models.AssertionEvaluationSpecClass(
|
|
388
|
+
assertion=str(assertion_urn),
|
|
389
|
+
schedule=schedule,
|
|
390
|
+
parameters=self._get_assertion_evaluation_parameters(
|
|
391
|
+
models.DatasetSchemaSourceTypeClass.DATAHUB_SCHEMA, None
|
|
392
|
+
),
|
|
393
|
+
)
|
|
394
|
+
]
|
|
395
|
+
),
|
|
396
|
+
)
|
|
397
|
+
|
|
398
|
+
def _get_assertion_evaluation_parameters(
|
|
399
|
+
self, source_type: str, field: Optional[FieldSpecType]
|
|
400
|
+
) -> models.AssertionEvaluationParametersClass:
|
|
401
|
+
"""Get evaluation parameters for schema assertion."""
|
|
402
|
+
return models.AssertionEvaluationParametersClass(
|
|
403
|
+
type=models.AssertionEvaluationParametersTypeClass.DATASET_SCHEMA,
|
|
404
|
+
datasetSchemaParameters=models.DatasetSchemaAssertionParametersClass(
|
|
405
|
+
sourceType=source_type,
|
|
406
|
+
),
|
|
407
|
+
)
|
|
408
|
+
|
|
409
|
+
def _convert_assertion_source_type_and_field(
|
|
410
|
+
self,
|
|
411
|
+
) -> tuple[str, Optional[FieldSpecType]]:
|
|
412
|
+
"""Schema assertions always use DATAHUB_SCHEMA source type."""
|
|
413
|
+
return models.DatasetSchemaSourceTypeClass.DATAHUB_SCHEMA, None
|