acryl-datahub-cloud 0.3.11rc0__py3-none-any.whl → 0.3.16.1rc0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub-cloud might be problematic. Click here for more details.

Files changed (238) hide show
  1. acryl_datahub_cloud/_codegen_config.json +1 -1
  2. acryl_datahub_cloud/acryl_cs_issues/models.py +5 -3
  3. acryl_datahub_cloud/action_request/action_request_owner_source.py +36 -6
  4. acryl_datahub_cloud/datahub_forms_notifications/__init__.py +0 -0
  5. acryl_datahub_cloud/datahub_forms_notifications/forms_notifications_source.py +569 -0
  6. acryl_datahub_cloud/datahub_forms_notifications/get_feature_flag.gql +7 -0
  7. acryl_datahub_cloud/datahub_forms_notifications/get_search_results_total.gql +14 -0
  8. acryl_datahub_cloud/datahub_forms_notifications/query.py +17 -0
  9. acryl_datahub_cloud/datahub_forms_notifications/scroll_forms_for_notification.gql +29 -0
  10. acryl_datahub_cloud/datahub_forms_notifications/send_form_notification_request.gql +5 -0
  11. acryl_datahub_cloud/datahub_reporting/datahub_dataset.py +37 -13
  12. acryl_datahub_cloud/datahub_reporting/datahub_form_reporting.py +55 -24
  13. acryl_datahub_cloud/datahub_reporting/extract_graph.py +4 -3
  14. acryl_datahub_cloud/datahub_reporting/extract_sql.py +242 -51
  15. acryl_datahub_cloud/datahub_reporting/forms.py +1 -1
  16. acryl_datahub_cloud/datahub_reporting/forms_config.py +3 -2
  17. acryl_datahub_cloud/datahub_restore/source.py +3 -2
  18. acryl_datahub_cloud/datahub_usage_reporting/excluded.py +94 -0
  19. acryl_datahub_cloud/datahub_usage_reporting/query_builder.py +48 -8
  20. acryl_datahub_cloud/datahub_usage_reporting/usage_feature_reporter.py +518 -77
  21. acryl_datahub_cloud/elasticsearch/graph_service.py +76 -14
  22. acryl_datahub_cloud/graphql_utils.py +64 -0
  23. acryl_datahub_cloud/lineage_features/source.py +555 -49
  24. acryl_datahub_cloud/metadata/_urns/urn_defs.py +2296 -1900
  25. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/actionworkflow/__init__.py +53 -0
  26. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/anomaly/__init__.py +2 -0
  27. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
  28. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/assertion/__init__.py +4 -2
  29. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/common/__init__.py +6 -0
  30. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/conversation/__init__.py +29 -0
  31. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/event/notification/settings/__init__.py +2 -0
  32. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/execution/__init__.py +2 -0
  33. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/file/__init__.py +19 -0
  34. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/form/__init__.py +8 -0
  35. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/identity/__init__.py +8 -0
  36. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/knowledge/__init__.py +33 -0
  37. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
  38. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +12 -0
  39. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/metadata/search/features/__init__.py +2 -0
  40. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/module/__init__.py +31 -0
  41. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/notification/__init__.py +19 -0
  42. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py +4 -0
  43. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/role/__init__.py +2 -0
  44. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/settings/asset/__init__.py +19 -0
  45. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +28 -0
  46. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/template/__init__.py +31 -0
  47. acryl_datahub_cloud/metadata/schema.avsc +25091 -20557
  48. acryl_datahub_cloud/metadata/schema_classes.py +29269 -23863
  49. acryl_datahub_cloud/metadata/schemas/ActionRequestInfo.avsc +235 -2
  50. acryl_datahub_cloud/metadata/schemas/ActionWorkflowInfo.avsc +683 -0
  51. acryl_datahub_cloud/metadata/schemas/ActionWorkflowKey.avsc +21 -0
  52. acryl_datahub_cloud/metadata/schemas/Actors.avsc +38 -1
  53. acryl_datahub_cloud/metadata/schemas/ApplicationKey.avsc +31 -0
  54. acryl_datahub_cloud/metadata/schemas/ApplicationProperties.avsc +75 -0
  55. acryl_datahub_cloud/metadata/schemas/Applications.avsc +38 -0
  56. acryl_datahub_cloud/metadata/schemas/AssertionAnalyticsRunEvent.avsc +353 -215
  57. acryl_datahub_cloud/metadata/schemas/AssertionInfo.avsc +147 -20
  58. acryl_datahub_cloud/metadata/schemas/AssertionKey.avsc +1 -1
  59. acryl_datahub_cloud/metadata/schemas/AssertionRunEvent.avsc +166 -21
  60. acryl_datahub_cloud/metadata/schemas/{AssertionSummary.avsc → AssertionRunSummary.avsc} +15 -2
  61. acryl_datahub_cloud/metadata/schemas/AssertionsSummary.avsc +54 -0
  62. acryl_datahub_cloud/metadata/schemas/AssetSettings.avsc +63 -0
  63. acryl_datahub_cloud/metadata/schemas/BusinessAttributeInfo.avsc +7 -3
  64. acryl_datahub_cloud/metadata/schemas/ChartInfo.avsc +20 -6
  65. acryl_datahub_cloud/metadata/schemas/ChartKey.avsc +1 -0
  66. acryl_datahub_cloud/metadata/schemas/ConstraintInfo.avsc +12 -1
  67. acryl_datahub_cloud/metadata/schemas/ContainerKey.avsc +1 -0
  68. acryl_datahub_cloud/metadata/schemas/ContainerProperties.avsc +16 -5
  69. acryl_datahub_cloud/metadata/schemas/CorpGroupEditableInfo.avsc +2 -1
  70. acryl_datahub_cloud/metadata/schemas/CorpGroupInfo.avsc +7 -3
  71. acryl_datahub_cloud/metadata/schemas/CorpGroupKey.avsc +2 -1
  72. acryl_datahub_cloud/metadata/schemas/CorpGroupSettings.avsc +127 -2
  73. acryl_datahub_cloud/metadata/schemas/CorpUserEditableInfo.avsc +1 -1
  74. acryl_datahub_cloud/metadata/schemas/CorpUserInfo.avsc +18 -2
  75. acryl_datahub_cloud/metadata/schemas/CorpUserInvitationStatus.avsc +106 -0
  76. acryl_datahub_cloud/metadata/schemas/CorpUserKey.avsc +4 -1
  77. acryl_datahub_cloud/metadata/schemas/CorpUserSettings.avsc +304 -2
  78. acryl_datahub_cloud/metadata/schemas/CorpUserUsageFeatures.avsc +86 -0
  79. acryl_datahub_cloud/metadata/schemas/DashboardInfo.avsc +11 -5
  80. acryl_datahub_cloud/metadata/schemas/DashboardKey.avsc +1 -0
  81. acryl_datahub_cloud/metadata/schemas/DataFlowInfo.avsc +15 -5
  82. acryl_datahub_cloud/metadata/schemas/DataFlowKey.avsc +1 -0
  83. acryl_datahub_cloud/metadata/schemas/DataHubAiConversationInfo.avsc +256 -0
  84. acryl_datahub_cloud/metadata/schemas/DataHubAiConversationKey.avsc +22 -0
  85. acryl_datahub_cloud/metadata/schemas/DataHubFileInfo.avsc +234 -0
  86. acryl_datahub_cloud/metadata/schemas/DataHubFileKey.avsc +22 -0
  87. acryl_datahub_cloud/metadata/schemas/DataHubIngestionSourceKey.avsc +2 -1
  88. acryl_datahub_cloud/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
  89. acryl_datahub_cloud/metadata/schemas/DataHubPageModuleProperties.avsc +308 -0
  90. acryl_datahub_cloud/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
  91. acryl_datahub_cloud/metadata/schemas/DataHubPageTemplateProperties.avsc +251 -0
  92. acryl_datahub_cloud/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
  93. acryl_datahub_cloud/metadata/schemas/DataJobInfo.avsc +13 -4
  94. acryl_datahub_cloud/metadata/schemas/DataJobInputOutput.avsc +8 -0
  95. acryl_datahub_cloud/metadata/schemas/DataJobKey.avsc +1 -0
  96. acryl_datahub_cloud/metadata/schemas/DataPlatformInfo.avsc +3 -1
  97. acryl_datahub_cloud/metadata/schemas/DataPlatformInstanceProperties.avsc +5 -2
  98. acryl_datahub_cloud/metadata/schemas/DataProcessKey.avsc +4 -0
  99. acryl_datahub_cloud/metadata/schemas/DataProductKey.avsc +2 -0
  100. acryl_datahub_cloud/metadata/schemas/DataProductProperties.avsc +6 -3
  101. acryl_datahub_cloud/metadata/schemas/DataTypeInfo.avsc +5 -0
  102. acryl_datahub_cloud/metadata/schemas/DatasetKey.avsc +10 -2
  103. acryl_datahub_cloud/metadata/schemas/DatasetProperties.avsc +12 -5
  104. acryl_datahub_cloud/metadata/schemas/DatasetUsageStatistics.avsc +8 -0
  105. acryl_datahub_cloud/metadata/schemas/DocumentInfo.avsc +407 -0
  106. acryl_datahub_cloud/metadata/schemas/DocumentKey.avsc +35 -0
  107. acryl_datahub_cloud/metadata/schemas/DocumentSettings.avsc +79 -0
  108. acryl_datahub_cloud/metadata/schemas/DomainKey.avsc +2 -0
  109. acryl_datahub_cloud/metadata/schemas/DomainProperties.avsc +7 -3
  110. acryl_datahub_cloud/metadata/schemas/EditableContainerProperties.avsc +2 -1
  111. acryl_datahub_cloud/metadata/schemas/EditableDashboardProperties.avsc +2 -1
  112. acryl_datahub_cloud/metadata/schemas/EditableDataFlowProperties.avsc +2 -1
  113. acryl_datahub_cloud/metadata/schemas/EditableDataJobProperties.avsc +2 -1
  114. acryl_datahub_cloud/metadata/schemas/EditableDatasetProperties.avsc +2 -1
  115. acryl_datahub_cloud/metadata/schemas/EditableERModelRelationshipProperties.avsc +2 -1
  116. acryl_datahub_cloud/metadata/schemas/EditableMLFeatureProperties.avsc +2 -1
  117. acryl_datahub_cloud/metadata/schemas/EditableMLFeatureTableProperties.avsc +2 -1
  118. acryl_datahub_cloud/metadata/schemas/EditableMLModelGroupProperties.avsc +2 -1
  119. acryl_datahub_cloud/metadata/schemas/EditableMLModelProperties.avsc +2 -1
  120. acryl_datahub_cloud/metadata/schemas/EditableNotebookProperties.avsc +2 -1
  121. acryl_datahub_cloud/metadata/schemas/EditableSchemaMetadata.avsc +4 -2
  122. acryl_datahub_cloud/metadata/schemas/EntityTypeInfo.avsc +5 -0
  123. acryl_datahub_cloud/metadata/schemas/ExecutionRequestArtifactsLocation.avsc +16 -0
  124. acryl_datahub_cloud/metadata/schemas/ExecutionRequestKey.avsc +2 -1
  125. acryl_datahub_cloud/metadata/schemas/FormAssignmentStatus.avsc +36 -0
  126. acryl_datahub_cloud/metadata/schemas/FormInfo.avsc +6 -0
  127. acryl_datahub_cloud/metadata/schemas/FormKey.avsc +3 -1
  128. acryl_datahub_cloud/metadata/schemas/FormNotifications.avsc +69 -0
  129. acryl_datahub_cloud/metadata/schemas/FormSettings.avsc +30 -0
  130. acryl_datahub_cloud/metadata/schemas/GlobalSettingsInfo.avsc +416 -0
  131. acryl_datahub_cloud/metadata/schemas/GlobalTags.avsc +2 -1
  132. acryl_datahub_cloud/metadata/schemas/GlossaryNodeInfo.avsc +3 -1
  133. acryl_datahub_cloud/metadata/schemas/GlossaryNodeKey.avsc +1 -0
  134. acryl_datahub_cloud/metadata/schemas/GlossaryTermInfo.avsc +3 -1
  135. acryl_datahub_cloud/metadata/schemas/GlossaryTermKey.avsc +2 -0
  136. acryl_datahub_cloud/metadata/schemas/IcebergWarehouseInfo.avsc +4 -0
  137. acryl_datahub_cloud/metadata/schemas/IncidentActivityEvent.avsc +3 -3
  138. acryl_datahub_cloud/metadata/schemas/IncidentInfo.avsc +3 -3
  139. acryl_datahub_cloud/metadata/schemas/InferredMetadata.avsc +71 -1
  140. acryl_datahub_cloud/metadata/schemas/InputFields.avsc +2 -1
  141. acryl_datahub_cloud/metadata/schemas/InviteToken.avsc +26 -0
  142. acryl_datahub_cloud/metadata/schemas/LineageFeatures.avsc +67 -42
  143. acryl_datahub_cloud/metadata/schemas/LogicalParent.avsc +145 -0
  144. acryl_datahub_cloud/metadata/schemas/MLFeatureKey.avsc +4 -1
  145. acryl_datahub_cloud/metadata/schemas/MLFeatureTableKey.avsc +4 -1
  146. acryl_datahub_cloud/metadata/schemas/MLModelDeploymentKey.avsc +7 -1
  147. acryl_datahub_cloud/metadata/schemas/MLModelGroupKey.avsc +9 -1
  148. acryl_datahub_cloud/metadata/schemas/MLModelKey.avsc +9 -1
  149. acryl_datahub_cloud/metadata/schemas/MLModelProperties.avsc +4 -2
  150. acryl_datahub_cloud/metadata/schemas/MLPrimaryKeyKey.avsc +4 -1
  151. acryl_datahub_cloud/metadata/schemas/MetadataChangeEvent.avsc +418 -97
  152. acryl_datahub_cloud/metadata/schemas/MetadataChangeLog.avsc +62 -44
  153. acryl_datahub_cloud/metadata/schemas/MetadataChangeProposal.avsc +61 -0
  154. acryl_datahub_cloud/metadata/schemas/MonitorAnomalyEvent.avsc +54 -9
  155. acryl_datahub_cloud/metadata/schemas/MonitorInfo.avsc +163 -23
  156. acryl_datahub_cloud/metadata/schemas/MonitorKey.avsc +9 -1
  157. acryl_datahub_cloud/metadata/schemas/MonitorSuiteInfo.avsc +128 -3
  158. acryl_datahub_cloud/metadata/schemas/NotebookInfo.avsc +5 -2
  159. acryl_datahub_cloud/metadata/schemas/NotebookKey.avsc +1 -0
  160. acryl_datahub_cloud/metadata/schemas/NotificationRequest.avsc +91 -4
  161. acryl_datahub_cloud/metadata/schemas/Operation.avsc +17 -0
  162. acryl_datahub_cloud/metadata/schemas/Ownership.avsc +71 -1
  163. acryl_datahub_cloud/metadata/schemas/QuerySubjects.avsc +2 -13
  164. acryl_datahub_cloud/metadata/schemas/RelationshipChangeEvent.avsc +215 -0
  165. acryl_datahub_cloud/metadata/schemas/RoleProperties.avsc +3 -1
  166. acryl_datahub_cloud/metadata/schemas/SchemaFieldInfo.avsc +3 -1
  167. acryl_datahub_cloud/metadata/schemas/SchemaFieldKey.avsc +3 -0
  168. acryl_datahub_cloud/metadata/schemas/SchemaMetadata.avsc +2 -1
  169. acryl_datahub_cloud/metadata/schemas/SemanticContent.avsc +123 -0
  170. acryl_datahub_cloud/metadata/schemas/StructuredProperties.avsc +69 -0
  171. acryl_datahub_cloud/metadata/schemas/StructuredPropertyDefinition.avsc +15 -4
  172. acryl_datahub_cloud/metadata/schemas/StructuredPropertySettings.avsc +9 -0
  173. acryl_datahub_cloud/metadata/schemas/SubscriptionInfo.avsc +136 -5
  174. acryl_datahub_cloud/metadata/schemas/SubscriptionKey.avsc +2 -1
  175. acryl_datahub_cloud/metadata/schemas/SystemMetadata.avsc +61 -0
  176. acryl_datahub_cloud/metadata/schemas/TagProperties.avsc +3 -1
  177. acryl_datahub_cloud/metadata/schemas/TestInfo.avsc +2 -1
  178. acryl_datahub_cloud/metadata/schemas/UpstreamLineage.avsc +9 -0
  179. acryl_datahub_cloud/metadata/schemas/UsageFeatures.avsc +10 -0
  180. acryl_datahub_cloud/notifications/__init__.py +0 -0
  181. acryl_datahub_cloud/notifications/notification_recipient_builder.py +399 -0
  182. acryl_datahub_cloud/sdk/__init__.py +69 -0
  183. acryl_datahub_cloud/sdk/assertion/__init__.py +58 -0
  184. acryl_datahub_cloud/sdk/assertion/assertion_base.py +779 -0
  185. acryl_datahub_cloud/sdk/assertion/column_metric_assertion.py +191 -0
  186. acryl_datahub_cloud/sdk/assertion/column_value_assertion.py +431 -0
  187. acryl_datahub_cloud/sdk/assertion/freshness_assertion.py +201 -0
  188. acryl_datahub_cloud/sdk/assertion/schema_assertion.py +268 -0
  189. acryl_datahub_cloud/sdk/assertion/smart_column_metric_assertion.py +212 -0
  190. acryl_datahub_cloud/sdk/assertion/smart_freshness_assertion.py +165 -0
  191. acryl_datahub_cloud/sdk/assertion/smart_sql_assertion.py +156 -0
  192. acryl_datahub_cloud/sdk/assertion/smart_volume_assertion.py +162 -0
  193. acryl_datahub_cloud/sdk/assertion/sql_assertion.py +273 -0
  194. acryl_datahub_cloud/sdk/assertion/types.py +20 -0
  195. acryl_datahub_cloud/sdk/assertion/volume_assertion.py +156 -0
  196. acryl_datahub_cloud/sdk/assertion_client/__init__.py +0 -0
  197. acryl_datahub_cloud/sdk/assertion_client/column_metric.py +545 -0
  198. acryl_datahub_cloud/sdk/assertion_client/column_value.py +617 -0
  199. acryl_datahub_cloud/sdk/assertion_client/freshness.py +371 -0
  200. acryl_datahub_cloud/sdk/assertion_client/helpers.py +166 -0
  201. acryl_datahub_cloud/sdk/assertion_client/schema.py +358 -0
  202. acryl_datahub_cloud/sdk/assertion_client/smart_column_metric.py +540 -0
  203. acryl_datahub_cloud/sdk/assertion_client/smart_freshness.py +373 -0
  204. acryl_datahub_cloud/sdk/assertion_client/smart_sql.py +411 -0
  205. acryl_datahub_cloud/sdk/assertion_client/smart_volume.py +380 -0
  206. acryl_datahub_cloud/sdk/assertion_client/sql.py +410 -0
  207. acryl_datahub_cloud/sdk/assertion_client/volume.py +446 -0
  208. acryl_datahub_cloud/sdk/assertion_input/__init__.py +0 -0
  209. acryl_datahub_cloud/sdk/assertion_input/assertion_input.py +1470 -0
  210. acryl_datahub_cloud/sdk/assertion_input/column_assertion_constants.py +114 -0
  211. acryl_datahub_cloud/sdk/assertion_input/column_assertion_utils.py +284 -0
  212. acryl_datahub_cloud/sdk/assertion_input/column_metric_assertion_input.py +759 -0
  213. acryl_datahub_cloud/sdk/assertion_input/column_metric_constants.py +109 -0
  214. acryl_datahub_cloud/sdk/assertion_input/column_value_assertion_input.py +810 -0
  215. acryl_datahub_cloud/sdk/assertion_input/freshness_assertion_input.py +305 -0
  216. acryl_datahub_cloud/sdk/assertion_input/schema_assertion_input.py +413 -0
  217. acryl_datahub_cloud/sdk/assertion_input/smart_column_metric_assertion_input.py +793 -0
  218. acryl_datahub_cloud/sdk/assertion_input/smart_freshness_assertion_input.py +218 -0
  219. acryl_datahub_cloud/sdk/assertion_input/smart_sql_assertion_input.py +181 -0
  220. acryl_datahub_cloud/sdk/assertion_input/smart_volume_assertion_input.py +189 -0
  221. acryl_datahub_cloud/sdk/assertion_input/sql_assertion_input.py +320 -0
  222. acryl_datahub_cloud/sdk/assertion_input/volume_assertion_input.py +635 -0
  223. acryl_datahub_cloud/sdk/assertions_client.py +1074 -0
  224. acryl_datahub_cloud/sdk/entities/__init__.py +0 -0
  225. acryl_datahub_cloud/sdk/entities/assertion.py +439 -0
  226. acryl_datahub_cloud/sdk/entities/monitor.py +291 -0
  227. acryl_datahub_cloud/sdk/entities/subscription.py +100 -0
  228. acryl_datahub_cloud/sdk/errors.py +34 -0
  229. acryl_datahub_cloud/sdk/resolver_client.py +42 -0
  230. acryl_datahub_cloud/sdk/subscription_client.py +737 -0
  231. {acryl_datahub_cloud-0.3.11rc0.dist-info → acryl_datahub_cloud-0.3.16.1rc0.dist-info}/METADATA +55 -49
  232. {acryl_datahub_cloud-0.3.11rc0.dist-info → acryl_datahub_cloud-0.3.16.1rc0.dist-info}/RECORD +235 -142
  233. {acryl_datahub_cloud-0.3.11rc0.dist-info → acryl_datahub_cloud-0.3.16.1rc0.dist-info}/WHEEL +1 -1
  234. {acryl_datahub_cloud-0.3.11rc0.dist-info → acryl_datahub_cloud-0.3.16.1rc0.dist-info}/entry_points.txt +1 -0
  235. acryl_datahub_cloud/_sdk_extras/__init__.py +0 -4
  236. acryl_datahub_cloud/_sdk_extras/assertion.py +0 -15
  237. acryl_datahub_cloud/_sdk_extras/assertions_client.py +0 -23
  238. {acryl_datahub_cloud-0.3.11rc0.dist-info → acryl_datahub_cloud-0.3.16.1rc0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,810 @@
1
+ """
2
+ Column value assertion input module.
3
+
4
+ This module provides the input class for creating column value assertions that validate
5
+ individual row values against semantic constraints (e.g., "all values in column X must match pattern Y"
6
+ or "no NULL values allowed").
7
+
8
+ Key differences from column_metric_assertion (FIELD_METRIC):
9
+ - FIELD_METRIC: Validates aggregated metrics (NULL_COUNT, MEAN, MIN, etc.)
10
+ - FIELD_VALUES: Validates each individual row value against an operator/predicate
11
+ """
12
+
13
+ from datetime import datetime
14
+ from enum import Enum
15
+ from typing import Optional, Union
16
+
17
+ from acryl_datahub_cloud.sdk.assertion_input.assertion_input import (
18
+ DEFAULT_EVERY_SIX_HOURS_SCHEDULE,
19
+ NO_PARAMETER_OPERATORS,
20
+ RANGE_OPERATORS,
21
+ SINGLE_VALUE_OPERATORS,
22
+ AssertionIncidentBehaviorInputTypes,
23
+ AssertionInfoInputType,
24
+ DetectionMechanismInputTypes,
25
+ FieldSpecType,
26
+ _AllRowsQuery,
27
+ _AllRowsQueryDataHubDatasetProfile,
28
+ _AssertionInput,
29
+ _ChangedRowsQuery,
30
+ _DatasetProfile,
31
+ _try_parse_and_validate_schema_classes_enum,
32
+ )
33
+ from acryl_datahub_cloud.sdk.assertion_input.column_metric_assertion_input import (
34
+ _try_parse_and_validate_range,
35
+ _try_parse_and_validate_range_type,
36
+ _try_parse_and_validate_value,
37
+ _try_parse_and_validate_value_type,
38
+ )
39
+ from acryl_datahub_cloud.sdk.assertion_input.column_metric_constants import (
40
+ ALLOWED_COLUMN_TYPES_FOR_COLUMN_METRIC_ASSERTION,
41
+ FIELD_VALUES_OPERATOR_CONFIG,
42
+ OperatorInputType,
43
+ RangeInputType,
44
+ RangeTypeInputType,
45
+ RangeTypeParsedType,
46
+ ValueInputType,
47
+ ValueType,
48
+ ValueTypeInputType,
49
+ )
50
+ from acryl_datahub_cloud.sdk.entities.assertion import TagsInputType
51
+ from acryl_datahub_cloud.sdk.errors import (
52
+ SDKNotYetSupportedError,
53
+ SDKUsageError,
54
+ )
55
+ from datahub.metadata import schema_classes as models
56
+ from datahub.metadata.urns import AssertionUrn, CorpUserUrn, DatasetUrn
57
+ from datahub.sdk.entity_client import EntityClient
58
+
59
+
60
+ class FailThresholdType(str, Enum):
61
+ """Enum for fail threshold types in column value assertions."""
62
+
63
+ COUNT = "COUNT"
64
+ PERCENTAGE = "PERCENTAGE"
65
+
66
+
67
+ class FieldTransformType(str, Enum):
68
+ """Enum for field transform types in column value assertions."""
69
+
70
+ LENGTH = "LENGTH"
71
+
72
+
73
+ FailThresholdInputType = Union[FailThresholdType, str]
74
+ FieldTransformInputType = Union[FieldTransformType, str, None]
75
+ ColumnValueAssertionParameters = Union[
76
+ None, # For operators that don't require parameters (NULL, NOT_NULL)
77
+ ValueInputType, # Single value
78
+ RangeInputType, # Range as tuple
79
+ ]
80
+
81
+ # This represents the type information from existing GMS assertions:
82
+ # - Single value: (value, type)
83
+ # - Range: ((min_value, max_value), (min_type, max_type))
84
+ GmsCriteriaTypeInfo = Union[
85
+ tuple[ValueInputType, models.AssertionStdParameterTypeClass], # Single value
86
+ tuple[
87
+ tuple[ValueInputType, ValueInputType], # (min, max) values
88
+ tuple[
89
+ models.AssertionStdParameterTypeClass,
90
+ models.AssertionStdParameterTypeClass,
91
+ ], # (min_type, max_type)
92
+ ], # Range
93
+ ]
94
+
95
+
96
+ def _get_default_detection_mechanism_column_value_assertion() -> _AllRowsQuery:
97
+ """Factory function for creating default detection mechanism instances.
98
+
99
+ Returns a new instance each time to avoid shared mutable state.
100
+ """
101
+ return _AllRowsQuery()
102
+
103
+
104
+ # This is used to validate that operators are compatible with transform outputs
105
+ FIELD_TRANSFORM_OUTPUT_TYPE: dict[str, str] = {
106
+ models.FieldTransformTypeClass.LENGTH: "NUMBER", # LENGTH(string) -> number
107
+ }
108
+
109
+
110
+ def _try_parse_fail_threshold_type(
111
+ fail_threshold_type: Optional[FailThresholdInputType],
112
+ ) -> FailThresholdType:
113
+ """Parse and validate fail threshold type.
114
+
115
+ Args:
116
+ fail_threshold_type: The fail threshold type to parse.
117
+
118
+ Returns:
119
+ The parsed FailThresholdType.
120
+
121
+ Raises:
122
+ SDKUsageError: If the fail threshold type is invalid.
123
+ """
124
+ if fail_threshold_type is None:
125
+ return FailThresholdType.COUNT
126
+
127
+ if isinstance(fail_threshold_type, FailThresholdType):
128
+ return fail_threshold_type
129
+
130
+ if isinstance(fail_threshold_type, str):
131
+ try:
132
+ return FailThresholdType(fail_threshold_type.upper())
133
+ except ValueError as e:
134
+ raise SDKUsageError(
135
+ f"Invalid fail threshold type: {fail_threshold_type}. "
136
+ f"Valid options are: {[t.value for t in FailThresholdType]}"
137
+ ) from e
138
+
139
+ raise SDKUsageError(
140
+ f"Invalid fail threshold type: {fail_threshold_type}. "
141
+ f"Valid options are: {[t.value for t in FailThresholdType]}"
142
+ )
143
+
144
+
145
+ def _try_parse_field_transform_type(
146
+ field_transform: Optional[FieldTransformInputType],
147
+ ) -> Optional[str]:
148
+ """Parse and validate field transform type.
149
+
150
+ Args:
151
+ field_transform: The field transform type to parse.
152
+
153
+ Returns:
154
+ The parsed FieldTransformTypeClass string constant or None.
155
+
156
+ Raises:
157
+ SDKUsageError: If the field transform type is invalid.
158
+ """
159
+ if field_transform is None:
160
+ return None
161
+
162
+ if isinstance(field_transform, FieldTransformType):
163
+ return models.FieldTransformTypeClass.LENGTH
164
+
165
+ if isinstance(field_transform, str):
166
+ if field_transform.upper() == "LENGTH":
167
+ return models.FieldTransformTypeClass.LENGTH
168
+ else:
169
+ raise SDKUsageError(
170
+ f"Invalid field transform type: {field_transform}. "
171
+ f"Valid options are: {[t.value for t in FieldTransformType]}"
172
+ )
173
+
174
+ raise SDKUsageError(
175
+ f"Invalid field transform type: {field_transform}. "
176
+ f"Valid options are: {[t.value for t in FieldTransformType]}"
177
+ )
178
+
179
+
180
+ def _validate_fail_threshold_value(
181
+ fail_threshold_type: FailThresholdType,
182
+ fail_threshold_value: int,
183
+ ) -> None:
184
+ """Validate fail threshold value based on the type.
185
+
186
+ Args:
187
+ fail_threshold_type: The type of fail threshold.
188
+ fail_threshold_value: The value to validate.
189
+
190
+ Raises:
191
+ SDKUsageError: If the fail threshold value is invalid.
192
+ """
193
+ if fail_threshold_value < 0:
194
+ raise SDKUsageError(
195
+ f"Fail threshold value must be non-negative, got {fail_threshold_value}"
196
+ )
197
+
198
+ if (
199
+ fail_threshold_type == FailThresholdType.PERCENTAGE
200
+ and fail_threshold_value > 100
201
+ ):
202
+ raise SDKUsageError(
203
+ f"Fail threshold value for PERCENTAGE must be between 0 and 100, "
204
+ f"got {fail_threshold_value}"
205
+ )
206
+
207
+
208
+ class _ColumnValueAssertionInput(_AssertionInput):
209
+ """
210
+ Input used to create a column value assertion.
211
+
212
+ This assertion is used to validate individual row values in a column against
213
+ semantic constraints (e.g., "all values in column X must match pattern Y" or
214
+ "no NULL values allowed").
215
+
216
+ Key differences from column_metric_assertion (FIELD_METRIC):
217
+ - FIELD_METRIC: Validates aggregated metrics (NULL_COUNT, MEAN, MIN, etc.)
218
+ - FIELD_VALUES: Validates each individual row value against an operator/predicate
219
+ """
220
+
221
+ def __init__(
222
+ self,
223
+ *,
224
+ dataset_urn: Union[str, DatasetUrn],
225
+ entity_client: EntityClient,
226
+ column_name: str,
227
+ operator: OperatorInputType,
228
+ criteria_parameters: Optional[ColumnValueAssertionParameters] = None,
229
+ transform: Optional[FieldTransformInputType] = None,
230
+ fail_threshold_type: Optional[FailThresholdInputType] = None,
231
+ fail_threshold_value: int = 0,
232
+ exclude_nulls: bool = True,
233
+ urn: Optional[Union[str, AssertionUrn]] = None,
234
+ display_name: Optional[str] = None,
235
+ enabled: bool = True,
236
+ schedule: Optional[Union[str, models.CronScheduleClass]] = None,
237
+ detection_mechanism: DetectionMechanismInputTypes = None,
238
+ incident_behavior: Optional[AssertionIncidentBehaviorInputTypes] = None,
239
+ tags: Optional[TagsInputType] = None,
240
+ created_by: Union[str, CorpUserUrn],
241
+ created_at: datetime,
242
+ updated_by: Union[str, CorpUserUrn],
243
+ updated_at: datetime,
244
+ gms_criteria_type_info: Optional[GmsCriteriaTypeInfo] = None,
245
+ ):
246
+ """
247
+ Initialize a column value assertion input.
248
+
249
+ Args:
250
+ dataset_urn: The dataset urn.
251
+ entity_client: The entity client.
252
+ column_name: The name of the column to validate.
253
+ operator: The operator to use for the assertion.
254
+ criteria_parameters: The criteria parameters (single value, range tuple, or None).
255
+ transform: Optional transform to apply to field values before evaluation.
256
+ fail_threshold_type: The type of failure threshold (COUNT or PERCENTAGE).
257
+ fail_threshold_value: The failure threshold value (default 0 = all rows must pass).
258
+ exclude_nulls: Whether to exclude nulls when evaluating the assertion.
259
+ urn: The urn of the assertion.
260
+ display_name: The display name of the assertion.
261
+ enabled: Whether the assertion is enabled.
262
+ schedule: The schedule of the assertion.
263
+ detection_mechanism: The detection mechanism of the assertion.
264
+ incident_behavior: The incident behavior of the assertion.
265
+ tags: The tags of the assertion.
266
+ created_by: The creator of the assertion.
267
+ created_at: The creation time of the assertion.
268
+ updated_by: The updater of the assertion.
269
+ updated_at: The update time of the assertion.
270
+ gms_criteria_type_info: Type info from existing GMS assertion for updates.
271
+ Format: (value, type) for single values, or ((min, max), (min_type, max_type)) for ranges.
272
+ """
273
+ _AssertionInput.__init__(
274
+ self,
275
+ dataset_urn=dataset_urn,
276
+ entity_client=entity_client,
277
+ urn=urn,
278
+ display_name=display_name,
279
+ enabled=enabled,
280
+ schedule=schedule,
281
+ detection_mechanism=detection_mechanism,
282
+ incident_behavior=incident_behavior,
283
+ tags=tags,
284
+ source_type=models.AssertionSourceTypeClass.NATIVE,
285
+ created_by=created_by,
286
+ created_at=created_at,
287
+ updated_by=updated_by,
288
+ updated_at=updated_at,
289
+ default_detection_mechanism=_get_default_detection_mechanism_column_value_assertion(),
290
+ )
291
+
292
+ self.column_name = self._try_parse_and_validate_column_name_is_valid_type(
293
+ column_name
294
+ )
295
+ self.operator = _try_parse_and_validate_schema_classes_enum(
296
+ operator, models.AssertionStdOperatorClass
297
+ )
298
+ self.transform = _try_parse_field_transform_type(transform)
299
+ self.fail_threshold_type = _try_parse_fail_threshold_type(fail_threshold_type)
300
+ self.fail_threshold_value = fail_threshold_value
301
+ self.exclude_nulls = exclude_nulls
302
+
303
+ _validate_fail_threshold_value(
304
+ self.fail_threshold_type, self.fail_threshold_value
305
+ )
306
+
307
+ if self.transform is not None:
308
+ self._validate_transform_for_column_type()
309
+ self._validate_operator_for_transform_output_type()
310
+
311
+ self.criteria_parameters: Optional[ColumnValueAssertionParameters] = None
312
+ self.criteria_type: Optional[Union[ValueTypeInputType, RangeTypeInputType]] = (
313
+ None
314
+ )
315
+
316
+ if gms_criteria_type_info is not None:
317
+ self._process_criteria_parameters_with_gms_type(
318
+ criteria_parameters, gms_criteria_type_info
319
+ )
320
+ else:
321
+ self._process_criteria_parameters(criteria_parameters)
322
+
323
+ if self.transform is None:
324
+ self._validate_field_type_and_operator_compatibility(
325
+ self.column_name, self.operator
326
+ )
327
+
328
+ def _validate_transform_for_column_type(self) -> None:
329
+ """Validate that the transform is compatible with the column type.
330
+
331
+ LENGTH transform is only valid for STRING columns.
332
+
333
+ Raises:
334
+ SDKUsageError: If the transform is not compatible with the column type.
335
+ """
336
+ field_spec = self._get_schema_field_spec(self.column_name)
337
+ if (
338
+ self.transform == models.FieldTransformTypeClass.LENGTH
339
+ and field_spec.type != "STRING"
340
+ ):
341
+ raise SDKUsageError(
342
+ f"LENGTH transform is only valid for STRING columns, "
343
+ f"but column '{self.column_name}' is of type {field_spec.type}"
344
+ )
345
+
346
+ def _validate_operator_for_transform_output_type(self) -> None:
347
+ """Validate that the operator is compatible with the transform output type.
348
+
349
+ When a transform is applied, the operator must be compatible with the
350
+ transform's output type, not the original column type.
351
+ For example: LENGTH(string_column) produces a NUMBER, so operators like
352
+ REGEX_MATCH (which expect STRING) should be rejected.
353
+
354
+ Raises:
355
+ SDKUsageError: If the operator is not compatible with transform output.
356
+ """
357
+ if self.transform is None:
358
+ return
359
+
360
+ # Get the output type for this transform
361
+ transform_output_type = FIELD_TRANSFORM_OUTPUT_TYPE.get(self.transform)
362
+ if transform_output_type is None:
363
+ raise SDKNotYetSupportedError(
364
+ f"Transform {self.transform} is not yet supported for operator validation. "
365
+ f"Please update FIELD_TRANSFORM_OUTPUT_TYPE mapping."
366
+ )
367
+
368
+ # Check if operator is allowed for the transform output type
369
+ allowed_operators = FIELD_VALUES_OPERATOR_CONFIG.get(transform_output_type, [])
370
+ if self.operator not in allowed_operators:
371
+ raise SDKUsageError(
372
+ f"Operator {self.operator} is not compatible with transform {self.transform}. "
373
+ f"Transform {self.transform} produces {transform_output_type} values, "
374
+ f"but operator {self.operator} is not valid for {transform_output_type} types. "
375
+ f"Allowed operators for {transform_output_type}: "
376
+ f"{', '.join(str(op) for op in allowed_operators)}"
377
+ )
378
+
379
+ def _infer_criteria_type_from_parameters(
380
+ self,
381
+ criteria_parameters: Optional[ColumnValueAssertionParameters],
382
+ ) -> Optional[Union[ValueTypeInputType, RangeTypeInputType]]:
383
+ """
384
+ Infer the criteria type from the parameters based on Python types.
385
+ """
386
+ if criteria_parameters is None:
387
+ return None
388
+
389
+ if isinstance(criteria_parameters, tuple):
390
+ if len(criteria_parameters) != 2:
391
+ raise SDKUsageError(
392
+ "Range parameters must be a tuple of exactly 2 values"
393
+ )
394
+ inferred_min_type = self._infer_single_value_type(criteria_parameters[0])
395
+ inferred_max_type = self._infer_single_value_type(criteria_parameters[1])
396
+ return (inferred_min_type, inferred_max_type)
397
+ else:
398
+ return self._infer_single_value_type(criteria_parameters)
399
+
400
+ def _infer_single_value_type(self, value: ValueInputType) -> ValueTypeInputType:
401
+ """Infer the type of a single value based on its Python type."""
402
+ if isinstance(value, (int, float)):
403
+ return ValueType.NUMBER
404
+ elif isinstance(value, str):
405
+ return ValueType.STRING
406
+ else:
407
+ return ValueType.UNKNOWN
408
+
409
+ def _process_criteria_parameters_with_gms_type(
410
+ self,
411
+ criteria_parameters: Optional[ColumnValueAssertionParameters],
412
+ gms_type_info: Optional[Union[models.AssertionStdParameterTypeClass, tuple]],
413
+ ) -> None:
414
+ """Process criteria_parameters using explicit type information from GMS."""
415
+ if criteria_parameters is None:
416
+ self._process_none_parameters()
417
+ elif isinstance(criteria_parameters, tuple):
418
+ # For range parameters, pass explicit types if available
419
+ # gms_type_info format: ((min_val, max_val), (min_type, max_type))
420
+ explicit_types = None
421
+ if (
422
+ isinstance(gms_type_info, tuple)
423
+ and len(gms_type_info) == 2
424
+ and isinstance(gms_type_info[0], tuple)
425
+ ):
426
+ # Extract types from second element (should be tuple of types)
427
+ explicit_types = (
428
+ gms_type_info[1] if isinstance(gms_type_info[1], tuple) else None
429
+ )
430
+ self._process_range_parameters(criteria_parameters, explicit_types)
431
+ else:
432
+ # For single value parameters, pass explicit type if available
433
+ # gms_type_info format: (value, type)
434
+ explicit_type = None
435
+ if (
436
+ isinstance(gms_type_info, tuple)
437
+ and len(gms_type_info) >= 2
438
+ and not isinstance(gms_type_info[0], tuple)
439
+ and not isinstance(gms_type_info[1], tuple)
440
+ ):
441
+ # Single value format: extract type from second element
442
+ explicit_type = gms_type_info[1]
443
+ self._process_single_value_parameters(criteria_parameters, explicit_type)
444
+
445
+ def _process_criteria_parameters(
446
+ self,
447
+ criteria_parameters: Optional[ColumnValueAssertionParameters],
448
+ ) -> None:
449
+ """Process the criteria_parameters with automatic type inference."""
450
+ if criteria_parameters is None:
451
+ self._process_none_parameters()
452
+ elif isinstance(criteria_parameters, tuple):
453
+ self._process_range_parameters(criteria_parameters)
454
+ else:
455
+ self._process_single_value_parameters(criteria_parameters)
456
+
457
+ def _process_none_parameters(self) -> None:
458
+ """Process None criteria_parameters.
459
+
460
+ Raises:
461
+ SDKUsageError: If the operator requires parameters but none are provided.
462
+ """
463
+ if self.operator in SINGLE_VALUE_OPERATORS:
464
+ raise SDKUsageError(
465
+ f"Single value is required for operator {self.operator}. "
466
+ "Provide a criteria_parameters value."
467
+ )
468
+ if self.operator in RANGE_OPERATORS:
469
+ raise SDKUsageError(
470
+ f"Range parameters are required for operator {self.operator}. "
471
+ "Provide a tuple of (min_value, max_value) as criteria_parameters."
472
+ )
473
+ self.criteria_parameters = None
474
+ self.criteria_type = None
475
+
476
+ def _process_range_parameters(
477
+ self,
478
+ criteria_parameters: tuple,
479
+ explicit_types: Optional[
480
+ tuple[
481
+ models.AssertionStdParameterTypeClass,
482
+ models.AssertionStdParameterTypeClass,
483
+ ]
484
+ ] = None,
485
+ ) -> None:
486
+ """Process tuple criteria_parameters for range operators.
487
+
488
+ Args:
489
+ criteria_parameters: The range parameters (min, max).
490
+ explicit_types: Optional explicit types from GMS (min_type, max_type).
491
+ If provided, these types are used directly. Otherwise, types are
492
+ inferred from the parameters.
493
+ """
494
+ if self.operator not in RANGE_OPERATORS:
495
+ raise SDKUsageError(
496
+ f"Operator {self.operator} does not support range parameters. "
497
+ "Provide a single value instead of a tuple."
498
+ )
499
+
500
+ if len(criteria_parameters) != 2:
501
+ raise SDKUsageError("Range parameters must be a tuple of exactly 2 values")
502
+
503
+ # Declare validated_range_type with explicit type annotation
504
+ validated_range_type: RangeTypeParsedType
505
+
506
+ # Use explicit types if provided, otherwise infer from parameters
507
+ if explicit_types is not None:
508
+ min_type, max_type = explicit_types
509
+ validated_min_type = _try_parse_and_validate_value_type(min_type)
510
+ validated_max_type = _try_parse_and_validate_value_type(max_type)
511
+ validated_range_type = (validated_min_type, validated_max_type)
512
+
513
+ min_value, max_value = criteria_parameters
514
+ validated_min_value = _try_parse_and_validate_value(
515
+ min_value, validated_min_type
516
+ )
517
+ validated_max_value = _try_parse_and_validate_value(
518
+ max_value, validated_max_type
519
+ )
520
+ validated_range = (validated_min_value, validated_max_value)
521
+ else:
522
+ inferred_range_type = self._infer_criteria_type_from_parameters(
523
+ criteria_parameters
524
+ )
525
+ # Type narrowing: inferred_range_type should be a tuple for range parameters
526
+ if not isinstance(inferred_range_type, tuple):
527
+ raise SDKUsageError(
528
+ "Expected tuple type for range parameters, but got "
529
+ f"{type(inferred_range_type).__name__}"
530
+ )
531
+ validated_range_type = _try_parse_and_validate_range_type(
532
+ inferred_range_type
533
+ )
534
+ validated_range = _try_parse_and_validate_range(
535
+ criteria_parameters, validated_range_type, self.operator
536
+ )
537
+
538
+ self.criteria_parameters = validated_range
539
+ self.criteria_type = validated_range_type
540
+
541
+ def _process_single_value_parameters(
542
+ self,
543
+ criteria_parameters: Union[str, int, float],
544
+ explicit_type: Optional[models.AssertionStdParameterTypeClass] = None,
545
+ ) -> None:
546
+ """Process single value criteria_parameters.
547
+
548
+ Args:
549
+ criteria_parameters: The single value parameter.
550
+ explicit_type: Optional explicit type from GMS. If provided, this type
551
+ is used directly. Otherwise, the type is inferred from the parameter.
552
+ """
553
+ if self.operator in NO_PARAMETER_OPERATORS:
554
+ raise SDKUsageError(
555
+ f"Value parameters should not be provided for operator {self.operator}"
556
+ )
557
+ if self.operator not in SINGLE_VALUE_OPERATORS:
558
+ raise SDKUsageError(
559
+ f"Operator {self.operator} does not support value parameters. "
560
+ "Use criteria_parameters=None or omit criteria_parameters."
561
+ )
562
+
563
+ # Use explicit type if provided, otherwise infer from parameters
564
+ if explicit_type is not None:
565
+ validated_value_type = _try_parse_and_validate_value_type(explicit_type)
566
+ else:
567
+ inferred_value_type = self._infer_criteria_type_from_parameters(
568
+ criteria_parameters
569
+ )
570
+ if isinstance(inferred_value_type, tuple):
571
+ raise SDKUsageError("Single value type expected, not a tuple type")
572
+ validated_value_type = _try_parse_and_validate_value_type(
573
+ inferred_value_type
574
+ )
575
+
576
+ validated_value = _try_parse_and_validate_value(
577
+ criteria_parameters, validated_value_type
578
+ )
579
+
580
+ self.criteria_parameters = validated_value
581
+ self.criteria_type = validated_value_type
582
+
583
+ def _create_monitor_info(
584
+ self,
585
+ assertion_urn: AssertionUrn,
586
+ status: models.MonitorStatusClass,
587
+ schedule: models.CronScheduleClass,
588
+ ) -> models.MonitorInfoClass:
589
+ """Create a MonitorInfoClass with all the necessary components."""
590
+ source_type, field = self._convert_assertion_source_type_and_field()
591
+ return models.MonitorInfoClass(
592
+ type=models.MonitorTypeClass.ASSERTION,
593
+ status=status,
594
+ assertionMonitor=models.AssertionMonitorClass(
595
+ assertions=[
596
+ models.AssertionEvaluationSpecClass(
597
+ assertion=str(assertion_urn),
598
+ schedule=schedule,
599
+ parameters=self._get_assertion_evaluation_parameters(
600
+ str(source_type), field
601
+ ),
602
+ ),
603
+ ],
604
+ settings=None,
605
+ ),
606
+ )
607
+
608
+ def _create_assertion_info(
609
+ self, filter: Optional[models.DatasetFilterClass]
610
+ ) -> AssertionInfoInputType:
611
+ """Create a FieldAssertionInfoClass for a column value assertion."""
612
+ field_spec = self._get_schema_field_spec(self.column_name)
613
+
614
+ field_values_assertion = models.FieldValuesAssertionClass(
615
+ field=field_spec,
616
+ operator=self.operator,
617
+ parameters=self._create_assertion_parameters(),
618
+ transform=self._create_field_transform(),
619
+ failThreshold=models.FieldValuesFailThresholdClass(
620
+ type=self._convert_fail_threshold_type(),
621
+ value=self.fail_threshold_value,
622
+ ),
623
+ excludeNulls=self.exclude_nulls,
624
+ )
625
+
626
+ return models.FieldAssertionInfoClass(
627
+ type=models.FieldAssertionTypeClass.FIELD_VALUES,
628
+ entity=str(self.dataset_urn),
629
+ filter=filter,
630
+ fieldValuesAssertion=field_values_assertion,
631
+ fieldMetricAssertion=None,
632
+ )
633
+
634
+ def _convert_fail_threshold_type(self) -> str:
635
+ """Convert the fail threshold type to the model class."""
636
+ if self.fail_threshold_type == FailThresholdType.COUNT:
637
+ return models.FieldValuesFailThresholdTypeClass.COUNT
638
+ else:
639
+ return models.FieldValuesFailThresholdTypeClass.PERCENTAGE
640
+
641
+ def _create_field_transform(self) -> Optional[models.FieldTransformClass]:
642
+ """Create the field transform if specified."""
643
+ if self.transform is None:
644
+ return None
645
+ return models.FieldTransformClass(type=self.transform)
646
+
647
+ def _convert_schedule(self) -> models.CronScheduleClass:
648
+ """Create a schedule for a column value assertion."""
649
+ if self.schedule is None:
650
+ return DEFAULT_EVERY_SIX_HOURS_SCHEDULE
651
+
652
+ return models.CronScheduleClass(
653
+ cron=self.schedule.cron,
654
+ timezone=self.schedule.timezone,
655
+ )
656
+
657
+ def _convert_schema_field_spec_to_freshness_field_spec(
658
+ self, field_spec: models.SchemaFieldSpecClass
659
+ ) -> models.FreshnessFieldSpecClass:
660
+ """Convert a SchemaFieldSpecClass to a FreshnessFieldSpecClass."""
661
+ return models.FreshnessFieldSpecClass(
662
+ path=field_spec.path,
663
+ type=field_spec.type,
664
+ nativeType=field_spec.nativeType,
665
+ kind=models.FreshnessFieldKindClass.HIGH_WATERMARK,
666
+ )
667
+
668
+ def _get_assertion_evaluation_parameters(
669
+ self, source_type: str, field: Optional[FieldSpecType]
670
+ ) -> models.AssertionEvaluationParametersClass:
671
+ """Get evaluation parameters for a column value assertion."""
672
+ if field is not None:
673
+ if isinstance(field, models.SchemaFieldSpecClass):
674
+ field = self._convert_schema_field_spec_to_freshness_field_spec(field)
675
+ assert isinstance(field, models.FreshnessFieldSpecClass), (
676
+ "Field must be FreshnessFieldSpecClass for monitor info"
677
+ )
678
+ return models.AssertionEvaluationParametersClass(
679
+ type=models.AssertionEvaluationParametersTypeClass.DATASET_FIELD,
680
+ datasetFieldParameters=models.DatasetFieldAssertionParametersClass(
681
+ sourceType=source_type,
682
+ changedRowsField=field,
683
+ ),
684
+ )
685
+
686
+ def _convert_assertion_source_type_and_field(
687
+ self,
688
+ ) -> tuple[str, Optional[FieldSpecType]]:
689
+ """Convert detection mechanism into source type and field specification."""
690
+ source_type = models.DatasetFieldAssertionSourceTypeClass.ALL_ROWS_QUERY
691
+ field = None
692
+ SUPPORTED_DETECTION_MECHANISMS = [
693
+ _AllRowsQuery().type,
694
+ _AllRowsQueryDataHubDatasetProfile().type,
695
+ _ChangedRowsQuery(column_name="").type,
696
+ ]
697
+
698
+ if isinstance(self.detection_mechanism, _ChangedRowsQuery):
699
+ source_type = models.DatasetFieldAssertionSourceTypeClass.CHANGED_ROWS_QUERY
700
+ column_name = self._try_parse_and_validate_column_name_is_valid_type(
701
+ self.detection_mechanism.column_name,
702
+ allowed_column_types=[
703
+ models.NumberTypeClass(),
704
+ models.DateTypeClass(),
705
+ models.TimeTypeClass(),
706
+ ],
707
+ )
708
+ field = self._get_schema_field_spec(column_name)
709
+ elif isinstance(self.detection_mechanism, _AllRowsQuery):
710
+ source_type = models.DatasetFieldAssertionSourceTypeClass.ALL_ROWS_QUERY
711
+ elif isinstance(
712
+ self.detection_mechanism,
713
+ (_AllRowsQueryDataHubDatasetProfile, _DatasetProfile),
714
+ ):
715
+ source_type = (
716
+ models.DatasetFieldAssertionSourceTypeClass.DATAHUB_DATASET_PROFILE
717
+ )
718
+ else:
719
+ raise SDKNotYetSupportedError(
720
+ f"Detection mechanism {self.detection_mechanism} is not supported for "
721
+ f"column value assertions, please use a supported detection mechanism: "
722
+ f"{', '.join(SUPPORTED_DETECTION_MECHANISMS)}"
723
+ )
724
+
725
+ return source_type, field
726
+
727
+ def _create_assertion_parameters(self) -> models.AssertionStdParametersClass:
728
+ """Create assertion parameters based on the operator type and provided values."""
729
+ if self.operator in SINGLE_VALUE_OPERATORS:
730
+ if self.criteria_parameters is None or isinstance(
731
+ self.criteria_parameters, tuple
732
+ ):
733
+ raise SDKUsageError(
734
+ f"Single value is required for operator {self.operator}"
735
+ )
736
+ if self.criteria_type is None or isinstance(self.criteria_type, tuple):
737
+ raise SDKUsageError(
738
+ f"Single value type is required for operator {self.operator}"
739
+ )
740
+ return models.AssertionStdParametersClass(
741
+ value=models.AssertionStdParameterClass(
742
+ value=str(self.criteria_parameters),
743
+ type=self.criteria_type,
744
+ ),
745
+ )
746
+ elif self.operator in RANGE_OPERATORS:
747
+ if not isinstance(self.criteria_parameters, tuple):
748
+ raise SDKUsageError(
749
+ f"Range parameters are required for operator {self.operator}"
750
+ )
751
+ if not isinstance(self.criteria_type, tuple):
752
+ raise SDKUsageError(
753
+ f"Range type is required for operator {self.operator}"
754
+ )
755
+ return models.AssertionStdParametersClass(
756
+ minValue=models.AssertionStdParameterClass(
757
+ value=str(self.criteria_parameters[0]),
758
+ type=self.criteria_type[0],
759
+ ),
760
+ maxValue=models.AssertionStdParameterClass(
761
+ value=str(self.criteria_parameters[1]),
762
+ type=self.criteria_type[1],
763
+ ),
764
+ )
765
+ elif self.operator in NO_PARAMETER_OPERATORS:
766
+ return models.AssertionStdParametersClass()
767
+ else:
768
+ raise SDKUsageError(f"Unsupported operator type: {self.operator}")
769
+
770
+ def _try_parse_and_validate_column_name_is_valid_type(
771
+ self,
772
+ column_name: str,
773
+ allowed_column_types: list[
774
+ models.DictWrapper
775
+ ] = ALLOWED_COLUMN_TYPES_FOR_COLUMN_METRIC_ASSERTION,
776
+ ) -> str:
777
+ """Parse and validate a column name and its type."""
778
+ field_spec = self._get_schema_field_spec(column_name)
779
+ self._validate_field_type(
780
+ field_spec,
781
+ column_name,
782
+ allowed_column_types,
783
+ "column value assertion",
784
+ )
785
+ return column_name
786
+
787
+ def _assertion_type(self) -> str:
788
+ """Get the assertion type."""
789
+ return models.AssertionTypeClass.FIELD
790
+
791
+ def _validate_field_type_and_operator_compatibility(
792
+ self, column_name: str, operator: models.AssertionStdOperatorClass
793
+ ) -> None:
794
+ """Validate that the field type is compatible with the operator.
795
+
796
+ Args:
797
+ column_name: The name of the column to validate.
798
+ operator: The operator to validate against.
799
+
800
+ Raises:
801
+ SDKUsageError: If the field type is not compatible with the operator.
802
+ """
803
+ field_spec = self._get_schema_field_spec(column_name)
804
+ allowed_operators = FIELD_VALUES_OPERATOR_CONFIG.get(field_spec.type, [])
805
+ if operator not in allowed_operators:
806
+ raise SDKUsageError(
807
+ f"Operator {operator} is not allowed for field type {field_spec.type} "
808
+ f"for column '{column_name}'. Allowed operators: "
809
+ f"{', '.join(str(op) for op in allowed_operators)}"
810
+ )