acryl-datahub-cloud 0.3.11rc0__py3-none-any.whl → 0.3.16.1rc0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub-cloud might be problematic. Click here for more details.

Files changed (238) hide show
  1. acryl_datahub_cloud/_codegen_config.json +1 -1
  2. acryl_datahub_cloud/acryl_cs_issues/models.py +5 -3
  3. acryl_datahub_cloud/action_request/action_request_owner_source.py +36 -6
  4. acryl_datahub_cloud/datahub_forms_notifications/__init__.py +0 -0
  5. acryl_datahub_cloud/datahub_forms_notifications/forms_notifications_source.py +569 -0
  6. acryl_datahub_cloud/datahub_forms_notifications/get_feature_flag.gql +7 -0
  7. acryl_datahub_cloud/datahub_forms_notifications/get_search_results_total.gql +14 -0
  8. acryl_datahub_cloud/datahub_forms_notifications/query.py +17 -0
  9. acryl_datahub_cloud/datahub_forms_notifications/scroll_forms_for_notification.gql +29 -0
  10. acryl_datahub_cloud/datahub_forms_notifications/send_form_notification_request.gql +5 -0
  11. acryl_datahub_cloud/datahub_reporting/datahub_dataset.py +37 -13
  12. acryl_datahub_cloud/datahub_reporting/datahub_form_reporting.py +55 -24
  13. acryl_datahub_cloud/datahub_reporting/extract_graph.py +4 -3
  14. acryl_datahub_cloud/datahub_reporting/extract_sql.py +242 -51
  15. acryl_datahub_cloud/datahub_reporting/forms.py +1 -1
  16. acryl_datahub_cloud/datahub_reporting/forms_config.py +3 -2
  17. acryl_datahub_cloud/datahub_restore/source.py +3 -2
  18. acryl_datahub_cloud/datahub_usage_reporting/excluded.py +94 -0
  19. acryl_datahub_cloud/datahub_usage_reporting/query_builder.py +48 -8
  20. acryl_datahub_cloud/datahub_usage_reporting/usage_feature_reporter.py +518 -77
  21. acryl_datahub_cloud/elasticsearch/graph_service.py +76 -14
  22. acryl_datahub_cloud/graphql_utils.py +64 -0
  23. acryl_datahub_cloud/lineage_features/source.py +555 -49
  24. acryl_datahub_cloud/metadata/_urns/urn_defs.py +2296 -1900
  25. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/actionworkflow/__init__.py +53 -0
  26. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/anomaly/__init__.py +2 -0
  27. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
  28. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/assertion/__init__.py +4 -2
  29. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/common/__init__.py +6 -0
  30. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/conversation/__init__.py +29 -0
  31. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/event/notification/settings/__init__.py +2 -0
  32. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/execution/__init__.py +2 -0
  33. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/file/__init__.py +19 -0
  34. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/form/__init__.py +8 -0
  35. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/identity/__init__.py +8 -0
  36. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/knowledge/__init__.py +33 -0
  37. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
  38. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +12 -0
  39. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/metadata/search/features/__init__.py +2 -0
  40. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/module/__init__.py +31 -0
  41. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/notification/__init__.py +19 -0
  42. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py +4 -0
  43. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/role/__init__.py +2 -0
  44. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/settings/asset/__init__.py +19 -0
  45. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +28 -0
  46. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/template/__init__.py +31 -0
  47. acryl_datahub_cloud/metadata/schema.avsc +25091 -20557
  48. acryl_datahub_cloud/metadata/schema_classes.py +29269 -23863
  49. acryl_datahub_cloud/metadata/schemas/ActionRequestInfo.avsc +235 -2
  50. acryl_datahub_cloud/metadata/schemas/ActionWorkflowInfo.avsc +683 -0
  51. acryl_datahub_cloud/metadata/schemas/ActionWorkflowKey.avsc +21 -0
  52. acryl_datahub_cloud/metadata/schemas/Actors.avsc +38 -1
  53. acryl_datahub_cloud/metadata/schemas/ApplicationKey.avsc +31 -0
  54. acryl_datahub_cloud/metadata/schemas/ApplicationProperties.avsc +75 -0
  55. acryl_datahub_cloud/metadata/schemas/Applications.avsc +38 -0
  56. acryl_datahub_cloud/metadata/schemas/AssertionAnalyticsRunEvent.avsc +353 -215
  57. acryl_datahub_cloud/metadata/schemas/AssertionInfo.avsc +147 -20
  58. acryl_datahub_cloud/metadata/schemas/AssertionKey.avsc +1 -1
  59. acryl_datahub_cloud/metadata/schemas/AssertionRunEvent.avsc +166 -21
  60. acryl_datahub_cloud/metadata/schemas/{AssertionSummary.avsc → AssertionRunSummary.avsc} +15 -2
  61. acryl_datahub_cloud/metadata/schemas/AssertionsSummary.avsc +54 -0
  62. acryl_datahub_cloud/metadata/schemas/AssetSettings.avsc +63 -0
  63. acryl_datahub_cloud/metadata/schemas/BusinessAttributeInfo.avsc +7 -3
  64. acryl_datahub_cloud/metadata/schemas/ChartInfo.avsc +20 -6
  65. acryl_datahub_cloud/metadata/schemas/ChartKey.avsc +1 -0
  66. acryl_datahub_cloud/metadata/schemas/ConstraintInfo.avsc +12 -1
  67. acryl_datahub_cloud/metadata/schemas/ContainerKey.avsc +1 -0
  68. acryl_datahub_cloud/metadata/schemas/ContainerProperties.avsc +16 -5
  69. acryl_datahub_cloud/metadata/schemas/CorpGroupEditableInfo.avsc +2 -1
  70. acryl_datahub_cloud/metadata/schemas/CorpGroupInfo.avsc +7 -3
  71. acryl_datahub_cloud/metadata/schemas/CorpGroupKey.avsc +2 -1
  72. acryl_datahub_cloud/metadata/schemas/CorpGroupSettings.avsc +127 -2
  73. acryl_datahub_cloud/metadata/schemas/CorpUserEditableInfo.avsc +1 -1
  74. acryl_datahub_cloud/metadata/schemas/CorpUserInfo.avsc +18 -2
  75. acryl_datahub_cloud/metadata/schemas/CorpUserInvitationStatus.avsc +106 -0
  76. acryl_datahub_cloud/metadata/schemas/CorpUserKey.avsc +4 -1
  77. acryl_datahub_cloud/metadata/schemas/CorpUserSettings.avsc +304 -2
  78. acryl_datahub_cloud/metadata/schemas/CorpUserUsageFeatures.avsc +86 -0
  79. acryl_datahub_cloud/metadata/schemas/DashboardInfo.avsc +11 -5
  80. acryl_datahub_cloud/metadata/schemas/DashboardKey.avsc +1 -0
  81. acryl_datahub_cloud/metadata/schemas/DataFlowInfo.avsc +15 -5
  82. acryl_datahub_cloud/metadata/schemas/DataFlowKey.avsc +1 -0
  83. acryl_datahub_cloud/metadata/schemas/DataHubAiConversationInfo.avsc +256 -0
  84. acryl_datahub_cloud/metadata/schemas/DataHubAiConversationKey.avsc +22 -0
  85. acryl_datahub_cloud/metadata/schemas/DataHubFileInfo.avsc +234 -0
  86. acryl_datahub_cloud/metadata/schemas/DataHubFileKey.avsc +22 -0
  87. acryl_datahub_cloud/metadata/schemas/DataHubIngestionSourceKey.avsc +2 -1
  88. acryl_datahub_cloud/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
  89. acryl_datahub_cloud/metadata/schemas/DataHubPageModuleProperties.avsc +308 -0
  90. acryl_datahub_cloud/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
  91. acryl_datahub_cloud/metadata/schemas/DataHubPageTemplateProperties.avsc +251 -0
  92. acryl_datahub_cloud/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
  93. acryl_datahub_cloud/metadata/schemas/DataJobInfo.avsc +13 -4
  94. acryl_datahub_cloud/metadata/schemas/DataJobInputOutput.avsc +8 -0
  95. acryl_datahub_cloud/metadata/schemas/DataJobKey.avsc +1 -0
  96. acryl_datahub_cloud/metadata/schemas/DataPlatformInfo.avsc +3 -1
  97. acryl_datahub_cloud/metadata/schemas/DataPlatformInstanceProperties.avsc +5 -2
  98. acryl_datahub_cloud/metadata/schemas/DataProcessKey.avsc +4 -0
  99. acryl_datahub_cloud/metadata/schemas/DataProductKey.avsc +2 -0
  100. acryl_datahub_cloud/metadata/schemas/DataProductProperties.avsc +6 -3
  101. acryl_datahub_cloud/metadata/schemas/DataTypeInfo.avsc +5 -0
  102. acryl_datahub_cloud/metadata/schemas/DatasetKey.avsc +10 -2
  103. acryl_datahub_cloud/metadata/schemas/DatasetProperties.avsc +12 -5
  104. acryl_datahub_cloud/metadata/schemas/DatasetUsageStatistics.avsc +8 -0
  105. acryl_datahub_cloud/metadata/schemas/DocumentInfo.avsc +407 -0
  106. acryl_datahub_cloud/metadata/schemas/DocumentKey.avsc +35 -0
  107. acryl_datahub_cloud/metadata/schemas/DocumentSettings.avsc +79 -0
  108. acryl_datahub_cloud/metadata/schemas/DomainKey.avsc +2 -0
  109. acryl_datahub_cloud/metadata/schemas/DomainProperties.avsc +7 -3
  110. acryl_datahub_cloud/metadata/schemas/EditableContainerProperties.avsc +2 -1
  111. acryl_datahub_cloud/metadata/schemas/EditableDashboardProperties.avsc +2 -1
  112. acryl_datahub_cloud/metadata/schemas/EditableDataFlowProperties.avsc +2 -1
  113. acryl_datahub_cloud/metadata/schemas/EditableDataJobProperties.avsc +2 -1
  114. acryl_datahub_cloud/metadata/schemas/EditableDatasetProperties.avsc +2 -1
  115. acryl_datahub_cloud/metadata/schemas/EditableERModelRelationshipProperties.avsc +2 -1
  116. acryl_datahub_cloud/metadata/schemas/EditableMLFeatureProperties.avsc +2 -1
  117. acryl_datahub_cloud/metadata/schemas/EditableMLFeatureTableProperties.avsc +2 -1
  118. acryl_datahub_cloud/metadata/schemas/EditableMLModelGroupProperties.avsc +2 -1
  119. acryl_datahub_cloud/metadata/schemas/EditableMLModelProperties.avsc +2 -1
  120. acryl_datahub_cloud/metadata/schemas/EditableNotebookProperties.avsc +2 -1
  121. acryl_datahub_cloud/metadata/schemas/EditableSchemaMetadata.avsc +4 -2
  122. acryl_datahub_cloud/metadata/schemas/EntityTypeInfo.avsc +5 -0
  123. acryl_datahub_cloud/metadata/schemas/ExecutionRequestArtifactsLocation.avsc +16 -0
  124. acryl_datahub_cloud/metadata/schemas/ExecutionRequestKey.avsc +2 -1
  125. acryl_datahub_cloud/metadata/schemas/FormAssignmentStatus.avsc +36 -0
  126. acryl_datahub_cloud/metadata/schemas/FormInfo.avsc +6 -0
  127. acryl_datahub_cloud/metadata/schemas/FormKey.avsc +3 -1
  128. acryl_datahub_cloud/metadata/schemas/FormNotifications.avsc +69 -0
  129. acryl_datahub_cloud/metadata/schemas/FormSettings.avsc +30 -0
  130. acryl_datahub_cloud/metadata/schemas/GlobalSettingsInfo.avsc +416 -0
  131. acryl_datahub_cloud/metadata/schemas/GlobalTags.avsc +2 -1
  132. acryl_datahub_cloud/metadata/schemas/GlossaryNodeInfo.avsc +3 -1
  133. acryl_datahub_cloud/metadata/schemas/GlossaryNodeKey.avsc +1 -0
  134. acryl_datahub_cloud/metadata/schemas/GlossaryTermInfo.avsc +3 -1
  135. acryl_datahub_cloud/metadata/schemas/GlossaryTermKey.avsc +2 -0
  136. acryl_datahub_cloud/metadata/schemas/IcebergWarehouseInfo.avsc +4 -0
  137. acryl_datahub_cloud/metadata/schemas/IncidentActivityEvent.avsc +3 -3
  138. acryl_datahub_cloud/metadata/schemas/IncidentInfo.avsc +3 -3
  139. acryl_datahub_cloud/metadata/schemas/InferredMetadata.avsc +71 -1
  140. acryl_datahub_cloud/metadata/schemas/InputFields.avsc +2 -1
  141. acryl_datahub_cloud/metadata/schemas/InviteToken.avsc +26 -0
  142. acryl_datahub_cloud/metadata/schemas/LineageFeatures.avsc +67 -42
  143. acryl_datahub_cloud/metadata/schemas/LogicalParent.avsc +145 -0
  144. acryl_datahub_cloud/metadata/schemas/MLFeatureKey.avsc +4 -1
  145. acryl_datahub_cloud/metadata/schemas/MLFeatureTableKey.avsc +4 -1
  146. acryl_datahub_cloud/metadata/schemas/MLModelDeploymentKey.avsc +7 -1
  147. acryl_datahub_cloud/metadata/schemas/MLModelGroupKey.avsc +9 -1
  148. acryl_datahub_cloud/metadata/schemas/MLModelKey.avsc +9 -1
  149. acryl_datahub_cloud/metadata/schemas/MLModelProperties.avsc +4 -2
  150. acryl_datahub_cloud/metadata/schemas/MLPrimaryKeyKey.avsc +4 -1
  151. acryl_datahub_cloud/metadata/schemas/MetadataChangeEvent.avsc +418 -97
  152. acryl_datahub_cloud/metadata/schemas/MetadataChangeLog.avsc +62 -44
  153. acryl_datahub_cloud/metadata/schemas/MetadataChangeProposal.avsc +61 -0
  154. acryl_datahub_cloud/metadata/schemas/MonitorAnomalyEvent.avsc +54 -9
  155. acryl_datahub_cloud/metadata/schemas/MonitorInfo.avsc +163 -23
  156. acryl_datahub_cloud/metadata/schemas/MonitorKey.avsc +9 -1
  157. acryl_datahub_cloud/metadata/schemas/MonitorSuiteInfo.avsc +128 -3
  158. acryl_datahub_cloud/metadata/schemas/NotebookInfo.avsc +5 -2
  159. acryl_datahub_cloud/metadata/schemas/NotebookKey.avsc +1 -0
  160. acryl_datahub_cloud/metadata/schemas/NotificationRequest.avsc +91 -4
  161. acryl_datahub_cloud/metadata/schemas/Operation.avsc +17 -0
  162. acryl_datahub_cloud/metadata/schemas/Ownership.avsc +71 -1
  163. acryl_datahub_cloud/metadata/schemas/QuerySubjects.avsc +2 -13
  164. acryl_datahub_cloud/metadata/schemas/RelationshipChangeEvent.avsc +215 -0
  165. acryl_datahub_cloud/metadata/schemas/RoleProperties.avsc +3 -1
  166. acryl_datahub_cloud/metadata/schemas/SchemaFieldInfo.avsc +3 -1
  167. acryl_datahub_cloud/metadata/schemas/SchemaFieldKey.avsc +3 -0
  168. acryl_datahub_cloud/metadata/schemas/SchemaMetadata.avsc +2 -1
  169. acryl_datahub_cloud/metadata/schemas/SemanticContent.avsc +123 -0
  170. acryl_datahub_cloud/metadata/schemas/StructuredProperties.avsc +69 -0
  171. acryl_datahub_cloud/metadata/schemas/StructuredPropertyDefinition.avsc +15 -4
  172. acryl_datahub_cloud/metadata/schemas/StructuredPropertySettings.avsc +9 -0
  173. acryl_datahub_cloud/metadata/schemas/SubscriptionInfo.avsc +136 -5
  174. acryl_datahub_cloud/metadata/schemas/SubscriptionKey.avsc +2 -1
  175. acryl_datahub_cloud/metadata/schemas/SystemMetadata.avsc +61 -0
  176. acryl_datahub_cloud/metadata/schemas/TagProperties.avsc +3 -1
  177. acryl_datahub_cloud/metadata/schemas/TestInfo.avsc +2 -1
  178. acryl_datahub_cloud/metadata/schemas/UpstreamLineage.avsc +9 -0
  179. acryl_datahub_cloud/metadata/schemas/UsageFeatures.avsc +10 -0
  180. acryl_datahub_cloud/notifications/__init__.py +0 -0
  181. acryl_datahub_cloud/notifications/notification_recipient_builder.py +399 -0
  182. acryl_datahub_cloud/sdk/__init__.py +69 -0
  183. acryl_datahub_cloud/sdk/assertion/__init__.py +58 -0
  184. acryl_datahub_cloud/sdk/assertion/assertion_base.py +779 -0
  185. acryl_datahub_cloud/sdk/assertion/column_metric_assertion.py +191 -0
  186. acryl_datahub_cloud/sdk/assertion/column_value_assertion.py +431 -0
  187. acryl_datahub_cloud/sdk/assertion/freshness_assertion.py +201 -0
  188. acryl_datahub_cloud/sdk/assertion/schema_assertion.py +268 -0
  189. acryl_datahub_cloud/sdk/assertion/smart_column_metric_assertion.py +212 -0
  190. acryl_datahub_cloud/sdk/assertion/smart_freshness_assertion.py +165 -0
  191. acryl_datahub_cloud/sdk/assertion/smart_sql_assertion.py +156 -0
  192. acryl_datahub_cloud/sdk/assertion/smart_volume_assertion.py +162 -0
  193. acryl_datahub_cloud/sdk/assertion/sql_assertion.py +273 -0
  194. acryl_datahub_cloud/sdk/assertion/types.py +20 -0
  195. acryl_datahub_cloud/sdk/assertion/volume_assertion.py +156 -0
  196. acryl_datahub_cloud/sdk/assertion_client/__init__.py +0 -0
  197. acryl_datahub_cloud/sdk/assertion_client/column_metric.py +545 -0
  198. acryl_datahub_cloud/sdk/assertion_client/column_value.py +617 -0
  199. acryl_datahub_cloud/sdk/assertion_client/freshness.py +371 -0
  200. acryl_datahub_cloud/sdk/assertion_client/helpers.py +166 -0
  201. acryl_datahub_cloud/sdk/assertion_client/schema.py +358 -0
  202. acryl_datahub_cloud/sdk/assertion_client/smart_column_metric.py +540 -0
  203. acryl_datahub_cloud/sdk/assertion_client/smart_freshness.py +373 -0
  204. acryl_datahub_cloud/sdk/assertion_client/smart_sql.py +411 -0
  205. acryl_datahub_cloud/sdk/assertion_client/smart_volume.py +380 -0
  206. acryl_datahub_cloud/sdk/assertion_client/sql.py +410 -0
  207. acryl_datahub_cloud/sdk/assertion_client/volume.py +446 -0
  208. acryl_datahub_cloud/sdk/assertion_input/__init__.py +0 -0
  209. acryl_datahub_cloud/sdk/assertion_input/assertion_input.py +1470 -0
  210. acryl_datahub_cloud/sdk/assertion_input/column_assertion_constants.py +114 -0
  211. acryl_datahub_cloud/sdk/assertion_input/column_assertion_utils.py +284 -0
  212. acryl_datahub_cloud/sdk/assertion_input/column_metric_assertion_input.py +759 -0
  213. acryl_datahub_cloud/sdk/assertion_input/column_metric_constants.py +109 -0
  214. acryl_datahub_cloud/sdk/assertion_input/column_value_assertion_input.py +810 -0
  215. acryl_datahub_cloud/sdk/assertion_input/freshness_assertion_input.py +305 -0
  216. acryl_datahub_cloud/sdk/assertion_input/schema_assertion_input.py +413 -0
  217. acryl_datahub_cloud/sdk/assertion_input/smart_column_metric_assertion_input.py +793 -0
  218. acryl_datahub_cloud/sdk/assertion_input/smart_freshness_assertion_input.py +218 -0
  219. acryl_datahub_cloud/sdk/assertion_input/smart_sql_assertion_input.py +181 -0
  220. acryl_datahub_cloud/sdk/assertion_input/smart_volume_assertion_input.py +189 -0
  221. acryl_datahub_cloud/sdk/assertion_input/sql_assertion_input.py +320 -0
  222. acryl_datahub_cloud/sdk/assertion_input/volume_assertion_input.py +635 -0
  223. acryl_datahub_cloud/sdk/assertions_client.py +1074 -0
  224. acryl_datahub_cloud/sdk/entities/__init__.py +0 -0
  225. acryl_datahub_cloud/sdk/entities/assertion.py +439 -0
  226. acryl_datahub_cloud/sdk/entities/monitor.py +291 -0
  227. acryl_datahub_cloud/sdk/entities/subscription.py +100 -0
  228. acryl_datahub_cloud/sdk/errors.py +34 -0
  229. acryl_datahub_cloud/sdk/resolver_client.py +42 -0
  230. acryl_datahub_cloud/sdk/subscription_client.py +737 -0
  231. {acryl_datahub_cloud-0.3.11rc0.dist-info → acryl_datahub_cloud-0.3.16.1rc0.dist-info}/METADATA +55 -49
  232. {acryl_datahub_cloud-0.3.11rc0.dist-info → acryl_datahub_cloud-0.3.16.1rc0.dist-info}/RECORD +235 -142
  233. {acryl_datahub_cloud-0.3.11rc0.dist-info → acryl_datahub_cloud-0.3.16.1rc0.dist-info}/WHEEL +1 -1
  234. {acryl_datahub_cloud-0.3.11rc0.dist-info → acryl_datahub_cloud-0.3.16.1rc0.dist-info}/entry_points.txt +1 -0
  235. acryl_datahub_cloud/_sdk_extras/__init__.py +0 -4
  236. acryl_datahub_cloud/_sdk_extras/assertion.py +0 -15
  237. acryl_datahub_cloud/_sdk_extras/assertions_client.py +0 -23
  238. {acryl_datahub_cloud-0.3.11rc0.dist-info → acryl_datahub_cloud-0.3.16.1rc0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,793 @@
1
+ from datetime import datetime
2
+ from typing import TYPE_CHECKING, Optional, Union
3
+
4
+ if TYPE_CHECKING:
5
+ pass
6
+
7
+ from acryl_datahub_cloud.sdk.assertion_input.assertion_input import (
8
+ DEFAULT_DAILY_SCHEDULE,
9
+ HIGH_WATERMARK_ALLOWED_FIELD_TYPES,
10
+ NO_PARAMETER_OPERATORS,
11
+ RANGE_OPERATORS,
12
+ SINGLE_VALUE_OPERATORS,
13
+ AssertionIncidentBehaviorInputTypes,
14
+ AssertionInfoInputType,
15
+ DetectionMechanismInputTypes,
16
+ ExclusionWindowInputTypes,
17
+ FieldSpecType,
18
+ InferenceSensitivity,
19
+ _AllRowsQuery,
20
+ _AllRowsQueryDataHubDatasetProfile,
21
+ _AssertionInput,
22
+ _ChangedRowsQuery,
23
+ _DatasetProfile,
24
+ _HasSmartAssertionInputs,
25
+ _try_parse_and_validate_schema_classes_enum,
26
+ )
27
+ from acryl_datahub_cloud.sdk.assertion_input.column_assertion_constants import (
28
+ ALLOWED_COLUMN_TYPES_FOR_COLUMN_ASSERTION,
29
+ FIELD_VALUES_OPERATOR_CONFIG,
30
+ OperatorType,
31
+ RangeInputType,
32
+ RangeTypeInputType,
33
+ ValueInputType,
34
+ ValueType,
35
+ ValueTypeInputType,
36
+ )
37
+ from acryl_datahub_cloud.sdk.assertion_input.column_assertion_utils import (
38
+ _is_no_parameter_operator,
39
+ _is_range_required_for_operator,
40
+ _is_value_required_for_operator,
41
+ _try_parse_and_validate_range,
42
+ _try_parse_and_validate_range_type,
43
+ _try_parse_and_validate_value,
44
+ _try_parse_and_validate_value_type,
45
+ )
46
+ from acryl_datahub_cloud.sdk.assertion_input.column_metric_constants import (
47
+ FIELD_METRIC_TYPE_CONFIG,
48
+ MetricInputType,
49
+ )
50
+ from acryl_datahub_cloud.sdk.entities.assertion import TagsInputType
51
+ from acryl_datahub_cloud.sdk.errors import (
52
+ SDKNotYetSupportedError,
53
+ SDKUsageError,
54
+ )
55
+ from datahub.metadata import schema_classes as models
56
+ from datahub.metadata.urns import AssertionUrn, CorpUserUrn, DatasetUrn
57
+ from datahub.sdk.entity_client import EntityClient
58
+
59
+ # Keep the smart-specific name for backward compatibility
60
+ ALLOWED_COLUMN_TYPES_FOR_SMART_COLUMN_METRIC_ASSERTION = (
61
+ ALLOWED_COLUMN_TYPES_FOR_COLUMN_ASSERTION
62
+ )
63
+
64
+ # New unified criteria parameters type
65
+ SmartColumnMetricAssertionParameters = Union[
66
+ None, # For operators that don't require parameters (NULL, NOT_NULL)
67
+ ValueInputType, # Single value
68
+ RangeInputType, # Range as tuple
69
+ ]
70
+
71
+ DEFAULT_DETECTION_MECHANISM_SMART_COLUMN_METRIC_ASSERTION: _AllRowsQuery = (
72
+ _AllRowsQuery()
73
+ )
74
+
75
+
76
+ class _SmartColumnMetricAssertionInput(_AssertionInput, _HasSmartAssertionInputs):
77
+ """
78
+ Input used to create a smart column metric assertion.
79
+
80
+ This assertion is used to validate the value of a common field / column metric (e.g. aggregation) such as null count + percentage,
81
+ min, max, median, and more. It uses AI to infer the assertion parameters. The operator is fixed to BETWEEN and criteria_parameters
82
+ are set to (0, 0) since the actual values will be inferred by AI.
83
+
84
+ Example using the entity models, not comprehensive for all options:
85
+
86
+ ```python
87
+ models.AssertionInfoClass(
88
+ type=models.AssertionTypeClass.FIELD,
89
+ fieldAssertion=FieldAssertionInfoClass(
90
+ type=models.FieldAssertionTypeClass.FIELD_METRIC,
91
+ entity=str(self.dataset_urn),
92
+ filter=DatasetFilterClass(
93
+ type=models.DatasetFilterTypeClass.SQL,
94
+ sql="SELECT * FROM dataset WHERE column_name = 'value'", # Example filter
95
+ ),
96
+ fieldMetricAssertion=FieldMetricAssertionClass(
97
+ field=SchemaFieldSpecClass(
98
+ path="column_name", # The column name to validate
99
+ type="string", # The type of the column
100
+ nativeType="string", # The native type of the column
101
+ ),
102
+ metric=models.FieldMetricTypeClass.NULL_COUNT_PERCENTAGE, # The metric to validate
103
+ operator=models.AssertionStdOperatorClass.BETWEEN, # Fixed operator for smart assertions
104
+ parameters=models.AssertionStdParametersClass(
105
+ minValue=models.AssertionStdParameterClass(
106
+ value="0", # Fixed min value for smart assertions
107
+ type=models.AssertionStdParameterTypeClass.NUMBER,
108
+ ),
109
+ maxValue=models.AssertionStdParameterClass(
110
+ value="0", # Fixed max value for smart assertions
111
+ type=models.AssertionStdParameterTypeClass.NUMBER,
112
+ ),
113
+ ),
114
+ ),
115
+ ),
116
+ source=models.AssertionSourceClass(
117
+ type=models.AssertionSourceTypeClass.INFERRED, # Smart assertions are of type inferred, not native
118
+ created=AuditStampClass(
119
+ time=1717929600,
120
+ actor="urn:li:corpuser:jdoe", # The actor who created the assertion
121
+ ),
122
+ ),
123
+ lastUpdated=AuditStampClass(
124
+ time=1717929600,
125
+ actor="urn:li:corpuser:jdoe", # The actor who last updated the assertion
126
+ ),
127
+ description="This assertion validates the null count percentage of the column 'column_name' is greater than 10.", # Optional description of the assertion
128
+ )
129
+ ```
130
+
131
+ ```python
132
+ models.MonitorInfoClass(
133
+ type=models.MonitorTypeClass.ASSERTION,
134
+ status=models.MonitorStatusClass(
135
+ mode=models.MonitorModeClass.ACTIVE, # Active or Inactive
136
+ ),
137
+ assertionMonitor=AssertionMonitorClass(
138
+ assertions=AssertionEvaluationSpecClass(
139
+ assertion="urn:li:assertion:123", # The assertion to monitor
140
+ schedule=models.CronScheduleClass(
141
+ cron="0 0 * * *", # The cron schedule
142
+ timezone="America/New_York", # The timezone
143
+ ),
144
+ parameters=models.AssertionEvaluationParametersClass(
145
+ type=models.AssertionEvaluationParametersTypeClass.DATASET_FIELD,
146
+ datasetFieldParameters=models.DatasetFieldAssertionParametersClass(
147
+ sourceType=models.DatasetFieldAssertionSourceTypeClass.CHANGED_ROWS_QUERY, # This can be ALL_ROWS_QUERY, CHANGED_ROWS_QUERY or DATAHUB_DATASET_PROFILE
148
+ changedRowsField=models.FreshnessFieldSpecClass(
149
+ path="column_name",
150
+ type="string",
151
+ nativeType="string",
152
+ kind=models.FreshnessFieldKindClass.HIGH_WATERMARK, # This can be LAST_MODIFIED or HIGH_WATERMARK
153
+ ),
154
+ ),
155
+ ),
156
+ ),
157
+ settings=models.AssertionMonitorSettingsClass(
158
+ adjustmentSettings=models.AssertionAdjustmentSettingsClass(
159
+ algorithm=models.AdjustmentAlgorithmClass.CUSTOM, # TODO: Do we need to set this in the SDK?
160
+ algorithmName="stddev", # TODO: Do we need to set this in the SDK? What are acceptable values?
161
+ context={
162
+ "stdDev": "1.0", # TODO: Do we need to set this in the SDK? What are acceptable values?
163
+ },
164
+ exclusionWindows=[models.AssertionExclusionWindowClass(
165
+ type=models.AssertionExclusionWindowTypeClass.FIXED_RANGE,
166
+ start=1717929600,
167
+ end=1717929600,
168
+ )],
169
+ trainingDataLookbackWindowDays=10, # The number of days to look back for training data
170
+ sensitivity=models.AssertionMonitorSensitivityClass(
171
+ level=1, # The sensitivity level
172
+ ),
173
+ ),
174
+ ),
175
+ ),
176
+ )
177
+ ```
178
+ """
179
+
180
+ def __init__(
181
+ self,
182
+ *,
183
+ # Required parameters
184
+ dataset_urn: Union[str, DatasetUrn],
185
+ entity_client: EntityClient,
186
+ column_name: str,
187
+ metric_type: MetricInputType,
188
+ urn: Optional[Union[str, AssertionUrn]] = None,
189
+ display_name: Optional[str] = None,
190
+ enabled: bool = True,
191
+ schedule: Optional[Union[str, models.CronScheduleClass]] = None,
192
+ detection_mechanism: DetectionMechanismInputTypes = None,
193
+ sensitivity: Optional[Union[str, InferenceSensitivity]] = None,
194
+ exclusion_windows: Optional[ExclusionWindowInputTypes] = None,
195
+ training_data_lookback_days: Optional[int] = None,
196
+ incident_behavior: Optional[AssertionIncidentBehaviorInputTypes] = None,
197
+ tags: Optional[TagsInputType] = None,
198
+ created_by: Union[str, CorpUserUrn],
199
+ created_at: datetime,
200
+ updated_by: Union[str, CorpUserUrn],
201
+ updated_at: datetime,
202
+ ):
203
+ """
204
+ Initialize a smart column metric assertion input.
205
+
206
+ Args:
207
+ dataset_urn: The dataset urn.
208
+ entity_client: The entity client.
209
+ column_name: The name of the column to validate.
210
+ metric_type: The metric type to validate.
211
+ urn: The urn of the assertion.
212
+ display_name: The display name of the assertion.
213
+ enabled: Whether the assertion is enabled.
214
+ schedule: The schedule of the assertion.
215
+ detection_mechanism: The detection mechanism of the assertion.
216
+ sensitivity: The sensitivity of the assertion.
217
+ exclusion_windows: The exclusion windows of the assertion.
218
+ training_data_lookback_days: The training data lookback days of the assertion.
219
+ incident_behavior: The incident behavior of the assertion. Accepts strings, enum values, lists, or None.
220
+ tags: The tags of the assertion.
221
+ created_by: The creator of the assertion.
222
+ created_at: The creation time of the assertion.
223
+ updated_by: The updater of the assertion.
224
+ updated_at: The update time of the assertion.
225
+ """
226
+ # Parent will handle validation of common parameters:
227
+ _AssertionInput.__init__(
228
+ self,
229
+ dataset_urn=dataset_urn,
230
+ entity_client=entity_client,
231
+ urn=urn,
232
+ display_name=display_name,
233
+ enabled=enabled,
234
+ schedule=schedule,
235
+ detection_mechanism=detection_mechanism,
236
+ incident_behavior=incident_behavior,
237
+ tags=tags,
238
+ source_type=models.AssertionSourceTypeClass.INFERRED, # Smart assertions are of type inferred, not native
239
+ created_by=created_by,
240
+ created_at=created_at,
241
+ updated_by=updated_by,
242
+ updated_at=updated_at,
243
+ default_detection_mechanism=DEFAULT_DETECTION_MECHANISM_SMART_COLUMN_METRIC_ASSERTION,
244
+ )
245
+ _HasSmartAssertionInputs.__init__(
246
+ self,
247
+ sensitivity=sensitivity,
248
+ exclusion_windows=exclusion_windows,
249
+ training_data_lookback_days=training_data_lookback_days,
250
+ )
251
+
252
+ # Validate Smart Column Metric Assertion specific parameters
253
+ self.metric_type = _try_parse_and_validate_schema_classes_enum(
254
+ metric_type, models.FieldMetricTypeClass
255
+ )
256
+ self.column_name = self._try_parse_and_validate_column_name_is_valid_type(
257
+ column_name
258
+ )
259
+
260
+ # Smart assertions use fixed operator and criteria_parameters since they are inferred by AI
261
+ self.operator = _try_parse_and_validate_schema_classes_enum(
262
+ OperatorType.BETWEEN, models.AssertionStdOperatorClass
263
+ )
264
+
265
+ # Initialize instance variables with fixed values for smart assertions
266
+ self.criteria_parameters: Optional[SmartColumnMetricAssertionParameters] = (
267
+ 0,
268
+ 0,
269
+ )
270
+ self.criteria_type: Optional[Union[ValueTypeInputType, RangeTypeInputType]] = (
271
+ ValueType.NUMBER,
272
+ ValueType.NUMBER,
273
+ )
274
+
275
+ # Validate compatibility:
276
+ # Skip operator validation for smart assertions since operator is a placeholder (AI inferred)
277
+ # Only validate metric type compatibility
278
+ self._validate_field_type_and_metric_type_compatibility(
279
+ self.column_name, self.metric_type
280
+ )
281
+
282
+ def _infer_criteria_type_from_parameters(
283
+ self,
284
+ criteria_parameters: Optional[SmartColumnMetricAssertionParameters],
285
+ ) -> Optional[Union[ValueTypeInputType, RangeTypeInputType]]:
286
+ """
287
+ Infer the criteria type from the parameters based on Python types.
288
+
289
+ Args:
290
+ criteria_parameters: The criteria parameters to infer type from.
291
+
292
+ Returns:
293
+ The inferred type(s) for the criteria parameters.
294
+ """
295
+ if criteria_parameters is None:
296
+ return None
297
+
298
+ if isinstance(criteria_parameters, tuple):
299
+ # Range parameters - infer type for each value
300
+ if len(criteria_parameters) != 2:
301
+ raise SDKUsageError(
302
+ "Range parameters must be a tuple of exactly 2 values"
303
+ )
304
+
305
+ type1 = self._infer_single_value_type(criteria_parameters[0])
306
+ type2 = self._infer_single_value_type(criteria_parameters[1])
307
+ return (type1, type2)
308
+ else:
309
+ # Single value parameter
310
+ return self._infer_single_value_type(criteria_parameters)
311
+
312
+ def _infer_single_value_type(self, value: ValueInputType) -> ValueTypeInputType:
313
+ """
314
+ Infer the type of a single value based on its Python type.
315
+
316
+ Args:
317
+ value: The value to infer type from.
318
+
319
+ Returns:
320
+ The inferred ValueType.
321
+ """
322
+ if isinstance(value, (int, float)):
323
+ return ValueType.NUMBER
324
+ elif isinstance(value, str):
325
+ return ValueType.STRING
326
+ else:
327
+ # Default fallback
328
+ return ValueType.UNKNOWN
329
+
330
+ def _process_criteria_parameters_with_gms_type(
331
+ self,
332
+ criteria_parameters: Optional[SmartColumnMetricAssertionParameters],
333
+ gms_type_info: Optional[Union[models.AssertionStdParameterTypeClass, tuple]],
334
+ ) -> None:
335
+ """Process criteria_parameters using explicit type information from GMS."""
336
+ if criteria_parameters is None:
337
+ self._process_none_parameters()
338
+ elif isinstance(criteria_parameters, tuple):
339
+ # Range parameters with GMS types
340
+ if gms_type_info and isinstance(gms_type_info, tuple):
341
+ self._process_range_parameters_with_types(
342
+ criteria_parameters, gms_type_info
343
+ )
344
+ else:
345
+ self._process_range_parameters(criteria_parameters)
346
+ else:
347
+ # Single value with GMS type
348
+ if gms_type_info and not isinstance(gms_type_info, tuple):
349
+ self._process_single_value_parameters_with_type(
350
+ criteria_parameters, gms_type_info
351
+ )
352
+ else:
353
+ self._process_single_value_parameters(criteria_parameters)
354
+
355
+ def _process_criteria_parameters(
356
+ self,
357
+ criteria_parameters: Optional[SmartColumnMetricAssertionParameters],
358
+ ) -> None:
359
+ """Process the new consolidated criteria_parameters with automatic type inference."""
360
+ if criteria_parameters is None:
361
+ self._process_none_parameters()
362
+ elif isinstance(criteria_parameters, tuple):
363
+ self._process_range_parameters(criteria_parameters)
364
+ else:
365
+ self._process_single_value_parameters(criteria_parameters)
366
+
367
+ def _process_none_parameters(self) -> None:
368
+ """Process None criteria_parameters."""
369
+ # No parameters - validation is now handled at the client level
370
+ # This allows both creation and update scenarios to be handled appropriately
371
+ self.criteria_parameters = None
372
+ self.criteria_type = None
373
+
374
+ def _process_range_parameters(self, criteria_parameters: tuple) -> None:
375
+ """Process tuple criteria_parameters for range operators."""
376
+ # Range parameters
377
+ if not _is_range_required_for_operator(self.operator):
378
+ raise SDKUsageError(
379
+ f"Operator {self.operator} does not support range parameters. "
380
+ "Provide a single value instead of a tuple."
381
+ )
382
+
383
+ # Infer range type automatically
384
+ inferred_range_type = self._infer_criteria_type_from_parameters(
385
+ criteria_parameters
386
+ )
387
+
388
+ # Validate and parse the range type
389
+ validated_range_type = _try_parse_and_validate_range_type(inferred_range_type)
390
+
391
+ # Validate and parse the range values
392
+ validated_range = _try_parse_and_validate_range(
393
+ criteria_parameters, validated_range_type, self.operator
394
+ )
395
+
396
+ # Store validated parameters
397
+ self.criteria_parameters = validated_range
398
+ self.criteria_type = validated_range_type
399
+
400
+ def _process_single_value_parameters(
401
+ self, criteria_parameters: Union[str, int, float]
402
+ ) -> None:
403
+ """Process single value criteria_parameters."""
404
+ # Single value parameters
405
+ if _is_no_parameter_operator(self.operator):
406
+ raise SDKUsageError(
407
+ f"Value parameters should not be provided for operator {self.operator}"
408
+ )
409
+ if not _is_value_required_for_operator(self.operator):
410
+ raise SDKUsageError(
411
+ f"Operator {self.operator} does not support value parameters. "
412
+ "Use criteria_parameters=None or omit criteria_parameters."
413
+ )
414
+
415
+ # Infer value type automatically
416
+ inferred_value_type = self._infer_criteria_type_from_parameters(
417
+ criteria_parameters
418
+ )
419
+
420
+ # Validate value if required
421
+ if _is_value_required_for_operator(self.operator):
422
+ # Validate and parse the value type - make sure it's a single type, not a tuple
423
+ if isinstance(inferred_value_type, tuple):
424
+ raise SDKUsageError("Single value type expected, not a tuple type")
425
+
426
+ validated_value_type = _try_parse_and_validate_value_type(
427
+ inferred_value_type
428
+ )
429
+ validated_value = _try_parse_and_validate_value(
430
+ criteria_parameters, validated_value_type
431
+ )
432
+
433
+ # Store validated parameters
434
+ self.criteria_parameters = validated_value
435
+ self.criteria_type = validated_value_type
436
+ else:
437
+ # Store raw parameters for operators that don't require validation
438
+ self.criteria_parameters = criteria_parameters
439
+ self.criteria_type = inferred_value_type
440
+
441
+ def _process_single_value_parameters_with_type(
442
+ self,
443
+ criteria_parameters: Union[str, int, float],
444
+ gms_type: models.AssertionStdParameterTypeClass,
445
+ ) -> None:
446
+ """Process single value criteria_parameters using explicit GMS type information."""
447
+ # Single value parameters
448
+ if _is_no_parameter_operator(self.operator):
449
+ raise SDKUsageError(
450
+ f"Value parameters should not be provided for operator {self.operator}"
451
+ )
452
+ if not _is_value_required_for_operator(self.operator):
453
+ raise SDKUsageError(
454
+ f"Operator {self.operator} does not support value parameters. "
455
+ "Use criteria_parameters=None or omit criteria_parameters."
456
+ )
457
+
458
+ # Use GMS type instead of inferring
459
+ validated_value_type = _try_parse_and_validate_value_type(gms_type)
460
+ validated_value = _try_parse_and_validate_value(
461
+ criteria_parameters, validated_value_type
462
+ )
463
+
464
+ # Store validated parameters
465
+ self.criteria_parameters = validated_value
466
+ self.criteria_type = validated_value_type
467
+
468
+ def _process_range_parameters_with_types(
469
+ self,
470
+ criteria_parameters: tuple,
471
+ gms_types: tuple,
472
+ ) -> None:
473
+ """Process range criteria_parameters using explicit GMS type information."""
474
+ # Range parameters with GMS types
475
+ if _is_no_parameter_operator(self.operator):
476
+ raise SDKUsageError(
477
+ f"Range parameters should not be provided for operator {self.operator}"
478
+ )
479
+ if not _is_range_required_for_operator(self.operator):
480
+ raise SDKUsageError(
481
+ f"Operator {self.operator} does not support range parameters. "
482
+ "Use a single value or criteria_parameters=None."
483
+ )
484
+
485
+ if len(criteria_parameters) != 2:
486
+ raise SDKUsageError("Range parameters must be a tuple of exactly 2 values")
487
+
488
+ min_value, max_value = criteria_parameters
489
+ min_type, max_type = gms_types
490
+
491
+ # Use GMS types instead of inferring
492
+ validated_min_type = _try_parse_and_validate_value_type(min_type)
493
+ validated_max_type = _try_parse_and_validate_value_type(max_type)
494
+
495
+ validated_min_value = _try_parse_and_validate_value(
496
+ min_value, validated_min_type
497
+ )
498
+ validated_max_value = _try_parse_and_validate_value(
499
+ max_value, validated_max_type
500
+ )
501
+
502
+ # Store validated parameters
503
+ self.criteria_parameters = (validated_min_value, validated_max_value)
504
+ self.criteria_type = (validated_min_type, validated_max_type)
505
+
506
+ def _create_monitor_info(
507
+ self,
508
+ assertion_urn: AssertionUrn,
509
+ status: models.MonitorStatusClass,
510
+ schedule: models.CronScheduleClass,
511
+ ) -> models.MonitorInfoClass:
512
+ """
513
+ Create a MonitorInfoClass with all the necessary components.
514
+ """
515
+ source_type, field = self._convert_assertion_source_type_and_field()
516
+ return models.MonitorInfoClass(
517
+ type=models.MonitorTypeClass.ASSERTION,
518
+ status=status,
519
+ assertionMonitor=models.AssertionMonitorClass(
520
+ assertions=[
521
+ models.AssertionEvaluationSpecClass(
522
+ assertion=str(assertion_urn),
523
+ schedule=schedule,
524
+ parameters=self._get_assertion_evaluation_parameters(
525
+ str(source_type), field
526
+ ),
527
+ ),
528
+ ],
529
+ settings=models.AssertionMonitorSettingsClass(
530
+ adjustmentSettings=models.AssertionAdjustmentSettingsClass(
531
+ sensitivity=self._convert_sensitivity(),
532
+ exclusionWindows=self._convert_exclusion_windows(),
533
+ trainingDataLookbackWindowDays=self.training_data_lookback_days,
534
+ ),
535
+ ),
536
+ ),
537
+ )
538
+
539
+ def _create_assertion_info(
540
+ self, filter: Optional[models.DatasetFilterClass]
541
+ ) -> AssertionInfoInputType:
542
+ """
543
+ Create a FieldAssertionInfoClass for a smart column metric assertion.
544
+
545
+ Args:
546
+ filter: Optional filter to apply to the assertion.
547
+
548
+ Returns:
549
+ A FieldAssertionInfoClass configured for smart column metric.
550
+ """
551
+ # Get the field spec for the column
552
+ field_spec = self._get_schema_field_spec(self.column_name)
553
+
554
+ # Create the field metric assertion
555
+ field_metric_assertion = models.FieldMetricAssertionClass(
556
+ field=field_spec,
557
+ metric=self.metric_type,
558
+ operator=self.operator,
559
+ parameters=self._create_assertion_parameters(),
560
+ )
561
+
562
+ # Create the field assertion info
563
+ return models.FieldAssertionInfoClass(
564
+ type=models.FieldAssertionTypeClass.FIELD_METRIC,
565
+ entity=str(self.dataset_urn),
566
+ filter=filter,
567
+ fieldMetricAssertion=field_metric_assertion,
568
+ fieldValuesAssertion=None, # Explicitly set to None since this is a field metric assertion
569
+ )
570
+
571
+ def _convert_schedule(self) -> models.CronScheduleClass:
572
+ """
573
+ Create a schedule for a smart column metric assertion.
574
+
575
+ Returns:
576
+ A CronScheduleClass with appropriate schedule settings.
577
+ """
578
+ if self.schedule is None:
579
+ return DEFAULT_DAILY_SCHEDULE
580
+
581
+ return models.CronScheduleClass(
582
+ cron=self.schedule.cron,
583
+ timezone=self.schedule.timezone,
584
+ )
585
+
586
+ def _convert_schema_field_spec_to_freshness_field_spec(
587
+ self, field_spec: models.SchemaFieldSpecClass
588
+ ) -> models.FreshnessFieldSpecClass:
589
+ """
590
+ Convert a SchemaFieldSpecClass to a FreshnessFieldSpecClass.
591
+ """
592
+ return models.FreshnessFieldSpecClass(
593
+ path=field_spec.path,
594
+ type=field_spec.type,
595
+ nativeType=field_spec.nativeType,
596
+ kind=models.FreshnessFieldKindClass.HIGH_WATERMARK,
597
+ )
598
+
599
+ def _get_assertion_evaluation_parameters(
600
+ self, source_type: str, field: Optional[FieldSpecType]
601
+ ) -> models.AssertionEvaluationParametersClass:
602
+ """
603
+ Get evaluation parameters for a smart column metric assertion.
604
+ Converts SchemaFieldSpecClass to FreshnessFieldSpecClass if needed.
605
+ """
606
+ if field is not None:
607
+ if isinstance(field, models.SchemaFieldSpecClass):
608
+ field = self._convert_schema_field_spec_to_freshness_field_spec(field)
609
+ assert isinstance(field, models.FreshnessFieldSpecClass), (
610
+ "Field must be FreshnessFieldSpecClass for monitor info"
611
+ )
612
+ return models.AssertionEvaluationParametersClass(
613
+ type=models.AssertionEvaluationParametersTypeClass.DATASET_FIELD,
614
+ datasetFieldParameters=models.DatasetFieldAssertionParametersClass(
615
+ sourceType=source_type,
616
+ changedRowsField=field,
617
+ ),
618
+ )
619
+
620
+ def _convert_assertion_source_type_and_field(
621
+ self,
622
+ ) -> tuple[str, Optional[FieldSpecType]]:
623
+ """
624
+ Convert detection mechanism into source type and field specification for column metric assertions.
625
+
626
+ Returns:
627
+ A tuple of (source_type, field) where field may be None.
628
+ Note that the source_type is a string, not a models.DatasetFieldAssertionSourceTypeClass (or other assertion source type) since
629
+ the source type is not a enum in the code generated from the DatasetFieldSourceType enum in the PDL.
630
+
631
+ Raises:
632
+ SDKNotYetSupportedError: If the detection mechanism is not supported.
633
+ SDKUsageError: If the field (column) is not found in the dataset,
634
+ and the detection mechanism requires a field. Also if the field
635
+ is not an allowed type for the detection mechanism.
636
+ """
637
+ source_type = models.DatasetFieldAssertionSourceTypeClass.ALL_ROWS_QUERY
638
+ field = None
639
+ SUPPORTED_DETECTION_MECHANISMS = [
640
+ _AllRowsQuery().type,
641
+ _AllRowsQueryDataHubDatasetProfile().type,
642
+ _ChangedRowsQuery(column_name="").type,
643
+ ]
644
+
645
+ if isinstance(self.detection_mechanism, _ChangedRowsQuery):
646
+ source_type = models.DatasetFieldAssertionSourceTypeClass.CHANGED_ROWS_QUERY
647
+ column_name = self._try_parse_and_validate_column_name_is_valid_type(
648
+ self.detection_mechanism.column_name, # The high watermark column name
649
+ allowed_column_types=HIGH_WATERMARK_ALLOWED_FIELD_TYPES,
650
+ )
651
+ field = self._get_schema_field_spec(column_name)
652
+ elif isinstance(self.detection_mechanism, _AllRowsQuery):
653
+ source_type = models.DatasetFieldAssertionSourceTypeClass.ALL_ROWS_QUERY
654
+ # For query-based detection, we don't need a field specification
655
+ # as the query itself defines what data to analyze
656
+ elif isinstance(
657
+ self.detection_mechanism,
658
+ (_AllRowsQueryDataHubDatasetProfile, _DatasetProfile),
659
+ ):
660
+ source_type = (
661
+ models.DatasetFieldAssertionSourceTypeClass.DATAHUB_DATASET_PROFILE
662
+ )
663
+ # Note: This is only valid on the all rows query
664
+ else:
665
+ raise SDKNotYetSupportedError(
666
+ f"Detection mechanism {self.detection_mechanism} is not supported for smart column metric assertions, please use a supported detection mechanism: {', '.join(SUPPORTED_DETECTION_MECHANISMS)}"
667
+ )
668
+
669
+ return source_type, field
670
+
671
+ def _create_assertion_parameters(self) -> models.AssertionStdParametersClass:
672
+ """
673
+ Create assertion parameters based on the operator type and provided values.
674
+
675
+ Returns:
676
+ An AssertionStdParametersClass with the appropriate parameters.
677
+
678
+ Raises:
679
+ SDKUsageError: If the parameters are invalid for the operator type.
680
+ """
681
+ if self.operator in SINGLE_VALUE_OPERATORS:
682
+ if self.criteria_parameters is None or isinstance(
683
+ self.criteria_parameters, tuple
684
+ ):
685
+ raise SDKUsageError(
686
+ f"Single value is required for operator {self.operator}"
687
+ )
688
+ if self.criteria_type is None or isinstance(self.criteria_type, tuple):
689
+ raise SDKUsageError(
690
+ f"Single value type is required for operator {self.operator}"
691
+ )
692
+ return models.AssertionStdParametersClass(
693
+ value=models.AssertionStdParameterClass(
694
+ value=str(self.criteria_parameters),
695
+ type=self.criteria_type,
696
+ ),
697
+ )
698
+ elif self.operator in RANGE_OPERATORS:
699
+ if not isinstance(self.criteria_parameters, tuple):
700
+ raise SDKUsageError(
701
+ f"Range parameters are required for operator {self.operator}"
702
+ )
703
+ if not isinstance(self.criteria_type, tuple):
704
+ raise SDKUsageError(
705
+ f"Range type is required for operator {self.operator}"
706
+ )
707
+ return models.AssertionStdParametersClass(
708
+ minValue=models.AssertionStdParameterClass(
709
+ value=str(self.criteria_parameters[0]),
710
+ type=self.criteria_type[0],
711
+ ),
712
+ maxValue=models.AssertionStdParameterClass(
713
+ value=str(self.criteria_parameters[1]),
714
+ type=self.criteria_type[1],
715
+ ),
716
+ )
717
+ elif self.operator in NO_PARAMETER_OPERATORS:
718
+ return models.AssertionStdParametersClass()
719
+ else:
720
+ raise SDKUsageError(f"Unsupported operator type: {self.operator}")
721
+
722
+ def _try_parse_and_validate_column_name_is_valid_type(
723
+ self,
724
+ column_name: str,
725
+ allowed_column_types: list[
726
+ models.DictWrapper
727
+ ] = ALLOWED_COLUMN_TYPES_FOR_SMART_COLUMN_METRIC_ASSERTION,
728
+ ) -> str:
729
+ """
730
+ Parse and validate a column name. Determine from the field spec if the column exists and is of the appropriate type for the metric type.
731
+ Validate that this is a column that is valid for the metric type, see also getEligibleFieldColumns and related functions in the frontend
732
+ """
733
+ field_spec = self._get_schema_field_spec(column_name)
734
+ self._validate_field_type(
735
+ field_spec,
736
+ column_name,
737
+ allowed_column_types,
738
+ "smart column metric assertion",
739
+ )
740
+ return column_name
741
+
742
+ def _assertion_type(self) -> str:
743
+ """Get the assertion type."""
744
+ return models.AssertionTypeClass.FIELD
745
+
746
+ def _validate_field_type_and_operator_compatibility(
747
+ self, column_name: str, operator: models.AssertionStdOperatorClass
748
+ ) -> None:
749
+ """Validate that the field type is compatible with the operator.
750
+
751
+ See FIELD_VALUES_OPERATOR_CONFIG in the frontend for the allowed operators for each field type.
752
+
753
+ Args:
754
+ column_name: The name of the column to validate.
755
+ operator: The operator to validate against.
756
+
757
+ Raises:
758
+ SDKUsageError: If the field type is not compatible with the operator.
759
+ """
760
+ field_spec = self._get_schema_field_spec(column_name)
761
+ allowed_operators = FIELD_VALUES_OPERATOR_CONFIG.get(field_spec.type, [])
762
+ if operator not in allowed_operators:
763
+ raise SDKUsageError(
764
+ f"Operator {operator} is not allowed for field type {field_spec.type} for column '{column_name}'. Allowed operators: {', '.join(str(op) for op in allowed_operators)}"
765
+ )
766
+
767
+ def _validate_field_type_and_metric_type_compatibility(
768
+ self, column_name: str, metric_type: models.FieldMetricTypeClass
769
+ ) -> None:
770
+ """Validate that the metric type is compatible with the field type.
771
+
772
+ See FIELD_METRIC_TYPE_CONFIG in the frontend for the allowed metric types for each field type.
773
+
774
+ Args:
775
+ column_name: The name of the column to validate.
776
+ metric_type: The metric type to validate.
777
+
778
+ Raises:
779
+ SDKUsageError: If the metric type is not compatible with the field type.
780
+ """
781
+ field_spec = self._get_schema_field_spec(column_name)
782
+ field_type = field_spec.type
783
+
784
+ if field_type not in FIELD_METRIC_TYPE_CONFIG:
785
+ raise SDKUsageError(
786
+ f"Column {column_name} is of type {field_type}, which is not supported for smart column metric assertions"
787
+ )
788
+
789
+ allowed_metric_types = FIELD_METRIC_TYPE_CONFIG[field_type]
790
+ if metric_type not in allowed_metric_types:
791
+ raise SDKUsageError(
792
+ f"Metric type {metric_type} is not allowed for field type {field_type}. Allowed metric types: {', '.join(str(mt) for mt in allowed_metric_types)}"
793
+ )