acryl-datahub-cloud 0.3.12rc5__py3-none-any.whl → 0.3.12rc6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub-cloud might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "acryl-datahub-cloud",
3
- "version": "0.3.12rc5",
3
+ "version": "0.3.12rc6",
4
4
  "install_requires": [
5
5
  "avro-gen3==0.7.16",
6
6
  "acryl-datahub"
@@ -0,0 +1,7 @@
1
+ query getAppConfig {
2
+ appConfig {
3
+ featureFlags {
4
+ formsNotificationsEnabled
5
+ }
6
+ }
7
+ }
@@ -2,6 +2,7 @@ from acryl_datahub_cloud.sdk.assertion.assertion_base import (
2
2
  FreshnessAssertion,
3
3
  SmartFreshnessAssertion,
4
4
  SmartVolumeAssertion,
5
+ SqlAssertion,
5
6
  )
6
7
  from acryl_datahub_cloud.sdk.assertion.smart_column_metric_assertion import (
7
8
  SmartColumnMetricAssertion,
@@ -13,6 +14,9 @@ from acryl_datahub_cloud.sdk.assertion_input.assertion_input import (
13
14
  InferenceSensitivity,
14
15
  TimeWindowSize,
15
16
  )
17
+ from acryl_datahub_cloud.sdk.assertion_input.sql_assertion_input import (
18
+ SqlAssertionCriteria,
19
+ )
16
20
  from acryl_datahub_cloud.sdk.assertions_client import AssertionsClient
17
21
  from acryl_datahub_cloud.sdk.resolver_client import ResolverClient
18
22
  from acryl_datahub_cloud.sdk.subscription_client import SubscriptionClient
@@ -30,4 +34,6 @@ __all__ = [
30
34
  "AssertionsClient",
31
35
  "ResolverClient",
32
36
  "SubscriptionClient",
37
+ "SqlAssertion",
38
+ "SqlAssertionCriteria",
33
39
  ]
@@ -17,6 +17,7 @@ from typing_extensions import Self
17
17
  from acryl_datahub_cloud.sdk.assertion_input.assertion_input import (
18
18
  ASSERTION_MONITOR_DEFAULT_TRAINING_LOOKBACK_WINDOW_DAYS,
19
19
  DEFAULT_DETECTION_MECHANISM,
20
+ DEFAULT_EVERY_SIX_HOURS_SCHEDULE,
20
21
  DEFAULT_SCHEDULE,
21
22
  DEFAULT_SENSITIVITY,
22
23
  AssertionIncidentBehavior,
@@ -35,6 +36,9 @@ from acryl_datahub_cloud.sdk.assertion_input.smart_column_metric_assertion_input
35
36
  ValueInputType,
36
37
  ValueTypeInputType,
37
38
  )
39
+ from acryl_datahub_cloud.sdk.assertion_input.sql_assertion_input import (
40
+ SqlAssertionCriteria,
41
+ )
38
42
  from acryl_datahub_cloud.sdk.entities.assertion import Assertion
39
43
  from acryl_datahub_cloud.sdk.entities.monitor import (
40
44
  Monitor,
@@ -73,7 +77,9 @@ class _HasSchedule:
73
77
  return self._schedule
74
78
 
75
79
  @staticmethod
76
- def _get_schedule(monitor: Monitor) -> models.CronScheduleClass:
80
+ def _get_schedule(
81
+ monitor: Monitor, default: models.CronScheduleClass = DEFAULT_SCHEDULE
82
+ ) -> models.CronScheduleClass:
77
83
  """Get the schedule from the monitor."""
78
84
  assertion_evaluation_specs = _get_nested_field_for_entity_with_default(
79
85
  monitor,
@@ -81,11 +87,11 @@ class _HasSchedule:
81
87
  [],
82
88
  )
83
89
  if len(assertion_evaluation_specs) == 0:
84
- return DEFAULT_SCHEDULE
90
+ return default
85
91
  assertion_evaluation_spec = assertion_evaluation_specs[0]
86
92
  schedule = assertion_evaluation_spec.schedule
87
93
  if schedule is None:
88
- return DEFAULT_SCHEDULE
94
+ return default
89
95
  return schedule
90
96
 
91
97
 
@@ -335,6 +341,7 @@ class _AssertionPublic(ABC):
335
341
  Abstract base class that represents a public facing assertion and contains the common properties of all assertions.
336
342
  """
337
343
 
344
+ # TODO: have the individual classes self-declare this
338
345
  _SUPPORTED_WITH_FILTER_ASSERTION_TYPES = (
339
346
  models.FreshnessAssertionInfoClass,
340
347
  models.VolumeAssertionInfoClass,
@@ -1148,3 +1155,163 @@ class FreshnessAssertion(_HasSchedule, _AssertionPublic):
1148
1155
  raise SDKNotYetSupportedError("FILE_METADATA DatasetFreshnessSourceType")
1149
1156
  else:
1150
1157
  raise SDKNotYetSupportedError(f"DatasetFreshnessSourceType {source_type}")
1158
+
1159
+
1160
+ class SqlAssertion(_AssertionPublic, _HasSchedule):
1161
+ """
1162
+ A class that represents a SQL assertion.
1163
+ """
1164
+
1165
+ def __init__(
1166
+ self,
1167
+ *,
1168
+ urn: AssertionUrn,
1169
+ dataset_urn: DatasetUrn,
1170
+ display_name: str,
1171
+ mode: AssertionMode,
1172
+ statement: str,
1173
+ criteria: SqlAssertionCriteria,
1174
+ schedule: models.CronScheduleClass,
1175
+ tags: list[TagUrn],
1176
+ incident_behavior: list[AssertionIncidentBehavior],
1177
+ created_by: Optional[CorpUserUrn] = None,
1178
+ created_at: Union[datetime, None] = None,
1179
+ updated_by: Optional[CorpUserUrn] = None,
1180
+ updated_at: Optional[datetime] = None,
1181
+ ):
1182
+ """
1183
+ Initialize a SQL assertion.
1184
+
1185
+ Note: Values can be accessed, but not set on the assertion object.
1186
+ To update an assertion, use the `upsert_*` method.
1187
+ Args:
1188
+ urn: The urn of the assertion.
1189
+ dataset_urn: The urn of the dataset that the assertion is for.
1190
+ display_name: The display name of the assertion.
1191
+ mode: The mode of the assertion (active, inactive).
1192
+ statement: The SQL statement to be used for the assertion.
1193
+ criteria: The criteria to be used for the assertion.
1194
+ schedule: The schedule of the assertion.
1195
+ tags: The tags applied to the assertion.
1196
+ incident_behavior: Whether to raise or resolve an incident when the assertion fails / passes.
1197
+ created_by: The urn of the user that created the assertion.
1198
+ created_at: The timestamp of when the assertion was created.
1199
+ updated_by: The urn of the user that updated the assertion.
1200
+ updated_at: The timestamp of when the assertion was updated.
1201
+ """
1202
+ # Initialize the mixins first
1203
+ _AssertionPublic.__init__(
1204
+ self,
1205
+ urn=urn,
1206
+ dataset_urn=dataset_urn,
1207
+ display_name=display_name,
1208
+ mode=mode,
1209
+ tags=tags,
1210
+ incident_behavior=incident_behavior,
1211
+ created_by=created_by,
1212
+ created_at=created_at,
1213
+ updated_by=updated_by,
1214
+ updated_at=updated_at,
1215
+ )
1216
+ _HasSchedule.__init__(self, schedule=schedule)
1217
+ # Then initialize the parent class
1218
+ self._statement = statement
1219
+ self._criteria = criteria
1220
+
1221
+ @property
1222
+ def statement(self) -> str:
1223
+ return self._statement
1224
+
1225
+ @property
1226
+ def criteria(self) -> SqlAssertionCriteria:
1227
+ return self._criteria
1228
+
1229
+ @staticmethod
1230
+ def _get_detection_mechanism(
1231
+ assertion: Assertion,
1232
+ monitor: Monitor,
1233
+ default: Optional[_DetectionMechanismTypes] = DEFAULT_DETECTION_MECHANISM,
1234
+ ) -> Optional[_DetectionMechanismTypes]:
1235
+ """Sql assertions do not have a detection mechanism."""
1236
+ return None
1237
+
1238
+ @staticmethod
1239
+ def _get_statement(assertion: Assertion) -> str:
1240
+ if assertion.info is None:
1241
+ raise SDKNotYetSupportedError(
1242
+ f"Assertion {assertion.urn} does not have a SQL assertion info, which is not supported"
1243
+ )
1244
+ if isinstance(assertion.info, models.SqlAssertionInfoClass):
1245
+ return assertion.info.statement
1246
+ else:
1247
+ raise SDKNotYetSupportedError(
1248
+ f"Assertion {assertion.urn} is not a SQL assertion"
1249
+ )
1250
+
1251
+ @staticmethod
1252
+ def _get_criteria(assertion: Assertion) -> SqlAssertionCriteria:
1253
+ if assertion.info is None:
1254
+ raise SDKNotYetSupportedError(
1255
+ f"Assertion {assertion.urn} does not have a SQL assertion info, which is not supported"
1256
+ )
1257
+ if isinstance(assertion.info, models.SqlAssertionInfoClass):
1258
+ parameters: Union[float, tuple[float, float]]
1259
+ if assertion.info.parameters.value is not None:
1260
+ parameters = float(assertion.info.parameters.value.value)
1261
+ elif (
1262
+ assertion.info.parameters.maxValue is not None
1263
+ and assertion.info.parameters.minValue is not None
1264
+ ):
1265
+ # min and max values are in the order of min, max
1266
+ parameters = (
1267
+ float(assertion.info.parameters.minValue.value),
1268
+ float(assertion.info.parameters.maxValue.value),
1269
+ )
1270
+ else:
1271
+ raise SDKNotYetSupportedError(
1272
+ f"Assertion {assertion.urn} does not have a valid parameters for the SQL assertion"
1273
+ )
1274
+
1275
+ return SqlAssertionCriteria(
1276
+ type=assertion.info.type
1277
+ if isinstance(assertion.info.type, str)
1278
+ else str(assertion.info.type),
1279
+ change_type=assertion.info.changeType
1280
+ if assertion.info.changeType is None
1281
+ else (
1282
+ assertion.info.changeType
1283
+ if isinstance(assertion.info.changeType, str)
1284
+ else str(assertion.info.changeType)
1285
+ ),
1286
+ operator=assertion.info.operator
1287
+ if isinstance(assertion.info.operator, str)
1288
+ else str(assertion.info.operator),
1289
+ parameters=parameters,
1290
+ )
1291
+ else:
1292
+ raise SDKNotYetSupportedError(
1293
+ f"Assertion {assertion.urn} is not a SQL assertion"
1294
+ )
1295
+
1296
+ @classmethod
1297
+ def _from_entities(cls, assertion: Assertion, monitor: Monitor) -> Self:
1298
+ """
1299
+ Create a SQL assertion from the assertion and monitor entities.
1300
+ """
1301
+ return cls(
1302
+ urn=assertion.urn,
1303
+ dataset_urn=assertion.dataset,
1304
+ display_name=assertion.description or "",
1305
+ mode=cls._get_mode(monitor),
1306
+ statement=cls._get_statement(assertion),
1307
+ criteria=cls._get_criteria(assertion),
1308
+ schedule=cls._get_schedule(
1309
+ monitor, default=DEFAULT_EVERY_SIX_HOURS_SCHEDULE
1310
+ ),
1311
+ tags=cls._get_tags(assertion),
1312
+ incident_behavior=cls._get_incident_behavior(assertion),
1313
+ created_by=cls._get_created_by(assertion),
1314
+ created_at=cls._get_created_at(assertion),
1315
+ updated_by=cls._get_updated_by(assertion),
1316
+ updated_at=cls._get_updated_at(assertion),
1317
+ )
@@ -4,6 +4,7 @@ from acryl_datahub_cloud.sdk.assertion.assertion_base import (
4
4
  FreshnessAssertion,
5
5
  SmartFreshnessAssertion,
6
6
  SmartVolumeAssertion,
7
+ SqlAssertion,
7
8
  )
8
9
  from acryl_datahub_cloud.sdk.assertion.smart_column_metric_assertion import (
9
10
  SmartColumnMetricAssertion,
@@ -14,5 +15,6 @@ AssertionTypes = Union[
14
15
  SmartVolumeAssertion,
15
16
  FreshnessAssertion,
16
17
  SmartColumnMetricAssertion,
18
+ SqlAssertion,
17
19
  # TODO: Add other assertion types here as we add them.
18
20
  ]
@@ -142,6 +142,40 @@ class _DatasetProfile(AbstractDetectionMechanism):
142
142
  type: Literal["dataset_profile"] = "dataset_profile"
143
143
 
144
144
 
145
+ # Operators that require a single value numeric parameter
146
+ SINGLE_VALUE_NUMERIC_OPERATORS = [
147
+ models.AssertionStdOperatorClass.EQUAL_TO,
148
+ models.AssertionStdOperatorClass.NOT_EQUAL_TO,
149
+ models.AssertionStdOperatorClass.GREATER_THAN,
150
+ models.AssertionStdOperatorClass.LESS_THAN,
151
+ models.AssertionStdOperatorClass.GREATER_THAN_OR_EQUAL_TO,
152
+ models.AssertionStdOperatorClass.LESS_THAN_OR_EQUAL_TO,
153
+ ]
154
+
155
+ # Operators that require a single value parameter
156
+ SINGLE_VALUE_OPERATORS = [
157
+ models.AssertionStdOperatorClass.CONTAIN,
158
+ models.AssertionStdOperatorClass.END_WITH,
159
+ models.AssertionStdOperatorClass.START_WITH,
160
+ models.AssertionStdOperatorClass.REGEX_MATCH,
161
+ models.AssertionStdOperatorClass.IN,
162
+ models.AssertionStdOperatorClass.NOT_IN,
163
+ ] + SINGLE_VALUE_NUMERIC_OPERATORS
164
+
165
+ # Operators that require a numeric range parameter
166
+ RANGE_OPERATORS = [
167
+ models.AssertionStdOperatorClass.BETWEEN,
168
+ ]
169
+
170
+ # Operators that require no parameters
171
+ NO_PARAMETER_OPERATORS = [
172
+ models.AssertionStdOperatorClass.NULL,
173
+ models.AssertionStdOperatorClass.NOT_NULL,
174
+ models.AssertionStdOperatorClass.IS_TRUE,
175
+ models.AssertionStdOperatorClass.IS_FALSE,
176
+ ]
177
+
178
+
145
179
  # Keep these two lists in sync:
146
180
  _DETECTION_MECHANISM_CONCRETE_TYPES = (
147
181
  _InformationSchema,
@@ -1096,15 +1130,12 @@ class _AssertionInput(ABC):
1096
1130
  Returns:
1097
1131
  A Monitor entity configured with the assertion input parameters.
1098
1132
  """
1099
- source_type, field = self._convert_assertion_source_type_and_field()
1100
1133
  return Monitor(
1101
1134
  id=(self.dataset_urn, assertion_urn),
1102
1135
  info=self._create_monitor_info(
1103
1136
  assertion_urn=assertion_urn,
1104
1137
  status=self._convert_monitor_status(),
1105
1138
  schedule=self._convert_schedule(),
1106
- source_type=source_type,
1107
- field=field,
1108
1139
  ),
1109
1140
  )
1110
1141
 
@@ -1182,8 +1213,6 @@ class _AssertionInput(ABC):
1182
1213
  assertion_urn: AssertionUrn,
1183
1214
  status: models.MonitorStatusClass,
1184
1215
  schedule: models.CronScheduleClass,
1185
- source_type: Union[str, models.DatasetFreshnessSourceTypeClass],
1186
- field: Optional[FieldSpecType],
1187
1216
  ) -> models.MonitorInfoClass:
1188
1217
  """
1189
1218
  Create a MonitorInfoClass with all the necessary components.
@@ -1191,8 +1220,6 @@ class _AssertionInput(ABC):
1191
1220
  Args:
1192
1221
  status: The monitor status.
1193
1222
  schedule: The monitor schedule.
1194
- source_type: The source type.
1195
- field: Optional field specification.
1196
1223
  Returns:
1197
1224
  A MonitorInfoClass configured with all the provided components.
1198
1225
  """
@@ -1432,12 +1459,11 @@ class _SmartFreshnessAssertionInput(
1432
1459
  assertion_urn: AssertionUrn,
1433
1460
  status: models.MonitorStatusClass,
1434
1461
  schedule: models.CronScheduleClass,
1435
- source_type: Union[str, models.DatasetFreshnessSourceTypeClass],
1436
- field: Optional[FieldSpecType],
1437
1462
  ) -> models.MonitorInfoClass:
1438
1463
  """
1439
1464
  Create a MonitorInfoClass with all the necessary components.
1440
1465
  """
1466
+ source_type, field = self._convert_assertion_source_type_and_field()
1441
1467
  return models.MonitorInfoClass(
1442
1468
  type=models.MonitorTypeClass.ASSERTION,
1443
1469
  status=status,
@@ -1591,12 +1617,11 @@ class _SmartVolumeAssertionInput(_AssertionInput, _HasSmartAssertionInputs):
1591
1617
  assertion_urn: AssertionUrn,
1592
1618
  status: models.MonitorStatusClass,
1593
1619
  schedule: models.CronScheduleClass,
1594
- source_type: Union[str, models.DatasetFreshnessSourceTypeClass],
1595
- field: Optional[FieldSpecType],
1596
1620
  ) -> models.MonitorInfoClass:
1597
1621
  """
1598
1622
  Create a MonitorInfoClass with all the necessary components.
1599
1623
  """
1624
+ source_type, field = self._convert_assertion_source_type_and_field()
1600
1625
  return models.MonitorInfoClass(
1601
1626
  type=models.MonitorTypeClass.ASSERTION,
1602
1627
  status=status,
@@ -112,12 +112,11 @@ class _FreshnessAssertionInput(_AssertionInput, _HasFreshnessFeatures):
112
112
  assertion_urn: AssertionUrn,
113
113
  status: models.MonitorStatusClass,
114
114
  schedule: models.CronScheduleClass,
115
- source_type: Union[str, models.DatasetFreshnessSourceTypeClass],
116
- field: Optional[FieldSpecType],
117
115
  ) -> models.MonitorInfoClass:
118
116
  """
119
117
  Create a MonitorInfoClass with all the necessary components.
120
118
  """
119
+ source_type, field = self._convert_assertion_source_type_and_field()
121
120
  return models.MonitorInfoClass(
122
121
  type=models.MonitorTypeClass.ASSERTION,
123
122
  status=status,
@@ -5,6 +5,9 @@ from typing import Optional, Union
5
5
  from acryl_datahub_cloud.sdk.assertion_input.assertion_input import (
6
6
  DEFAULT_EVERY_SIX_HOURS_SCHEDULE,
7
7
  HIGH_WATERMARK_ALLOWED_FIELD_TYPES,
8
+ NO_PARAMETER_OPERATORS,
9
+ RANGE_OPERATORS,
10
+ SINGLE_VALUE_OPERATORS,
8
11
  AssertionIncidentBehavior,
9
12
  AssertionInfoInputType,
10
13
  DetectionMechanismInputTypes,
@@ -85,35 +88,6 @@ FIELD_VALUES_OPERATOR_CONFIG = {
85
88
  ],
86
89
  }
87
90
 
88
- # Operators that require a single value parameter
89
- SINGLE_VALUE_OPERATORS = [
90
- models.AssertionStdOperatorClass.EQUAL_TO,
91
- models.AssertionStdOperatorClass.NOT_EQUAL_TO,
92
- models.AssertionStdOperatorClass.GREATER_THAN,
93
- models.AssertionStdOperatorClass.LESS_THAN,
94
- models.AssertionStdOperatorClass.GREATER_THAN_OR_EQUAL_TO,
95
- models.AssertionStdOperatorClass.LESS_THAN_OR_EQUAL_TO,
96
- models.AssertionStdOperatorClass.CONTAIN,
97
- models.AssertionStdOperatorClass.END_WITH,
98
- models.AssertionStdOperatorClass.START_WITH,
99
- models.AssertionStdOperatorClass.REGEX_MATCH,
100
- models.AssertionStdOperatorClass.IN,
101
- models.AssertionStdOperatorClass.NOT_IN,
102
- ]
103
-
104
- # Operators that require a range parameter
105
- RANGE_OPERATORS = [
106
- models.AssertionStdOperatorClass.BETWEEN,
107
- ]
108
-
109
- # Operators that require no parameters
110
- NO_PARAMETER_OPERATORS = [
111
- models.AssertionStdOperatorClass.NULL,
112
- models.AssertionStdOperatorClass.NOT_NULL,
113
- models.AssertionStdOperatorClass.IS_TRUE,
114
- models.AssertionStdOperatorClass.IS_FALSE,
115
- ]
116
-
117
91
  # Keep this in sync with FIELD_METRIC_TYPE_CONFIG in the frontend
118
92
  # datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/assertion/builder/steps/field/utils.ts
119
93
  FIELD_METRIC_TYPE_CONFIG = {
@@ -442,12 +416,11 @@ class _SmartColumnMetricAssertionInput(_AssertionInput, _HasSmartAssertionInputs
442
416
  assertion_urn: AssertionUrn,
443
417
  status: models.MonitorStatusClass,
444
418
  schedule: models.CronScheduleClass,
445
- source_type: Union[str, models.DatasetFreshnessSourceTypeClass],
446
- field: Optional[FieldSpecType],
447
419
  ) -> models.MonitorInfoClass:
448
420
  """
449
421
  Create a MonitorInfoClass with all the necessary components.
450
422
  """
423
+ source_type, field = self._convert_assertion_source_type_and_field()
451
424
  return models.MonitorInfoClass(
452
425
  type=models.MonitorTypeClass.ASSERTION,
453
426
  status=status,