acryl-datahub-cloud 0.3.11.1rc7__py3-none-any.whl → 0.3.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub-cloud might be problematic. Click here for more details.

Files changed (94) hide show
  1. acryl_datahub_cloud/_codegen_config.json +1 -1
  2. acryl_datahub_cloud/action_request/action_request_owner_source.py +36 -6
  3. acryl_datahub_cloud/datahub_forms_notifications/__init__.py +0 -0
  4. acryl_datahub_cloud/datahub_forms_notifications/forms_notifications_source.py +569 -0
  5. acryl_datahub_cloud/datahub_forms_notifications/get_feature_flag.gql +7 -0
  6. acryl_datahub_cloud/datahub_forms_notifications/get_search_results_total.gql +14 -0
  7. acryl_datahub_cloud/datahub_forms_notifications/query.py +17 -0
  8. acryl_datahub_cloud/datahub_forms_notifications/scroll_forms_for_notification.gql +29 -0
  9. acryl_datahub_cloud/datahub_forms_notifications/send_form_notification_request.gql +5 -0
  10. acryl_datahub_cloud/datahub_reporting/datahub_form_reporting.py +29 -13
  11. acryl_datahub_cloud/datahub_usage_reporting/query_builder.py +48 -8
  12. acryl_datahub_cloud/datahub_usage_reporting/usage_feature_reporter.py +49 -40
  13. acryl_datahub_cloud/metadata/_urns/urn_defs.py +2011 -1955
  14. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
  15. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/assertion/__init__.py +2 -2
  16. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/form/__init__.py +8 -0
  17. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/notification/__init__.py +19 -0
  18. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +2 -0
  19. acryl_datahub_cloud/metadata/schema.avsc +25413 -25425
  20. acryl_datahub_cloud/metadata/schema_classes.py +1316 -791
  21. acryl_datahub_cloud/metadata/schemas/ApplicationKey.avsc +31 -0
  22. acryl_datahub_cloud/metadata/schemas/ApplicationProperties.avsc +72 -0
  23. acryl_datahub_cloud/metadata/schemas/Applications.avsc +38 -0
  24. acryl_datahub_cloud/metadata/schemas/AssertionAnalyticsRunEvent.avsc +223 -202
  25. acryl_datahub_cloud/metadata/schemas/AssertionInfo.avsc +36 -7
  26. acryl_datahub_cloud/metadata/schemas/AssertionKey.avsc +1 -1
  27. acryl_datahub_cloud/metadata/schemas/AssertionRunEvent.avsc +40 -8
  28. acryl_datahub_cloud/metadata/schemas/{AssertionSummary.avsc → AssertionRunSummary.avsc} +2 -2
  29. acryl_datahub_cloud/metadata/schemas/AssertionsSummary.avsc +14 -0
  30. acryl_datahub_cloud/metadata/schemas/ChartKey.avsc +1 -0
  31. acryl_datahub_cloud/metadata/schemas/ConstraintInfo.avsc +12 -1
  32. acryl_datahub_cloud/metadata/schemas/ContainerKey.avsc +1 -0
  33. acryl_datahub_cloud/metadata/schemas/CorpGroupKey.avsc +2 -1
  34. acryl_datahub_cloud/metadata/schemas/CorpUserKey.avsc +2 -1
  35. acryl_datahub_cloud/metadata/schemas/DashboardKey.avsc +1 -0
  36. acryl_datahub_cloud/metadata/schemas/DataFlowKey.avsc +1 -0
  37. acryl_datahub_cloud/metadata/schemas/DataHubIngestionSourceKey.avsc +2 -1
  38. acryl_datahub_cloud/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
  39. acryl_datahub_cloud/metadata/schemas/DataJobKey.avsc +1 -0
  40. acryl_datahub_cloud/metadata/schemas/DataProductKey.avsc +1 -0
  41. acryl_datahub_cloud/metadata/schemas/DataProductProperties.avsc +1 -1
  42. acryl_datahub_cloud/metadata/schemas/DatasetKey.avsc +1 -0
  43. acryl_datahub_cloud/metadata/schemas/FormAssignmentStatus.avsc +36 -0
  44. acryl_datahub_cloud/metadata/schemas/FormInfo.avsc +6 -0
  45. acryl_datahub_cloud/metadata/schemas/FormKey.avsc +3 -1
  46. acryl_datahub_cloud/metadata/schemas/FormNotifications.avsc +69 -0
  47. acryl_datahub_cloud/metadata/schemas/FormSettings.avsc +30 -0
  48. acryl_datahub_cloud/metadata/schemas/GlobalSettingsInfo.avsc +22 -0
  49. acryl_datahub_cloud/metadata/schemas/GlossaryTermKey.avsc +1 -0
  50. acryl_datahub_cloud/metadata/schemas/MLFeatureKey.avsc +1 -0
  51. acryl_datahub_cloud/metadata/schemas/MLFeatureTableKey.avsc +1 -0
  52. acryl_datahub_cloud/metadata/schemas/MLModelGroupKey.avsc +1 -0
  53. acryl_datahub_cloud/metadata/schemas/MLModelKey.avsc +1 -0
  54. acryl_datahub_cloud/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
  55. acryl_datahub_cloud/metadata/schemas/MetadataChangeEvent.avsc +12 -1
  56. acryl_datahub_cloud/metadata/schemas/MonitorAnomalyEvent.avsc +21 -9
  57. acryl_datahub_cloud/metadata/schemas/MonitorInfo.avsc +39 -10
  58. acryl_datahub_cloud/metadata/schemas/MonitorSuiteInfo.avsc +1 -1
  59. acryl_datahub_cloud/metadata/schemas/NotebookKey.avsc +1 -0
  60. acryl_datahub_cloud/metadata/schemas/NotificationRequest.avsc +1 -0
  61. acryl_datahub_cloud/metadata/schemas/Operation.avsc +17 -0
  62. acryl_datahub_cloud/metadata/schemas/SubscriptionInfo.avsc +3 -3
  63. acryl_datahub_cloud/metadata/schemas/SubscriptionKey.avsc +2 -1
  64. acryl_datahub_cloud/metadata/schemas/UsageFeatures.avsc +10 -0
  65. acryl_datahub_cloud/metadata/schemas/__init__.py +3 -3
  66. acryl_datahub_cloud/notifications/__init__.py +0 -0
  67. acryl_datahub_cloud/notifications/notification_recipient_builder.py +399 -0
  68. acryl_datahub_cloud/sdk/__init__.py +39 -0
  69. acryl_datahub_cloud/sdk/assertion/__init__.py +0 -0
  70. acryl_datahub_cloud/sdk/assertion/assertion_base.py +1467 -0
  71. acryl_datahub_cloud/sdk/assertion/smart_column_metric_assertion.py +224 -0
  72. acryl_datahub_cloud/sdk/assertion/types.py +20 -0
  73. acryl_datahub_cloud/sdk/assertion_input/__init__.py +0 -0
  74. acryl_datahub_cloud/sdk/assertion_input/assertion_input.py +1648 -0
  75. acryl_datahub_cloud/sdk/assertion_input/freshness_assertion_input.py +258 -0
  76. acryl_datahub_cloud/sdk/assertion_input/smart_column_metric_assertion_input.py +914 -0
  77. acryl_datahub_cloud/sdk/assertion_input/sql_assertion_input.py +272 -0
  78. acryl_datahub_cloud/sdk/assertion_input/volume_assertion_input.py +648 -0
  79. acryl_datahub_cloud/sdk/assertions_client.py +3206 -0
  80. acryl_datahub_cloud/sdk/entities/__init__.py +0 -0
  81. acryl_datahub_cloud/sdk/entities/assertion.py +432 -0
  82. acryl_datahub_cloud/sdk/entities/monitor.py +291 -0
  83. acryl_datahub_cloud/sdk/entities/subscription.py +84 -0
  84. acryl_datahub_cloud/sdk/errors.py +34 -0
  85. acryl_datahub_cloud/sdk/resolver_client.py +39 -0
  86. acryl_datahub_cloud/sdk/subscription_client.py +714 -0
  87. {acryl_datahub_cloud-0.3.11.1rc7.dist-info → acryl_datahub_cloud-0.3.12.dist-info}/METADATA +47 -42
  88. {acryl_datahub_cloud-0.3.11.1rc7.dist-info → acryl_datahub_cloud-0.3.12.dist-info}/RECORD +91 -58
  89. {acryl_datahub_cloud-0.3.11.1rc7.dist-info → acryl_datahub_cloud-0.3.12.dist-info}/WHEEL +1 -1
  90. {acryl_datahub_cloud-0.3.11.1rc7.dist-info → acryl_datahub_cloud-0.3.12.dist-info}/entry_points.txt +1 -0
  91. acryl_datahub_cloud/_sdk_extras/__init__.py +0 -4
  92. acryl_datahub_cloud/_sdk_extras/assertion.py +0 -15
  93. acryl_datahub_cloud/_sdk_extras/assertions_client.py +0 -23
  94. {acryl_datahub_cloud-0.3.11.1rc7.dist-info → acryl_datahub_cloud-0.3.12.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,648 @@
1
+ from abc import ABC
2
+ from datetime import datetime
3
+ from enum import Enum
4
+ from typing import Any, Optional, Tuple, Union
5
+
6
+ from pydantic import BaseModel, Extra
7
+
8
+ from acryl_datahub_cloud.sdk.assertion_input.assertion_input import (
9
+ DEFAULT_EVERY_SIX_HOURS_SCHEDULE,
10
+ AssertionIncidentBehaviorInputTypes,
11
+ DetectionMechanismInputTypes,
12
+ FieldSpecType,
13
+ _AssertionInput,
14
+ )
15
+ from acryl_datahub_cloud.sdk.entities.assertion import (
16
+ Assertion,
17
+ AssertionInfoInputType,
18
+ TagsInputType,
19
+ )
20
+ from acryl_datahub_cloud.sdk.errors import SDKNotYetSupportedError, SDKUsageError
21
+ from datahub.metadata import schema_classes as models
22
+ from datahub.metadata.urns import (
23
+ AssertionUrn,
24
+ CorpUserUrn,
25
+ DatasetUrn,
26
+ )
27
+ from datahub.sdk.entity_client import EntityClient
28
+
29
+ # TODO: better naming for "volume assertion definition"
30
+
31
+
32
+ # Type aliases and enums for volume assertions
33
+
34
+
35
+ class VolumeAssertionDefinitionType(str, Enum):
36
+ ROW_COUNT_TOTAL = models.VolumeAssertionTypeClass.ROW_COUNT_TOTAL
37
+ ROW_COUNT_CHANGE = models.VolumeAssertionTypeClass.ROW_COUNT_CHANGE
38
+
39
+
40
+ # Currently supported volume assertion definition types
41
+ CURRENTLY_SUPPORTED_VOLUME_ASSERTION_DEFINITIONS = [
42
+ VolumeAssertionDefinitionType.ROW_COUNT_TOTAL,
43
+ VolumeAssertionDefinitionType.ROW_COUNT_CHANGE,
44
+ ]
45
+
46
+
47
+ class VolumeAssertionDefinitionChangeKind(str, Enum):
48
+ ABSOLUTE = models.AssertionValueChangeTypeClass.ABSOLUTE
49
+ PERCENTAGE = models.AssertionValueChangeTypeClass.PERCENTAGE
50
+
51
+
52
+ VolumeAssertionDefinitionParameters = Union[float, Tuple[float, float]]
53
+
54
+
55
+ class VolumeAssertionOperator(str, Enum):
56
+ """Valid operators for volume assertions."""
57
+
58
+ LESS_THAN_OR_EQUAL_TO = models.AssertionStdOperatorClass.LESS_THAN_OR_EQUAL_TO
59
+ GREATER_THAN_OR_EQUAL_TO = models.AssertionStdOperatorClass.GREATER_THAN_OR_EQUAL_TO
60
+ BETWEEN = models.AssertionStdOperatorClass.BETWEEN
61
+
62
+
63
+ class _AbstractVolumeAssertionDefinition(BaseModel, ABC):
64
+ type: str
65
+
66
+ class Config:
67
+ extra = Extra.forbid
68
+
69
+
70
+ class RowCountTotal(_AbstractVolumeAssertionDefinition):
71
+ type: VolumeAssertionDefinitionType = VolumeAssertionDefinitionType.ROW_COUNT_TOTAL
72
+ operator: VolumeAssertionOperator
73
+ parameters: VolumeAssertionDefinitionParameters
74
+
75
+
76
+ class RowCountChange(_AbstractVolumeAssertionDefinition):
77
+ type: VolumeAssertionDefinitionType = VolumeAssertionDefinitionType.ROW_COUNT_CHANGE
78
+ kind: VolumeAssertionDefinitionChangeKind
79
+ operator: VolumeAssertionOperator
80
+ parameters: VolumeAssertionDefinitionParameters
81
+
82
+
83
+ _VOLUME_ASSERTION_DEFINITION_CONCRETE_TYPES = (
84
+ RowCountTotal,
85
+ RowCountChange,
86
+ )
87
+ _VolumeAssertionDefinitionTypes = Union[
88
+ RowCountTotal,
89
+ RowCountChange,
90
+ ]
91
+
92
+ VolumeAssertionDefinitionInputTypes = Union[
93
+ dict[str, Any], _VolumeAssertionDefinitionTypes
94
+ ]
95
+
96
+
97
+ class VolumeAssertionDefinition:
98
+ ROW_COUNT_TOTAL = RowCountTotal
99
+ ROW_COUNT_CHANGE = RowCountChange
100
+
101
+ @staticmethod
102
+ def _validate_between_parameters(
103
+ parameters: VolumeAssertionDefinitionParameters, assertion_type: str
104
+ ) -> None:
105
+ """Validate parameters for BETWEEN operator."""
106
+ if not isinstance(parameters, tuple) or len(parameters) != 2:
107
+ raise SDKUsageError(
108
+ f"For BETWEEN operator in {assertion_type}, parameters must be a tuple of two numbers (min_value, max_value)."
109
+ )
110
+
111
+ @staticmethod
112
+ def _validate_single_value_parameters(
113
+ parameters: VolumeAssertionDefinitionParameters,
114
+ operator_enum: VolumeAssertionOperator,
115
+ assertion_type: str,
116
+ ) -> None:
117
+ """Validate parameters for single-value operators."""
118
+ if not isinstance(parameters, (int, float)):
119
+ if isinstance(parameters, tuple):
120
+ raise SDKUsageError(
121
+ f"For {operator_enum.value} operator in {assertion_type}, parameters must be a single number, not a tuple."
122
+ )
123
+ else:
124
+ raise SDKUsageError(
125
+ f"For {operator_enum.value} operator in {assertion_type}, parameters must be a single number."
126
+ )
127
+
128
+ @staticmethod
129
+ def _parse_operator(
130
+ operator: Union[str, VolumeAssertionOperator],
131
+ ) -> VolumeAssertionOperator:
132
+ """Parse and validate operator input, converting string to enum if needed."""
133
+ if isinstance(operator, str):
134
+ try:
135
+ return VolumeAssertionOperator(operator)
136
+ except ValueError as e:
137
+ valid_operators = ", ".join(
138
+ [op.value for op in VolumeAssertionOperator]
139
+ )
140
+ raise SDKUsageError(
141
+ f"Invalid operator '{operator}'. Valid operators: {valid_operators}"
142
+ ) from e
143
+ return operator
144
+
145
+ @staticmethod
146
+ def _validate_operator_and_parameters(
147
+ operator: Union[str, VolumeAssertionOperator],
148
+ parameters: VolumeAssertionDefinitionParameters,
149
+ assertion_type: str,
150
+ ) -> None:
151
+ """Validate that operator and parameters are compatible for volume assertions."""
152
+ operator_enum = VolumeAssertionDefinition._parse_operator(operator)
153
+
154
+ # Validate parameter structure based on operator
155
+ if operator_enum == VolumeAssertionOperator.BETWEEN:
156
+ VolumeAssertionDefinition._validate_between_parameters(
157
+ parameters, assertion_type
158
+ )
159
+ else:
160
+ VolumeAssertionDefinition._validate_single_value_parameters(
161
+ parameters, operator_enum, assertion_type
162
+ )
163
+
164
+ @staticmethod
165
+ def _parse_instantiated_object(
166
+ definition: _VolumeAssertionDefinitionTypes,
167
+ ) -> _VolumeAssertionDefinitionTypes:
168
+ """Parse and validate already instantiated volume assertion objects."""
169
+ VolumeAssertionDefinition._validate_operator_and_parameters(
170
+ definition.operator, definition.parameters, definition.type
171
+ )
172
+ return definition
173
+
174
+ @staticmethod
175
+ def _parse_dict_definition(
176
+ definition_dict: dict[str, Any],
177
+ ) -> _VolumeAssertionDefinitionTypes:
178
+ """Parse and validate dictionary-based volume assertion definitions."""
179
+ try:
180
+ assertion_type = definition_dict.pop("type")
181
+ except KeyError as e:
182
+ raise SDKUsageError(
183
+ "Volume assertion definition must include a 'type' field"
184
+ ) from e
185
+
186
+ # Check for valid assertion type first
187
+ if assertion_type not in CURRENTLY_SUPPORTED_VOLUME_ASSERTION_DEFINITIONS:
188
+ supported_types = ", ".join(
189
+ [t.value for t in CURRENTLY_SUPPORTED_VOLUME_ASSERTION_DEFINITIONS]
190
+ )
191
+ raise SDKUsageError(
192
+ f"Unknown volume assertion type: {assertion_type}. Supported types: {supported_types}"
193
+ )
194
+
195
+ # Extract operator and parameters for validation
196
+ operator = definition_dict.get("operator")
197
+ parameters = definition_dict.get("parameters")
198
+
199
+ if operator is None:
200
+ raise SDKUsageError(
201
+ f"Missing required 'operator' field for {assertion_type}"
202
+ )
203
+ if parameters is None:
204
+ raise SDKUsageError(
205
+ f"Missing required 'parameters' field for {assertion_type}"
206
+ )
207
+
208
+ # Validate basic parameter type first
209
+ if not isinstance(parameters, (int, float, tuple)):
210
+ raise SDKUsageError(
211
+ f"For {assertion_type}, parameters must be a number or a tuple of two numbers, got: {type(parameters)}"
212
+ )
213
+
214
+ # Validate operator and parameters before object creation
215
+ VolumeAssertionDefinition._validate_operator_and_parameters(
216
+ operator, parameters, assertion_type
217
+ )
218
+
219
+ # Convert string operator to enum for object creation
220
+ if isinstance(operator, str):
221
+ definition_dict["operator"] = VolumeAssertionDefinition._parse_operator(
222
+ operator
223
+ )
224
+
225
+ if assertion_type == VolumeAssertionDefinitionType.ROW_COUNT_TOTAL:
226
+ try:
227
+ return RowCountTotal(**definition_dict)
228
+ except Exception as e:
229
+ raise SDKUsageError(
230
+ f"Failed to create {VolumeAssertionDefinitionType.ROW_COUNT_TOTAL.value} volume assertion: {str(e)}"
231
+ ) from e
232
+ else: # assertion_type == VolumeAssertionDefinitionType.ROW_COUNT_CHANGE
233
+ try:
234
+ return RowCountChange(**definition_dict)
235
+ except Exception as e:
236
+ raise SDKUsageError(
237
+ f"Failed to create {VolumeAssertionDefinitionType.ROW_COUNT_CHANGE.value} volume assertion: {str(e)}"
238
+ ) from e
239
+
240
+ @staticmethod
241
+ def parse(
242
+ definition: VolumeAssertionDefinitionInputTypes,
243
+ ) -> _VolumeAssertionDefinitionTypes:
244
+ """Parse and validate a volume assertion definition.
245
+
246
+ This method converts dictionary-based volume assertion definitions into typed volume
247
+ assertion objects, or validates already instantiated volume assertion objects. It
248
+ supports two volume assertion types: row_count_total and row_count_change.
249
+
250
+ Args:
251
+ definition: A volume assertion definition that can be either:
252
+ - A dictionary containing volume assertion configuration with keys:
253
+ - type: Must be "row_count_total" or "row_count_change"
254
+ - operator: Must be "LESS_THAN_OR_EQUAL_TO", "GREATER_THAN_OR_EQUAL_TO", or "BETWEEN"
255
+ - parameters: Number for single-value operators, tuple of two numbers for BETWEEN
256
+ - kind: Required for "row_count_change", must be "absolute" or "percent"
257
+ - An already instantiated RowCountTotal or RowCountChange object
258
+
259
+ Returns:
260
+ A validated volume assertion definition object (RowCountTotal or RowCountChange).
261
+
262
+ Raises:
263
+ SDKUsageError: If the definition is invalid, including:
264
+ - Invalid input type (not dict or volume assertion object)
265
+ - Missing required fields (type, operator, parameters, kind for row_count_change)
266
+ - Unknown assertion type (not row_count_total or row_count_change)
267
+ - Invalid operator (not in allowed operators)
268
+ - Invalid parameter structure for operator:
269
+ - Single-value operators require number parameters
270
+ - BETWEEN operator requires tuple of two numbers
271
+ - Object construction failures (extra fields, validation errors)
272
+
273
+ Examples:
274
+ Parse a row count total assertion:
275
+ >>> definition = {
276
+ ... "type": "row_count_total",
277
+ ... "operator": "GREATER_THAN_OR_EQUAL_TO",
278
+ ... "parameters": 100
279
+ ... }
280
+ >>> result = VolumeAssertionDefinition.parse(definition)
281
+ >>> isinstance(result, RowCountTotal)
282
+ True
283
+
284
+ Parse a row count change assertion with BETWEEN operator:
285
+ >>> definition = {
286
+ ... "type": "row_count_change",
287
+ ... "kind": "absolute",
288
+ ... "operator": "BETWEEN",
289
+ ... "parameters": (10, 50)
290
+ ... }
291
+ >>> result = VolumeAssertionDefinition.parse(definition)
292
+ >>> isinstance(result, RowCountChange)
293
+ True
294
+
295
+ Parse an already instantiated object:
296
+ >>> obj = RowCountTotal(
297
+ ... operator=VolumeAssertionOperator.LESS_THAN_OR_EQUAL_TO,
298
+ ... parameters=200
299
+ ... )
300
+ >>> result = VolumeAssertionDefinition.parse(obj)
301
+ >>> result == obj
302
+ True
303
+ """
304
+ # If already instantiated, validate and return
305
+ if isinstance(definition, _VOLUME_ASSERTION_DEFINITION_CONCRETE_TYPES):
306
+ return VolumeAssertionDefinition._parse_instantiated_object(definition)
307
+
308
+ if not isinstance(definition, dict):
309
+ raise SDKUsageError(
310
+ f"Volume assertion definition must be a dict or a volume assertion definition object, got: {type(definition)}"
311
+ )
312
+
313
+ return VolumeAssertionDefinition._parse_dict_definition(definition.copy())
314
+
315
+ @staticmethod
316
+ def build_model_volume_info(
317
+ definition: _VolumeAssertionDefinitionTypes,
318
+ dataset_urn: str,
319
+ filter: Optional[models.DatasetFilterClass] = None,
320
+ ) -> models.VolumeAssertionInfoClass:
321
+ """Build a DataHub VolumeAssertionInfoClass from a validated volume assertion definition.
322
+
323
+ This method converts validated volume assertion definition objects into DataHub model
324
+ classes suitable for creating volume assertions in the DataHub metadata service.
325
+
326
+ Args:
327
+ definition: A validated volume assertion definition object (RowCountTotal or RowCountChange).
328
+ This should be the output of VolumeAssertionDefinition.parse().
329
+ dataset_urn: The dataset URN that this assertion applies to.
330
+ filter: Optional filter to apply to the assertion.
331
+
332
+ Returns:
333
+ A VolumeAssertionInfoClass configured for the specific volume assertion type.
334
+
335
+ Raises:
336
+ SDKUsageError: If the definition type is not supported.
337
+ """
338
+ if isinstance(definition, RowCountTotal):
339
+ volume_info = models.VolumeAssertionInfoClass(
340
+ type=models.VolumeAssertionTypeClass.ROW_COUNT_TOTAL,
341
+ entity=dataset_urn,
342
+ rowCountTotal=models.RowCountTotalClass(
343
+ operator=definition.operator.value,
344
+ parameters=VolumeAssertionDefinition._build_assertion_parameters(
345
+ definition.operator, definition.parameters
346
+ ),
347
+ ),
348
+ )
349
+ if filter is not None:
350
+ volume_info.filter = filter
351
+ return volume_info
352
+ elif isinstance(definition, RowCountChange):
353
+ # Map kind to DataHub assertion value change type
354
+ change_type = (
355
+ models.AssertionValueChangeTypeClass.ABSOLUTE
356
+ if definition.kind == VolumeAssertionDefinitionChangeKind.ABSOLUTE
357
+ else models.AssertionValueChangeTypeClass.PERCENTAGE
358
+ )
359
+
360
+ volume_info = models.VolumeAssertionInfoClass(
361
+ type=models.VolumeAssertionTypeClass.ROW_COUNT_CHANGE,
362
+ entity=dataset_urn,
363
+ rowCountChange=models.RowCountChangeClass(
364
+ type=change_type,
365
+ operator=definition.operator.value,
366
+ parameters=VolumeAssertionDefinition._build_assertion_parameters(
367
+ definition.operator, definition.parameters
368
+ ),
369
+ ),
370
+ )
371
+ if filter is not None:
372
+ volume_info.filter = filter
373
+ return volume_info
374
+ else:
375
+ raise SDKUsageError(
376
+ f"Unsupported volume assertion definition type: {type(definition)}"
377
+ )
378
+
379
+ @staticmethod
380
+ def _format_number_value(value: Union[int, float]) -> str:
381
+ """Format number value for DataHub parameter strings.
382
+
383
+ Converts whole numbers to integers (100.0 -> "100") and keeps decimals (100.5 -> "100.5").
384
+ """
385
+ if isinstance(value, float) and value.is_integer():
386
+ return str(int(value))
387
+ return str(value)
388
+
389
+ @staticmethod
390
+ def _build_assertion_parameters(
391
+ operator: VolumeAssertionOperator,
392
+ parameters: VolumeAssertionDefinitionParameters,
393
+ ) -> models.AssertionStdParametersClass:
394
+ """Build assertion parameters for DataHub model classes.
395
+
396
+ Args:
397
+ operator: The volume assertion operator.
398
+ parameters: The parameters (int for single value, tuple for BETWEEN).
399
+
400
+ Returns:
401
+ AssertionStdParametersClass with appropriate parameter structure.
402
+ """
403
+ if operator == VolumeAssertionOperator.BETWEEN:
404
+ assert isinstance(parameters, tuple) and len(parameters) == 2, (
405
+ f"BETWEEN operator requires tuple of two numbers, got: {parameters}"
406
+ )
407
+ # Sort values to ensure minValue is actually the minimum and maxValue is the maximum
408
+ min_val, max_val = sorted(parameters)
409
+ return models.AssertionStdParametersClass(
410
+ minValue=models.AssertionStdParameterClass(
411
+ value=VolumeAssertionDefinition._format_number_value(min_val),
412
+ type=models.AssertionStdParameterTypeClass.NUMBER,
413
+ ),
414
+ maxValue=models.AssertionStdParameterClass(
415
+ value=VolumeAssertionDefinition._format_number_value(max_val),
416
+ type=models.AssertionStdParameterTypeClass.NUMBER,
417
+ ),
418
+ )
419
+ else:
420
+ # Single value operators
421
+ assert isinstance(parameters, (int, float)), (
422
+ f"Single value operator {operator} requires number parameter, got: {parameters}"
423
+ )
424
+ return models.AssertionStdParametersClass(
425
+ value=models.AssertionStdParameterClass(
426
+ value=VolumeAssertionDefinition._format_number_value(parameters),
427
+ type=models.AssertionStdParameterTypeClass.NUMBER,
428
+ ),
429
+ )
430
+
431
+ @staticmethod
432
+ def _extract_volume_parameters(
433
+ assertion_urn: str,
434
+ operator: VolumeAssertionOperator,
435
+ parameters: models.AssertionStdParametersClass,
436
+ ) -> VolumeAssertionDefinitionParameters:
437
+ """Extract parameters from assertion based on operator type."""
438
+ if operator.value == "BETWEEN":
439
+ if parameters.minValue is None or parameters.maxValue is None:
440
+ raise SDKNotYetSupportedError(
441
+ f"Volume assertion {assertion_urn} has BETWEEN operator but missing min/max values"
442
+ )
443
+ return (float(parameters.minValue.value), float(parameters.maxValue.value))
444
+ else:
445
+ if parameters.value is None:
446
+ raise SDKNotYetSupportedError(
447
+ f"Volume assertion {assertion_urn} has {operator.value} operator but missing value"
448
+ )
449
+ return float(parameters.value.value)
450
+
451
+ @staticmethod
452
+ def _get_row_count_total(assertion: Assertion) -> RowCountTotal:
453
+ """Extract RowCountTotal from assertion."""
454
+ assert isinstance(assertion.info, models.VolumeAssertionInfoClass)
455
+ if assertion.info.rowCountTotal is None:
456
+ raise SDKNotYetSupportedError(
457
+ f"Volume assertion {assertion.urn} has ROW_COUNT_TOTAL type but no rowCountTotal, which is not supported"
458
+ )
459
+ row_count_total = assertion.info.rowCountTotal
460
+ operator = VolumeAssertionOperator(row_count_total.operator)
461
+ parameters = VolumeAssertionDefinition._extract_volume_parameters(
462
+ str(assertion.urn), operator, row_count_total.parameters
463
+ )
464
+ return RowCountTotal(operator=operator, parameters=parameters)
465
+
466
+ @staticmethod
467
+ def _get_row_count_change(assertion: Assertion) -> RowCountChange:
468
+ """Extract RowCountChange from assertion."""
469
+ assert isinstance(assertion.info, models.VolumeAssertionInfoClass)
470
+ if assertion.info.rowCountChange is None:
471
+ raise SDKNotYetSupportedError(
472
+ f"Volume assertion {assertion.urn} has ROW_COUNT_CHANGE type but no rowCountChange, which is not supported"
473
+ )
474
+ row_count_change = assertion.info.rowCountChange
475
+ operator = VolumeAssertionOperator(row_count_change.operator)
476
+ parameters = VolumeAssertionDefinition._extract_volume_parameters(
477
+ str(assertion.urn), operator, row_count_change.parameters
478
+ )
479
+ kind: VolumeAssertionDefinitionChangeKind = (
480
+ VolumeAssertionDefinitionChangeKind.ABSOLUTE
481
+ if row_count_change.type == models.AssertionValueChangeTypeClass.ABSOLUTE
482
+ else VolumeAssertionDefinitionChangeKind.PERCENTAGE
483
+ )
484
+ return RowCountChange(operator=operator, parameters=parameters, kind=kind)
485
+
486
+ @staticmethod
487
+ def from_assertion(assertion: Assertion) -> _VolumeAssertionDefinitionTypes:
488
+ """Create a volume assertion definition from a DataHub assertion entity.
489
+
490
+ Args:
491
+ assertion: The DataHub assertion entity to extract the definition from.
492
+
493
+ Returns:
494
+ A volume assertion definition object (RowCountTotal or RowCountChange).
495
+
496
+ Raises:
497
+ SDKNotYetSupportedError: If the assertion is not a volume assertion or has
498
+ unsupported configuration.
499
+ """
500
+ if assertion.info is None:
501
+ raise SDKNotYetSupportedError(
502
+ f"Assertion {assertion.urn} does not have a volume assertion info, which is not supported"
503
+ )
504
+ if not isinstance(assertion.info, models.VolumeAssertionInfoClass):
505
+ raise SDKNotYetSupportedError(
506
+ f"Assertion {assertion.urn} is not a volume assertion"
507
+ )
508
+
509
+ if assertion.info.type == models.VolumeAssertionTypeClass.ROW_COUNT_TOTAL:
510
+ return VolumeAssertionDefinition._get_row_count_total(assertion)
511
+ elif assertion.info.type == models.VolumeAssertionTypeClass.ROW_COUNT_CHANGE:
512
+ return VolumeAssertionDefinition._get_row_count_change(assertion)
513
+ else:
514
+ raise SDKNotYetSupportedError(
515
+ f"Volume assertion {assertion.urn} has unsupported type {assertion.info.type}"
516
+ )
517
+
518
+
519
+ class _VolumeAssertionInput(_AssertionInput):
520
+ def __init__(
521
+ self,
522
+ *,
523
+ # Required fields
524
+ dataset_urn: Union[str, DatasetUrn],
525
+ entity_client: EntityClient, # Needed to get the schema field spec for the detection mechanism if needed
526
+ definition: VolumeAssertionDefinitionInputTypes,
527
+ urn: Optional[Union[str, AssertionUrn]] = None,
528
+ # Optional fields
529
+ display_name: Optional[str] = None,
530
+ enabled: bool = True,
531
+ schedule: Optional[Union[str, models.CronScheduleClass]] = None,
532
+ detection_mechanism: DetectionMechanismInputTypes = None,
533
+ incident_behavior: Optional[AssertionIncidentBehaviorInputTypes] = None,
534
+ tags: Optional[TagsInputType] = None,
535
+ created_by: Union[str, CorpUserUrn],
536
+ created_at: datetime,
537
+ updated_by: Union[str, CorpUserUrn],
538
+ updated_at: datetime,
539
+ ):
540
+ _AssertionInput.__init__(
541
+ self,
542
+ dataset_urn=dataset_urn,
543
+ entity_client=entity_client,
544
+ urn=urn,
545
+ display_name=display_name,
546
+ enabled=enabled,
547
+ schedule=schedule,
548
+ detection_mechanism=detection_mechanism,
549
+ incident_behavior=incident_behavior,
550
+ tags=tags,
551
+ source_type=models.AssertionSourceTypeClass.NATIVE, # Native assertions are of type native, not inferred
552
+ created_by=created_by,
553
+ created_at=created_at,
554
+ updated_by=updated_by,
555
+ updated_at=updated_at,
556
+ )
557
+
558
+ self.definition = VolumeAssertionDefinition.parse(definition)
559
+
560
+ def _assertion_type(self) -> str:
561
+ return models.AssertionTypeClass.VOLUME
562
+
563
+ def _create_assertion_info(
564
+ self, filter: Optional[models.DatasetFilterClass]
565
+ ) -> AssertionInfoInputType:
566
+ """
567
+ Create a VolumeAssertionInfoClass for a volume assertion.
568
+
569
+ Args:
570
+ filter: Optional filter to apply to the assertion.
571
+
572
+ Returns:
573
+ A VolumeAssertionInfoClass configured for volume assertions.
574
+ """
575
+ return VolumeAssertionDefinition.build_model_volume_info(
576
+ self.definition, str(self.dataset_urn), filter
577
+ )
578
+
579
+ def _create_monitor_info(
580
+ self,
581
+ assertion_urn: AssertionUrn,
582
+ status: models.MonitorStatusClass,
583
+ schedule: models.CronScheduleClass,
584
+ ) -> models.MonitorInfoClass:
585
+ """
586
+ Create a MonitorInfoClass with all the necessary components.
587
+ """
588
+ source_type, field = self._convert_assertion_source_type_and_field()
589
+ return models.MonitorInfoClass(
590
+ type=models.MonitorTypeClass.ASSERTION,
591
+ status=status,
592
+ assertionMonitor=models.AssertionMonitorClass(
593
+ assertions=[
594
+ models.AssertionEvaluationSpecClass(
595
+ assertion=str(assertion_urn),
596
+ schedule=schedule,
597
+ parameters=self._get_assertion_evaluation_parameters(
598
+ str(source_type), field
599
+ ),
600
+ )
601
+ ]
602
+ ),
603
+ )
604
+
605
+ def _convert_schedule(self) -> models.CronScheduleClass:
606
+ """Create a schedule for a volume assertion.
607
+
608
+ Returns:
609
+ A CronScheduleClass with appropriate schedule settings.
610
+ """
611
+ if self.schedule is None:
612
+ return DEFAULT_EVERY_SIX_HOURS_SCHEDULE
613
+
614
+ return models.CronScheduleClass(
615
+ cron=self.schedule.cron,
616
+ timezone=self.schedule.timezone,
617
+ )
618
+
619
+ def _get_assertion_evaluation_parameters(
620
+ self, source_type: str, field: Optional[FieldSpecType]
621
+ ) -> models.AssertionEvaluationParametersClass:
622
+ return models.AssertionEvaluationParametersClass(
623
+ type=models.AssertionEvaluationParametersTypeClass.DATASET_VOLUME,
624
+ datasetVolumeParameters=models.DatasetVolumeAssertionParametersClass(
625
+ sourceType=source_type
626
+ ),
627
+ )
628
+
629
+ def _convert_assertion_source_type_and_field(
630
+ self,
631
+ ) -> tuple[str, Optional[FieldSpecType]]:
632
+ """Convert the detection mechanism to source type and field."""
633
+ default_source_type = models.DatasetVolumeSourceTypeClass.INFORMATION_SCHEMA
634
+
635
+ if self.detection_mechanism is None:
636
+ return default_source_type, None
637
+
638
+ # Convert detection mechanism to volume source type
639
+ if isinstance(self.detection_mechanism, str):
640
+ if self.detection_mechanism == "information_schema":
641
+ return models.DatasetVolumeSourceTypeClass.INFORMATION_SCHEMA, None
642
+ elif self.detection_mechanism == "datahub_operation":
643
+ return models.DatasetVolumeSourceTypeClass.OPERATION, None
644
+ else:
645
+ return default_source_type, None
646
+
647
+ # For more complex detection mechanisms, we might need additional logic
648
+ return default_source_type, None