acryl-datahub-cloud 0.3.12.1rc3__py3-none-any.whl → 0.3.12.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub-cloud might be problematic. Click here for more details.
- acryl_datahub_cloud/_codegen_config.json +1 -1
- acryl_datahub_cloud/sdk/__init__.py +20 -0
- acryl_datahub_cloud/sdk/assertion/assertion_base.py +146 -97
- acryl_datahub_cloud/sdk/assertion/column_metric_assertion.py +191 -0
- acryl_datahub_cloud/sdk/assertion/smart_column_metric_assertion.py +10 -22
- acryl_datahub_cloud/sdk/assertion_input/assertion_input.py +99 -19
- acryl_datahub_cloud/sdk/assertion_input/column_metric_assertion_input.py +965 -0
- acryl_datahub_cloud/sdk/assertion_input/column_metric_constants.py +191 -0
- acryl_datahub_cloud/sdk/assertion_input/freshness_assertion_input.py +60 -11
- acryl_datahub_cloud/sdk/assertion_input/smart_column_metric_assertion_input.py +438 -347
- acryl_datahub_cloud/sdk/assertion_input/sql_assertion_input.py +105 -61
- acryl_datahub_cloud/sdk/assertion_input/volume_assertion_input.py +381 -392
- acryl_datahub_cloud/sdk/assertions_client.py +993 -314
- {acryl_datahub_cloud-0.3.12.1rc3.dist-info → acryl_datahub_cloud-0.3.12.2.dist-info}/METADATA +47 -47
- {acryl_datahub_cloud-0.3.12.1rc3.dist-info → acryl_datahub_cloud-0.3.12.2.dist-info}/RECORD +18 -15
- {acryl_datahub_cloud-0.3.12.1rc3.dist-info → acryl_datahub_cloud-0.3.12.2.dist-info}/WHEEL +0 -0
- {acryl_datahub_cloud-0.3.12.1rc3.dist-info → acryl_datahub_cloud-0.3.12.2.dist-info}/entry_points.txt +0 -0
- {acryl_datahub_cloud-0.3.12.1rc3.dist-info → acryl_datahub_cloud-0.3.12.2.dist-info}/top_level.txt +0 -0
|
@@ -33,7 +33,7 @@ from acryl_datahub_cloud.sdk.errors import (
|
|
|
33
33
|
from datahub.emitter.enum_helpers import get_enum_options
|
|
34
34
|
from datahub.emitter.mce_builder import make_ts_millis, parse_ts_millis
|
|
35
35
|
from datahub.metadata import schema_classes as models
|
|
36
|
-
from datahub.metadata.urns import AssertionUrn, CorpUserUrn, DatasetUrn
|
|
36
|
+
from datahub.metadata.urns import AssertionUrn, CorpUserUrn, DatasetUrn, TagUrn
|
|
37
37
|
from datahub.sdk import Dataset
|
|
38
38
|
from datahub.sdk.entity_client import EntityClient
|
|
39
39
|
|
|
@@ -66,6 +66,15 @@ DEFAULT_EVERY_SIX_HOURS_SCHEDULE = models.CronScheduleClass(
|
|
|
66
66
|
), # User local timezone, matches the UI default
|
|
67
67
|
)
|
|
68
68
|
|
|
69
|
+
TYPE_CLASS_NAME_TO_TYPE_MAP = {
|
|
70
|
+
"StringTypeClass": "STRING",
|
|
71
|
+
"NumberTypeClass": "NUMBER",
|
|
72
|
+
"BooleanTypeClass": "BOOLEAN",
|
|
73
|
+
"DateTypeClass": "DATE",
|
|
74
|
+
"TimeTypeClass": "TIME",
|
|
75
|
+
"NullTypeClass": "NULL",
|
|
76
|
+
}
|
|
77
|
+
|
|
69
78
|
|
|
70
79
|
class AbstractDetectionMechanism(BaseModel, ABC):
|
|
71
80
|
type: str
|
|
@@ -318,8 +327,11 @@ class DetectionMechanism:
|
|
|
318
327
|
def _try_parse_from_dict(
|
|
319
328
|
detection_mechanism_config: dict[str, str],
|
|
320
329
|
) -> _DetectionMechanismTypes:
|
|
330
|
+
# Make a copy of the dictionary to avoid mutating the original
|
|
331
|
+
config_copy = detection_mechanism_config.copy()
|
|
332
|
+
|
|
321
333
|
try:
|
|
322
|
-
detection_mechanism_type =
|
|
334
|
+
detection_mechanism_type = config_copy.pop("type")
|
|
323
335
|
except KeyError as e:
|
|
324
336
|
raise SDKUsageErrorWithExamples(
|
|
325
337
|
msg="Detection mechanism type is required if using a dict to create a DetectionMechanism",
|
|
@@ -336,23 +348,23 @@ class DetectionMechanism:
|
|
|
336
348
|
) from e
|
|
337
349
|
|
|
338
350
|
try:
|
|
339
|
-
return detection_mechanism_obj(**
|
|
351
|
+
return detection_mechanism_obj(**config_copy)
|
|
340
352
|
except TypeError as e:
|
|
341
353
|
if "object is not callable" not in e.args[0]:
|
|
342
354
|
raise e
|
|
343
|
-
if
|
|
355
|
+
if config_copy:
|
|
344
356
|
# If we are here in the TypeError case, the detection mechanism is an instance of a class,
|
|
345
357
|
# not a class itself, so we can't instantiate it with the config dict.
|
|
346
358
|
# In this case, the config dict should be empty after the type is popped.
|
|
347
359
|
# If it is not empty, we raise an error.
|
|
348
360
|
raise SDKUsageErrorWithExamples(
|
|
349
|
-
msg=f"Invalid additional fields specified for detection mechanism '{detection_mechanism_type}': {
|
|
361
|
+
msg=f"Invalid additional fields specified for detection mechanism '{detection_mechanism_type}': {config_copy}",
|
|
350
362
|
examples=DetectionMechanism._DETECTION_MECHANISM_EXAMPLES,
|
|
351
363
|
) from e
|
|
352
364
|
return detection_mechanism_obj
|
|
353
365
|
except ValidationError as e:
|
|
354
366
|
raise SDKUsageErrorWithExamples(
|
|
355
|
-
msg=f"Invalid detection mechanism type '{detection_mechanism_type}': {
|
|
367
|
+
msg=f"Invalid detection mechanism type '{detection_mechanism_type}': {config_copy} {e}",
|
|
356
368
|
examples=DetectionMechanism._DETECTION_MECHANISM_EXAMPLES,
|
|
357
369
|
) from e
|
|
358
370
|
|
|
@@ -426,12 +438,14 @@ class InferenceSensitivity(Enum):
|
|
|
426
438
|
DEFAULT_SENSITIVITY: InferenceSensitivity = InferenceSensitivity.MEDIUM
|
|
427
439
|
|
|
428
440
|
TIME_WINDOW_SIZE_EXAMPLES = {
|
|
429
|
-
"Time window size from
|
|
441
|
+
"Recommended: Time window size from objects": "TimeWindowSize(unit=CalendarInterval.MINUTE, multiple=10)",
|
|
430
442
|
"Time window size from object": "TimeWindowSize(unit='MINUTE', multiple=10)",
|
|
443
|
+
"Time window size from models.TimeWindowSizeClass": "models.TimeWindowSizeClass(unit='MINUTE', multiple=10)",
|
|
444
|
+
"Time window size from dict": '{"unit": "MINUTE", "multiple": 10}',
|
|
431
445
|
}
|
|
432
446
|
|
|
433
447
|
|
|
434
|
-
class CalendarInterval(Enum):
|
|
448
|
+
class CalendarInterval(str, Enum):
|
|
435
449
|
MINUTE = "MINUTE"
|
|
436
450
|
HOUR = "HOUR"
|
|
437
451
|
DAY = "DAY"
|
|
@@ -446,9 +460,24 @@ TimeWindowSizeInputTypes: TypeAlias = Union[
|
|
|
446
460
|
models.TimeWindowSizeClass,
|
|
447
461
|
models.FixedIntervalScheduleClass,
|
|
448
462
|
TimeWindowSize,
|
|
463
|
+
dict[str, Union[str, int]], # {"unit": "MINUTE", "multiple": 10}
|
|
449
464
|
]
|
|
450
465
|
|
|
451
466
|
|
|
467
|
+
def _try_parse_calendar_interval(
|
|
468
|
+
config: Union[str, CalendarInterval],
|
|
469
|
+
) -> CalendarInterval:
|
|
470
|
+
if isinstance(config, CalendarInterval):
|
|
471
|
+
return config
|
|
472
|
+
try:
|
|
473
|
+
return CalendarInterval(config.upper())
|
|
474
|
+
except ValueError as e:
|
|
475
|
+
raise SDKUsageErrorWithExamples(
|
|
476
|
+
msg=f"Invalid calendar interval: {config}",
|
|
477
|
+
examples=TIME_WINDOW_SIZE_EXAMPLES,
|
|
478
|
+
) from e
|
|
479
|
+
|
|
480
|
+
|
|
452
481
|
def _try_parse_time_window_size(
|
|
453
482
|
config: TimeWindowSizeInputTypes,
|
|
454
483
|
) -> models.TimeWindowSizeClass:
|
|
@@ -471,6 +500,23 @@ def _try_parse_time_window_size(
|
|
|
471
500
|
),
|
|
472
501
|
multiple=config.multiple,
|
|
473
502
|
)
|
|
503
|
+
elif isinstance(config, dict):
|
|
504
|
+
if "unit" not in config or "multiple" not in config:
|
|
505
|
+
raise SDKUsageErrorWithExamples(
|
|
506
|
+
msg=f"Invalid time window size: {config}",
|
|
507
|
+
examples=TIME_WINDOW_SIZE_EXAMPLES,
|
|
508
|
+
)
|
|
509
|
+
try:
|
|
510
|
+
multiple = int(config["multiple"])
|
|
511
|
+
except ValueError as e:
|
|
512
|
+
raise SDKUsageErrorWithExamples(
|
|
513
|
+
msg=f"Invalid time window size: {config}",
|
|
514
|
+
examples=TIME_WINDOW_SIZE_EXAMPLES,
|
|
515
|
+
) from e
|
|
516
|
+
return models.TimeWindowSizeClass(
|
|
517
|
+
unit=_try_parse_calendar_interval(str(config["unit"])),
|
|
518
|
+
multiple=multiple,
|
|
519
|
+
)
|
|
474
520
|
else:
|
|
475
521
|
raise SDKUsageErrorWithExamples(
|
|
476
522
|
msg=f"Invalid time window size: {config}",
|
|
@@ -1077,6 +1123,10 @@ class _AssertionInput(ABC):
|
|
|
1077
1123
|
"""
|
|
1078
1124
|
Convert the tags input into a standardized format.
|
|
1079
1125
|
|
|
1126
|
+
Tag names are automatically converted to tag URNs using TagUrn constructor. For example:
|
|
1127
|
+
- "my_tag" becomes "urn:li:tag:my_tag"
|
|
1128
|
+
- "urn:li:tag:my_tag" remains unchanged
|
|
1129
|
+
|
|
1080
1130
|
Returns:
|
|
1081
1131
|
A list of tags or None if no tags are provided.
|
|
1082
1132
|
|
|
@@ -1087,16 +1137,19 @@ class _AssertionInput(ABC):
|
|
|
1087
1137
|
return None
|
|
1088
1138
|
|
|
1089
1139
|
if isinstance(self.tags, str):
|
|
1090
|
-
return [self.tags]
|
|
1140
|
+
return [str(TagUrn(self.tags))]
|
|
1091
1141
|
elif isinstance(self.tags, list):
|
|
1092
|
-
return
|
|
1142
|
+
return [
|
|
1143
|
+
str(TagUrn(tag)) if isinstance(tag, str) else tag for tag in self.tags
|
|
1144
|
+
]
|
|
1093
1145
|
else:
|
|
1094
1146
|
raise SDKUsageErrorWithExamples(
|
|
1095
1147
|
msg=f"Invalid tags: {self.tags}",
|
|
1096
1148
|
examples={
|
|
1097
|
-
"Tags from string": "
|
|
1098
|
-
"Tags from
|
|
1099
|
-
|
|
1149
|
+
"Tags from string (tag name)": "my_tag_1",
|
|
1150
|
+
"Tags from string (tag URN)": "urn:li:tag:my_tag_1",
|
|
1151
|
+
"Tags from list (mixed)": [
|
|
1152
|
+
"my_tag_1",
|
|
1100
1153
|
"urn:li:tag:my_tag_2",
|
|
1101
1154
|
],
|
|
1102
1155
|
},
|
|
@@ -1173,16 +1226,40 @@ class _AssertionInput(ABC):
|
|
|
1173
1226
|
schema_fields = self.cached_dataset._schema_dict()
|
|
1174
1227
|
field = schema_fields.get(column_name)
|
|
1175
1228
|
if field:
|
|
1176
|
-
return
|
|
1177
|
-
path=field.fieldPath,
|
|
1178
|
-
type=field.type.type.__class__.__name__,
|
|
1179
|
-
nativeType=field.nativeDataType,
|
|
1180
|
-
)
|
|
1229
|
+
return self._convert_schema_field_to_schema_field_spec(field)
|
|
1181
1230
|
else:
|
|
1182
1231
|
raise SDKUsageError(
|
|
1183
1232
|
msg=f"Column {column_name} not found in dataset {self.dataset_urn}",
|
|
1184
1233
|
)
|
|
1185
1234
|
|
|
1235
|
+
def _convert_schema_field_to_schema_field_spec(
|
|
1236
|
+
self, field: models.SchemaFieldClass
|
|
1237
|
+
) -> models.SchemaFieldSpecClass:
|
|
1238
|
+
"""
|
|
1239
|
+
Convert a SchemaFieldClass to a SchemaFieldSpecClass.
|
|
1240
|
+
"""
|
|
1241
|
+
type_class_name = field.type.type.__class__.__name__
|
|
1242
|
+
try:
|
|
1243
|
+
type = self._convert_schema_field_type_class_name_to_type(type_class_name)
|
|
1244
|
+
except KeyError as e:
|
|
1245
|
+
raise SDKUsageError(
|
|
1246
|
+
msg=f"Invalid type: {type_class_name}. Must be one of {list(TYPE_CLASS_NAME_TO_TYPE_MAP.keys())}",
|
|
1247
|
+
) from e
|
|
1248
|
+
|
|
1249
|
+
return models.SchemaFieldSpecClass(
|
|
1250
|
+
path=field.fieldPath,
|
|
1251
|
+
type=type,
|
|
1252
|
+
nativeType=field.nativeDataType,
|
|
1253
|
+
)
|
|
1254
|
+
|
|
1255
|
+
def _convert_schema_field_type_class_name_to_type(
|
|
1256
|
+
self, type_class_name: str
|
|
1257
|
+
) -> str:
|
|
1258
|
+
"""
|
|
1259
|
+
Convert a type class name to a type.
|
|
1260
|
+
"""
|
|
1261
|
+
return TYPE_CLASS_NAME_TO_TYPE_MAP[type_class_name]
|
|
1262
|
+
|
|
1186
1263
|
def _validate_field_type(
|
|
1187
1264
|
self,
|
|
1188
1265
|
field_spec: models.SchemaFieldSpecClass,
|
|
@@ -1202,7 +1279,10 @@ class _AssertionInput(ABC):
|
|
|
1202
1279
|
Raises:
|
|
1203
1280
|
SDKUsageError: If the field has an invalid type
|
|
1204
1281
|
"""
|
|
1205
|
-
allowed_type_names = [
|
|
1282
|
+
allowed_type_names = [
|
|
1283
|
+
self._convert_schema_field_type_class_name_to_type(t.__class__.__name__)
|
|
1284
|
+
for t in allowed_types
|
|
1285
|
+
]
|
|
1206
1286
|
if field_spec.type not in allowed_type_names:
|
|
1207
1287
|
raise SDKUsageError(
|
|
1208
1288
|
msg=f"Column {column_name} with type {field_spec.type} does not have an allowed type for a {field_type_name} in dataset {self.dataset_urn}. "
|