acryl-datahub-cloud 0.3.12.1rc3__py3-none-any.whl → 0.3.12.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub-cloud might be problematic. Click here for more details.

@@ -33,7 +33,7 @@ from acryl_datahub_cloud.sdk.errors import (
33
33
  from datahub.emitter.enum_helpers import get_enum_options
34
34
  from datahub.emitter.mce_builder import make_ts_millis, parse_ts_millis
35
35
  from datahub.metadata import schema_classes as models
36
- from datahub.metadata.urns import AssertionUrn, CorpUserUrn, DatasetUrn
36
+ from datahub.metadata.urns import AssertionUrn, CorpUserUrn, DatasetUrn, TagUrn
37
37
  from datahub.sdk import Dataset
38
38
  from datahub.sdk.entity_client import EntityClient
39
39
 
@@ -66,6 +66,15 @@ DEFAULT_EVERY_SIX_HOURS_SCHEDULE = models.CronScheduleClass(
66
66
  ), # User local timezone, matches the UI default
67
67
  )
68
68
 
69
+ TYPE_CLASS_NAME_TO_TYPE_MAP = {
70
+ "StringTypeClass": "STRING",
71
+ "NumberTypeClass": "NUMBER",
72
+ "BooleanTypeClass": "BOOLEAN",
73
+ "DateTypeClass": "DATE",
74
+ "TimeTypeClass": "TIME",
75
+ "NullTypeClass": "NULL",
76
+ }
77
+
69
78
 
70
79
  class AbstractDetectionMechanism(BaseModel, ABC):
71
80
  type: str
@@ -318,8 +327,11 @@ class DetectionMechanism:
318
327
  def _try_parse_from_dict(
319
328
  detection_mechanism_config: dict[str, str],
320
329
  ) -> _DetectionMechanismTypes:
330
+ # Make a copy of the dictionary to avoid mutating the original
331
+ config_copy = detection_mechanism_config.copy()
332
+
321
333
  try:
322
- detection_mechanism_type = detection_mechanism_config.pop("type")
334
+ detection_mechanism_type = config_copy.pop("type")
323
335
  except KeyError as e:
324
336
  raise SDKUsageErrorWithExamples(
325
337
  msg="Detection mechanism type is required if using a dict to create a DetectionMechanism",
@@ -336,23 +348,23 @@ class DetectionMechanism:
336
348
  ) from e
337
349
 
338
350
  try:
339
- return detection_mechanism_obj(**detection_mechanism_config)
351
+ return detection_mechanism_obj(**config_copy)
340
352
  except TypeError as e:
341
353
  if "object is not callable" not in e.args[0]:
342
354
  raise e
343
- if detection_mechanism_config:
355
+ if config_copy:
344
356
  # If we are here in the TypeError case, the detection mechanism is an instance of a class,
345
357
  # not a class itself, so we can't instantiate it with the config dict.
346
358
  # In this case, the config dict should be empty after the type is popped.
347
359
  # If it is not empty, we raise an error.
348
360
  raise SDKUsageErrorWithExamples(
349
- msg=f"Invalid additional fields specified for detection mechanism '{detection_mechanism_type}': {detection_mechanism_config}",
361
+ msg=f"Invalid additional fields specified for detection mechanism '{detection_mechanism_type}': {config_copy}",
350
362
  examples=DetectionMechanism._DETECTION_MECHANISM_EXAMPLES,
351
363
  ) from e
352
364
  return detection_mechanism_obj
353
365
  except ValidationError as e:
354
366
  raise SDKUsageErrorWithExamples(
355
- msg=f"Invalid detection mechanism type '{detection_mechanism_type}': {detection_mechanism_config} {e}",
367
+ msg=f"Invalid detection mechanism type '{detection_mechanism_type}': {config_copy} {e}",
356
368
  examples=DetectionMechanism._DETECTION_MECHANISM_EXAMPLES,
357
369
  ) from e
358
370
 
@@ -426,12 +438,14 @@ class InferenceSensitivity(Enum):
426
438
  DEFAULT_SENSITIVITY: InferenceSensitivity = InferenceSensitivity.MEDIUM
427
439
 
428
440
  TIME_WINDOW_SIZE_EXAMPLES = {
429
- "Time window size from models.TimeWindowSizeClass": "models.TimeWindowSizeClass(unit='MINUTE', multiple=10)",
441
+ "Recommended: Time window size from objects": "TimeWindowSize(unit=CalendarInterval.MINUTE, multiple=10)",
430
442
  "Time window size from object": "TimeWindowSize(unit='MINUTE', multiple=10)",
443
+ "Time window size from models.TimeWindowSizeClass": "models.TimeWindowSizeClass(unit='MINUTE', multiple=10)",
444
+ "Time window size from dict": '{"unit": "MINUTE", "multiple": 10}',
431
445
  }
432
446
 
433
447
 
434
- class CalendarInterval(Enum):
448
+ class CalendarInterval(str, Enum):
435
449
  MINUTE = "MINUTE"
436
450
  HOUR = "HOUR"
437
451
  DAY = "DAY"
@@ -446,9 +460,24 @@ TimeWindowSizeInputTypes: TypeAlias = Union[
446
460
  models.TimeWindowSizeClass,
447
461
  models.FixedIntervalScheduleClass,
448
462
  TimeWindowSize,
463
+ dict[str, Union[str, int]], # {"unit": "MINUTE", "multiple": 10}
449
464
  ]
450
465
 
451
466
 
467
+ def _try_parse_calendar_interval(
468
+ config: Union[str, CalendarInterval],
469
+ ) -> CalendarInterval:
470
+ if isinstance(config, CalendarInterval):
471
+ return config
472
+ try:
473
+ return CalendarInterval(config.upper())
474
+ except ValueError as e:
475
+ raise SDKUsageErrorWithExamples(
476
+ msg=f"Invalid calendar interval: {config}",
477
+ examples=TIME_WINDOW_SIZE_EXAMPLES,
478
+ ) from e
479
+
480
+
452
481
  def _try_parse_time_window_size(
453
482
  config: TimeWindowSizeInputTypes,
454
483
  ) -> models.TimeWindowSizeClass:
@@ -471,6 +500,23 @@ def _try_parse_time_window_size(
471
500
  ),
472
501
  multiple=config.multiple,
473
502
  )
503
+ elif isinstance(config, dict):
504
+ if "unit" not in config or "multiple" not in config:
505
+ raise SDKUsageErrorWithExamples(
506
+ msg=f"Invalid time window size: {config}",
507
+ examples=TIME_WINDOW_SIZE_EXAMPLES,
508
+ )
509
+ try:
510
+ multiple = int(config["multiple"])
511
+ except ValueError as e:
512
+ raise SDKUsageErrorWithExamples(
513
+ msg=f"Invalid time window size: {config}",
514
+ examples=TIME_WINDOW_SIZE_EXAMPLES,
515
+ ) from e
516
+ return models.TimeWindowSizeClass(
517
+ unit=_try_parse_calendar_interval(str(config["unit"])),
518
+ multiple=multiple,
519
+ )
474
520
  else:
475
521
  raise SDKUsageErrorWithExamples(
476
522
  msg=f"Invalid time window size: {config}",
@@ -1077,6 +1123,10 @@ class _AssertionInput(ABC):
1077
1123
  """
1078
1124
  Convert the tags input into a standardized format.
1079
1125
 
1126
+ Tag names are automatically converted to tag URNs using TagUrn constructor. For example:
1127
+ - "my_tag" becomes "urn:li:tag:my_tag"
1128
+ - "urn:li:tag:my_tag" remains unchanged
1129
+
1080
1130
  Returns:
1081
1131
  A list of tags or None if no tags are provided.
1082
1132
 
@@ -1087,16 +1137,19 @@ class _AssertionInput(ABC):
1087
1137
  return None
1088
1138
 
1089
1139
  if isinstance(self.tags, str):
1090
- return [self.tags]
1140
+ return [str(TagUrn(self.tags))]
1091
1141
  elif isinstance(self.tags, list):
1092
- return self.tags
1142
+ return [
1143
+ str(TagUrn(tag)) if isinstance(tag, str) else tag for tag in self.tags
1144
+ ]
1093
1145
  else:
1094
1146
  raise SDKUsageErrorWithExamples(
1095
1147
  msg=f"Invalid tags: {self.tags}",
1096
1148
  examples={
1097
- "Tags from string": "urn:li:tag:my_tag_1",
1098
- "Tags from list": [
1099
- "urn:li:tag:my_tag_1",
1149
+ "Tags from string (tag name)": "my_tag_1",
1150
+ "Tags from string (tag URN)": "urn:li:tag:my_tag_1",
1151
+ "Tags from list (mixed)": [
1152
+ "my_tag_1",
1100
1153
  "urn:li:tag:my_tag_2",
1101
1154
  ],
1102
1155
  },
@@ -1173,16 +1226,40 @@ class _AssertionInput(ABC):
1173
1226
  schema_fields = self.cached_dataset._schema_dict()
1174
1227
  field = schema_fields.get(column_name)
1175
1228
  if field:
1176
- return models.SchemaFieldSpecClass(
1177
- path=field.fieldPath,
1178
- type=field.type.type.__class__.__name__,
1179
- nativeType=field.nativeDataType,
1180
- )
1229
+ return self._convert_schema_field_to_schema_field_spec(field)
1181
1230
  else:
1182
1231
  raise SDKUsageError(
1183
1232
  msg=f"Column {column_name} not found in dataset {self.dataset_urn}",
1184
1233
  )
1185
1234
 
1235
+ def _convert_schema_field_to_schema_field_spec(
1236
+ self, field: models.SchemaFieldClass
1237
+ ) -> models.SchemaFieldSpecClass:
1238
+ """
1239
+ Convert a SchemaFieldClass to a SchemaFieldSpecClass.
1240
+ """
1241
+ type_class_name = field.type.type.__class__.__name__
1242
+ try:
1243
+ type = self._convert_schema_field_type_class_name_to_type(type_class_name)
1244
+ except KeyError as e:
1245
+ raise SDKUsageError(
1246
+ msg=f"Invalid type: {type_class_name}. Must be one of {list(TYPE_CLASS_NAME_TO_TYPE_MAP.keys())}",
1247
+ ) from e
1248
+
1249
+ return models.SchemaFieldSpecClass(
1250
+ path=field.fieldPath,
1251
+ type=type,
1252
+ nativeType=field.nativeDataType,
1253
+ )
1254
+
1255
+ def _convert_schema_field_type_class_name_to_type(
1256
+ self, type_class_name: str
1257
+ ) -> str:
1258
+ """
1259
+ Convert a type class name to a type.
1260
+ """
1261
+ return TYPE_CLASS_NAME_TO_TYPE_MAP[type_class_name]
1262
+
1186
1263
  def _validate_field_type(
1187
1264
  self,
1188
1265
  field_spec: models.SchemaFieldSpecClass,
@@ -1202,7 +1279,10 @@ class _AssertionInput(ABC):
1202
1279
  Raises:
1203
1280
  SDKUsageError: If the field has an invalid type
1204
1281
  """
1205
- allowed_type_names = [t.__class__.__name__ for t in allowed_types]
1282
+ allowed_type_names = [
1283
+ self._convert_schema_field_type_class_name_to_type(t.__class__.__name__)
1284
+ for t in allowed_types
1285
+ ]
1206
1286
  if field_spec.type not in allowed_type_names:
1207
1287
  raise SDKUsageError(
1208
1288
  msg=f"Column {column_name} with type {field_spec.type} does not have an allowed type for a {field_type_name} in dataset {self.dataset_urn}. "