acryl-datahub-cloud 0.3.12rc1__py3-none-any.whl → 0.3.12rc3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub-cloud might be problematic. Click here for more details.

Files changed (70) hide show
  1. acryl_datahub_cloud/_codegen_config.json +1 -1
  2. acryl_datahub_cloud/datahub_forms_notifications/forms_notifications_source.py +524 -0
  3. acryl_datahub_cloud/datahub_forms_notifications/get_search_results_total.gql +14 -0
  4. acryl_datahub_cloud/datahub_forms_notifications/query.py +17 -0
  5. acryl_datahub_cloud/datahub_forms_notifications/scroll_forms_for_notification.gql +29 -0
  6. acryl_datahub_cloud/datahub_forms_notifications/send_form_notification_request.gql +5 -0
  7. acryl_datahub_cloud/datahub_usage_reporting/query_builder.py +48 -8
  8. acryl_datahub_cloud/datahub_usage_reporting/usage_feature_reporter.py +49 -40
  9. acryl_datahub_cloud/metadata/_urns/urn_defs.py +1842 -1786
  10. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
  11. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/form/__init__.py +4 -0
  12. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/notification/__init__.py +19 -0
  13. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +2 -0
  14. acryl_datahub_cloud/metadata/schema.avsc +24747 -23945
  15. acryl_datahub_cloud/metadata/schema_classes.py +1031 -631
  16. acryl_datahub_cloud/metadata/schemas/ApplicationKey.avsc +31 -0
  17. acryl_datahub_cloud/metadata/schemas/ApplicationProperties.avsc +72 -0
  18. acryl_datahub_cloud/metadata/schemas/Applications.avsc +38 -0
  19. acryl_datahub_cloud/metadata/schemas/AssertionAnalyticsRunEvent.avsc +31 -7
  20. acryl_datahub_cloud/metadata/schemas/AssertionInfo.avsc +27 -6
  21. acryl_datahub_cloud/metadata/schemas/AssertionRunEvent.avsc +31 -7
  22. acryl_datahub_cloud/metadata/schemas/AssertionsSummary.avsc +14 -0
  23. acryl_datahub_cloud/metadata/schemas/ChartKey.avsc +1 -0
  24. acryl_datahub_cloud/metadata/schemas/ConstraintInfo.avsc +12 -1
  25. acryl_datahub_cloud/metadata/schemas/ContainerKey.avsc +1 -0
  26. acryl_datahub_cloud/metadata/schemas/CorpGroupKey.avsc +2 -1
  27. acryl_datahub_cloud/metadata/schemas/CorpUserKey.avsc +2 -1
  28. acryl_datahub_cloud/metadata/schemas/DashboardKey.avsc +1 -0
  29. acryl_datahub_cloud/metadata/schemas/DataFlowKey.avsc +1 -0
  30. acryl_datahub_cloud/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
  31. acryl_datahub_cloud/metadata/schemas/DataJobKey.avsc +1 -0
  32. acryl_datahub_cloud/metadata/schemas/DataProductKey.avsc +1 -0
  33. acryl_datahub_cloud/metadata/schemas/DataProductProperties.avsc +1 -1
  34. acryl_datahub_cloud/metadata/schemas/DatasetKey.avsc +1 -0
  35. acryl_datahub_cloud/metadata/schemas/FormAssignmentStatus.avsc +36 -0
  36. acryl_datahub_cloud/metadata/schemas/FormInfo.avsc +6 -0
  37. acryl_datahub_cloud/metadata/schemas/FormKey.avsc +2 -1
  38. acryl_datahub_cloud/metadata/schemas/FormNotifications.avsc +69 -0
  39. acryl_datahub_cloud/metadata/schemas/FormSettings.avsc +3 -0
  40. acryl_datahub_cloud/metadata/schemas/GlobalSettingsInfo.avsc +22 -0
  41. acryl_datahub_cloud/metadata/schemas/GlossaryTermKey.avsc +1 -0
  42. acryl_datahub_cloud/metadata/schemas/MLFeatureKey.avsc +1 -0
  43. acryl_datahub_cloud/metadata/schemas/MLFeatureTableKey.avsc +1 -0
  44. acryl_datahub_cloud/metadata/schemas/MLModelGroupKey.avsc +1 -0
  45. acryl_datahub_cloud/metadata/schemas/MLModelKey.avsc +1 -0
  46. acryl_datahub_cloud/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
  47. acryl_datahub_cloud/metadata/schemas/MetadataChangeEvent.avsc +12 -1
  48. acryl_datahub_cloud/metadata/schemas/MonitorInfo.avsc +27 -6
  49. acryl_datahub_cloud/metadata/schemas/NotebookKey.avsc +1 -0
  50. acryl_datahub_cloud/metadata/schemas/NotificationRequest.avsc +1 -0
  51. acryl_datahub_cloud/notifications/__init__.py +0 -0
  52. acryl_datahub_cloud/notifications/notification_recipient_builder.py +399 -0
  53. acryl_datahub_cloud/sdk/__init__.py +25 -0
  54. acryl_datahub_cloud/{_sdk_extras → sdk}/assertion.py +202 -45
  55. acryl_datahub_cloud/{_sdk_extras → sdk}/assertion_input.py +344 -83
  56. acryl_datahub_cloud/{_sdk_extras → sdk}/assertions_client.py +635 -199
  57. acryl_datahub_cloud/sdk/entities/__init__.py +0 -0
  58. acryl_datahub_cloud/{_sdk_extras → sdk}/entities/assertion.py +1 -1
  59. acryl_datahub_cloud/{_sdk_extras → sdk}/subscription_client.py +146 -33
  60. {acryl_datahub_cloud-0.3.12rc1.dist-info → acryl_datahub_cloud-0.3.12rc3.dist-info}/METADATA +48 -43
  61. {acryl_datahub_cloud-0.3.12rc1.dist-info → acryl_datahub_cloud-0.3.12rc3.dist-info}/RECORD +69 -54
  62. {acryl_datahub_cloud-0.3.12rc1.dist-info → acryl_datahub_cloud-0.3.12rc3.dist-info}/entry_points.txt +1 -0
  63. acryl_datahub_cloud/_sdk_extras/__init__.py +0 -19
  64. /acryl_datahub_cloud/{_sdk_extras/entities → datahub_forms_notifications}/__init__.py +0 -0
  65. /acryl_datahub_cloud/{_sdk_extras → sdk}/entities/monitor.py +0 -0
  66. /acryl_datahub_cloud/{_sdk_extras → sdk}/entities/subscription.py +0 -0
  67. /acryl_datahub_cloud/{_sdk_extras → sdk}/errors.py +0 -0
  68. /acryl_datahub_cloud/{_sdk_extras → sdk}/resolver_client.py +0 -0
  69. {acryl_datahub_cloud-0.3.12rc1.dist-info → acryl_datahub_cloud-0.3.12rc3.dist-info}/WHEEL +0 -0
  70. {acryl_datahub_cloud-0.3.12rc1.dist-info → acryl_datahub_cloud-0.3.12rc3.dist-info}/top_level.txt +0 -0
@@ -11,17 +11,20 @@ from enum import Enum
11
11
  from typing import Literal, Optional, TypeAlias, Union
12
12
 
13
13
  import pydantic
14
+ import pytz
15
+ import tzlocal
14
16
  from avrogen.dict_wrapper import DictWrapper
17
+ from croniter import croniter
15
18
  from pydantic import BaseModel, Extra, ValidationError
16
19
 
17
- from acryl_datahub_cloud._sdk_extras.entities.assertion import (
20
+ from acryl_datahub_cloud.sdk.entities.assertion import (
18
21
  Assertion,
19
22
  AssertionActionsInputType,
20
23
  AssertionInfoInputType,
21
24
  TagsInputType,
22
25
  )
23
- from acryl_datahub_cloud._sdk_extras.entities.monitor import Monitor
24
- from acryl_datahub_cloud._sdk_extras.errors import (
26
+ from acryl_datahub_cloud.sdk.entities.monitor import Monitor
27
+ from acryl_datahub_cloud.sdk.errors import (
25
28
  SDKNotYetSupportedError,
26
29
  SDKUsageError,
27
30
  SDKUsageErrorWithExamples,
@@ -39,6 +42,13 @@ ASSERTION_MONITOR_DEFAULT_TRAINING_LOOKBACK_WINDOW_DAYS = 60
39
42
  DEFAULT_NAME_PREFIX = "New Assertion"
40
43
  DEFAULT_NAME_SUFFIX_LENGTH = 8
41
44
 
45
+ DEFAULT_SCHEDULE = models.CronScheduleClass(
46
+ cron="0 * * * *", # Every hour, matches the UI default
47
+ timezone=str(
48
+ tzlocal.get_localzone()
49
+ ), # User local timezone, matches the UI default
50
+ )
51
+
42
52
 
43
53
  class AbstractDetectionMechanism(BaseModel, ABC):
44
54
  type: str
@@ -85,6 +95,16 @@ class _DataHubOperation(AbstractDetectionMechanism):
85
95
  type: Literal["datahub_operation"] = "datahub_operation"
86
96
 
87
97
 
98
+ class _Query(AbstractDetectionMechanism):
99
+ # COUNT(*) query
100
+ type: Literal["query"] = "query"
101
+ additional_filter: Optional[str] = None
102
+
103
+
104
+ class _DatasetProfile(AbstractDetectionMechanism):
105
+ type: Literal["dataset_profile"] = "dataset_profile"
106
+
107
+
88
108
  # Keep these two lists in sync:
89
109
  _DETECTION_MECHANISM_CONCRETE_TYPES = (
90
110
  _InformationSchema,
@@ -92,6 +112,8 @@ _DETECTION_MECHANISM_CONCRETE_TYPES = (
92
112
  _LastModifiedColumn,
93
113
  _HighWatermarkColumn,
94
114
  _DataHubOperation,
115
+ _Query,
116
+ _DatasetProfile,
95
117
  )
96
118
  _DetectionMechanismTypes = Union[
97
119
  _InformationSchema,
@@ -99,8 +121,16 @@ _DetectionMechanismTypes = Union[
99
121
  _LastModifiedColumn,
100
122
  _HighWatermarkColumn,
101
123
  _DataHubOperation,
124
+ _Query,
125
+ _DatasetProfile,
102
126
  ]
103
127
 
128
+ _DETECTION_MECHANISM_TYPES_WITH_ADDITIONAL_FILTER = (
129
+ _LastModifiedColumn,
130
+ _HighWatermarkColumn,
131
+ _Query,
132
+ )
133
+
104
134
 
105
135
  class DetectionMechanism:
106
136
  # To have a more enum-like user experience even with sub parameters, we define the detection mechanisms as class attributes.
@@ -110,6 +140,8 @@ class DetectionMechanism:
110
140
  LAST_MODIFIED_COLUMN = _LastModifiedColumn
111
141
  HIGH_WATERMARK_COLUMN = _HighWatermarkColumn
112
142
  DATAHUB_OPERATION = _DataHubOperation()
143
+ QUERY = _Query
144
+ DATASET_PROFILE = _DatasetProfile()
113
145
 
114
146
  _DETECTION_MECHANISM_EXAMPLES = {
115
147
  "Information Schema from string": "information_schema",
@@ -130,6 +162,14 @@ class DetectionMechanism:
130
162
  "High Watermark Column from DetectionMechanism": "DetectionMechanism.HIGH_WATERMARK_COLUMN(column_name='id', additional_filter='id > 1000')",
131
163
  "DataHub Operation from string": "datahub_operation",
132
164
  "DataHub Operation from DetectionMechanism": "DetectionMechanism.DATAHUB_OPERATION",
165
+ "Query from string": "query",
166
+ "Query from dict": {
167
+ "type": "query",
168
+ "additional_filter": "id > 1000",
169
+ },
170
+ "Query from DetectionMechanism (with optional additional filter)": "DetectionMechanism.QUERY(additional_filter='id > 1000')",
171
+ "Dataset Profile from string": "dataset_profile",
172
+ "Dataset Profile from DetectionMechanism": "DetectionMechanism.DATASET_PROFILE",
133
173
  }
134
174
 
135
175
  @staticmethod
@@ -496,6 +536,64 @@ def _try_parse_training_data_lookback_days(
496
536
  return training_data_lookback_days
497
537
 
498
538
 
539
+ def _validate_cron_schedule(schedule: str, timezone: str) -> None:
540
+ """We are using the POSIX.1-2017 standard for cron expressions.
541
+
542
+ Note: We are using the croniter library for cron parsing which is different from executor, which uses apscheduler, so there is a risk of mismatch here.
543
+ """
544
+ try:
545
+ # Validate timezone - pytz.timezone() raises UnknownTimeZoneError for invalid timezones
546
+ # Skip timezone validation when empty
547
+ if timezone:
548
+ pytz.timezone(timezone)
549
+
550
+ # Validate 5-field cron expression only (POSIX.1-2017 standard)
551
+ fields = schedule.strip().split()
552
+ if len(fields) != 5:
553
+ raise ValueError("POSIX.1-2017 requires exactly 5 fields")
554
+
555
+ # POSIX.1-2017 specific validation: Sunday must be 0, not 7
556
+ # However croniter accepts 7 as Sunday, so custom check is needed here.
557
+ # Check the day-of-week field (5th field, index 4)
558
+ dow_field = fields[4]
559
+ if "7" in dow_field:
560
+ # Check if 7 appears as a standalone value or in ranges
561
+ import re
562
+
563
+ # Match 7 as standalone, in lists, or in ranges
564
+ if re.search(r"\b7\b|7-|,7,|^7,|,7$|-7\b", dow_field):
565
+ raise ValueError(
566
+ "POSIX.1-2017 standard: Sunday must be represented as 0, not 7"
567
+ )
568
+
569
+ # Validate cron expression - croniter constructor validates the expression
570
+ croniter(schedule)
571
+
572
+ except Exception as e:
573
+ raise SDKUsageError(
574
+ f"Invalid cron expression or timezone: {schedule} {timezone}, please use a POSIX.1-2017 compatible cron expression and timezone."
575
+ ) from e
576
+
577
+
578
+ def _try_parse_schedule(
579
+ schedule: Optional[Union[str, models.CronScheduleClass]],
580
+ ) -> Optional[models.CronScheduleClass]:
581
+ if schedule is None:
582
+ return None
583
+ if isinstance(schedule, str):
584
+ _validate_cron_schedule(schedule, "UTC")
585
+ return models.CronScheduleClass(
586
+ cron=schedule,
587
+ timezone="UTC",
588
+ )
589
+ if isinstance(schedule, models.CronScheduleClass):
590
+ _validate_cron_schedule(schedule.cron, schedule.timezone)
591
+ return schedule
592
+
593
+
594
+ FieldSpecType = Union[models.FreshnessFieldSpecClass, models.SchemaFieldSpecClass]
595
+
596
+
499
597
  class _AssertionInput(ABC):
500
598
  def __init__(
501
599
  self,
@@ -509,6 +607,7 @@ class _AssertionInput(ABC):
509
607
  ] = None, # Can be None if the assertion is not yet created
510
608
  display_name: Optional[str] = None,
511
609
  enabled: bool = True,
610
+ schedule: Optional[Union[str, models.CronScheduleClass]] = None,
512
611
  detection_mechanism: DetectionMechanismInputTypes = None,
513
612
  sensitivity: Optional[Union[str, InferenceSensitivity]] = None,
514
613
  exclusion_windows: Optional[ExclusionWindowInputTypes] = None,
@@ -553,7 +652,7 @@ class _AssertionInput(ABC):
553
652
  else _generate_default_name(DEFAULT_NAME_PREFIX, DEFAULT_NAME_SUFFIX_LENGTH)
554
653
  )
555
654
  self.enabled = enabled
556
-
655
+ self.schedule = _try_parse_schedule(schedule)
557
656
  self.detection_mechanism = DetectionMechanism.parse(detection_mechanism)
558
657
  self.sensitivity = InferenceSensitivity.parse(sensitivity)
559
658
  self.exclusion_windows = _try_parse_exclusion_window(exclusion_windows)
@@ -656,10 +755,7 @@ class _AssertionInput(ABC):
656
755
  """
657
756
  if not isinstance(
658
757
  self.detection_mechanism,
659
- (
660
- DetectionMechanism.LAST_MODIFIED_COLUMN,
661
- DetectionMechanism.HIGH_WATERMARK_COLUMN,
662
- ),
758
+ _DETECTION_MECHANISM_TYPES_WITH_ADDITIONAL_FILTER,
663
759
  ):
664
760
  return None
665
761
 
@@ -672,12 +768,6 @@ class _AssertionInput(ABC):
672
768
  sql=additional_filter,
673
769
  )
674
770
 
675
- @abstractmethod
676
- def _create_assertion_info(
677
- self, filter: Optional[models.DatasetFilterClass]
678
- ) -> AssertionInfoInputType:
679
- pass
680
-
681
771
  def _convert_tags(self) -> Optional[TagsInputType]:
682
772
  """
683
773
  Convert the tags input into a standardized format.
@@ -800,30 +890,6 @@ class _AssertionInput(ABC):
800
890
  )
801
891
  return exclusion_windows
802
892
 
803
- @abstractmethod
804
- def _convert_assertion_source_type_and_field(
805
- self,
806
- ) -> tuple[str, Optional[models.FreshnessFieldSpecClass]]:
807
- """
808
- Convert detection mechanism into source type and field specification for freshness assertions.
809
-
810
- Returns:
811
- A tuple of (source_type, field) where field may be None.
812
- Note that the source_type is a string, not a models.DatasetFreshnessSourceTypeClass since
813
- the source type is not a enum in the code generated from the DatasetFreshnessSourceType enum in the PDL.
814
-
815
- Raises:
816
- SDKNotYetSupportedError: If the detection mechanism is not supported.
817
- SDKUsageError: If the field (column) is not found in the dataset,
818
- and the detection mechanism requires a field. Also if the field
819
- is not an allowed type for the detection mechanism.
820
- """
821
- pass
822
-
823
- @abstractmethod
824
- def _convert_schedule(self) -> models.CronScheduleClass:
825
- pass
826
-
827
893
  def _convert_sensitivity(self) -> models.AssertionMonitorSensitivityClass:
828
894
  """
829
895
  Convert sensitivity into an AssertionMonitorSensitivityClass.
@@ -835,13 +901,68 @@ class _AssertionInput(ABC):
835
901
  level=InferenceSensitivity.to_int(self.sensitivity),
836
902
  )
837
903
 
904
+ def _get_schema_field_spec(self, column_name: str) -> models.SchemaFieldSpecClass:
905
+ """
906
+ Get the schema field spec for the detection mechanism if needed.
907
+ """
908
+ # Only fetch the dataset if it's not already cached.
909
+ # Also we only fetch the dataset if it's needed for the detection mechanism.
910
+ if self.cached_dataset is None:
911
+ self.cached_dataset = self.entity_client.get(self.dataset_urn)
912
+
913
+ # Handle case where dataset doesn't exist
914
+ if self.cached_dataset is None:
915
+ raise SDKUsageError(
916
+ f"Dataset {self.dataset_urn} not found. Cannot validate column {column_name}."
917
+ )
918
+
919
+ # TODO: Make a public accessor for _schema_dict in the SDK
920
+ schema_fields = self.cached_dataset._schema_dict()
921
+ field = schema_fields.get(column_name)
922
+ if field:
923
+ return models.SchemaFieldSpecClass(
924
+ path=field.fieldPath,
925
+ type=field.type.type.__class__.__name__,
926
+ nativeType=field.nativeDataType,
927
+ )
928
+ else:
929
+ raise SDKUsageError(
930
+ msg=f"Column {column_name} not found in dataset {self.dataset_urn}",
931
+ )
932
+
933
+ def _validate_field_type(
934
+ self,
935
+ field_spec: models.SchemaFieldSpecClass,
936
+ column_name: str,
937
+ allowed_types: list[DictWrapper],
938
+ field_type_name: str,
939
+ ) -> None:
940
+ """
941
+ Validate that a field has an allowed type.
942
+
943
+ Args:
944
+ field_spec: The field specification to validate
945
+ column_name: The name of the column for error messages
946
+ allowed_types: List of allowed field types
947
+ field_type_name: Human-readable name of the field type for error messages
948
+
949
+ Raises:
950
+ SDKUsageError: If the field has an invalid type
951
+ """
952
+ allowed_type_names = [t.__class__.__name__ for t in allowed_types]
953
+ if field_spec.type not in allowed_type_names:
954
+ raise SDKUsageError(
955
+ msg=f"Column {column_name} with type {field_spec.type} does not have an allowed type for a {field_type_name} in dataset {self.dataset_urn}. "
956
+ f"Allowed types are {allowed_type_names}.",
957
+ )
958
+
838
959
  def _create_monitor_info(
839
960
  self,
840
961
  assertion_urn: AssertionUrn,
841
962
  status: models.MonitorStatusClass,
842
963
  schedule: models.CronScheduleClass,
843
964
  source_type: Union[str, models.DatasetFreshnessSourceTypeClass],
844
- field: Optional[models.FreshnessFieldSpecClass],
965
+ field: Optional[FieldSpecType],
845
966
  sensitivity: models.AssertionMonitorSensitivityClass,
846
967
  exclusion_windows: list[models.AssertionExclusionWindowClass],
847
968
  ) -> models.MonitorInfoClass:
@@ -851,7 +972,7 @@ class _AssertionInput(ABC):
851
972
  Args:
852
973
  status: The monitor status.
853
974
  schedule: The monitor schedule.
854
- source_type: The freshness source type.
975
+ source_type: The source type.
855
976
  field: Optional field specification.
856
977
  sensitivity: The monitor sensitivity.
857
978
  exclusion_windows: List of exclusion windows.
@@ -867,12 +988,8 @@ class _AssertionInput(ABC):
867
988
  models.AssertionEvaluationSpecClass(
868
989
  assertion=str(assertion_urn),
869
990
  schedule=schedule,
870
- parameters=models.AssertionEvaluationParametersClass(
871
- type=models.AssertionEvaluationParametersTypeClass.DATASET_FRESHNESS,
872
- datasetFreshnessParameters=models.DatasetFreshnessAssertionParametersClass(
873
- sourceType=source_type,
874
- field=field,
875
- ),
991
+ parameters=self._get_assertion_evaluation_parameters(
992
+ str(source_type), field
876
993
  ),
877
994
  )
878
995
  ],
@@ -886,36 +1003,34 @@ class _AssertionInput(ABC):
886
1003
  ),
887
1004
  )
888
1005
 
889
- def _get_schema_field_spec(self, column_name: str) -> models.SchemaFieldSpecClass:
890
- """
891
- Get the schema field spec for the detection mechanism if needed.
892
- """
893
- # Only fetch the dataset if it's not already cached.
894
- # Also we only fetch the dataset if it's needed for the detection mechanism.
895
- if self.cached_dataset is None:
896
- self.cached_dataset = self.entity_client.get(self.dataset_urn)
1006
+ @abstractmethod
1007
+ def _create_assertion_info(
1008
+ self, filter: Optional[models.DatasetFilterClass]
1009
+ ) -> AssertionInfoInputType:
1010
+ """Create assertion info specific to the assertion type."""
1011
+ pass
897
1012
 
898
- # TODO: Make a public accessor for _schema_dict in the SDK
899
- schema_fields = self.cached_dataset._schema_dict()
900
- field = schema_fields.get(column_name)
901
- if field:
902
- return models.SchemaFieldSpecClass(
903
- path=field.fieldPath,
904
- type=field.type.type.__class__.__name__,
905
- nativeType=field.nativeDataType,
906
- )
907
- else:
908
- raise SDKUsageError(
909
- msg=f"Column {column_name} not found in dataset {self.dataset_urn}",
910
- )
1013
+ @abstractmethod
1014
+ def _convert_schedule(self) -> models.CronScheduleClass:
1015
+ """Convert schedule to appropriate format for the assertion type."""
1016
+ pass
911
1017
 
1018
+ @abstractmethod
1019
+ def _get_assertion_evaluation_parameters(
1020
+ self, source_type: str, field: Optional[FieldSpecType]
1021
+ ) -> models.AssertionEvaluationParametersClass:
1022
+ """Get evaluation parameters specific to the assertion type."""
1023
+ pass
1024
+
1025
+ @abstractmethod
1026
+ def _convert_assertion_source_type_and_field(
1027
+ self,
1028
+ ) -> tuple[str, Optional[FieldSpecType]]:
1029
+ """Convert detection mechanism to source type and field spec."""
1030
+ pass
912
1031
 
913
- class _SmartFreshnessAssertionInput(_AssertionInput):
914
- DEFAULT_SCHEDULE = models.CronScheduleClass(
915
- cron="0 0 * * *",
916
- timezone="UTC",
917
- )
918
1032
 
1033
+ class _SmartFreshnessAssertionInput(_AssertionInput):
919
1034
  def __init__(
920
1035
  self,
921
1036
  *,
@@ -926,6 +1041,7 @@ class _SmartFreshnessAssertionInput(_AssertionInput):
926
1041
  urn: Optional[Union[str, AssertionUrn]] = None,
927
1042
  display_name: Optional[str] = None,
928
1043
  enabled: bool = True,
1044
+ schedule: Optional[Union[str, models.CronScheduleClass]] = None,
929
1045
  detection_mechanism: DetectionMechanismInputTypes = None,
930
1046
  sensitivity: Optional[Union[str, InferenceSensitivity]] = None,
931
1047
  exclusion_windows: Optional[ExclusionWindowInputTypes] = None,
@@ -945,6 +1061,9 @@ class _SmartFreshnessAssertionInput(_AssertionInput):
945
1061
  urn=urn,
946
1062
  display_name=display_name,
947
1063
  enabled=enabled,
1064
+ schedule=schedule
1065
+ if schedule is not None
1066
+ else DEFAULT_SCHEDULE, # Use provided schedule or default for create case
948
1067
  detection_mechanism=detection_mechanism,
949
1068
  sensitivity=sensitivity,
950
1069
  exclusion_windows=exclusion_windows,
@@ -973,29 +1092,51 @@ class _SmartFreshnessAssertionInput(_AssertionInput):
973
1092
  return models.FreshnessAssertionInfoClass(
974
1093
  type=models.FreshnessAssertionTypeClass.DATASET_CHANGE, # Currently only dataset change is supported
975
1094
  entity=str(self.dataset_urn),
976
- # schedule (optional, not used for smart freshness assertions)
1095
+ # schedule (optional, must be left empty for smart freshness assertions - managed by the AI inference engine)
977
1096
  filter=filter,
978
1097
  )
979
1098
 
980
1099
  def _convert_schedule(self) -> models.CronScheduleClass:
981
1100
  """Create a schedule for a smart freshness assertion.
982
1101
 
983
- Since the schedule is not used for smart freshness assertions, we return a default schedule.
1102
+ For create case, uses DEFAULT_SCHEDULE. For update case, preserves existing schedule.
984
1103
 
985
1104
  Returns:
986
1105
  A CronScheduleClass with appropriate schedule settings.
987
1106
  """
988
- return self.DEFAULT_SCHEDULE
1107
+ assert self.schedule is not None, (
1108
+ "Schedule should never be None due to constructor logic"
1109
+ )
1110
+ return self.schedule
1111
+
1112
+ def _get_assertion_evaluation_parameters(
1113
+ self, source_type: str, field: Optional[FieldSpecType]
1114
+ ) -> models.AssertionEvaluationParametersClass:
1115
+ # Ensure field is either None or FreshnessFieldSpecClass
1116
+ freshness_field = None
1117
+ if field is not None:
1118
+ if not isinstance(field, models.FreshnessFieldSpecClass):
1119
+ raise SDKUsageError(
1120
+ f"Expected FreshnessFieldSpecClass for freshness assertion, got {type(field).__name__}"
1121
+ )
1122
+ freshness_field = field
1123
+
1124
+ return models.AssertionEvaluationParametersClass(
1125
+ type=models.AssertionEvaluationParametersTypeClass.DATASET_FRESHNESS,
1126
+ datasetFreshnessParameters=models.DatasetFreshnessAssertionParametersClass(
1127
+ sourceType=source_type, field=freshness_field
1128
+ ),
1129
+ )
989
1130
 
990
1131
  def _convert_assertion_source_type_and_field(
991
1132
  self,
992
- ) -> tuple[str, Optional[models.FreshnessFieldSpecClass]]:
1133
+ ) -> tuple[str, Optional[FieldSpecType]]:
993
1134
  """
994
1135
  Convert detection mechanism into source type and field specification for freshness assertions.
995
1136
 
996
1137
  Returns:
997
1138
  A tuple of (source_type, field) where field may be None.
998
- Note that the source_type is a string, not a models.DatasetFreshnessSourceTypeClass since
1139
+ Note that the source_type is a string, not a models.DatasetFreshnessSourceTypeClass (or other assertion source type) since
999
1140
  the source type is not a enum in the code generated from the DatasetFreshnessSourceType enum in the PDL.
1000
1141
 
1001
1142
  Raises:
@@ -1060,15 +1201,135 @@ class _SmartFreshnessAssertionInput(_AssertionInput):
1060
1201
  )
1061
1202
 
1062
1203
  field_spec = self._get_schema_field_spec(column_name)
1063
- allowed_type_names = [t.__class__.__name__ for t in allowed_types]
1064
- if field_spec.type not in allowed_type_names:
1065
- raise SDKUsageError(
1066
- msg=f"Column {column_name} with type {field_spec.type} does not have an allowed type for a {field_type_name} in dataset {self.dataset_urn}. "
1067
- f"Allowed types are {allowed_type_names}.",
1068
- )
1204
+ self._validate_field_type(
1205
+ field_spec, column_name, allowed_types, field_type_name
1206
+ )
1069
1207
  return models.FreshnessFieldSpecClass(
1070
1208
  path=field_spec.path,
1071
1209
  type=field_spec.type,
1072
1210
  nativeType=field_spec.nativeType,
1073
1211
  kind=kind,
1074
1212
  )
1213
+
1214
+
1215
+ class _SmartVolumeAssertionInput(_AssertionInput):
1216
+ def __init__(
1217
+ self,
1218
+ *,
1219
+ # Required fields
1220
+ dataset_urn: Union[str, DatasetUrn],
1221
+ entity_client: EntityClient, # Needed to get the schema field spec for the detection mechanism if needed
1222
+ # Optional fields
1223
+ urn: Optional[Union[str, AssertionUrn]] = None,
1224
+ display_name: Optional[str] = None,
1225
+ enabled: bool = True,
1226
+ schedule: Optional[Union[str, models.CronScheduleClass]] = None,
1227
+ detection_mechanism: DetectionMechanismInputTypes = None,
1228
+ sensitivity: Optional[Union[str, InferenceSensitivity]] = None,
1229
+ exclusion_windows: Optional[ExclusionWindowInputTypes] = None,
1230
+ training_data_lookback_days: Optional[int] = None,
1231
+ incident_behavior: Optional[
1232
+ Union[AssertionIncidentBehavior, list[AssertionIncidentBehavior]]
1233
+ ] = None,
1234
+ tags: Optional[TagsInputType] = None,
1235
+ created_by: Union[str, CorpUserUrn],
1236
+ created_at: datetime,
1237
+ updated_by: Union[str, CorpUserUrn],
1238
+ updated_at: datetime,
1239
+ ):
1240
+ super().__init__(
1241
+ dataset_urn=dataset_urn,
1242
+ entity_client=entity_client,
1243
+ urn=urn,
1244
+ display_name=display_name,
1245
+ enabled=enabled,
1246
+ schedule=schedule,
1247
+ detection_mechanism=detection_mechanism,
1248
+ sensitivity=sensitivity,
1249
+ exclusion_windows=exclusion_windows,
1250
+ training_data_lookback_days=training_data_lookback_days,
1251
+ incident_behavior=incident_behavior,
1252
+ tags=tags,
1253
+ source_type=models.AssertionSourceTypeClass.INFERRED, # Smart assertions are of type inferred, not native
1254
+ created_by=created_by,
1255
+ created_at=created_at,
1256
+ updated_by=updated_by,
1257
+ updated_at=updated_at,
1258
+ )
1259
+
1260
+ def _create_assertion_info(
1261
+ self, filter: Optional[models.DatasetFilterClass]
1262
+ ) -> AssertionInfoInputType:
1263
+ """
1264
+ Create a VolumeAssertionInfoClass for a smart volume assertion.
1265
+
1266
+ Args:
1267
+ filter: Optional filter to apply to the assertion.
1268
+
1269
+ Returns:
1270
+ A VolumeAssertionInfoClass configured for smart volume.
1271
+ """
1272
+ return models.VolumeAssertionInfoClass(
1273
+ type=models.VolumeAssertionTypeClass.ROW_COUNT_TOTAL, # Currently only ROW_COUNT_TOTAL is supported for smart volume
1274
+ entity=str(self.dataset_urn),
1275
+ filter=filter,
1276
+ )
1277
+
1278
+ def _convert_schedule(self) -> models.CronScheduleClass:
1279
+ """Create a schedule for a smart freshness assertion.
1280
+
1281
+ Since the schedule is not used for smart freshness assertions, we return a default schedule.
1282
+
1283
+ Returns:
1284
+ A CronScheduleClass with appropriate schedule settings.
1285
+ """
1286
+ if self.schedule is None:
1287
+ return DEFAULT_SCHEDULE
1288
+
1289
+ return models.CronScheduleClass(
1290
+ cron=self.schedule.cron,
1291
+ timezone=self.schedule.timezone,
1292
+ )
1293
+
1294
+ def _get_assertion_evaluation_parameters(
1295
+ self, source_type: str, field: Optional[FieldSpecType]
1296
+ ) -> models.AssertionEvaluationParametersClass:
1297
+ return models.AssertionEvaluationParametersClass(
1298
+ type=models.AssertionEvaluationParametersTypeClass.DATASET_VOLUME,
1299
+ datasetVolumeParameters=models.DatasetVolumeAssertionParametersClass(
1300
+ sourceType=source_type,
1301
+ ),
1302
+ )
1303
+
1304
+ def _convert_assertion_source_type_and_field(
1305
+ self,
1306
+ ) -> tuple[str, Optional[FieldSpecType]]:
1307
+ """
1308
+ Convert detection mechanism into source type and field specification for volume assertions.
1309
+
1310
+ Returns:
1311
+ A tuple of (source_type, field) where field may be None.
1312
+ Note that the source_type is a string, not a models.DatasetFreshnessSourceTypeClass (or other assertion source type) since
1313
+ the source type is not a enum in the code generated from the DatasetFreshnessSourceType enum in the PDL.
1314
+
1315
+ Raises:
1316
+ SDKNotYetSupportedError: If the detection mechanism is not supported.
1317
+ SDKUsageError: If the field (column) is not found in the dataset,
1318
+ and the detection mechanism requires a field. Also if the field
1319
+ is not an allowed type for the detection mechanism.
1320
+ """
1321
+ source_type = models.DatasetVolumeSourceTypeClass.INFORMATION_SCHEMA
1322
+ field = None
1323
+
1324
+ if isinstance(self.detection_mechanism, _Query):
1325
+ source_type = models.DatasetVolumeSourceTypeClass.QUERY
1326
+ elif isinstance(self.detection_mechanism, _InformationSchema):
1327
+ source_type = models.DatasetVolumeSourceTypeClass.INFORMATION_SCHEMA
1328
+ elif isinstance(self.detection_mechanism, _DatasetProfile):
1329
+ source_type = models.DatasetVolumeSourceTypeClass.DATAHUB_DATASET_PROFILE
1330
+ else:
1331
+ raise SDKNotYetSupportedError(
1332
+ f"Detection mechanism {self.detection_mechanism} not yet supported for smart volume assertions"
1333
+ )
1334
+
1335
+ return source_type, field