duckguard 2.2.0__py3-none-any.whl → 3.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. duckguard/__init__.py +1 -1
  2. duckguard/anomaly/__init__.py +28 -0
  3. duckguard/anomaly/baselines.py +294 -0
  4. duckguard/anomaly/methods.py +16 -2
  5. duckguard/anomaly/ml_methods.py +724 -0
  6. duckguard/checks/__init__.py +26 -0
  7. duckguard/checks/conditional.py +796 -0
  8. duckguard/checks/distributional.py +524 -0
  9. duckguard/checks/multicolumn.py +726 -0
  10. duckguard/checks/query_based.py +643 -0
  11. duckguard/cli/main.py +257 -2
  12. duckguard/connectors/factory.py +30 -2
  13. duckguard/connectors/files.py +7 -3
  14. duckguard/core/column.py +851 -1
  15. duckguard/core/dataset.py +1035 -0
  16. duckguard/core/result.py +236 -0
  17. duckguard/freshness/__init__.py +33 -0
  18. duckguard/freshness/monitor.py +429 -0
  19. duckguard/history/schema.py +119 -1
  20. duckguard/notifications/__init__.py +20 -2
  21. duckguard/notifications/email.py +508 -0
  22. duckguard/profiler/distribution_analyzer.py +384 -0
  23. duckguard/profiler/outlier_detector.py +497 -0
  24. duckguard/profiler/pattern_matcher.py +301 -0
  25. duckguard/profiler/quality_scorer.py +445 -0
  26. duckguard/reports/html_reporter.py +1 -2
  27. duckguard/rules/executor.py +642 -0
  28. duckguard/rules/generator.py +4 -1
  29. duckguard/rules/schema.py +54 -0
  30. duckguard/schema_history/__init__.py +40 -0
  31. duckguard/schema_history/analyzer.py +414 -0
  32. duckguard/schema_history/tracker.py +288 -0
  33. duckguard/semantic/detector.py +17 -1
  34. duckguard-3.0.0.dist-info/METADATA +1072 -0
  35. {duckguard-2.2.0.dist-info → duckguard-3.0.0.dist-info}/RECORD +38 -21
  36. duckguard-2.2.0.dist-info/METADATA +0 -351
  37. {duckguard-2.2.0.dist-info → duckguard-3.0.0.dist-info}/WHEEL +0 -0
  38. {duckguard-2.2.0.dist-info → duckguard-3.0.0.dist-info}/entry_points.txt +0 -0
  39. {duckguard-2.2.0.dist-info → duckguard-3.0.0.dist-info}/licenses/LICENSE +0 -0
@@ -248,6 +248,26 @@ class RuleExecutor:
248
248
  CheckType.MAX_LENGTH: self._check_max_length,
249
249
  CheckType.ALLOWED_VALUES: self._check_allowed_values,
250
250
  CheckType.ISIN: self._check_allowed_values,
251
+ # Conditional checks (DuckGuard 3.0)
252
+ CheckType.NOT_NULL_WHEN: self._check_not_null_when,
253
+ CheckType.UNIQUE_WHEN: self._check_unique_when,
254
+ CheckType.BETWEEN_WHEN: self._check_between_when,
255
+ CheckType.ISIN_WHEN: self._check_isin_when,
256
+ CheckType.PATTERN_WHEN: self._check_pattern_when,
257
+ # Multi-column checks (DuckGuard 3.0)
258
+ CheckType.COLUMN_PAIR_SATISFY: self._check_column_pair_satisfy,
259
+ CheckType.MULTICOLUMN_UNIQUE: self._check_multicolumn_unique,
260
+ CheckType.MULTICOLUMN_SUM: self._check_multicolumn_sum,
261
+ # Query-based checks (DuckGuard 3.0)
262
+ CheckType.QUERY_NO_ROWS: self._check_query_no_rows,
263
+ CheckType.QUERY_RETURNS_ROWS: self._check_query_returns_rows,
264
+ CheckType.QUERY_RESULT_EQUALS: self._check_query_result_equals,
265
+ CheckType.QUERY_RESULT_BETWEEN: self._check_query_result_between,
266
+ # Distributional checks (DuckGuard 3.0)
267
+ CheckType.DISTRIBUTION_NORMAL: self._check_distribution_normal,
268
+ CheckType.DISTRIBUTION_UNIFORM: self._check_distribution_uniform,
269
+ CheckType.DISTRIBUTION_KS_TEST: self._check_ks_test,
270
+ CheckType.DISTRIBUTION_CHI_SQUARE: self._check_chi_square_test,
251
271
  }
252
272
 
253
273
  handler = check_handlers.get(check.type)
@@ -576,6 +596,628 @@ class RuleExecutor:
576
596
  details=result.details or {},
577
597
  )
578
598
 
599
+ # =================================================================
600
+ # Conditional Check Handlers (DuckGuard 3.0)
601
+ # =================================================================
602
+
603
+ def _check_not_null_when(self, col, check: Check) -> CheckResult:
604
+ """Check column is not null when condition is true."""
605
+ condition = check.params.get("condition")
606
+ if not condition:
607
+ return CheckResult(
608
+ check=check,
609
+ column=col.name,
610
+ passed=False,
611
+ actual_value=None,
612
+ expected_value="not null when condition",
613
+ message="Missing 'condition' parameter for not_null_when check",
614
+ severity=check.severity,
615
+ )
616
+
617
+ threshold = check.params.get("threshold", 1.0)
618
+ result = col.not_null_when(condition=condition, threshold=threshold)
619
+
620
+ return CheckResult(
621
+ check=check,
622
+ column=col.name,
623
+ passed=result.passed,
624
+ actual_value=result.actual_value,
625
+ expected_value=result.expected_value,
626
+ message=result.message,
627
+ severity=check.severity,
628
+ details=result.details or {},
629
+ )
630
+
631
+ def _check_unique_when(self, col, check: Check) -> CheckResult:
632
+ """Check column is unique when condition is true."""
633
+ condition = check.params.get("condition")
634
+ if not condition:
635
+ return CheckResult(
636
+ check=check,
637
+ column=col.name,
638
+ passed=False,
639
+ actual_value=None,
640
+ expected_value="unique when condition",
641
+ message="Missing 'condition' parameter for unique_when check",
642
+ severity=check.severity,
643
+ )
644
+
645
+ threshold = check.params.get("threshold", 1.0)
646
+ result = col.unique_when(condition=condition, threshold=threshold)
647
+
648
+ return CheckResult(
649
+ check=check,
650
+ column=col.name,
651
+ passed=result.passed,
652
+ actual_value=result.actual_value,
653
+ expected_value=result.expected_value,
654
+ message=result.message,
655
+ severity=check.severity,
656
+ details=result.details or {},
657
+ )
658
+
659
+ def _check_between_when(self, col, check: Check) -> CheckResult:
660
+ """Check column is between min/max when condition is true."""
661
+ condition = check.params.get("condition")
662
+ if not condition:
663
+ return CheckResult(
664
+ check=check,
665
+ column=col.name,
666
+ passed=False,
667
+ actual_value=None,
668
+ expected_value="between when condition",
669
+ message="Missing 'condition' parameter for between_when check",
670
+ severity=check.severity,
671
+ )
672
+
673
+ # Get min/max from check value (expected to be a tuple/list)
674
+ if isinstance(check.value, (list, tuple)) and len(check.value) == 2:
675
+ min_val, max_val = check.value
676
+ else:
677
+ min_val = check.params.get("min_value")
678
+ max_val = check.params.get("max_value")
679
+
680
+ if min_val is None or max_val is None:
681
+ return CheckResult(
682
+ check=check,
683
+ column=col.name,
684
+ passed=False,
685
+ actual_value=None,
686
+ expected_value=f"between {min_val} and {max_val} when condition",
687
+ message="Missing 'min_value' or 'max_value' for between_when check",
688
+ severity=check.severity,
689
+ )
690
+
691
+ threshold = check.params.get("threshold", 1.0)
692
+ result = col.between_when(
693
+ min_val=min_val,
694
+ max_val=max_val,
695
+ condition=condition,
696
+ threshold=threshold
697
+ )
698
+
699
+ return CheckResult(
700
+ check=check,
701
+ column=col.name,
702
+ passed=result.passed,
703
+ actual_value=result.actual_value,
704
+ expected_value=result.expected_value,
705
+ message=result.message,
706
+ severity=check.severity,
707
+ details=result.details or {},
708
+ )
709
+
710
+ def _check_isin_when(self, col, check: Check) -> CheckResult:
711
+ """Check column is in allowed values when condition is true."""
712
+ condition = check.params.get("condition")
713
+ if not condition:
714
+ return CheckResult(
715
+ check=check,
716
+ column=col.name,
717
+ passed=False,
718
+ actual_value=None,
719
+ expected_value="isin when condition",
720
+ message="Missing 'condition' parameter for isin_when check",
721
+ severity=check.severity,
722
+ )
723
+
724
+ allowed_values = check.value
725
+ if not isinstance(allowed_values, list):
726
+ allowed_values = [allowed_values]
727
+
728
+ threshold = check.params.get("threshold", 1.0)
729
+ result = col.isin_when(
730
+ allowed_values=allowed_values,
731
+ condition=condition,
732
+ threshold=threshold
733
+ )
734
+
735
+ return CheckResult(
736
+ check=check,
737
+ column=col.name,
738
+ passed=result.passed,
739
+ actual_value=result.actual_value,
740
+ expected_value=result.expected_value,
741
+ message=result.message,
742
+ severity=check.severity,
743
+ details=result.details or {},
744
+ )
745
+
746
+ def _check_pattern_when(self, col, check: Check) -> CheckResult:
747
+ """Check column matches pattern when condition is true."""
748
+ condition = check.params.get("condition")
749
+ if not condition:
750
+ return CheckResult(
751
+ check=check,
752
+ column=col.name,
753
+ passed=False,
754
+ actual_value=None,
755
+ expected_value="matches pattern when condition",
756
+ message="Missing 'condition' parameter for pattern_when check",
757
+ severity=check.severity,
758
+ )
759
+
760
+ pattern = check.value
761
+ if not pattern:
762
+ return CheckResult(
763
+ check=check,
764
+ column=col.name,
765
+ passed=False,
766
+ actual_value=None,
767
+ expected_value="matches pattern when condition",
768
+ message="Missing pattern value for pattern_when check",
769
+ severity=check.severity,
770
+ )
771
+
772
+ threshold = check.params.get("threshold", 1.0)
773
+ result = col.matches_when(
774
+ pattern=pattern,
775
+ condition=condition,
776
+ threshold=threshold
777
+ )
778
+
779
+ return CheckResult(
780
+ check=check,
781
+ column=col.name,
782
+ passed=result.passed,
783
+ actual_value=result.actual_value,
784
+ expected_value=result.expected_value,
785
+ message=result.message,
786
+ severity=check.severity,
787
+ details=result.details or {},
788
+ )
789
+
790
+ # =================================================================
791
+ # Multi-Column Check Handlers (DuckGuard 3.0)
792
+ # =================================================================
793
+
794
+ def _check_column_pair_satisfy(self, col, check: Check) -> CheckResult:
795
+ """Check that column pair satisfies expression.
796
+
797
+ Note: Multi-column checks are dataset-level, but called with col context.
798
+ """
799
+ column_a = check.params.get("column_a")
800
+ column_b = check.params.get("column_b")
801
+ expression = check.params.get("expression") or check.value
802
+
803
+ if not column_a or not column_b:
804
+ return CheckResult(
805
+ check=check,
806
+ column=None,
807
+ passed=False,
808
+ actual_value=None,
809
+ expected_value="column pair satisfaction",
810
+ message="Missing 'column_a' or 'column_b' parameter",
811
+ severity=check.severity,
812
+ )
813
+
814
+ if not expression:
815
+ return CheckResult(
816
+ check=check,
817
+ column=None,
818
+ passed=False,
819
+ actual_value=None,
820
+ expected_value="column pair satisfaction",
821
+ message="Missing 'expression' parameter",
822
+ severity=check.severity,
823
+ )
824
+
825
+ threshold = check.params.get("threshold", 1.0)
826
+
827
+ # Get dataset from column context
828
+ dataset = col._dataset
829
+
830
+ result = dataset.expect_column_pair_satisfy(
831
+ column_a=column_a,
832
+ column_b=column_b,
833
+ expression=expression,
834
+ threshold=threshold
835
+ )
836
+
837
+ return CheckResult(
838
+ check=check,
839
+ column=None,
840
+ passed=result.passed,
841
+ actual_value=result.actual_value,
842
+ expected_value=result.expected_value,
843
+ message=result.message,
844
+ severity=check.severity,
845
+ details=result.details or {},
846
+ )
847
+
848
+ def _check_multicolumn_unique(self, col, check: Check) -> CheckResult:
849
+ """Check that combination of columns is unique."""
850
+ columns = check.params.get("columns") or check.value
851
+
852
+ if not columns or not isinstance(columns, list):
853
+ return CheckResult(
854
+ check=check,
855
+ column=None,
856
+ passed=False,
857
+ actual_value=None,
858
+ expected_value="composite uniqueness",
859
+ message="Missing or invalid 'columns' parameter (expected list)",
860
+ severity=check.severity,
861
+ )
862
+
863
+ threshold = check.params.get("threshold", 1.0)
864
+ dataset = col._dataset
865
+
866
+ result = dataset.expect_columns_unique(
867
+ columns=columns,
868
+ threshold=threshold
869
+ )
870
+
871
+ return CheckResult(
872
+ check=check,
873
+ column=None,
874
+ passed=result.passed,
875
+ actual_value=result.actual_value,
876
+ expected_value=result.expected_value,
877
+ message=result.message,
878
+ severity=check.severity,
879
+ details=result.details or {},
880
+ )
881
+
882
+ def _check_multicolumn_sum(self, col, check: Check) -> CheckResult:
883
+ """Check that sum of columns equals expected value."""
884
+ columns = check.params.get("columns")
885
+ expected_sum = check.params.get("expected_sum") or check.value
886
+
887
+ if not columns or not isinstance(columns, list):
888
+ return CheckResult(
889
+ check=check,
890
+ column=None,
891
+ passed=False,
892
+ actual_value=None,
893
+ expected_value="multicolumn sum",
894
+ message="Missing or invalid 'columns' parameter (expected list)",
895
+ severity=check.severity,
896
+ )
897
+
898
+ if expected_sum is None:
899
+ return CheckResult(
900
+ check=check,
901
+ column=None,
902
+ passed=False,
903
+ actual_value=None,
904
+ expected_value="multicolumn sum",
905
+ message="Missing 'expected_sum' parameter",
906
+ severity=check.severity,
907
+ )
908
+
909
+ threshold = check.params.get("threshold", 0.01)
910
+ dataset = col._dataset
911
+
912
+ result = dataset.expect_multicolumn_sum_to_equal(
913
+ columns=columns,
914
+ expected_sum=expected_sum,
915
+ threshold=threshold
916
+ )
917
+
918
+ return CheckResult(
919
+ check=check,
920
+ column=None,
921
+ passed=result.passed,
922
+ actual_value=result.actual_value,
923
+ expected_value=result.expected_value,
924
+ message=result.message,
925
+ severity=check.severity,
926
+ details=result.details or {},
927
+ )
928
+
929
+ # Query-based check handlers (DuckGuard 3.0)
930
+ def _check_query_no_rows(self, col, check: Check) -> CheckResult:
931
+ """Check that custom SQL query returns no rows."""
932
+ query = check.params.get("query") or check.value
933
+
934
+ if not query:
935
+ return CheckResult(
936
+ check=check,
937
+ column=None,
938
+ passed=False,
939
+ actual_value=None,
940
+ expected_value="no rows",
941
+ message="Missing 'query' parameter",
942
+ severity=check.severity,
943
+ )
944
+
945
+ dataset = col._dataset if col else None
946
+ if not dataset:
947
+ return CheckResult(
948
+ check=check,
949
+ column=None,
950
+ passed=False,
951
+ actual_value=None,
952
+ expected_value="no rows",
953
+ message="Dataset not available for query execution",
954
+ severity=check.severity,
955
+ )
956
+
957
+ message = check.params.get("message")
958
+
959
+ result = dataset.expect_query_to_return_no_rows(
960
+ query=query,
961
+ message=message
962
+ )
963
+
964
+ return CheckResult(
965
+ check=check,
966
+ column=None,
967
+ passed=result.passed,
968
+ actual_value=result.actual_value,
969
+ expected_value=result.expected_value,
970
+ message=result.message,
971
+ severity=check.severity,
972
+ details=result.details or {},
973
+ )
974
+
975
+ def _check_query_returns_rows(self, col, check: Check) -> CheckResult:
976
+ """Check that custom SQL query returns at least one row."""
977
+ query = check.params.get("query") or check.value
978
+
979
+ if not query:
980
+ return CheckResult(
981
+ check=check,
982
+ column=None,
983
+ passed=False,
984
+ actual_value=None,
985
+ expected_value="> 0 rows",
986
+ message="Missing 'query' parameter",
987
+ severity=check.severity,
988
+ )
989
+
990
+ dataset = col._dataset if col else None
991
+ if not dataset:
992
+ return CheckResult(
993
+ check=check,
994
+ column=None,
995
+ passed=False,
996
+ actual_value=None,
997
+ expected_value="> 0 rows",
998
+ message="Dataset not available for query execution",
999
+ severity=check.severity,
1000
+ )
1001
+
1002
+ message = check.params.get("message")
1003
+
1004
+ result = dataset.expect_query_to_return_rows(
1005
+ query=query,
1006
+ message=message
1007
+ )
1008
+
1009
+ return CheckResult(
1010
+ check=check,
1011
+ column=None,
1012
+ passed=result.passed,
1013
+ actual_value=result.actual_value,
1014
+ expected_value=result.expected_value,
1015
+ message=result.message,
1016
+ severity=check.severity,
1017
+ details=result.details or {},
1018
+ )
1019
+
1020
+ def _check_query_result_equals(self, col, check: Check) -> CheckResult:
1021
+ """Check that custom SQL query result equals expected value."""
1022
+ query = check.params.get("query")
1023
+ expected = check.params.get("expected") or check.value
1024
+
1025
+ if not query:
1026
+ return CheckResult(
1027
+ check=check,
1028
+ column=None,
1029
+ passed=False,
1030
+ actual_value=None,
1031
+ expected_value=expected,
1032
+ message="Missing 'query' parameter",
1033
+ severity=check.severity,
1034
+ )
1035
+
1036
+ if expected is None:
1037
+ return CheckResult(
1038
+ check=check,
1039
+ column=None,
1040
+ passed=False,
1041
+ actual_value=None,
1042
+ expected_value=None,
1043
+ message="Missing 'expected' parameter",
1044
+ severity=check.severity,
1045
+ )
1046
+
1047
+ dataset = col._dataset if col else None
1048
+ if not dataset:
1049
+ return CheckResult(
1050
+ check=check,
1051
+ column=None,
1052
+ passed=False,
1053
+ actual_value=None,
1054
+ expected_value=expected,
1055
+ message="Dataset not available for query execution",
1056
+ severity=check.severity,
1057
+ )
1058
+
1059
+ tolerance = check.params.get("tolerance")
1060
+ message = check.params.get("message")
1061
+
1062
+ result = dataset.expect_query_result_to_equal(
1063
+ query=query,
1064
+ expected=expected,
1065
+ tolerance=tolerance,
1066
+ message=message
1067
+ )
1068
+
1069
+ return CheckResult(
1070
+ check=check,
1071
+ column=None,
1072
+ passed=result.passed,
1073
+ actual_value=result.actual_value,
1074
+ expected_value=result.expected_value,
1075
+ message=result.message,
1076
+ severity=check.severity,
1077
+ details=result.details or {},
1078
+ )
1079
+
1080
+ def _check_query_result_between(self, col, check: Check) -> CheckResult:
1081
+ """Check that custom SQL query result is within range."""
1082
+ query = check.params.get("query")
1083
+ min_value = check.params.get("min_value")
1084
+ max_value = check.params.get("max_value")
1085
+
1086
+ if not query:
1087
+ return CheckResult(
1088
+ check=check,
1089
+ column=None,
1090
+ passed=False,
1091
+ actual_value=None,
1092
+ expected_value=f"between {min_value} and {max_value}",
1093
+ message="Missing 'query' parameter",
1094
+ severity=check.severity,
1095
+ )
1096
+
1097
+ if min_value is None or max_value is None:
1098
+ return CheckResult(
1099
+ check=check,
1100
+ column=None,
1101
+ passed=False,
1102
+ actual_value=None,
1103
+ expected_value=f"between {min_value} and {max_value}",
1104
+ message="Missing 'min_value' or 'max_value' parameter",
1105
+ severity=check.severity,
1106
+ )
1107
+
1108
+ dataset = col._dataset if col else None
1109
+ if not dataset:
1110
+ return CheckResult(
1111
+ check=check,
1112
+ column=None,
1113
+ passed=False,
1114
+ actual_value=None,
1115
+ expected_value=f"between {min_value} and {max_value}",
1116
+ message="Dataset not available for query execution",
1117
+ severity=check.severity,
1118
+ )
1119
+
1120
+ message = check.params.get("message")
1121
+
1122
+ result = dataset.expect_query_result_to_be_between(
1123
+ query=query,
1124
+ min_value=min_value,
1125
+ max_value=max_value,
1126
+ message=message
1127
+ )
1128
+
1129
+ return CheckResult(
1130
+ check=check,
1131
+ column=None,
1132
+ passed=result.passed,
1133
+ actual_value=result.actual_value,
1134
+ expected_value=result.expected_value,
1135
+ message=result.message,
1136
+ severity=check.severity,
1137
+ details=result.details or {},
1138
+ )
1139
+
1140
+ # Distributional check handlers (DuckGuard 3.0)
1141
+ def _check_distribution_normal(self, col, check: Check) -> CheckResult:
1142
+ """Check if column follows normal distribution."""
1143
+ significance_level = check.params.get("significance_level", 0.05)
1144
+
1145
+ result = col.expect_distribution_normal(
1146
+ significance_level=significance_level
1147
+ )
1148
+
1149
+ return CheckResult(
1150
+ check=check,
1151
+ column=col.name,
1152
+ passed=result.passed,
1153
+ actual_value=result.actual_value,
1154
+ expected_value=result.expected_value,
1155
+ message=result.message,
1156
+ severity=check.severity,
1157
+ details=result.details or {},
1158
+ )
1159
+
1160
+ def _check_distribution_uniform(self, col, check: Check) -> CheckResult:
1161
+ """Check if column follows uniform distribution."""
1162
+ significance_level = check.params.get("significance_level", 0.05)
1163
+
1164
+ result = col.expect_distribution_uniform(
1165
+ significance_level=significance_level
1166
+ )
1167
+
1168
+ return CheckResult(
1169
+ check=check,
1170
+ column=col.name,
1171
+ passed=result.passed,
1172
+ actual_value=result.actual_value,
1173
+ expected_value=result.expected_value,
1174
+ message=result.message,
1175
+ severity=check.severity,
1176
+ details=result.details or {},
1177
+ )
1178
+
1179
+ def _check_ks_test(self, col, check: Check) -> CheckResult:
1180
+ """Perform Kolmogorov-Smirnov test."""
1181
+ distribution = check.params.get("distribution", "norm")
1182
+ significance_level = check.params.get("significance_level", 0.05)
1183
+
1184
+ result = col.expect_ks_test(
1185
+ distribution=distribution,
1186
+ significance_level=significance_level
1187
+ )
1188
+
1189
+ return CheckResult(
1190
+ check=check,
1191
+ column=col.name,
1192
+ passed=result.passed,
1193
+ actual_value=result.actual_value,
1194
+ expected_value=result.expected_value,
1195
+ message=result.message,
1196
+ severity=check.severity,
1197
+ details=result.details or {},
1198
+ )
1199
+
1200
+ def _check_chi_square_test(self, col, check: Check) -> CheckResult:
1201
+ """Perform chi-square goodness-of-fit test."""
1202
+ expected_frequencies = check.params.get("expected_frequencies")
1203
+ significance_level = check.params.get("significance_level", 0.05)
1204
+
1205
+ result = col.expect_chi_square_test(
1206
+ expected_frequencies=expected_frequencies,
1207
+ significance_level=significance_level
1208
+ )
1209
+
1210
+ return CheckResult(
1211
+ check=check,
1212
+ column=col.name,
1213
+ passed=result.passed,
1214
+ actual_value=result.actual_value,
1215
+ expected_value=result.expected_value,
1216
+ message=result.message,
1217
+ severity=check.severity,
1218
+ details=result.details or {},
1219
+ )
1220
+
579
1221
  def _compare(self, actual: Any, expected: Any, operator: str) -> bool:
580
1222
  """Compare actual value to expected using operator."""
581
1223
  if actual is None or expected is None:
@@ -14,6 +14,7 @@ from duckguard.connectors import connect
14
14
  from duckguard.core.dataset import Dataset
15
15
  from duckguard.rules.schema import (
16
16
  BUILTIN_PATTERNS,
17
+ CASE_SENSITIVE_PATTERNS,
17
18
  Check,
18
19
  CheckType,
19
20
  ColumnRules,
@@ -215,9 +216,11 @@ class RuleGenerator:
215
216
 
216
217
  for pattern_name, pattern in self._patterns.items():
217
218
  try:
219
+ # Use case-sensitive matching for certain patterns (slug, identifier)
220
+ flags = 0 if pattern_name in CASE_SENSITIVE_PATTERNS else re.IGNORECASE
218
221
  matches = sum(
219
222
  1 for v in sample
220
- if re.match(pattern, str(v), re.IGNORECASE)
223
+ if re.match(pattern, str(v), flags)
221
224
  )
222
225
  match_rate = matches / len(sample)
223
226