pointblank 0.13.4__py3-none-any.whl → 0.15.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. pointblank/__init__.py +4 -0
  2. pointblank/_constants.py +117 -0
  3. pointblank/_constants_translations.py +487 -2
  4. pointblank/_interrogation.py +1065 -12
  5. pointblank/_spec_utils.py +1015 -0
  6. pointblank/_utils.py +17 -7
  7. pointblank/_utils_ai.py +875 -0
  8. pointblank/assistant.py +1 -1
  9. pointblank/cli.py +128 -115
  10. pointblank/column.py +1 -1
  11. pointblank/data/api-docs.txt +1838 -130
  12. pointblank/data/validations/README.md +108 -0
  13. pointblank/data/validations/complex_preprocessing.json +54 -0
  14. pointblank/data/validations/complex_preprocessing.pkl +0 -0
  15. pointblank/data/validations/generate_test_files.py +127 -0
  16. pointblank/data/validations/multiple_steps.json +83 -0
  17. pointblank/data/validations/multiple_steps.pkl +0 -0
  18. pointblank/data/validations/narwhals_function.json +28 -0
  19. pointblank/data/validations/narwhals_function.pkl +0 -0
  20. pointblank/data/validations/no_preprocessing.json +83 -0
  21. pointblank/data/validations/no_preprocessing.pkl +0 -0
  22. pointblank/data/validations/pandas_compatible.json +28 -0
  23. pointblank/data/validations/pandas_compatible.pkl +0 -0
  24. pointblank/data/validations/preprocessing_functions.py +46 -0
  25. pointblank/data/validations/simple_preprocessing.json +57 -0
  26. pointblank/data/validations/simple_preprocessing.pkl +0 -0
  27. pointblank/datascan.py +4 -4
  28. pointblank/draft.py +52 -3
  29. pointblank/scan_profile.py +6 -6
  30. pointblank/schema.py +8 -82
  31. pointblank/thresholds.py +1 -1
  32. pointblank/validate.py +3069 -437
  33. {pointblank-0.13.4.dist-info → pointblank-0.15.0.dist-info}/METADATA +67 -8
  34. pointblank-0.15.0.dist-info/RECORD +56 -0
  35. pointblank-0.13.4.dist-info/RECORD +0 -39
  36. {pointblank-0.13.4.dist-info → pointblank-0.15.0.dist-info}/WHEEL +0 -0
  37. {pointblank-0.13.4.dist-info → pointblank-0.15.0.dist-info}/entry_points.txt +0 -0
  38. {pointblank-0.13.4.dist-info → pointblank-0.15.0.dist-info}/licenses/LICENSE +0 -0
  39. {pointblank-0.13.4.dist-info → pointblank-0.15.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,1015 @@
1
+ from __future__ import annotations
2
+
3
+ import re
4
+
5
+
6
+ def regex_email() -> str:
7
+ """Regex pattern for email validation."""
8
+ # Requires at least one dot in the domain part
9
+ return r"^[a-zA-Z0-9.!#$%&'*+\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)+$"
10
+
11
+
12
+ def regex_url() -> str:
13
+ """Regex pattern for URL validation."""
14
+ # Simplified but comprehensive URL regex
15
+ return r"^(https?|ftp):\/\/[^\s/$.?#].[^\s]*$"
16
+
17
+
18
+ def regex_phone() -> str:
19
+ """Regex pattern for phone number validation."""
20
+ # Matches various phone number formats - requires at least 7 digits total
21
+ return r"^[\+]?[(]?[0-9]{1,4}[)]?[-\s\.]?[(]?[0-9]{1,4}[)]?[-\s\.]?[0-9]{1,9}([-\s\.]?[0-9]{1,9})+$"
22
+
23
+
24
+ def regex_ipv4_address() -> str:
25
+ """Regex pattern for IPv4 address validation."""
26
+ return r"^(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$"
27
+
28
+
29
+ def regex_ipv6_address() -> str:
30
+ """Regex pattern for IPv6 address validation."""
31
+ return r"^(([0-9a-fA-F]{1,4}:){7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:))$"
32
+
33
+
34
+ def regex_mac() -> str:
35
+ """Regex pattern for MAC address validation."""
36
+ return r"^([0-9A-Fa-f]{2}[:-]){5}([0-9A-Fa-f]{2})$"
37
+
38
+
39
+ def regex_swift_bic() -> str:
40
+ """Regex pattern for SWIFT/BIC code validation."""
41
+ # Bank code (4 letters) + Country code (2 letters) + Location code (2 letters/digits) + optional Branch code (3 letters/digits)
42
+ return r"^[A-Z]{4}[A-Z]{2}[A-Z0-9]{2}([A-Z0-9]{3})?$"
43
+
44
+
45
+ def regex_vin() -> str:
46
+ """Regex pattern for VIN validation (basic format check)."""
47
+ return r"^[A-HJ-NPR-Z0-9]{17}$"
48
+
49
+
50
+ def regex_credit_card_1() -> str:
51
+ """Get first regex pattern for credit card validation."""
52
+ return r"^[0-9\s\-]+$"
53
+
54
+
55
+ def regex_credit_card_2() -> str:
56
+ """Get second regex pattern for credit card validation."""
57
+ return r"^[0-9]{13,19}$"
58
+
59
+
60
+ def regex_iban(country: str | None = None) -> str:
61
+ """
62
+ Regex pattern for IBAN validation.
63
+
64
+ Parameters
65
+ ----------
66
+ country
67
+ Optional two or three-letter country code. If provided, returns country-specific pattern.
68
+ """
69
+ if country is None:
70
+ # Generic IBAN pattern: 2 letters, 2 digits, up to 30 alphanumeric
71
+ return r"^[A-Z]{2}[0-9]{2}[A-Z0-9]{1,30}$"
72
+
73
+ # Country-specific patterns
74
+ iban_patterns = {
75
+ "AL": r"^AL[0-9]{10}[A-Z0-9]{16}$",
76
+ "ALB": r"^AL[0-9]{10}[A-Z0-9]{16}$",
77
+ "AD": r"^AD[0-9]{10}[A-Z0-9]{12}$",
78
+ "AND": r"^AD[0-9]{10}[A-Z0-9]{12}$",
79
+ "AT": r"^AT[0-9]{18}$",
80
+ "AUT": r"^AT[0-9]{18}$",
81
+ "BE": r"^BE[0-9]{14}$",
82
+ "BEL": r"^BE[0-9]{14}$",
83
+ "BA": r"^BA[0-9]{18}$",
84
+ "BIH": r"^BA[0-9]{18}$",
85
+ "BG": r"^BG[0-9]{2}[A-Z]{4}[0-9]{6}[A-Z0-9]{8}$",
86
+ "BGR": r"^BG[0-9]{2}[A-Z]{4}[0-9]{6}[A-Z0-9]{8}$",
87
+ "BR": r"^BR[0-9]{25}[A-Z]{1}[A-Z0-9]{1}$",
88
+ "BRA": r"^BR[0-9]{25}[A-Z]{1}[A-Z0-9]{1}$",
89
+ "HR": r"^HR[0-9]{19}$",
90
+ "HRV": r"^HR[0-9]{19}$",
91
+ "CY": r"^CY[0-9]{10}[A-Z0-9]{16}$",
92
+ "CYP": r"^CY[0-9]{10}[A-Z0-9]{16}$",
93
+ "CZ": r"^CZ[0-9]{22}$",
94
+ "CZE": r"^CZ[0-9]{22}$",
95
+ "DK": r"^DK[0-9]{16}$",
96
+ "DNK": r"^DK[0-9]{16}$",
97
+ "EE": r"^EE[0-9]{18}$",
98
+ "EST": r"^EE[0-9]{18}$",
99
+ "FO": r"^FO[0-9]{16}$",
100
+ "FRO": r"^FO[0-9]{16}$",
101
+ "FI": r"^FI[0-9]{16}$",
102
+ "FIN": r"^FI[0-9]{16}$",
103
+ "FR": r"^FR[0-9]{12}[A-Z0-9]{11}[0-9]{2}$",
104
+ "FRA": r"^FR[0-9]{12}[A-Z0-9]{11}[0-9]{2}$",
105
+ "PF": r"^PF[0-9]{12}[A-Z0-9]{11}[0-9]{2}$",
106
+ "PYF": r"^PF[0-9]{12}[A-Z0-9]{11}[0-9]{2}$",
107
+ "TF": r"^TF[0-9]{12}[A-Z0-9]{11}[0-9]{2}$",
108
+ "ATF": r"^TF[0-9]{12}[A-Z0-9]{11}[0-9]{2}$",
109
+ "DE": r"^DE[0-9]{20}$",
110
+ "DEU": r"^DE[0-9]{20}$",
111
+ "GI": r"^GI[0-9]{2}[A-Z]{4}[A-Z0-9]{15}$",
112
+ "GIB": r"^GI[0-9]{2}[A-Z]{4}[A-Z0-9]{15}$",
113
+ "GE": r"^GE[0-9]{2}[A-Z]{2}[0-9]{16}$",
114
+ "GEO": r"^GE[0-9]{2}[A-Z]{2}[0-9]{16}$",
115
+ "GR": r"^GR[0-9]{9}[A-Z0-9]{16}$",
116
+ "GRC": r"^GR[0-9]{9}[A-Z0-9]{16}$",
117
+ "GL": r"^GL[0-9]{16}$",
118
+ "GRL": r"^GL[0-9]{16}$",
119
+ "HU": r"^HU[0-9]{26}$",
120
+ "HUN": r"^HU[0-9]{26}$",
121
+ "IS": r"^IS[0-9]{24}$",
122
+ "ISL": r"^IS[0-9]{24}$",
123
+ "IE": r"^IE[0-9]{2}[A-Z]{4}[0-9]{14}$",
124
+ "IRL": r"^IE[0-9]{2}[A-Z]{4}[0-9]{14}$",
125
+ "IL": r"^IL[0-9]{21}$",
126
+ "ISR": r"^IL[0-9]{21}$",
127
+ "IT": r"^IT[0-9]{2}[A-Z]{1}[0-9]{10}[A-Z0-9]{12}$",
128
+ "ITA": r"^IT[0-9]{2}[A-Z]{1}[0-9]{10}[A-Z0-9]{12}$",
129
+ "LV": r"^LV[0-9]{2}[A-Z]{4}[A-Z0-9]{13}$",
130
+ "LVA": r"^LV[0-9]{2}[A-Z]{4}[A-Z0-9]{13}$",
131
+ "LB": r"^LB[0-9]{6}[A-Z0-9]{20}$",
132
+ "LBN": r"^LB[0-9]{6}[A-Z0-9]{20}$",
133
+ "LI": r"^LI[0-9]{7}[A-Z0-9]{12}$",
134
+ "LIE": r"^LI[0-9]{7}[A-Z0-9]{12}$",
135
+ "LT": r"^LT[0-9]{18}$",
136
+ "LTU": r"^LT[0-9]{18}$",
137
+ "LU": r"^LU[0-9]{5}[A-Z0-9]{13}$",
138
+ "LUX": r"^LU[0-9]{5}[A-Z0-9]{13}$",
139
+ "MK": r"^MK[0-9]{5}[A-Z0-9]{10}[0-9]{2}$",
140
+ "MKD": r"^MK[0-9]{5}[A-Z0-9]{10}[0-9]{2}$",
141
+ "MT": r"^MT[0-9]{2}[A-Z]{4}[0-9]{5}[A-Z0-9]{18}$",
142
+ "MLT": r"^MT[0-9]{2}[A-Z]{4}[0-9]{5}[A-Z0-9]{18}$",
143
+ "MU": r"^MU[0-9]{2}[A-Z]{4}[0-9]{19}[A-Z]{3}$",
144
+ "MUS": r"^MU[0-9]{2}[A-Z]{4}[0-9]{19}[A-Z]{3}$",
145
+ "YT": r"^YT[0-9]{12}[A-Z0-9]{11}[0-9]{2}$",
146
+ "MYT": r"^YT[0-9]{12}[A-Z0-9]{11}[0-9]{2}$",
147
+ "MC": r"^MC[0-9]{12}[A-Z0-9]{11}[0-9]{2}$",
148
+ "MCO": r"^MC[0-9]{12}[A-Z0-9]{11}[0-9]{2}$",
149
+ "ME": r"^ME[0-9]{20}$",
150
+ "MNE": r"^ME[0-9]{20}$",
151
+ "NL": r"^NL[0-9]{2}[A-Z]{4}[0-9]{10}$",
152
+ "NLD": r"^NL[0-9]{2}[A-Z]{4}[0-9]{10}$",
153
+ "NC": r"^NC[0-9]{12}[A-Z0-9]{11}[0-9]{2}$",
154
+ "NCL": r"^NC[0-9]{12}[A-Z0-9]{11}[0-9]{2}$",
155
+ "NO": r"^NO[0-9]{13}$",
156
+ "NOR": r"^NO[0-9]{13}$",
157
+ "PL": r"^PL[0-9]{26}$",
158
+ "POL": r"^PL[0-9]{26}$",
159
+ "PT": r"^PT[0-9]{23}$",
160
+ "PRT": r"^PT[0-9]{23}$",
161
+ "RO": r"^RO[0-9]{2}[A-Z]{4}[A-Z0-9]{16}$",
162
+ "ROU": r"^RO[0-9]{2}[A-Z]{4}[A-Z0-9]{16}$",
163
+ "PM": r"^PM[0-9]{12}[A-Z0-9]{11}[0-9]{2}$",
164
+ "SPM": r"^PM[0-9]{12}[A-Z0-9]{11}[0-9]{2}$",
165
+ "SM": r"^SM[0-9]{2}[A-Z]{1}[0-9]{10}[A-Z0-9]{12}$",
166
+ "SMR": r"^SM[0-9]{2}[A-Z]{1}[0-9]{10}[A-Z0-9]{12}$",
167
+ "SA": r"^SA[0-9]{4}[A-Z0-9]{18}$",
168
+ "SAU": r"^SA[0-9]{4}[A-Z0-9]{18}$",
169
+ "RS": r"^RS[0-9]{20}$",
170
+ "SRB": r"^RS[0-9]{20}$",
171
+ "SK": r"^SK[0-9]{22}$",
172
+ "SVK": r"^SK[0-9]{22}$",
173
+ "SI": r"^SI[0-9]{17}$",
174
+ "SVN": r"^SI[0-9]{17}$",
175
+ "ES": r"^ES[0-9]{22}$",
176
+ "ESP": r"^ES[0-9]{22}$",
177
+ "SE": r"^SE[0-9]{22}$",
178
+ "SWE": r"^SE[0-9]{22}$",
179
+ "CH": r"^CH[0-9]{7}[A-Z0-9]{12}$",
180
+ "CHE": r"^CH[0-9]{7}[A-Z0-9]{12}$",
181
+ "TN": r"^TN[0-9]{22}$",
182
+ "TUN": r"^TN[0-9]{22}$",
183
+ "TR": r"^TR[0-9]{8}[A-Z0-9]{16}$",
184
+ "TUR": r"^TR[0-9]{8}[A-Z0-9]{16}$",
185
+ "GB": r"^GB[0-9]{2}[A-Z]{4}[0-9]{14}$",
186
+ "GBR": r"^GB[0-9]{2}[A-Z]{4}[0-9]{14}$",
187
+ "WF": r"^WF[0-9]{12}[A-Z0-9]{11}[0-9]{2}$",
188
+ "WLF": r"^WF[0-9]{12}[A-Z0-9]{11}[0-9]{2}$",
189
+ }
190
+
191
+ return iban_patterns.get(country.upper(), r"^[A-Z]{2}[0-9]{2}[A-Z0-9]{1,30}$")
192
+
193
+
194
+ def regex_postal_code(country: str) -> str:
195
+ """
196
+ Get regex pattern for postal code validation for a specific country.
197
+
198
+ Parameters
199
+ ----------
200
+ country
201
+ Two or three-letter country code.
202
+ """
203
+ postal_patterns = {
204
+ "AD": r"^AD[0-9]{3}$",
205
+ "AND": r"^AD[0-9]{3}$",
206
+ "AF": r"^[0-9]{4}$",
207
+ "AFG": r"^[0-9]{4}$",
208
+ "AI": r"^AI-2640$",
209
+ "AIA": r"^AI-2640$",
210
+ "AL": r"^[0-9]{4}$",
211
+ "ALB": r"^[0-9]{4}$",
212
+ "AM": r"^[0-9]{4}$",
213
+ "ARM": r"^[0-9]{4}$",
214
+ "AR": r"^([A-Z][0-9]{4}[A-Z]{3}|[A-Z][0-9]{4})$",
215
+ "ARG": r"^([A-Z][0-9]{4}[A-Z]{3}|[A-Z][0-9]{4})$",
216
+ "AS": r"^96799$",
217
+ "ASM": r"^96799$",
218
+ "AT": r"^[0-9]{4}$",
219
+ "AUT": r"^[0-9]{4}$",
220
+ "AU": r"^[0-9]{4}$",
221
+ "AUS": r"^[0-9]{4}$",
222
+ "AZ": r"^AZ[0-9]{4}$",
223
+ "AZE": r"^AZ[0-9]{4}$",
224
+ "BA": r"^[0-9]{5}$",
225
+ "BIH": r"^[0-9]{5}$",
226
+ "BB": r"^BB[0-9]{5}$",
227
+ "BRB": r"^BB[0-9]{5}$",
228
+ "BD": r"^[0-9]{4}$",
229
+ "BGD": r"^[0-9]{4}$",
230
+ "BE": r"^[0-9]{4}$",
231
+ "BEL": r"^[0-9]{4}$",
232
+ "BG": r"^[0-9]{4}$",
233
+ "BGR": r"^[0-9]{4}$",
234
+ "BH": r"^[0-9]{3,4}$",
235
+ "BHR": r"^[0-9]{3,4}$",
236
+ "BL": r"^97133$",
237
+ "BLM": r"^97133$",
238
+ "BM": r"^[A-Z]{2}\s?[0-9]{2}$",
239
+ "BMU": r"^[A-Z]{2}\s?[0-9]{2}$",
240
+ "BN": r"^[A-Z]{2}\s?[0-9]{4}$",
241
+ "BRN": r"^[A-Z]{2}\s?[0-9]{4}$",
242
+ "BR": r"^[0-9]{5}-?[0-9]{3}$",
243
+ "BRA": r"^[0-9]{5}-?[0-9]{3}$",
244
+ "BT": r"^[0-9]{5}$",
245
+ "BTN": r"^[0-9]{5}$",
246
+ "BY": r"^[0-9]{6}$",
247
+ "BLR": r"^[0-9]{6}$",
248
+ "CA": r"^[A-Z][0-9][A-Z]\s?[0-9][A-Z][0-9]$",
249
+ "CAN": r"^[A-Z][0-9][A-Z]\s?[0-9][A-Z][0-9]$",
250
+ "CC": r"^6799$",
251
+ "CCK": r"^6799$",
252
+ "CH": r"^[0-9]{4}$",
253
+ "CHE": r"^[0-9]{4}$",
254
+ "CL": r"^[0-9]{7}$",
255
+ "CHL": r"^[0-9]{7}$",
256
+ "CN": r"^[0-9]{6}$",
257
+ "CHN": r"^[0-9]{6}$",
258
+ "CO": r"^[0-9]{6}$",
259
+ "COL": r"^[0-9]{6}$",
260
+ "CR": r"^[0-9]{5}$",
261
+ "CRI": r"^[0-9]{5}$",
262
+ "CU": r"^[0-9]{5}$",
263
+ "CUB": r"^[0-9]{5}$",
264
+ "CV": r"^[0-9]{4}$",
265
+ "CPV": r"^[0-9]{4}$",
266
+ "CX": r"^6798$",
267
+ "CXR": r"^6798$",
268
+ "CY": r"^[0-9]{4}$",
269
+ "CYP": r"^[0-9]{4}$",
270
+ "CZ": r"^[0-9]{3}\s?[0-9]{2}$",
271
+ "CZE": r"^[0-9]{3}\s?[0-9]{2}$",
272
+ "DE": r"^[0-9]{5}$",
273
+ "DEU": r"^[0-9]{5}$",
274
+ "DK": r"^[0-9]{4}$",
275
+ "DNK": r"^[0-9]{4}$",
276
+ "DO": r"^[0-9]{5}$",
277
+ "DOM": r"^[0-9]{5}$",
278
+ "DZ": r"^[0-9]{5}$",
279
+ "DZA": r"^[0-9]{5}$",
280
+ "EC": r"^[0-9]{6}$",
281
+ "ECU": r"^[0-9]{6}$",
282
+ "EE": r"^[0-9]{5}$",
283
+ "EST": r"^[0-9]{5}$",
284
+ "EG": r"^[0-9]{5}$",
285
+ "EGY": r"^[0-9]{5}$",
286
+ "ES": r"^[0-9]{5}$",
287
+ "ESP": r"^[0-9]{5}$",
288
+ "ET": r"^[0-9]{4}$",
289
+ "ETH": r"^[0-9]{4}$",
290
+ "FI": r"^[0-9]{5}$",
291
+ "FIN": r"^[0-9]{5}$",
292
+ "FK": r"^FIQQ 1ZZ$",
293
+ "FLK": r"^FIQQ 1ZZ$",
294
+ "FM": r"^(96941|96942|96943|96944)$",
295
+ "FSM": r"^(96941|96942|96943|96944)$",
296
+ "FO": r"^[0-9]{3}$",
297
+ "FRO": r"^[0-9]{3}$",
298
+ "FR": r"^[0-9]{5}$",
299
+ "FRA": r"^[0-9]{5}$",
300
+ "GB": r"^([A-Z]{1,2}[0-9]{1,2}[A-Z]?)\s?([0-9][A-Z]{2})$",
301
+ "GBR": r"^([A-Z]{1,2}[0-9]{1,2}[A-Z]?)\s?([0-9][A-Z]{2})$",
302
+ "GF": r"^973[0-9]{2}$",
303
+ "GUF": r"^973[0-9]{2}$",
304
+ "GI": r"^GX11 1AA$",
305
+ "GIB": r"^GX11 1AA$",
306
+ "GL": r"^39[0-9]{2}$",
307
+ "GRL": r"^39[0-9]{2}$",
308
+ "GP": r"^971[0-9]{2}$",
309
+ "GLP": r"^971[0-9]{2}$",
310
+ "GR": r"^[0-9]{3}\s?[0-9]{2}$",
311
+ "GRC": r"^[0-9]{3}\s?[0-9]{2}$",
312
+ "GT": r"^[0-9]{5}$",
313
+ "GTM": r"^[0-9]{5}$",
314
+ "GU": r"^969[0-9]{2}$",
315
+ "GUM": r"^969[0-9]{2}$",
316
+ "HR": r"^[0-9]{5}$",
317
+ "HRV": r"^[0-9]{5}$",
318
+ "HT": r"^[0-9]{4}$",
319
+ "HTI": r"^[0-9]{4}$",
320
+ "HU": r"^[0-9]{4}$",
321
+ "HUN": r"^[0-9]{4}$",
322
+ "ID": r"^[0-9]{5}$",
323
+ "IDN": r"^[0-9]{5}$",
324
+ "IE": r"^[A-Z][0-9]{2}\s?[A-Z0-9]{4}$",
325
+ "IRL": r"^[A-Z][0-9]{2}\s?[A-Z0-9]{4}$",
326
+ "IN": r"^[0-9]{6}$",
327
+ "IND": r"^[0-9]{6}$",
328
+ "IO": r"^BBND 1ZZ$",
329
+ "IOT": r"^BBND 1ZZ$",
330
+ "IQ": r"^[0-9]{5}$",
331
+ "IRQ": r"^[0-9]{5}$",
332
+ "IR": r"^[0-9]{10}$",
333
+ "IRN": r"^[0-9]{10}$",
334
+ "IS": r"^[0-9]{3}$",
335
+ "ISL": r"^[0-9]{3}$",
336
+ "IT": r"^[0-9]{5}$",
337
+ "ITA": r"^[0-9]{5}$",
338
+ "JP": r"^[0-9]{3}-?[0-9]{4}$",
339
+ "JPN": r"^[0-9]{3}-?[0-9]{4}$",
340
+ "KR": r"^[0-9]{5}$",
341
+ "KOR": r"^[0-9]{5}$",
342
+ "KY": r"^KY[0-9]-[0-9]{4}$",
343
+ "CYM": r"^KY[0-9]-[0-9]{4}$",
344
+ "LI": r"^948[5-9]|949[0-7]$",
345
+ "LIE": r"^948[5-9]|949[0-7]$",
346
+ "LK": r"^[0-9]{5}$",
347
+ "LKA": r"^[0-9]{5}$",
348
+ "LT": r"^LT-?[0-9]{5}$",
349
+ "LTU": r"^LT-?[0-9]{5}$",
350
+ "LU": r"^[0-9]{4}$",
351
+ "LUX": r"^[0-9]{4}$",
352
+ "LV": r"^LV-?[0-9]{4}$",
353
+ "LVA": r"^LV-?[0-9]{4}$",
354
+ "MC": r"^980[0-9]{2}$",
355
+ "MCO": r"^980[0-9]{2}$",
356
+ "MD": r"^MD-?[0-9]{4}$",
357
+ "MDA": r"^MD-?[0-9]{4}$",
358
+ "MH": r"^(96960|96970)$",
359
+ "MHL": r"^(96960|96970)$",
360
+ "MK": r"^[0-9]{4}$",
361
+ "MKD": r"^[0-9]{4}$",
362
+ "MP": r"^9695[0-2]$",
363
+ "MNP": r"^9695[0-2]$",
364
+ "MQ": r"^972[0-9]{2}$",
365
+ "MTQ": r"^972[0-9]{2}$",
366
+ "MX": r"^[0-9]{5}$",
367
+ "MEX": r"^[0-9]{5}$",
368
+ "MY": r"^[0-9]{5}$",
369
+ "MYS": r"^[0-9]{5}$",
370
+ "NC": r"^988[0-9]{2}$",
371
+ "NCL": r"^988[0-9]{2}$",
372
+ "NE": r"^[0-9]{4}$",
373
+ "NER": r"^[0-9]{4}$",
374
+ "NF": r"^2899$",
375
+ "NFK": r"^2899$",
376
+ "NG": r"^[0-9]{6}$",
377
+ "NGA": r"^[0-9]{6}$",
378
+ "NI": r"^[0-9]{5}$",
379
+ "NIC": r"^[0-9]{5}$",
380
+ "NL": r"^[0-9]{4}\s?[A-Z]{2}$",
381
+ "NLD": r"^[0-9]{4}\s?[A-Z]{2}$",
382
+ "NO": r"^[0-9]{4}$",
383
+ "NOR": r"^[0-9]{4}$",
384
+ "NP": r"^[0-9]{5}$",
385
+ "NPL": r"^[0-9]{5}$",
386
+ "NZ": r"^[0-9]{4}$",
387
+ "NZL": r"^[0-9]{4}$",
388
+ "OM": r"^[0-9]{3}$",
389
+ "OMN": r"^[0-9]{3}$",
390
+ "PE": r"^([A-Z]{4,5}\s?[0-9]{2}|[0-9]{5})$",
391
+ "PER": r"^([A-Z]{4,5}\s?[0-9]{2}|[0-9]{5})$",
392
+ "PF": r"^987[0-9]{2}$",
393
+ "PYF": r"^987[0-9]{2}$",
394
+ "PG": r"^[0-9]{3}$",
395
+ "PNG": r"^[0-9]{3}$",
396
+ "PH": r"^[0-9]{4}$",
397
+ "PHL": r"^[0-9]{4}$",
398
+ "PK": r"^[0-9]{5}$",
399
+ "PAK": r"^[0-9]{5}$",
400
+ "PL": r"^[0-9]{2}-?[0-9]{3}$",
401
+ "POL": r"^[0-9]{2}-?[0-9]{3}$",
402
+ "PM": r"^97500$",
403
+ "SPM": r"^97500$",
404
+ "PN": r"^PCRN 1ZZ$",
405
+ "PCN": r"^PCRN 1ZZ$",
406
+ "PR": r"^00[679][0-9]{2}$",
407
+ "PRI": r"^00[679][0-9]{2}$",
408
+ "PT": r"^[0-9]{4}-?[0-9]{3}$",
409
+ "PRT": r"^[0-9]{4}-?[0-9]{3}$",
410
+ "PW": r"^96940$",
411
+ "PLW": r"^96940$",
412
+ "PY": r"^[0-9]{4}$",
413
+ "PRY": r"^[0-9]{4}$",
414
+ "RE": r"^974[0-9]{2}$",
415
+ "REU": r"^974[0-9]{2}$",
416
+ "RO": r"^[0-9]{6}$",
417
+ "ROU": r"^[0-9]{6}$",
418
+ "RS": r"^[0-9]{5}$",
419
+ "SRB": r"^[0-9]{5}$",
420
+ "RU": r"^[0-9]{6}$",
421
+ "RUS": r"^[0-9]{6}$",
422
+ "SA": r"^[0-9]{5}$",
423
+ "SAU": r"^[0-9]{5}$",
424
+ "SD": r"^[0-9]{5}$",
425
+ "SDN": r"^[0-9]{5}$",
426
+ "SE": r"^[0-9]{3}\s?[0-9]{2}$",
427
+ "SWE": r"^[0-9]{3}\s?[0-9]{2}$",
428
+ "SG": r"^[0-9]{6}$",
429
+ "SGP": r"^[0-9]{6}$",
430
+ "SH": r"^(STHL 1ZZ|ASCN 1ZZ)$",
431
+ "SHN": r"^(STHL 1ZZ|ASCN 1ZZ)$",
432
+ "SI": r"^[0-9]{4}$",
433
+ "SVN": r"^[0-9]{4}$",
434
+ "SJ": r"^[0-9]{4}$",
435
+ "SJM": r"^[0-9]{4}$",
436
+ "SK": r"^[0-9]{3}\s?[0-9]{2}$",
437
+ "SVK": r"^[0-9]{3}\s?[0-9]{2}$",
438
+ "SM": r"^4789[0-9]$",
439
+ "SMR": r"^4789[0-9]$",
440
+ "SN": r"^[0-9]{5}$",
441
+ "SEN": r"^[0-9]{5}$",
442
+ "SO": r"^[A-Z]{2}\s?[0-9]{5}$",
443
+ "SOM": r"^[A-Z]{2}\s?[0-9]{5}$",
444
+ "SV": r"^CP\s?[0-9]{4}$",
445
+ "SLV": r"^CP\s?[0-9]{4}$",
446
+ "SZ": r"^[A-Z][0-9]{3}$",
447
+ "SWZ": r"^[A-Z][0-9]{3}$",
448
+ "TC": r"^TKCA 1ZZ$",
449
+ "TCA": r"^TKCA 1ZZ$",
450
+ "TH": r"^[0-9]{5}$",
451
+ "THA": r"^[0-9]{5}$",
452
+ "TJ": r"^[0-9]{6}$",
453
+ "TJK": r"^[0-9]{6}$",
454
+ "TM": r"^[0-9]{6}$",
455
+ "TKM": r"^[0-9]{6}$",
456
+ "TN": r"^[0-9]{4}$",
457
+ "TUN": r"^[0-9]{4}$",
458
+ "TR": r"^[0-9]{5}$",
459
+ "TUR": r"^[0-9]{5}$",
460
+ "TW": r"^[0-9]{3}([0-9]{2})?$",
461
+ "TWN": r"^[0-9]{3}([0-9]{2})?$",
462
+ "TZ": r"^[0-9]{5}$",
463
+ "TZA": r"^[0-9]{5}$",
464
+ "UA": r"^[0-9]{5}$",
465
+ "UKR": r"^[0-9]{5}$",
466
+ "UM": r"^96898$",
467
+ "UMI": r"^96898$",
468
+ "US": r"^[0-9]{5}(-[0-9]{4})?$",
469
+ "USA": r"^[0-9]{5}(-[0-9]{4})?$",
470
+ "ZIP": r"^[0-9]{5}(-[0-9]{4})?$", # Alias for US
471
+ "UY": r"^[0-9]{5}$",
472
+ "URY": r"^[0-9]{5}$",
473
+ "UZ": r"^[0-9]{6}$",
474
+ "UZB": r"^[0-9]{6}$",
475
+ "VA": r"^00120$",
476
+ "VAT": r"^00120$",
477
+ "VC": r"^VC[0-9]{4}$",
478
+ "VCT": r"^VC[0-9]{4}$",
479
+ "VE": r"^[0-9]{4}$",
480
+ "VEN": r"^[0-9]{4}$",
481
+ "VG": r"^VG[0-9]{4}$",
482
+ "VGB": r"^VG[0-9]{4}$",
483
+ "VI": r"^008[0-9]{2}$",
484
+ "VIR": r"^008[0-9]{2}$",
485
+ "VN": r"^[0-9]{6}$",
486
+ "VNM": r"^[0-9]{6}$",
487
+ "WF": r"^986[0-9]{2}$",
488
+ "WLF": r"^986[0-9]{2}$",
489
+ "YT": r"^976[0-9]{2}$",
490
+ "MYT": r"^976[0-9]{2}$",
491
+ "ZA": r"^[0-9]{4}$",
492
+ "ZAF": r"^[0-9]{4}$",
493
+ "ZM": r"^[0-9]{5}$",
494
+ "ZMB": r"^[0-9]{5}$",
495
+ }
496
+
497
+ return postal_patterns.get(country.upper(), r"^[0-9A-Z\s-]+$")
498
+
499
+
500
+ # Helper functions for string cleaning
501
+
502
+
503
+ def remove_hyphens(x: str, replacement: str = "") -> str:
504
+ """Remove hyphens from a string."""
505
+ return x.replace("-", replacement)
506
+
507
+
508
+ def remove_spaces(x: str, replacement: str = "") -> str:
509
+ """Remove spaces from a string."""
510
+ return x.replace(" ", replacement)
511
+
512
+
513
+ def remove_letters(x: str, replacement: str = "") -> str:
514
+ """Remove letters from a string."""
515
+ return re.sub(r"[a-zA-Z]", replacement, x)
516
+
517
+
518
+ def remove_punctuation(x: str, replacement: str = " ") -> str:
519
+ """Remove punctuation from a string."""
520
+ return re.sub(r"[^\w\s]", replacement, x)
521
+
522
+
523
+ # Validation functions
524
+
525
+
526
+ def is_isbn_10(x: str) -> bool:
527
+ """
528
+ Check if a string is a valid ISBN-10.
529
+
530
+ Parameters
531
+ ----------
532
+ x
533
+ String to validate.
534
+
535
+ Returns
536
+ -------
537
+ bool
538
+ True if valid ISBN-10, False otherwise.
539
+ """
540
+ x = remove_hyphens(x)
541
+ x = remove_punctuation(x)
542
+ x = x.lower()
543
+ x = remove_spaces(x)
544
+
545
+ if not re.match(r"\d{9}[0-9x]", x):
546
+ return False
547
+
548
+ digits = list(x)
549
+
550
+ # If the check digit is "x" then substitute that for "10"
551
+ if digits[9] == "x":
552
+ digits[9] = "10"
553
+
554
+ # Recast as integer values
555
+ try:
556
+ digits = [int(d) for d in digits]
557
+ except ValueError:
558
+ return False
559
+
560
+ # The sum of vector multiplication of digits by the digit
561
+ # weights (10 to 1 across the digits) should be
562
+ # divided evenly by 11 for this to be a valid ISBN-10
563
+ return sum(d * w for d, w in zip(digits, range(10, 0, -1))) % 11 == 0
564
+
565
+
566
+ def is_isbn_13(x: str) -> bool:
567
+ """
568
+ Check if a string is a valid ISBN-13.
569
+
570
+ Parameters
571
+ ----------
572
+ x
573
+ String to validate.
574
+
575
+ Returns
576
+ -------
577
+ bool
578
+ True if valid ISBN-13, False otherwise.
579
+ """
580
+ x = remove_hyphens(x)
581
+
582
+ if not re.match(r"\d{13}", x):
583
+ return False
584
+
585
+ try:
586
+ digits = [int(d) for d in x]
587
+ except ValueError:
588
+ return False
589
+
590
+ check = digits[12]
591
+ remainder = sum(d * w for d, w in zip(digits[:12], [1, 3] * 6)) % 10
592
+
593
+ return (remainder == 0 and check == 0) or (10 - remainder == check)
594
+
595
+
596
+ def check_isbn(x: list[str]) -> list[bool]:
597
+ """
598
+ Check if strings are valid ISBNs (10 or 13 digit).
599
+
600
+ Parameters
601
+ ----------
602
+ x
603
+ List of strings to validate.
604
+
605
+ Returns
606
+ -------
607
+ list[bool]
608
+ List of boolean values indicating validity.
609
+ """
610
+ results = []
611
+ for val in x:
612
+ if val is None or (isinstance(val, float) and val != val): # Check for None or NaN
613
+ results.append(False)
614
+ continue
615
+
616
+ val_clean = remove_hyphens(str(val))
617
+ val_clean = remove_punctuation(val_clean)
618
+ val_clean = val_clean.lower()
619
+ val_clean = remove_spaces(val_clean)
620
+
621
+ isbn_length = len(val_clean)
622
+
623
+ if isbn_length == 10:
624
+ results.append(is_isbn_10(val_clean))
625
+ elif isbn_length == 13:
626
+ results.append(is_isbn_13(val_clean))
627
+ else:
628
+ results.append(False)
629
+
630
+ return results
631
+
632
+
633
+ def is_vin(x: str) -> bool:
634
+ """
635
+ Check if a string is a valid VIN (Vehicle Identification Number).
636
+
637
+ Parameters
638
+ ----------
639
+ x
640
+ String to validate.
641
+
642
+ Returns
643
+ -------
644
+ bool
645
+ True if valid VIN, False otherwise.
646
+ """
647
+ if not re.match(regex_vin(), x.upper()):
648
+ return False
649
+
650
+ x_lower = x.lower()
651
+ digits = list(x_lower)
652
+
653
+ weights = [8, 7, 6, 5, 4, 3, 2, 10, 0, 9, 8, 7, 6, 5, 4, 3, 2]
654
+
655
+ letter_vals = {
656
+ "a": 1,
657
+ "b": 2,
658
+ "c": 3,
659
+ "d": 4,
660
+ "e": 5,
661
+ "f": 6,
662
+ "g": 7,
663
+ "h": 8,
664
+ "j": 1,
665
+ "k": 2,
666
+ "l": 3,
667
+ "m": 4,
668
+ "n": 5,
669
+ "p": 7,
670
+ "r": 9,
671
+ "s": 2,
672
+ "t": 3,
673
+ "u": 4,
674
+ "v": 5,
675
+ "w": 6,
676
+ "x": 7,
677
+ "y": 8,
678
+ "z": 9,
679
+ }
680
+
681
+ total = 0
682
+ for i in range(17):
683
+ if not digits[i].isdigit():
684
+ total += letter_vals.get(digits[i], 0) * weights[i]
685
+ else:
686
+ total += int(digits[i]) * weights[i]
687
+
688
+ check = total % 11
689
+
690
+ if check == 10:
691
+ check_str = "x"
692
+ else:
693
+ check_str = str(check)
694
+
695
+ return check_str == digits[8]
696
+
697
+
698
+ def check_vin(x: list[str]) -> list[bool]:
699
+ """
700
+ Check if strings are valid VINs.
701
+
702
+ Parameters
703
+ ----------
704
+ x
705
+ List of strings to validate.
706
+
707
+ Returns
708
+ -------
709
+ list[bool]
710
+ List of boolean values indicating validity.
711
+ """
712
+ results = []
713
+ for val in x:
714
+ if val is None or (isinstance(val, float) and val != val): # Check for None or NaN
715
+ results.append(False)
716
+ continue
717
+
718
+ val_str = str(val)
719
+ val_str = remove_hyphens(val_str)
720
+ val_str = remove_punctuation(val_str)
721
+ val_str = val_str.lower()
722
+ val_str = remove_spaces(val_str)
723
+
724
+ results.append(is_vin(val_str))
725
+
726
+ return results
727
+
728
+
729
+ def luhn(x: str) -> bool:
730
+ """
731
+ Check if a string passes the Luhn algorithm (for credit cards).
732
+
733
+ Parameters
734
+ ----------
735
+ x
736
+ String to validate.
737
+
738
+ Returns
739
+ -------
740
+ bool
741
+ True if passes Luhn check, False otherwise.
742
+ """
743
+ try:
744
+ digits = [int(d) for d in reversed(x)]
745
+ except ValueError:
746
+ return False
747
+
748
+ odd_sum = sum(digits[::2])
749
+ even_digits = [d * 2 for d in digits[1::2]]
750
+ even_digits = [d - 9 if d > 9 else d for d in even_digits]
751
+ even_sum = sum(even_digits)
752
+
753
+ total = odd_sum + even_sum
754
+
755
+ return total % 10 == 0
756
+
757
+
758
+ def is_credit_card(x: str) -> bool:
759
+ """
760
+ Check if a string is a valid credit card number.
761
+
762
+ Parameters
763
+ ----------
764
+ x
765
+ String to validate.
766
+
767
+ Returns
768
+ -------
769
+ bool
770
+ True if valid credit card number, False otherwise.
771
+ """
772
+ if not re.match(regex_credit_card_1(), x):
773
+ return False
774
+
775
+ x_clean = remove_hyphens(x)
776
+ x_clean = remove_punctuation(x_clean)
777
+ x_clean = remove_spaces(x_clean)
778
+
779
+ if not re.match(regex_credit_card_2(), x_clean):
780
+ return False
781
+
782
+ return luhn(x_clean)
783
+
784
+
785
+ def check_credit_card(x: list[str]) -> list[bool]:
786
+ """
787
+ Check if strings are valid credit card numbers.
788
+
789
+ Parameters
790
+ ----------
791
+ x
792
+ List of strings to validate.
793
+
794
+ Returns
795
+ -------
796
+ list[bool]
797
+ List of boolean values indicating validity.
798
+ """
799
+ return [
800
+ is_credit_card(str(val))
801
+ if val is not None and (not isinstance(val, float) or val == val)
802
+ else False
803
+ for val in x
804
+ ]
805
+
806
+
807
+ def check_iban(x: list[str], country: str | None = None) -> list[bool]:
808
+ """
809
+ Check if strings are valid IBANs.
810
+
811
+ Parameters
812
+ ----------
813
+ x
814
+ List of strings to validate.
815
+ country
816
+ Optional country code for country-specific validation.
817
+
818
+ Returns
819
+ -------
820
+ list[bool]
821
+ List of boolean values indicating validity.
822
+ """
823
+ pattern = regex_iban(country=country)
824
+ return [
825
+ bool(re.match(pattern, str(val).upper()))
826
+ if val is not None and (not isinstance(val, float) or val == val)
827
+ else False
828
+ for val in x
829
+ ]
830
+
831
+
832
+ def check_postal_code(x: list[str], country: str) -> list[bool]:
833
+ """
834
+ Check if strings are valid postal codes for a given country.
835
+
836
+ Parameters
837
+ ----------
838
+ x
839
+ List of strings to validate.
840
+ country
841
+ Country code (2 or 3 letter).
842
+
843
+ Returns
844
+ -------
845
+ list[bool]
846
+ List of boolean values indicating validity.
847
+ """
848
+ pattern = regex_postal_code(country=country)
849
+ return [
850
+ bool(re.match(pattern, str(val).upper()))
851
+ if val is not None and (not isinstance(val, float) or val == val)
852
+ else False
853
+ for val in x
854
+ ]
855
+
856
+
857
+ def check_url(x: list[str]) -> list[bool]:
858
+ """
859
+ Check if strings are valid URLs.
860
+
861
+ Parameters
862
+ ----------
863
+ x
864
+ List of strings to validate.
865
+
866
+ Returns
867
+ -------
868
+ list[bool]
869
+ List of boolean values indicating validity.
870
+ """
871
+ pattern = regex_url()
872
+ return [
873
+ bool(re.match(pattern, str(val)))
874
+ if val is not None and (not isinstance(val, float) or val == val)
875
+ else False
876
+ for val in x
877
+ ]
878
+
879
+
880
+ def check_ipv4_address(x: list[str]) -> list[bool]:
881
+ """
882
+ Check if strings are valid IPv4 addresses.
883
+
884
+ Parameters
885
+ ----------
886
+ x
887
+ List of strings to validate.
888
+
889
+ Returns
890
+ -------
891
+ list[bool]
892
+ List of boolean values indicating validity.
893
+ """
894
+ pattern = regex_ipv4_address()
895
+ return [
896
+ bool(re.match(pattern, str(val)))
897
+ if val is not None and (not isinstance(val, float) or val == val)
898
+ else False
899
+ for val in x
900
+ ]
901
+
902
+
903
+ def check_ipv6_address(x: list[str]) -> list[bool]:
904
+ """
905
+ Check if strings are valid IPv6 addresses.
906
+
907
+ Parameters
908
+ ----------
909
+ x
910
+ List of strings to validate.
911
+
912
+ Returns
913
+ -------
914
+ list[bool]
915
+ List of boolean values indicating validity.
916
+ """
917
+ pattern = regex_ipv6_address()
918
+ return [
919
+ bool(re.match(pattern, str(val)))
920
+ if val is not None and (not isinstance(val, float) or val == val)
921
+ else False
922
+ for val in x
923
+ ]
924
+
925
+
926
+ def check_email(x: list[str]) -> list[bool]:
927
+ """
928
+ Check if strings are valid email addresses.
929
+
930
+ Parameters
931
+ ----------
932
+ x
933
+ List of strings to validate.
934
+
935
+ Returns
936
+ -------
937
+ list[bool]
938
+ List of boolean values indicating validity.
939
+ """
940
+ pattern = regex_email()
941
+ return [
942
+ bool(re.match(pattern, str(val)))
943
+ if val is not None and (not isinstance(val, float) or val == val)
944
+ else False
945
+ for val in x
946
+ ]
947
+
948
+
949
+ def check_phone(x: list[str]) -> list[bool]:
950
+ """
951
+ Check if strings are valid phone numbers.
952
+
953
+ Parameters
954
+ ----------
955
+ x
956
+ List of strings to validate.
957
+
958
+ Returns
959
+ -------
960
+ list[bool]
961
+ List of boolean values indicating validity.
962
+ """
963
+ pattern = regex_phone()
964
+ return [
965
+ bool(re.match(pattern, str(val)))
966
+ if val is not None and (not isinstance(val, float) or val == val)
967
+ else False
968
+ for val in x
969
+ ]
970
+
971
+
972
+ def check_mac(x: list[str]) -> list[bool]:
973
+ """
974
+ Check if strings are valid MAC addresses.
975
+
976
+ Parameters
977
+ ----------
978
+ x
979
+ List of strings to validate.
980
+
981
+ Returns
982
+ -------
983
+ list[bool]
984
+ List of boolean values indicating validity.
985
+ """
986
+ pattern = regex_mac()
987
+ return [
988
+ bool(re.match(pattern, str(val)))
989
+ if val is not None and (not isinstance(val, float) or val == val)
990
+ else False
991
+ for val in x
992
+ ]
993
+
994
+
995
+ def check_swift_bic(x: list[str]) -> list[bool]:
996
+ """
997
+ Check if strings are valid SWIFT/BIC codes.
998
+
999
+ Parameters
1000
+ ----------
1001
+ x
1002
+ List of strings to validate.
1003
+
1004
+ Returns
1005
+ -------
1006
+ list[bool]
1007
+ List of boolean values indicating validity.
1008
+ """
1009
+ pattern = regex_swift_bic()
1010
+ return [
1011
+ bool(re.match(pattern, str(val).upper()))
1012
+ if val is not None and (not isinstance(val, float) or val == val)
1013
+ else False
1014
+ for val in x
1015
+ ]