pointblank 0.18.0__py3-none-any.whl → 0.20.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (322) hide show
  1. pointblank/__init__.py +44 -1
  2. pointblank/_constants.py +258 -166
  3. pointblank/_constants_translations.py +378 -0
  4. pointblank/_interrogation.py +204 -0
  5. pointblank/_utils_llms_txt.py +20 -0
  6. pointblank/data/api-docs.txt +793 -1
  7. pointblank/field.py +1507 -0
  8. pointblank/generate/__init__.py +17 -0
  9. pointblank/generate/base.py +49 -0
  10. pointblank/generate/generators.py +573 -0
  11. pointblank/generate/regex.py +217 -0
  12. pointblank/locales/__init__.py +1476 -0
  13. pointblank/locales/data/AR/address.json +73 -0
  14. pointblank/locales/data/AR/company.json +60 -0
  15. pointblank/locales/data/AR/internet.json +19 -0
  16. pointblank/locales/data/AR/misc.json +7 -0
  17. pointblank/locales/data/AR/person.json +39 -0
  18. pointblank/locales/data/AR/text.json +38 -0
  19. pointblank/locales/data/AT/address.json +84 -0
  20. pointblank/locales/data/AT/company.json +65 -0
  21. pointblank/locales/data/AT/internet.json +20 -0
  22. pointblank/locales/data/AT/misc.json +8 -0
  23. pointblank/locales/data/AT/person.json +17 -0
  24. pointblank/locales/data/AT/text.json +35 -0
  25. pointblank/locales/data/AU/address.json +83 -0
  26. pointblank/locales/data/AU/company.json +65 -0
  27. pointblank/locales/data/AU/internet.json +20 -0
  28. pointblank/locales/data/AU/misc.json +8 -0
  29. pointblank/locales/data/AU/person.json +17 -0
  30. pointblank/locales/data/AU/text.json +35 -0
  31. pointblank/locales/data/BE/address.json +225 -0
  32. pointblank/locales/data/BE/company.json +129 -0
  33. pointblank/locales/data/BE/internet.json +36 -0
  34. pointblank/locales/data/BE/misc.json +6 -0
  35. pointblank/locales/data/BE/person.json +62 -0
  36. pointblank/locales/data/BE/text.json +38 -0
  37. pointblank/locales/data/BG/address.json +75 -0
  38. pointblank/locales/data/BG/company.json +60 -0
  39. pointblank/locales/data/BG/internet.json +19 -0
  40. pointblank/locales/data/BG/misc.json +7 -0
  41. pointblank/locales/data/BG/person.json +40 -0
  42. pointblank/locales/data/BG/text.json +38 -0
  43. pointblank/locales/data/BR/address.json +98 -0
  44. pointblank/locales/data/BR/company.json +65 -0
  45. pointblank/locales/data/BR/internet.json +20 -0
  46. pointblank/locales/data/BR/misc.json +8 -0
  47. pointblank/locales/data/BR/person.json +17 -0
  48. pointblank/locales/data/BR/text.json +35 -0
  49. pointblank/locales/data/CA/address.json +747 -0
  50. pointblank/locales/data/CA/company.json +120 -0
  51. pointblank/locales/data/CA/internet.json +24 -0
  52. pointblank/locales/data/CA/misc.json +11 -0
  53. pointblank/locales/data/CA/person.json +1033 -0
  54. pointblank/locales/data/CA/text.json +58 -0
  55. pointblank/locales/data/CH/address.json +184 -0
  56. pointblank/locales/data/CH/company.json +112 -0
  57. pointblank/locales/data/CH/internet.json +20 -0
  58. pointblank/locales/data/CH/misc.json +10 -0
  59. pointblank/locales/data/CH/person.json +64 -0
  60. pointblank/locales/data/CH/text.json +45 -0
  61. pointblank/locales/data/CL/address.json +71 -0
  62. pointblank/locales/data/CL/company.json +60 -0
  63. pointblank/locales/data/CL/internet.json +19 -0
  64. pointblank/locales/data/CL/misc.json +7 -0
  65. pointblank/locales/data/CL/person.json +38 -0
  66. pointblank/locales/data/CL/text.json +38 -0
  67. pointblank/locales/data/CN/address.json +124 -0
  68. pointblank/locales/data/CN/company.json +76 -0
  69. pointblank/locales/data/CN/internet.json +20 -0
  70. pointblank/locales/data/CN/misc.json +8 -0
  71. pointblank/locales/data/CN/person.json +50 -0
  72. pointblank/locales/data/CN/text.json +38 -0
  73. pointblank/locales/data/CO/address.json +76 -0
  74. pointblank/locales/data/CO/company.json +60 -0
  75. pointblank/locales/data/CO/internet.json +19 -0
  76. pointblank/locales/data/CO/misc.json +7 -0
  77. pointblank/locales/data/CO/person.json +38 -0
  78. pointblank/locales/data/CO/text.json +38 -0
  79. pointblank/locales/data/CY/address.json +62 -0
  80. pointblank/locales/data/CY/company.json +60 -0
  81. pointblank/locales/data/CY/internet.json +19 -0
  82. pointblank/locales/data/CY/misc.json +7 -0
  83. pointblank/locales/data/CY/person.json +38 -0
  84. pointblank/locales/data/CY/text.json +38 -0
  85. pointblank/locales/data/CZ/address.json +70 -0
  86. pointblank/locales/data/CZ/company.json +61 -0
  87. pointblank/locales/data/CZ/internet.json +19 -0
  88. pointblank/locales/data/CZ/misc.json +7 -0
  89. pointblank/locales/data/CZ/person.json +40 -0
  90. pointblank/locales/data/CZ/text.json +38 -0
  91. pointblank/locales/data/DE/address.json +756 -0
  92. pointblank/locales/data/DE/company.json +101 -0
  93. pointblank/locales/data/DE/internet.json +22 -0
  94. pointblank/locales/data/DE/misc.json +11 -0
  95. pointblank/locales/data/DE/person.json +1026 -0
  96. pointblank/locales/data/DE/text.json +50 -0
  97. pointblank/locales/data/DK/address.json +231 -0
  98. pointblank/locales/data/DK/company.json +65 -0
  99. pointblank/locales/data/DK/internet.json +20 -0
  100. pointblank/locales/data/DK/misc.json +7 -0
  101. pointblank/locales/data/DK/person.json +45 -0
  102. pointblank/locales/data/DK/text.json +43 -0
  103. pointblank/locales/data/EE/address.json +69 -0
  104. pointblank/locales/data/EE/company.json +60 -0
  105. pointblank/locales/data/EE/internet.json +19 -0
  106. pointblank/locales/data/EE/misc.json +7 -0
  107. pointblank/locales/data/EE/person.json +39 -0
  108. pointblank/locales/data/EE/text.json +38 -0
  109. pointblank/locales/data/ES/address.json +3086 -0
  110. pointblank/locales/data/ES/company.json +644 -0
  111. pointblank/locales/data/ES/internet.json +25 -0
  112. pointblank/locales/data/ES/misc.json +11 -0
  113. pointblank/locales/data/ES/person.json +488 -0
  114. pointblank/locales/data/ES/text.json +49 -0
  115. pointblank/locales/data/FI/address.json +93 -0
  116. pointblank/locales/data/FI/company.json +65 -0
  117. pointblank/locales/data/FI/internet.json +20 -0
  118. pointblank/locales/data/FI/misc.json +8 -0
  119. pointblank/locales/data/FI/person.json +17 -0
  120. pointblank/locales/data/FI/text.json +35 -0
  121. pointblank/locales/data/FR/address.json +619 -0
  122. pointblank/locales/data/FR/company.json +111 -0
  123. pointblank/locales/data/FR/internet.json +22 -0
  124. pointblank/locales/data/FR/misc.json +11 -0
  125. pointblank/locales/data/FR/person.json +1066 -0
  126. pointblank/locales/data/FR/text.json +50 -0
  127. pointblank/locales/data/GB/address.json +5759 -0
  128. pointblank/locales/data/GB/company.json +131 -0
  129. pointblank/locales/data/GB/internet.json +24 -0
  130. pointblank/locales/data/GB/misc.json +45 -0
  131. pointblank/locales/data/GB/person.json +578 -0
  132. pointblank/locales/data/GB/text.json +61 -0
  133. pointblank/locales/data/GR/address.json +68 -0
  134. pointblank/locales/data/GR/company.json +61 -0
  135. pointblank/locales/data/GR/internet.json +19 -0
  136. pointblank/locales/data/GR/misc.json +7 -0
  137. pointblank/locales/data/GR/person.json +39 -0
  138. pointblank/locales/data/GR/text.json +38 -0
  139. pointblank/locales/data/HK/address.json +79 -0
  140. pointblank/locales/data/HK/company.json +69 -0
  141. pointblank/locales/data/HK/internet.json +19 -0
  142. pointblank/locales/data/HK/misc.json +7 -0
  143. pointblank/locales/data/HK/person.json +42 -0
  144. pointblank/locales/data/HK/text.json +38 -0
  145. pointblank/locales/data/HR/address.json +73 -0
  146. pointblank/locales/data/HR/company.json +60 -0
  147. pointblank/locales/data/HR/internet.json +19 -0
  148. pointblank/locales/data/HR/misc.json +7 -0
  149. pointblank/locales/data/HR/person.json +38 -0
  150. pointblank/locales/data/HR/text.json +38 -0
  151. pointblank/locales/data/HU/address.json +70 -0
  152. pointblank/locales/data/HU/company.json +61 -0
  153. pointblank/locales/data/HU/internet.json +19 -0
  154. pointblank/locales/data/HU/misc.json +7 -0
  155. pointblank/locales/data/HU/person.json +40 -0
  156. pointblank/locales/data/HU/text.json +38 -0
  157. pointblank/locales/data/ID/address.json +68 -0
  158. pointblank/locales/data/ID/company.json +61 -0
  159. pointblank/locales/data/ID/internet.json +19 -0
  160. pointblank/locales/data/ID/misc.json +7 -0
  161. pointblank/locales/data/ID/person.json +40 -0
  162. pointblank/locales/data/ID/text.json +38 -0
  163. pointblank/locales/data/IE/address.json +643 -0
  164. pointblank/locales/data/IE/company.json +140 -0
  165. pointblank/locales/data/IE/internet.json +24 -0
  166. pointblank/locales/data/IE/misc.json +44 -0
  167. pointblank/locales/data/IE/person.json +55 -0
  168. pointblank/locales/data/IE/text.json +60 -0
  169. pointblank/locales/data/IN/address.json +92 -0
  170. pointblank/locales/data/IN/company.json +65 -0
  171. pointblank/locales/data/IN/internet.json +20 -0
  172. pointblank/locales/data/IN/misc.json +8 -0
  173. pointblank/locales/data/IN/person.json +52 -0
  174. pointblank/locales/data/IN/text.json +39 -0
  175. pointblank/locales/data/IS/address.json +63 -0
  176. pointblank/locales/data/IS/company.json +61 -0
  177. pointblank/locales/data/IS/internet.json +19 -0
  178. pointblank/locales/data/IS/misc.json +7 -0
  179. pointblank/locales/data/IS/person.json +44 -0
  180. pointblank/locales/data/IS/text.json +38 -0
  181. pointblank/locales/data/IT/address.json +192 -0
  182. pointblank/locales/data/IT/company.json +137 -0
  183. pointblank/locales/data/IT/internet.json +20 -0
  184. pointblank/locales/data/IT/misc.json +10 -0
  185. pointblank/locales/data/IT/person.json +70 -0
  186. pointblank/locales/data/IT/text.json +44 -0
  187. pointblank/locales/data/JP/address.json +713 -0
  188. pointblank/locales/data/JP/company.json +113 -0
  189. pointblank/locales/data/JP/internet.json +22 -0
  190. pointblank/locales/data/JP/misc.json +10 -0
  191. pointblank/locales/data/JP/person.json +1057 -0
  192. pointblank/locales/data/JP/text.json +51 -0
  193. pointblank/locales/data/KR/address.json +77 -0
  194. pointblank/locales/data/KR/company.json +68 -0
  195. pointblank/locales/data/KR/internet.json +19 -0
  196. pointblank/locales/data/KR/misc.json +7 -0
  197. pointblank/locales/data/KR/person.json +40 -0
  198. pointblank/locales/data/KR/text.json +38 -0
  199. pointblank/locales/data/LT/address.json +66 -0
  200. pointblank/locales/data/LT/company.json +60 -0
  201. pointblank/locales/data/LT/internet.json +19 -0
  202. pointblank/locales/data/LT/misc.json +7 -0
  203. pointblank/locales/data/LT/person.json +42 -0
  204. pointblank/locales/data/LT/text.json +38 -0
  205. pointblank/locales/data/LU/address.json +66 -0
  206. pointblank/locales/data/LU/company.json +60 -0
  207. pointblank/locales/data/LU/internet.json +19 -0
  208. pointblank/locales/data/LU/misc.json +7 -0
  209. pointblank/locales/data/LU/person.json +38 -0
  210. pointblank/locales/data/LU/text.json +38 -0
  211. pointblank/locales/data/LV/address.json +62 -0
  212. pointblank/locales/data/LV/company.json +60 -0
  213. pointblank/locales/data/LV/internet.json +19 -0
  214. pointblank/locales/data/LV/misc.json +7 -0
  215. pointblank/locales/data/LV/person.json +40 -0
  216. pointblank/locales/data/LV/text.json +38 -0
  217. pointblank/locales/data/MT/address.json +61 -0
  218. pointblank/locales/data/MT/company.json +60 -0
  219. pointblank/locales/data/MT/internet.json +19 -0
  220. pointblank/locales/data/MT/misc.json +7 -0
  221. pointblank/locales/data/MT/person.json +38 -0
  222. pointblank/locales/data/MT/text.json +38 -0
  223. pointblank/locales/data/MX/address.json +100 -0
  224. pointblank/locales/data/MX/company.json +65 -0
  225. pointblank/locales/data/MX/internet.json +20 -0
  226. pointblank/locales/data/MX/misc.json +8 -0
  227. pointblank/locales/data/MX/person.json +18 -0
  228. pointblank/locales/data/MX/text.json +39 -0
  229. pointblank/locales/data/NL/address.json +1517 -0
  230. pointblank/locales/data/NL/company.json +133 -0
  231. pointblank/locales/data/NL/internet.json +44 -0
  232. pointblank/locales/data/NL/misc.json +55 -0
  233. pointblank/locales/data/NL/person.json +365 -0
  234. pointblank/locales/data/NL/text.json +210 -0
  235. pointblank/locales/data/NO/address.json +86 -0
  236. pointblank/locales/data/NO/company.json +66 -0
  237. pointblank/locales/data/NO/internet.json +20 -0
  238. pointblank/locales/data/NO/misc.json +8 -0
  239. pointblank/locales/data/NO/person.json +17 -0
  240. pointblank/locales/data/NO/text.json +35 -0
  241. pointblank/locales/data/NZ/address.json +90 -0
  242. pointblank/locales/data/NZ/company.json +65 -0
  243. pointblank/locales/data/NZ/internet.json +20 -0
  244. pointblank/locales/data/NZ/misc.json +8 -0
  245. pointblank/locales/data/NZ/person.json +17 -0
  246. pointblank/locales/data/NZ/text.json +39 -0
  247. pointblank/locales/data/PH/address.json +67 -0
  248. pointblank/locales/data/PH/company.json +61 -0
  249. pointblank/locales/data/PH/internet.json +19 -0
  250. pointblank/locales/data/PH/misc.json +7 -0
  251. pointblank/locales/data/PH/person.json +40 -0
  252. pointblank/locales/data/PH/text.json +38 -0
  253. pointblank/locales/data/PL/address.json +91 -0
  254. pointblank/locales/data/PL/company.json +65 -0
  255. pointblank/locales/data/PL/internet.json +20 -0
  256. pointblank/locales/data/PL/misc.json +8 -0
  257. pointblank/locales/data/PL/person.json +17 -0
  258. pointblank/locales/data/PL/text.json +35 -0
  259. pointblank/locales/data/PT/address.json +90 -0
  260. pointblank/locales/data/PT/company.json +65 -0
  261. pointblank/locales/data/PT/internet.json +20 -0
  262. pointblank/locales/data/PT/misc.json +8 -0
  263. pointblank/locales/data/PT/person.json +17 -0
  264. pointblank/locales/data/PT/text.json +35 -0
  265. pointblank/locales/data/RO/address.json +73 -0
  266. pointblank/locales/data/RO/company.json +61 -0
  267. pointblank/locales/data/RO/internet.json +19 -0
  268. pointblank/locales/data/RO/misc.json +7 -0
  269. pointblank/locales/data/RO/person.json +40 -0
  270. pointblank/locales/data/RO/text.json +38 -0
  271. pointblank/locales/data/RU/address.json +74 -0
  272. pointblank/locales/data/RU/company.json +60 -0
  273. pointblank/locales/data/RU/internet.json +19 -0
  274. pointblank/locales/data/RU/misc.json +7 -0
  275. pointblank/locales/data/RU/person.json +38 -0
  276. pointblank/locales/data/RU/text.json +38 -0
  277. pointblank/locales/data/SE/address.json +247 -0
  278. pointblank/locales/data/SE/company.json +65 -0
  279. pointblank/locales/data/SE/internet.json +20 -0
  280. pointblank/locales/data/SE/misc.json +7 -0
  281. pointblank/locales/data/SE/person.json +45 -0
  282. pointblank/locales/data/SE/text.json +43 -0
  283. pointblank/locales/data/SI/address.json +67 -0
  284. pointblank/locales/data/SI/company.json +60 -0
  285. pointblank/locales/data/SI/internet.json +19 -0
  286. pointblank/locales/data/SI/misc.json +7 -0
  287. pointblank/locales/data/SI/person.json +38 -0
  288. pointblank/locales/data/SI/text.json +38 -0
  289. pointblank/locales/data/SK/address.json +64 -0
  290. pointblank/locales/data/SK/company.json +60 -0
  291. pointblank/locales/data/SK/internet.json +19 -0
  292. pointblank/locales/data/SK/misc.json +7 -0
  293. pointblank/locales/data/SK/person.json +38 -0
  294. pointblank/locales/data/SK/text.json +38 -0
  295. pointblank/locales/data/TR/address.json +105 -0
  296. pointblank/locales/data/TR/company.json +65 -0
  297. pointblank/locales/data/TR/internet.json +20 -0
  298. pointblank/locales/data/TR/misc.json +8 -0
  299. pointblank/locales/data/TR/person.json +17 -0
  300. pointblank/locales/data/TR/text.json +35 -0
  301. pointblank/locales/data/TW/address.json +86 -0
  302. pointblank/locales/data/TW/company.json +69 -0
  303. pointblank/locales/data/TW/internet.json +19 -0
  304. pointblank/locales/data/TW/misc.json +7 -0
  305. pointblank/locales/data/TW/person.json +42 -0
  306. pointblank/locales/data/TW/text.json +38 -0
  307. pointblank/locales/data/US/address.json +996 -0
  308. pointblank/locales/data/US/company.json +131 -0
  309. pointblank/locales/data/US/internet.json +22 -0
  310. pointblank/locales/data/US/misc.json +11 -0
  311. pointblank/locales/data/US/person.json +1092 -0
  312. pointblank/locales/data/US/text.json +56 -0
  313. pointblank/locales/data/_shared/misc.json +42 -0
  314. pointblank/schema.py +339 -2
  315. pointblank/validate.py +1263 -11
  316. {pointblank-0.18.0.dist-info → pointblank-0.20.0.dist-info}/METADATA +45 -1
  317. pointblank-0.20.0.dist-info/RECORD +366 -0
  318. {pointblank-0.18.0.dist-info → pointblank-0.20.0.dist-info}/WHEEL +1 -1
  319. pointblank-0.18.0.dist-info/RECORD +0 -59
  320. {pointblank-0.18.0.dist-info → pointblank-0.20.0.dist-info}/entry_points.txt +0 -0
  321. {pointblank-0.18.0.dist-info → pointblank-0.20.0.dist-info}/licenses/LICENSE +0 -0
  322. {pointblank-0.18.0.dist-info → pointblank-0.20.0.dist-info}/top_level.txt +0 -0
pointblank/validate.py CHANGED
@@ -17,6 +17,7 @@ from importlib.metadata import version
17
17
  from pathlib import Path
18
18
  from typing import TYPE_CHECKING, Any, Callable, Literal, NoReturn, ParamSpec, TypeVar
19
19
  from zipfile import ZipFile
20
+ from zoneinfo import ZoneInfo
20
21
 
21
22
  import commonmark
22
23
  import narwhals as nw
@@ -4350,6 +4351,18 @@ class Validate:
4350
4351
  locale's rules. Examples include `"en-US"` for English (United States) and `"fr-FR"` for
4351
4352
  French (France). More simply, this can be a language identifier without a designation of
4352
4353
  territory, like `"es"` for Spanish.
4354
+ owner
4355
+ An optional string identifying the owner of the data being validated. This is useful for
4356
+ governance purposes, indicating who is responsible for the quality and maintenance of the
4357
+ data. For example, `"data-platform-team"` or `"analytics-engineering"`.
4358
+ consumers
4359
+ An optional string or list of strings identifying who depends on or consumes this data.
4360
+ This helps document data dependencies and can be useful for impact analysis when data
4361
+ quality issues are detected. For example, `"ml-team"` or `["ml-team", "analytics"]`.
4362
+ version
4363
+ An optional string representing the version of the validation plan or data contract. This
4364
+ supports semantic versioning (e.g., `"1.0.0"`, `"2.1.0"`) and is useful for tracking changes
4365
+ to validation rules over time and for organizational governance.
4353
4366
 
4354
4367
  Returns
4355
4368
  -------
@@ -4836,6 +4849,9 @@ class Validate:
4836
4849
  brief: str | bool | None = None
4837
4850
  lang: str | None = None
4838
4851
  locale: str | None = None
4852
+ owner: str | None = None
4853
+ consumers: str | list[str] | None = None
4854
+ version: str | None = None
4839
4855
 
4840
4856
  def __post_init__(self):
4841
4857
  # Process data through the centralized data processing pipeline
@@ -4880,6 +4896,36 @@ class Validate:
4880
4896
  # Transform any shorthands of `brief` to string representations
4881
4897
  self.brief = _transform_auto_brief(brief=self.brief)
4882
4898
 
4899
+ # Validate and normalize the `owner` parameter
4900
+ if self.owner is not None and not isinstance(self.owner, str):
4901
+ raise TypeError(
4902
+ "The `owner=` parameter must be a string representing the owner of the data. "
4903
+ f"Received type: {type(self.owner).__name__}"
4904
+ )
4905
+
4906
+ # Validate and normalize the `consumers` parameter
4907
+ if self.consumers is not None:
4908
+ if isinstance(self.consumers, str):
4909
+ self.consumers = [self.consumers]
4910
+ elif isinstance(self.consumers, list):
4911
+ if not all(isinstance(c, str) for c in self.consumers):
4912
+ raise TypeError(
4913
+ "The `consumers=` parameter must be a string or a list of strings. "
4914
+ "All elements in the list must be strings."
4915
+ )
4916
+ else:
4917
+ raise TypeError(
4918
+ "The `consumers=` parameter must be a string or a list of strings. "
4919
+ f"Received type: {type(self.consumers).__name__}"
4920
+ )
4921
+
4922
+ # Validate the `version` parameter
4923
+ if self.version is not None and not isinstance(self.version, str):
4924
+ raise TypeError(
4925
+ "The `version=` parameter must be a string representing the version. "
4926
+ f"Received type: {type(self.version).__name__}"
4927
+ )
4928
+
4883
4929
  # TODO: Add functionality to obtain the column names and types from the table
4884
4930
  self.col_names = None
4885
4931
  self.col_types = None
@@ -11530,6 +11576,369 @@ class Validate:
11530
11576
 
11531
11577
  return self
11532
11578
 
11579
+ def data_freshness(
11580
+ self,
11581
+ column: str,
11582
+ max_age: str | datetime.timedelta,
11583
+ reference_time: datetime.datetime | str | None = None,
11584
+ timezone: str | None = None,
11585
+ allow_tz_mismatch: bool = False,
11586
+ pre: Callable | None = None,
11587
+ thresholds: int | float | bool | tuple | dict | Thresholds | None = None,
11588
+ actions: Actions | None = None,
11589
+ brief: str | bool | None = None,
11590
+ active: bool = True,
11591
+ ) -> Validate:
11592
+ """
11593
+ Validate that data in a datetime column is not older than a specified maximum age.
11594
+
11595
+ The `data_freshness()` validation method checks whether the most recent timestamp in the
11596
+ specified datetime column is within the allowed `max_age=` from the `reference_time=` (which
11597
+ defaults to the current time). This is useful for ensuring data pipelines are delivering
11598
+ fresh data and for enforcing data SLAs.
11599
+
11600
+ This method helps detect stale data by comparing the maximum (most recent) value in a
11601
+ datetime column against an expected freshness threshold.
11602
+
11603
+ Parameters
11604
+ ----------
11605
+ column
11606
+ The name of the datetime column to check for freshness. This column should contain
11607
+ date or datetime values.
11608
+ max_age
11609
+ The maximum allowed age of the data. Can be specified as: (1) a string with a
11610
+ human-readable duration like `"24 hours"`, `"1 day"`, `"30 minutes"`, `"2 weeks"`, etc.
11611
+ (supported units: `seconds`, `minutes`, `hours`, `days`, `weeks`), or (2) a
11612
+ `datetime.timedelta` object for precise control.
11613
+ reference_time
11614
+ The reference point in time to compare against. Defaults to `None`, which uses the
11615
+ current time (UTC if `timezone=` is not specified). Can be: (1) a `datetime.datetime`
11616
+ object (timezone-aware recommended), (2) a string in ISO 8601 format (e.g.,
11617
+ `"2024-01-15T10:30:00"` or `"2024-01-15T10:30:00+05:30"`), or (3) `None` to use the
11618
+ current time.
11619
+ timezone
11620
+ The timezone to use for interpreting the data and reference time. Accepts IANA
11621
+ timezone names (e.g., `"America/New_York"`), hour offsets (e.g., `"-7"`), or ISO 8601
11622
+ offsets (e.g., `"-07:00"`). When `None` (default), naive datetimes are treated as UTC.
11623
+ See the *The `timezone=` Parameter* section for details.
11624
+ allow_tz_mismatch
11625
+ Whether to allow timezone mismatches between the column data and reference time.
11626
+ By default (`False`), a warning note is added when comparing timezone-naive with
11627
+ timezone-aware datetimes. Set to `True` to suppress these warnings.
11628
+ pre
11629
+ An optional preprocessing function or lambda to apply to the data table during
11630
+ interrogation. This function should take a table as input and return a modified table.
11631
+ thresholds
11632
+ Set threshold failure levels for reporting and reacting to exceedences of the levels.
11633
+ The thresholds are set at the step level and will override any global thresholds set in
11634
+ `Validate(thresholds=...)`. The default is `None`, which means that no thresholds will
11635
+ be set locally and global thresholds (if any) will take effect.
11636
+ actions
11637
+ Optional actions to take when the validation step meets or exceeds any set threshold
11638
+ levels. If provided, the [`Actions`](`pointblank.Actions`) class should be used to
11639
+ define the actions.
11640
+ brief
11641
+ An optional brief description of the validation step that will be displayed in the
11642
+ reporting table. You can use the templating elements like `"{step}"` to insert
11643
+ the step number, or `"{auto}"` to include an automatically generated brief. If `True`
11644
+ the entire brief will be automatically generated. If `None` (the default) then there
11645
+ won't be a brief.
11646
+ active
11647
+ A boolean value indicating whether the validation step should be active. Using `False`
11648
+ will make the validation step inactive (still reporting its presence and keeping indexes
11649
+ for the steps unchanged).
11650
+
11651
+ Returns
11652
+ -------
11653
+ Validate
11654
+ The `Validate` object with the added validation step.
11655
+
11656
+ How Timezones Affect Freshness Checks
11657
+ -------------------------------------
11658
+ Freshness validation involves comparing two times: the **data time** (the most recent
11659
+ timestamp in your column) and the **execution time** (when and where the validation runs).
11660
+ Timezone confusion typically arises because these two times may originate from different
11661
+ contexts.
11662
+
11663
+ Consider these common scenarios:
11664
+
11665
+ - your data timestamps are stored in UTC (common for databases), but you're running
11666
+ validation on your laptop in New York (Eastern Time)
11667
+ - you develop and test validation locally, then deploy it to a cloud workflow that runs
11668
+ in UTC—suddenly your 'same' validation behaves differently
11669
+ - your data comes from servers in multiple regions, each recording timestamps in their
11670
+ local timezone
11671
+
11672
+ The `timezone=` parameter exists to solve this problem by establishing a single, explicit
11673
+ timezone context for the freshness comparison. When you specify a timezone, Pointblank
11674
+ interprets both the data timestamps (if naive) and the execution time in that timezone,
11675
+ ensuring consistent behavior whether you run validation on your laptop or in a cloud
11676
+ workflow.
11677
+
11678
+ **Scenario 1: Data has timezone-aware datetimes**
11679
+
11680
+ ```python
11681
+ # Your data column has values like: 2024-01-15 10:30:00+00:00 (UTC)
11682
+ # Comparison is straightforward as both sides have explicit timezones
11683
+ .data_freshness(column="updated_at", max_age="24 hours")
11684
+ ```
11685
+
11686
+ **Scenario 2: Data has naive datetimes (no timezone)**
11687
+
11688
+ ```python
11689
+ # Your data column has values like: 2024-01-15 10:30:00 (no timezone)
11690
+ # Specify the timezone the data was recorded in:
11691
+ .data_freshness(column="updated_at", max_age="24 hours", timezone="America/New_York")
11692
+ ```
11693
+
11694
+ **Scenario 3: Ensuring consistent behavior across environments**
11695
+
11696
+ ```python
11697
+ # Pin the timezone to ensure identical results whether running locally or in the cloud
11698
+ .data_freshness(
11699
+ column="updated_at",
11700
+ max_age="24 hours",
11701
+ timezone="UTC", # Explicit timezone removes environment dependence
11702
+ )
11703
+ ```
11704
+
11705
+ The `timezone=` Parameter
11706
+ ---------------------------
11707
+ The `timezone=` parameter accepts several convenient formats, making it easy to specify
11708
+ timezones in whatever way is most natural for your use case. The following examples
11709
+ illustrate the three supported input styles.
11710
+
11711
+ **IANA Timezone Names** (recommended for regions with daylight saving time):
11712
+
11713
+ ```python
11714
+ timezone="America/New_York" # Eastern Time (handles DST automatically)
11715
+ timezone="Europe/London" # UK time
11716
+ timezone="Asia/Tokyo" # Japan Standard Time
11717
+ timezone="Australia/Sydney" # Australian Eastern Time
11718
+ timezone="UTC" # Coordinated Universal Time
11719
+ ```
11720
+
11721
+ **Simple Hour Offsets** (quick and easy):
11722
+
11723
+ ```python
11724
+ timezone="-7" # UTC-7 (e.g., Mountain Standard Time)
11725
+ timezone="+5" # UTC+5 (e.g., Pakistan Standard Time)
11726
+ timezone="0" # UTC
11727
+ timezone="-12" # UTC-12
11728
+ ```
11729
+
11730
+ **ISO 8601 Offset Format** (precise, including fractional hours):
11731
+
11732
+ ```python
11733
+ timezone="-07:00" # UTC-7
11734
+ timezone="+05:30" # UTC+5:30 (e.g., India Standard Time)
11735
+ timezone="+00:00" # UTC
11736
+ timezone="-09:30" # UTC-9:30
11737
+ ```
11738
+
11739
+ When a timezone is specified:
11740
+
11741
+ - naive datetime values in the column are assumed to be in this timezone.
11742
+ - the reference time (if naive) is assumed to be in this timezone.
11743
+ - the validation report will show times in this timezone.
11744
+
11745
+ When `None` (default):
11746
+
11747
+ - if your column has timezone-aware datetimes, those timezones are used
11748
+ - if your column has naive datetimes, they're treated as UTC
11749
+ - the current time reference uses UTC
11750
+
11751
+ Note that IANA timezone names are preferred when daylight saving time transitions matter, as
11752
+ they automatically handle the offset changes. Fixed offsets like `"-7"` or `"-07:00"` do not
11753
+ account for DST.
11754
+
11755
+ Recommendations for Working with Timestamps
11756
+ -------------------------------------------
11757
+ When working with datetime data, storing timestamps in UTC in your databases is strongly
11758
+ recommended since it provides a consistent reference point regardless of where your data
11759
+ originates or where it's consumed. Using timezone-aware datetimes whenever possible helps
11760
+ avoid ambiguity—when a datetime has an explicit timezone, there's no guessing about what
11761
+ time it actually represents.
11762
+
11763
+ If you're working with naive datetimes (which lack timezone information), always specify the
11764
+ `timezone=` parameter so Pointblank knows how to interpret those values. When providing
11765
+ `reference_time=` as a string, use ISO 8601 format with the timezone offset included (e.g.,
11766
+ `"2024-01-15T10:30:00+00:00"`) to ensure unambiguous parsing. Finally, prefer IANA timezone
11767
+ names (like `"America/New_York"`) over fixed offsets (like `"-05:00"`) when daylight saving
11768
+ time transitions matter, since IANA names automatically handle the twice-yearly offset
11769
+ changes. To see all available IANA timezone names in Python, use
11770
+ `zoneinfo.available_timezones()` from the standard library's `zoneinfo` module.
11771
+
11772
+ Examples
11773
+ --------
11774
+ ```{python}
11775
+ #| echo: false
11776
+ #| output: false
11777
+ import pointblank as pb
11778
+ pb.config(report_incl_header=False, report_incl_footer=False)
11779
+ ```
11780
+
11781
+ The simplest use of `data_freshness()` requires just two arguments: the `column=` containing
11782
+ your timestamps and `max_age=` specifying how old the data can be. In this first example,
11783
+ we create sample data with an `"updated_at"` column containing timestamps from 1, 12, and
11784
+ 20 hours ago. By setting `max_age="24 hours"`, we're asserting that the most recent
11785
+ timestamp should be within 24 hours of the current time. Since the newest record is only
11786
+ 1 hour old, this validation passes.
11787
+
11788
+ ```{python}
11789
+ import pointblank as pb
11790
+ import polars as pl
11791
+ from datetime import datetime, timedelta
11792
+
11793
+ # Create sample data with recent timestamps
11794
+ recent_data = pl.DataFrame({
11795
+ "id": [1, 2, 3],
11796
+ "updated_at": [
11797
+ datetime.now() - timedelta(hours=1),
11798
+ datetime.now() - timedelta(hours=12),
11799
+ datetime.now() - timedelta(hours=20),
11800
+ ]
11801
+ })
11802
+
11803
+ validation = (
11804
+ pb.Validate(data=recent_data)
11805
+ .data_freshness(column="updated_at", max_age="24 hours")
11806
+ .interrogate()
11807
+ )
11808
+
11809
+ validation
11810
+ ```
11811
+
11812
+ The `max_age=` parameter accepts human-readable strings with various time units. You can
11813
+ chain multiple `data_freshness()` calls to check different freshness thresholds
11814
+ simultaneously—useful for tiered SLAs where you might want warnings at 30 minutes but
11815
+ errors at 2 days.
11816
+
11817
+ ```{python}
11818
+ # Check data is fresh within different time windows
11819
+ validation = (
11820
+ pb.Validate(data=recent_data)
11821
+ .data_freshness(column="updated_at", max_age="30 minutes") # Very fresh
11822
+ .data_freshness(column="updated_at", max_age="2 days") # Reasonably fresh
11823
+ .data_freshness(column="updated_at", max_age="1 week") # Within a week
11824
+ .interrogate()
11825
+ )
11826
+
11827
+ validation
11828
+ ```
11829
+
11830
+ When your data contains naive datetimes (timestamps without timezone information), use the
11831
+ `timezone=` parameter to specify what timezone those values represent. Here we have event
11832
+ data recorded in Eastern Time, so we set `timezone="America/New_York"` to ensure the
11833
+ freshness comparison is done correctly.
11834
+
11835
+ ```{python}
11836
+ # Data with naive datetimes (assume they're in Eastern Time)
11837
+ eastern_data = pl.DataFrame({
11838
+ "event_time": [
11839
+ datetime.now() - timedelta(hours=2),
11840
+ datetime.now() - timedelta(hours=5),
11841
+ ]
11842
+ })
11843
+
11844
+ validation = (
11845
+ pb.Validate(data=eastern_data)
11846
+ .data_freshness(
11847
+ column="event_time",
11848
+ max_age="12 hours",
11849
+ timezone="America/New_York" # Interpret times as Eastern
11850
+ )
11851
+ .interrogate()
11852
+ )
11853
+
11854
+ validation
11855
+ ```
11856
+
11857
+ For reproducible validations or historical checks, you can use `reference_time=` to compare
11858
+ against a specific point in time instead of the current time. This is particularly useful
11859
+ for testing or when validating data snapshots. The reference time should include a timezone
11860
+ offset (like `+00:00` for UTC) to avoid ambiguity.
11861
+
11862
+ ```{python}
11863
+ validation = (
11864
+ pb.Validate(data=recent_data)
11865
+ .data_freshness(
11866
+ column="updated_at",
11867
+ max_age="24 hours",
11868
+ reference_time="2024-01-15T12:00:00+00:00"
11869
+ )
11870
+ .interrogate()
11871
+ )
11872
+
11873
+ validation
11874
+ ```
11875
+ """
11876
+
11877
+ assertion_type = _get_fn_name()
11878
+
11879
+ _check_pre(pre=pre)
11880
+ _check_thresholds(thresholds=thresholds)
11881
+ _check_boolean_input(param=active, param_name="active")
11882
+ _check_boolean_input(param=allow_tz_mismatch, param_name="allow_tz_mismatch")
11883
+
11884
+ # Validate and parse the max_age parameter
11885
+ max_age_td = _parse_max_age(max_age)
11886
+
11887
+ # Validate the column parameter
11888
+ if not isinstance(column, str):
11889
+ raise TypeError(
11890
+ f"The `column` parameter must be a string, got {type(column).__name__}."
11891
+ )
11892
+
11893
+ # Validate the timezone parameter if provided
11894
+ if timezone is not None:
11895
+ _validate_timezone(timezone)
11896
+
11897
+ # Parse reference_time if it's a string
11898
+ parsed_reference_time = None
11899
+ if reference_time is not None:
11900
+ if isinstance(reference_time, str):
11901
+ parsed_reference_time = _parse_reference_time(reference_time)
11902
+ elif isinstance(reference_time, datetime.datetime):
11903
+ parsed_reference_time = reference_time
11904
+ else:
11905
+ raise TypeError(
11906
+ f"The `reference_time` parameter must be a string or datetime object, "
11907
+ f"got {type(reference_time).__name__}."
11908
+ )
11909
+
11910
+ # Determine threshold to use (global or local) and normalize a local `thresholds=` value
11911
+ thresholds = (
11912
+ self.thresholds if thresholds is None else _normalize_thresholds_creation(thresholds)
11913
+ )
11914
+
11915
+ # Package up the parameters for later interrogation
11916
+ values = {
11917
+ "max_age": max_age_td,
11918
+ "max_age_str": max_age if isinstance(max_age, str) else str(max_age),
11919
+ "reference_time": parsed_reference_time,
11920
+ "timezone": timezone,
11921
+ "allow_tz_mismatch": allow_tz_mismatch,
11922
+ }
11923
+
11924
+ # Determine brief to use (global or local) and transform any shorthands of `brief=`
11925
+ brief = self.brief if brief is None else _transform_auto_brief(brief=brief)
11926
+
11927
+ val_info = _ValidationInfo(
11928
+ assertion_type=assertion_type,
11929
+ column=column,
11930
+ values=values,
11931
+ pre=pre,
11932
+ thresholds=thresholds,
11933
+ actions=actions,
11934
+ brief=brief,
11935
+ active=active,
11936
+ )
11937
+
11938
+ self._add_validation(validation_info=val_info)
11939
+
11940
+ return self
11941
+
11533
11942
  def col_count_match(
11534
11943
  self,
11535
11944
  count: int | Any,
@@ -12941,6 +13350,8 @@ class Validate:
12941
13350
  "col_schema_match",
12942
13351
  "row_count_match",
12943
13352
  "col_count_match",
13353
+ "data_freshness",
13354
+ "tbl_match",
12944
13355
  ]
12945
13356
 
12946
13357
  if validation.n == 0 and assertion_type not in table_level_assertions:
@@ -13201,6 +13612,105 @@ class Validate:
13201
13612
 
13202
13613
  results_tbl = None
13203
13614
 
13615
+ elif assertion_type == "data_freshness":
13616
+ from pointblank._interrogation import data_freshness as data_freshness_check
13617
+
13618
+ freshness_result = data_freshness_check(
13619
+ data_tbl=data_tbl_step,
13620
+ column=column,
13621
+ max_age=value["max_age"],
13622
+ reference_time=value["reference_time"],
13623
+ timezone=value["timezone"],
13624
+ allow_tz_mismatch=value["allow_tz_mismatch"],
13625
+ )
13626
+
13627
+ result_bool = freshness_result["passed"]
13628
+ validation.all_passed = result_bool
13629
+ validation.n = 1
13630
+ validation.n_passed = int(result_bool)
13631
+ validation.n_failed = 1 - int(result_bool)
13632
+
13633
+ # Store the freshness check details for reporting
13634
+ validation.val_info = freshness_result
13635
+
13636
+ # Update the values dict with actual computed values for failure text
13637
+ if freshness_result.get("age") is not None:
13638
+ value["age"] = freshness_result["age"]
13639
+
13640
+ # Add timezone warning note if applicable
13641
+ if freshness_result.get("tz_warning_key"):
13642
+ tz_key = freshness_result["tz_warning_key"]
13643
+ tz_warning_text = NOTES_TEXT.get(tz_key, {}).get(
13644
+ self.locale, NOTES_TEXT.get(tz_key, {}).get("en", "")
13645
+ )
13646
+ validation._add_note(
13647
+ key="tz_warning",
13648
+ markdown=f"⚠️ {tz_warning_text}",
13649
+ text=tz_warning_text,
13650
+ )
13651
+
13652
+ # Add note about column being empty if applicable
13653
+ if freshness_result.get("column_empty"):
13654
+ column_empty_text = NOTES_TEXT.get(
13655
+ "data_freshness_column_empty", {}
13656
+ ).get(
13657
+ self.locale,
13658
+ NOTES_TEXT.get("data_freshness_column_empty", {}).get(
13659
+ "en", "The datetime column is empty (no values to check)."
13660
+ ),
13661
+ )
13662
+ validation._add_note(
13663
+ key="column_empty",
13664
+ markdown=f"⚠️ {column_empty_text}",
13665
+ text=column_empty_text,
13666
+ )
13667
+
13668
+ # Add informational note about the freshness check
13669
+ if freshness_result.get("max_datetime") and freshness_result.get("age"):
13670
+ max_dt = freshness_result["max_datetime"]
13671
+ # Format datetime without microseconds for cleaner display
13672
+ if hasattr(max_dt, "replace"):
13673
+ max_dt_display = max_dt.replace(microsecond=0)
13674
+ else:
13675
+ max_dt_display = max_dt
13676
+ age = freshness_result["age"]
13677
+ age_str = _format_timedelta(age)
13678
+ max_age_str = _format_timedelta(value["max_age"])
13679
+
13680
+ # Get translated template for pass/fail
13681
+ if result_bool:
13682
+ details_key = "data_freshness_details_pass"
13683
+ prefix = "✓"
13684
+ else:
13685
+ details_key = "data_freshness_details_fail"
13686
+ prefix = "✗"
13687
+
13688
+ details_template = NOTES_TEXT.get(details_key, {}).get(
13689
+ self.locale,
13690
+ NOTES_TEXT.get(details_key, {}).get(
13691
+ "en",
13692
+ "Most recent data: `{max_dt}` (age: {age}, max allowed: {max_age})",
13693
+ ),
13694
+ )
13695
+
13696
+ # Format the template with values
13697
+ note_text = details_template.format(
13698
+ max_dt=max_dt_display, age=age_str, max_age=max_age_str
13699
+ )
13700
+ # For markdown, make the age bold
13701
+ note_md_template = details_template.replace(
13702
+ "(age: {age}", "(age: **{age}**"
13703
+ )
13704
+ note_md = f"{prefix} {note_md_template.format(max_dt=max_dt_display, age=age_str, max_age=max_age_str)}"
13705
+
13706
+ validation._add_note(
13707
+ key="freshness_details",
13708
+ markdown=note_md,
13709
+ text=note_text,
13710
+ )
13711
+
13712
+ results_tbl = None
13713
+
13204
13714
  elif assertion_type == "tbl_match":
13205
13715
  from pointblank._interrogation import tbl_match
13206
13716
 
@@ -13265,6 +13775,15 @@ class Validate:
13265
13775
  validation.n_passed = int(result_bool)
13266
13776
  validation.n_failed = 1 - result_bool
13267
13777
 
13778
+ # Store computed values for step reports
13779
+ validation.val_info = {
13780
+ "actual": real,
13781
+ "target": target,
13782
+ "tol": tol,
13783
+ "lower_bound": lower_bound,
13784
+ "upper_bound": upper_bound,
13785
+ }
13786
+
13268
13787
  results_tbl = None
13269
13788
  else:
13270
13789
  raise ValueError(
@@ -16045,6 +16564,69 @@ class Validate:
16045
16564
  tol_value = bound_finder.keywords.get("tol", 0) if bound_finder else 0
16046
16565
  values_upd.append(f"p = {p_value}<br/>tol = {tol_value}")
16047
16566
 
16567
+ elif assertion_type[i] in ["data_freshness"]:
16568
+ # Format max_age nicely for display
16569
+ max_age = value.get("max_age")
16570
+ max_age_str = _format_timedelta(max_age) if max_age else "&mdash;"
16571
+
16572
+ # Build additional lines with non-default parameters
16573
+ extra_lines = []
16574
+
16575
+ if value.get("reference_time") is not None:
16576
+ ref_time = value["reference_time"]
16577
+
16578
+ # Format datetime across two lines: date and time+tz
16579
+ if hasattr(ref_time, "strftime"):
16580
+ date_str = ref_time.strftime("@%Y-%m-%d")
16581
+ time_str = " " + ref_time.strftime("%H:%M:%S")
16582
+
16583
+ # Add timezone offset if present
16584
+ if hasattr(ref_time, "tzinfo") and ref_time.tzinfo is not None:
16585
+ tz_offset = ref_time.strftime("%z")
16586
+ if tz_offset:
16587
+ time_str += tz_offset
16588
+ extra_lines.append(date_str)
16589
+ extra_lines.append(time_str)
16590
+ else:
16591
+ extra_lines.append(f"@{ref_time}")
16592
+
16593
+ # Timezone and allow_tz_mismatch on same line
16594
+ tz_line_parts = []
16595
+ if value.get("timezone") is not None:
16596
+ # Convert timezone name to ISO 8601 offset format
16597
+ tz_name = value["timezone"]
16598
+
16599
+ try:
16600
+ tz_obj = ZoneInfo(tz_name)
16601
+
16602
+ # Get the current offset for this timezone
16603
+ now = datetime.datetime.now(tz_obj)
16604
+ offset = now.strftime("%z")
16605
+
16606
+ # Format as ISO 8601 extended: -07:00 (insert colon)
16607
+ if len(offset) == 5:
16608
+ tz_display = f"{offset[:3]}:{offset[3:]}"
16609
+ else:
16610
+ tz_display = offset
16611
+
16612
+ except Exception:
16613
+ tz_display = tz_name
16614
+ tz_line_parts.append(tz_display)
16615
+
16616
+ if value.get("allow_tz_mismatch"):
16617
+ tz_line_parts.append("~tz")
16618
+
16619
+ if tz_line_parts:
16620
+ extra_lines.append(" ".join(tz_line_parts))
16621
+
16622
+ if extra_lines:
16623
+ extra_html = "<br/>".join(extra_lines)
16624
+ values_upd.append(
16625
+ f'{max_age_str}<br/><span style="font-size: 9px;">{extra_html}</span>'
16626
+ )
16627
+ else:
16628
+ values_upd.append(max_age_str)
16629
+
16048
16630
  elif assertion_type[i] in ["col_schema_match"]:
16049
16631
  values_upd.append("SCHEMA")
16050
16632
 
@@ -16550,6 +17132,15 @@ class Validate:
16550
17132
  if incl_footer_timings:
16551
17133
  gt_tbl = gt_tbl.tab_source_note(source_note=html(table_time))
16552
17134
 
17135
+ # Add governance metadata as source note if any metadata is present
17136
+ governance_html = _create_governance_metadata_html(
17137
+ owner=self.owner,
17138
+ consumers=self.consumers,
17139
+ version=self.version,
17140
+ )
17141
+ if governance_html:
17142
+ gt_tbl = gt_tbl.tab_source_note(source_note=html(governance_html))
17143
+
16553
17144
  # Create notes markdown from validation steps and add as separate source note if enabled
16554
17145
  if incl_footer_notes:
16555
17146
  notes_markdown = _create_notes_html(self.validation_info)
@@ -16898,6 +17489,18 @@ class Validate:
16898
17489
  debug_return_df=debug_return_df,
16899
17490
  )
16900
17491
 
17492
+ elif is_valid_agg(assertion_type):
17493
+ step_report = _step_report_aggregate(
17494
+ assertion_type=assertion_type,
17495
+ i=i,
17496
+ column=column,
17497
+ values=values,
17498
+ all_passed=all_passed,
17499
+ val_info=val_info,
17500
+ header=header,
17501
+ lang=lang,
17502
+ )
17503
+
16901
17504
  else:
16902
17505
  step_report = None # pragma: no cover
16903
17506
 
@@ -17494,19 +18097,278 @@ def _process_brief(
17494
18097
  return brief
17495
18098
 
17496
18099
 
17497
- def _transform_auto_brief(brief: str | bool | None) -> str | None:
17498
- if isinstance(brief, bool):
17499
- if brief:
17500
- return "{auto}"
17501
- else:
17502
- return None
17503
- else:
17504
- return brief
18100
+ def _parse_max_age(max_age: str | datetime.timedelta) -> datetime.timedelta:
18101
+ """
18102
+ Parse a max_age specification into a timedelta.
17505
18103
 
18104
+ Parameters
18105
+ ----------
18106
+ max_age
18107
+ Either a timedelta object or a string like "24 hours", "1 day", "30 minutes",
18108
+ or compound expressions like "2 hours 15 minutes", "1 day 6 hours", etc.
17506
18109
 
17507
- def _process_action_str(
17508
- action_str: str,
17509
- step: int,
18110
+ Returns
18111
+ -------
18112
+ datetime.timedelta
18113
+ The parsed timedelta.
18114
+
18115
+ Raises
18116
+ ------
18117
+ ValueError
18118
+ If the string format is invalid or the unit is not recognized.
18119
+ """
18120
+ if isinstance(max_age, datetime.timedelta):
18121
+ return max_age
18122
+
18123
+ if not isinstance(max_age, str):
18124
+ raise TypeError(
18125
+ f"The `max_age` parameter must be a string or timedelta, got {type(max_age).__name__}."
18126
+ )
18127
+
18128
+ # Parse string format like "24 hours", "1 day", "30 minutes", etc.
18129
+ max_age_str = max_age.strip().lower()
18130
+
18131
+ # Define unit mappings (singular and plural forms)
18132
+ unit_mappings = {
18133
+ "second": "seconds",
18134
+ "seconds": "seconds",
18135
+ "sec": "seconds",
18136
+ "secs": "seconds",
18137
+ "s": "seconds",
18138
+ "minute": "minutes",
18139
+ "minutes": "minutes",
18140
+ "min": "minutes",
18141
+ "mins": "minutes",
18142
+ "m": "minutes",
18143
+ "hour": "hours",
18144
+ "hours": "hours",
18145
+ "hr": "hours",
18146
+ "hrs": "hours",
18147
+ "h": "hours",
18148
+ "day": "days",
18149
+ "days": "days",
18150
+ "d": "days",
18151
+ "week": "weeks",
18152
+ "weeks": "weeks",
18153
+ "wk": "weeks",
18154
+ "wks": "weeks",
18155
+ "w": "weeks",
18156
+ }
18157
+
18158
+ import re
18159
+
18160
+ # Pattern to find all number+unit pairs (supports compound expressions)
18161
+ # Matches: "2 hours 15 minutes", "1day6h", "30 min", etc.
18162
+ compound_pattern = r"(\d+(?:\.\d+)?)\s*([a-zA-Z]+)"
18163
+ matches = re.findall(compound_pattern, max_age_str)
18164
+
18165
+ if not matches:
18166
+ raise ValueError(
18167
+ f"Invalid max_age format: '{max_age}'. Expected format like '24 hours', "
18168
+ f"'1 day', '30 minutes', '2 hours 15 minutes', etc."
18169
+ )
18170
+
18171
+ # Accumulate timedelta from all matched components
18172
+ total_td = datetime.timedelta()
18173
+ valid_units = ["seconds", "minutes", "hours", "days", "weeks"]
18174
+
18175
+ for value_str, unit in matches:
18176
+ value = float(value_str)
18177
+
18178
+ # Normalize the unit
18179
+ unit_lower = unit.lower()
18180
+ if unit_lower not in unit_mappings:
18181
+ raise ValueError(
18182
+ f"Unknown time unit '{unit}' in max_age '{max_age}'. "
18183
+ f"Valid units are: {', '.join(valid_units)} (or their abbreviations)."
18184
+ )
18185
+
18186
+ normalized_unit = unit_mappings[unit_lower]
18187
+
18188
+ # Add to total timedelta
18189
+ if normalized_unit == "seconds":
18190
+ total_td += datetime.timedelta(seconds=value)
18191
+ elif normalized_unit == "minutes":
18192
+ total_td += datetime.timedelta(minutes=value)
18193
+ elif normalized_unit == "hours":
18194
+ total_td += datetime.timedelta(hours=value)
18195
+ elif normalized_unit == "days":
18196
+ total_td += datetime.timedelta(days=value)
18197
+ elif normalized_unit == "weeks":
18198
+ total_td += datetime.timedelta(weeks=value)
18199
+
18200
+ return total_td
18201
+
18202
+
18203
+ def _parse_timezone(timezone: str) -> datetime.tzinfo:
18204
+ """
18205
+ Parse a timezone string into a tzinfo object.
18206
+
18207
+ Supports:
18208
+ - IANA timezone names: "America/New_York", "Europe/London", "UTC"
18209
+ - Offset strings: "-7", "+5", "-07:00", "+05:30"
18210
+
18211
+ Parameters
18212
+ ----------
18213
+ timezone
18214
+ The timezone string to parse.
18215
+
18216
+ Returns
18217
+ -------
18218
+ datetime.tzinfo
18219
+ The parsed timezone object.
18220
+
18221
+ Raises
18222
+ ------
18223
+ ValueError
18224
+ If the timezone is not valid.
18225
+ """
18226
+ import re
18227
+
18228
+ # Check for offset formats: "-7", "+5", "-07:00", "+05:30", etc.
18229
+ # Match: optional sign, 1-2 digits, optional colon and 2 more digits
18230
+ offset_pattern = r"^([+-]?)(\d{1,2})(?::(\d{2}))?$"
18231
+ match = re.match(offset_pattern, timezone.strip())
18232
+
18233
+ if match:
18234
+ sign_str, hours_str, minutes_str = match.groups()
18235
+ hours = int(hours_str)
18236
+ minutes = int(minutes_str) if minutes_str else 0
18237
+
18238
+ # Apply sign (default positive if not specified)
18239
+ total_minutes = hours * 60 + minutes
18240
+ if sign_str == "-":
18241
+ total_minutes = -total_minutes
18242
+
18243
+ return datetime.timezone(datetime.timedelta(minutes=total_minutes))
18244
+
18245
+ # Try IANA timezone names (zoneinfo is standard in Python 3.9+)
18246
+ try:
18247
+ return ZoneInfo(timezone)
18248
+ except KeyError:
18249
+ pass
18250
+
18251
+ raise ValueError(
18252
+ f"Invalid timezone: '{timezone}'. Use an IANA timezone name "
18253
+ f"(e.g., 'America/New_York', 'UTC') or an offset (e.g., '-7', '+05:30')."
18254
+ )
18255
+
18256
+
18257
+ def _validate_timezone(timezone: str) -> None:
18258
+ """
18259
+ Validate that a timezone string is valid.
18260
+
18261
+ Parameters
18262
+ ----------
18263
+ timezone
18264
+ The timezone string to validate.
18265
+
18266
+ Raises
18267
+ ------
18268
+ ValueError
18269
+ If the timezone is not valid.
18270
+ """
18271
+ # Use _parse_timezone to validate - it will raise ValueError if invalid
18272
+ _parse_timezone(timezone)
18273
+
18274
+
18275
+ def _parse_reference_time(reference_time: str) -> datetime.datetime:
18276
+ """
18277
+ Parse a reference time string into a datetime object.
18278
+
18279
+ Parameters
18280
+ ----------
18281
+ reference_time
18282
+ An ISO 8601 formatted datetime string.
18283
+
18284
+ Returns
18285
+ -------
18286
+ datetime.datetime
18287
+ The parsed datetime object.
18288
+
18289
+ Raises
18290
+ ------
18291
+ ValueError
18292
+ If the string cannot be parsed.
18293
+ """
18294
+ # Try parsing with fromisoformat (handles most ISO 8601 formats)
18295
+ try:
18296
+ return datetime.datetime.fromisoformat(reference_time)
18297
+ except ValueError:
18298
+ pass
18299
+
18300
+ # Try parsing common formats
18301
+ formats = [
18302
+ "%Y-%m-%d %H:%M:%S",
18303
+ "%Y-%m-%d %H:%M:%S%z",
18304
+ "%Y-%m-%dT%H:%M:%S",
18305
+ "%Y-%m-%dT%H:%M:%S%z",
18306
+ "%Y-%m-%d",
18307
+ ]
18308
+
18309
+ for fmt in formats:
18310
+ try:
18311
+ return datetime.datetime.strptime(reference_time, fmt)
18312
+ except ValueError:
18313
+ continue
18314
+
18315
+ raise ValueError(
18316
+ f"Could not parse reference_time '{reference_time}'. "
18317
+ f"Please use ISO 8601 format like '2024-01-15T10:30:00' or '2024-01-15T10:30:00+00:00'."
18318
+ )
18319
+
18320
+
18321
+ def _format_timedelta(td: datetime.timedelta) -> str:
18322
+ """
18323
+ Format a timedelta into a human-readable string.
18324
+
18325
+ Parameters
18326
+ ----------
18327
+ td
18328
+ The timedelta to format.
18329
+
18330
+ Returns
18331
+ -------
18332
+ str
18333
+ A human-readable string like "24 hours", "2 days 5 hours", etc.
18334
+ """
18335
+ total_seconds = td.total_seconds()
18336
+
18337
+ if total_seconds < 60:
18338
+ val = round(total_seconds, 1)
18339
+ return f"{val}s"
18340
+ elif total_seconds < 3600:
18341
+ val = round(total_seconds / 60, 1)
18342
+ return f"{val}m"
18343
+ elif total_seconds < 86400:
18344
+ val = round(total_seconds / 3600, 1)
18345
+ return f"{val}h"
18346
+ elif total_seconds < 604800:
18347
+ # For days, show "xd yh" format for better readability
18348
+ days = int(total_seconds // 86400)
18349
+ remaining_hours = round((total_seconds % 86400) / 3600, 1)
18350
+ if remaining_hours == 0:
18351
+ return f"{days}d"
18352
+ else:
18353
+ return f"{days}d {remaining_hours}h"
18354
+ else:
18355
+ val = round(total_seconds / 604800)
18356
+ return f"{val}w"
18357
+
18358
+
18359
+ def _transform_auto_brief(brief: str | bool | None) -> str | None:
18360
+ if isinstance(brief, bool):
18361
+ if brief:
18362
+ return "{auto}"
18363
+ else:
18364
+ return None
18365
+ else:
18366
+ return brief
18367
+
18368
+
18369
+ def _process_action_str(
18370
+ action_str: str,
18371
+ step: int,
17510
18372
  col: str | None,
17511
18373
  value: Any,
17512
18374
  type: str,
@@ -17688,6 +18550,14 @@ def _create_autobrief_or_failure_text(
17688
18550
  for_failure=for_failure,
17689
18551
  )
17690
18552
 
18553
+ if assertion_type == "data_freshness":
18554
+ return _create_text_data_freshness(
18555
+ lang=lang,
18556
+ column=column,
18557
+ value=values,
18558
+ for_failure=for_failure,
18559
+ )
18560
+
17691
18561
  if assertion_type == "col_pct_null":
17692
18562
  return _create_text_col_pct_null(
17693
18563
  lang=lang,
@@ -17916,6 +18786,33 @@ def _create_text_col_count_match(lang: str, value: dict, for_failure: bool = Fal
17916
18786
  return EXPECT_FAIL_TEXT[f"col_count_match_n_{type_}_text"][lang].format(values_text=values_text)
17917
18787
 
17918
18788
 
18789
+ def _create_text_data_freshness(
18790
+ lang: str,
18791
+ column: str | None,
18792
+ value: dict,
18793
+ for_failure: bool = False,
18794
+ ) -> str:
18795
+ """Create text for data_freshness validation."""
18796
+ type_ = _expect_failure_type(for_failure=for_failure)
18797
+
18798
+ column_text = _prep_column_text(column=column)
18799
+ max_age_text = _format_timedelta(value.get("max_age"))
18800
+
18801
+ if for_failure:
18802
+ age = value.get("age")
18803
+ age_text = _format_timedelta(age) if age else "unknown"
18804
+ return EXPECT_FAIL_TEXT[f"data_freshness_{type_}_text"][lang].format(
18805
+ column_text=column_text,
18806
+ max_age_text=max_age_text,
18807
+ age_text=age_text,
18808
+ )
18809
+ else:
18810
+ return EXPECT_FAIL_TEXT[f"data_freshness_{type_}_text"][lang].format(
18811
+ column_text=column_text,
18812
+ max_age_text=max_age_text,
18813
+ )
18814
+
18815
+
17919
18816
  def _create_text_col_pct_null(
17920
18817
  lang: str,
17921
18818
  column: str | None,
@@ -18850,6 +19747,71 @@ def _extract_pre_argument(source: str) -> str:
18850
19747
  return pre_arg
18851
19748
 
18852
19749
 
19750
+ def _create_governance_metadata_html(
19751
+ owner: str | None,
19752
+ consumers: list[str] | None,
19753
+ version: str | None,
19754
+ ) -> str:
19755
+ """
19756
+ Create HTML for governance metadata display in the report footer.
19757
+
19758
+ Parameters
19759
+ ----------
19760
+ owner
19761
+ The owner of the data being validated.
19762
+ consumers
19763
+ List of consumers who depend on the data.
19764
+ version
19765
+ The version of the validation plan.
19766
+
19767
+ Returns
19768
+ -------
19769
+ str
19770
+ HTML string containing formatted governance metadata, or empty string if no metadata.
19771
+ """
19772
+ if owner is None and consumers is None and version is None:
19773
+ return ""
19774
+
19775
+ metadata_parts = []
19776
+
19777
+ # Common style for the metadata badges (similar to timing style but slightly smaller font)
19778
+ badge_style = (
19779
+ "background-color: #FFF; color: #444; padding: 0.5em 0.5em; position: inherit; "
19780
+ "margin-right: 5px; border: solid 1px #999999; font-variant-numeric: tabular-nums; "
19781
+ "border-radius: 0; padding: 2px 10px 2px 10px; font-size: 11px;"
19782
+ )
19783
+ label_style = (
19784
+ "color: #777; font-weight: bold; font-size: 9px; text-transform: uppercase; "
19785
+ "margin-right: 3px;"
19786
+ )
19787
+
19788
+ if owner is not None:
19789
+ metadata_parts.append(
19790
+ f"<span style='{badge_style}'><span style='{label_style}'>Owner:</span> {owner}</span>"
19791
+ )
19792
+
19793
+ if consumers is not None and len(consumers) > 0:
19794
+ consumers_str = ", ".join(consumers)
19795
+ metadata_parts.append(
19796
+ f"<span style='{badge_style}'>"
19797
+ f"<span style='{label_style}'>Consumers:</span> {consumers_str}"
19798
+ f"</span>"
19799
+ )
19800
+
19801
+ if version is not None:
19802
+ metadata_parts.append(
19803
+ f"<span style='{badge_style}'>"
19804
+ f"<span style='{label_style}'>Version:</span> {version}"
19805
+ f"</span>"
19806
+ )
19807
+
19808
+ return (
19809
+ f"<div style='margin-top: 5px; margin-bottom: 5px; margin-left: 10px;'>"
19810
+ f"{''.join(metadata_parts)}"
19811
+ f"</div>"
19812
+ )
19813
+
19814
+
18853
19815
  def _create_table_time_html(
18854
19816
  time_start: datetime.datetime | None, time_end: datetime.datetime | None
18855
19817
  ) -> str:
@@ -20356,6 +21318,296 @@ def _step_report_rows_distinct(
20356
21318
  return step_report
20357
21319
 
20358
21320
 
21321
+ def _step_report_aggregate(
21322
+ assertion_type: str,
21323
+ i: int,
21324
+ column: str,
21325
+ values: dict,
21326
+ all_passed: bool,
21327
+ val_info: dict | None,
21328
+ header: str,
21329
+ lang: str,
21330
+ ) -> GT:
21331
+ """
21332
+ Generate a step report for aggregate validation methods (col_sum_*, col_avg_*, col_sd_*).
21333
+
21334
+ This creates a 1-row table showing the computed aggregate value vs. the target value,
21335
+ along with tolerance and pass/fail status.
21336
+ """
21337
+
21338
+ # Determine whether the `lang` value represents a right-to-left language
21339
+ is_rtl_lang = lang in RTL_LANGUAGES
21340
+ direction_rtl = " direction: rtl;" if is_rtl_lang else ""
21341
+
21342
+ # Parse assertion type to get aggregate function and comparison operator
21343
+ # Format: col_{agg}_{comp} (e.g., col_sum_eq, col_avg_gt, col_sd_le)
21344
+ parts = assertion_type.split("_")
21345
+ agg_type = parts[1] # sum, avg, sd
21346
+ comp_type = parts[2] # eq, gt, ge, lt, le
21347
+
21348
+ # Map aggregate type to display name
21349
+ agg_display = {"sum": "SUM", "avg": "AVG", "sd": "SD"}.get(agg_type, agg_type.upper())
21350
+
21351
+ # Map comparison type to symbol
21352
+ comp_symbols = {
21353
+ "eq": "=",
21354
+ "gt": "&gt;",
21355
+ "ge": "&ge;",
21356
+ "lt": "&lt;",
21357
+ "le": "&le;",
21358
+ }
21359
+ comp_symbol = comp_symbols.get(comp_type, comp_type)
21360
+
21361
+ # Get computed values from val_info (stored during interrogation)
21362
+ if val_info is not None:
21363
+ actual = val_info.get("actual", None)
21364
+ target = val_info.get("target", None)
21365
+ tol = val_info.get("tol", 0)
21366
+ lower_bound = val_info.get("lower_bound", target)
21367
+ upper_bound = val_info.get("upper_bound", target)
21368
+ else:
21369
+ # Fallback if val_info is not available
21370
+ actual = None
21371
+ target = values.get("value", None)
21372
+ tol = values.get("tol", 0)
21373
+ lower_bound = target
21374
+ upper_bound = target
21375
+
21376
+ # Format column name for display (handle list vs string)
21377
+ if isinstance(column, list):
21378
+ column_display = column[0] if len(column) == 1 else ", ".join(column)
21379
+ else:
21380
+ column_display = str(column)
21381
+
21382
+ # Generate assertion text for header
21383
+ if target is not None:
21384
+ target_display = f"{target:,.6g}" if isinstance(target, float) else f"{target:,}"
21385
+ assertion_text = f"{agg_display}({column_display}) {comp_symbol} {target_display}"
21386
+ else:
21387
+ assertion_text = f"{agg_display}({column_display}) {comp_symbol} ?"
21388
+
21389
+ # Calculate difference from boundary
21390
+ if actual is not None and target is not None:
21391
+ if comp_type == "eq":
21392
+ # For equality, show distance from target (considering tolerance)
21393
+ if lower_bound == upper_bound:
21394
+ difference = actual - target
21395
+ else:
21396
+ # With tolerance, show distance from nearest bound
21397
+ if actual < lower_bound:
21398
+ difference = actual - lower_bound
21399
+ elif actual > upper_bound:
21400
+ difference = actual - upper_bound
21401
+ else:
21402
+ difference = 0 # Within bounds
21403
+ elif comp_type in ["gt", "ge"]:
21404
+ # Distance from lower bound (positive if passing)
21405
+ difference = actual - lower_bound
21406
+ elif comp_type in ["lt", "le"]:
21407
+ # Distance from upper bound (negative if passing)
21408
+ difference = actual - upper_bound
21409
+ else:
21410
+ difference = actual - target
21411
+ else:
21412
+ difference = None
21413
+
21414
+ # Format values for display
21415
+ def format_value(v):
21416
+ if v is None:
21417
+ return "&mdash;"
21418
+ if isinstance(v, float):
21419
+ return f"{v:,.6g}"
21420
+ return f"{v:,}"
21421
+
21422
+ # Format tolerance for display
21423
+ if tol == 0:
21424
+ tol_display = "&mdash;"
21425
+ elif isinstance(tol, tuple):
21426
+ tol_display = f"(-{tol[0]}, +{tol[1]})"
21427
+ else:
21428
+ tol_display = f"&plusmn;{tol}"
21429
+
21430
+ # Format difference with sign
21431
+ if difference is not None:
21432
+ if difference == 0:
21433
+ diff_display = "0"
21434
+ elif difference > 0:
21435
+ diff_display = (
21436
+ f"+{difference:,.6g}" if isinstance(difference, float) else f"+{difference:,}"
21437
+ )
21438
+ else:
21439
+ diff_display = (
21440
+ f"{difference:,.6g}" if isinstance(difference, float) else f"{difference:,}"
21441
+ )
21442
+ else:
21443
+ diff_display = "&mdash;"
21444
+
21445
+ # Create pass/fail indicator
21446
+ if all_passed:
21447
+ status_html = CHECK_MARK_SPAN
21448
+ status_color = "#4CA64C"
21449
+ else:
21450
+ status_html = CROSS_MARK_SPAN
21451
+ status_color = "#CF142B"
21452
+
21453
+ # Select DataFrame library (prefer Polars, fall back to Pandas)
21454
+ if _is_lib_present("polars"):
21455
+ import polars as pl
21456
+
21457
+ df_lib = pl
21458
+ elif _is_lib_present("pandas"): # pragma: no cover
21459
+ import pandas as pd # pragma: no cover
21460
+
21461
+ df_lib = pd # pragma: no cover
21462
+ else: # pragma: no cover
21463
+ raise ImportError(
21464
+ "Neither Polars nor Pandas is available for step report generation"
21465
+ ) # pragma: no cover
21466
+
21467
+ # Create the data for the 1-row table
21468
+ report_data = df_lib.DataFrame(
21469
+ {
21470
+ "actual": [format_value(actual)],
21471
+ "target": [format_value(target)],
21472
+ "tolerance": [tol_display],
21473
+ "difference": [diff_display],
21474
+ "status": [status_html],
21475
+ }
21476
+ )
21477
+
21478
+ # Create GT table with styling matching preview() and other step reports
21479
+ step_report = (
21480
+ GT(report_data, id="pb_step_tbl")
21481
+ .opt_table_font(font=google_font(name="IBM Plex Sans"))
21482
+ .opt_align_table_header(align="left")
21483
+ .cols_label(
21484
+ actual="ACTUAL",
21485
+ target="EXPECTED",
21486
+ tolerance="TOL",
21487
+ difference="DIFFERENCE",
21488
+ status="",
21489
+ )
21490
+ .cols_align(align="center")
21491
+ .fmt_markdown(columns=["actual", "target", "tolerance", "difference", "status"])
21492
+ .tab_style(
21493
+ style=style.text(color="black", font=google_font(name="IBM Plex Mono"), size="13px"),
21494
+ locations=loc.body(columns=["actual", "target", "tolerance", "difference"]),
21495
+ )
21496
+ .tab_style(
21497
+ style=style.text(size="13px"),
21498
+ locations=loc.body(columns="status"),
21499
+ )
21500
+ .tab_style(
21501
+ style=style.text(color="gray20", font=google_font(name="IBM Plex Mono"), size="12px"),
21502
+ locations=loc.column_labels(),
21503
+ )
21504
+ .tab_style(
21505
+ style=style.borders(
21506
+ sides=["top", "bottom"], color="#E9E9E9", style="solid", weight="1px"
21507
+ ),
21508
+ locations=loc.body(),
21509
+ )
21510
+ .tab_options(
21511
+ table_body_vlines_style="solid",
21512
+ table_body_vlines_width="1px",
21513
+ table_body_vlines_color="#E9E9E9",
21514
+ column_labels_vlines_style="solid",
21515
+ column_labels_vlines_width="1px",
21516
+ column_labels_vlines_color="#F2F2F2",
21517
+ )
21518
+ .cols_width(
21519
+ cases={
21520
+ "actual": "200px",
21521
+ "target": "200px",
21522
+ "tolerance": "150px",
21523
+ "difference": "200px",
21524
+ "status": "50px",
21525
+ }
21526
+ )
21527
+ )
21528
+
21529
+ # Apply styling based on pass/fail
21530
+ if all_passed:
21531
+ step_report = step_report.tab_style(
21532
+ style=[
21533
+ style.text(color="#006400"),
21534
+ style.fill(color="#4CA64C33"),
21535
+ ],
21536
+ locations=loc.body(columns="status"),
21537
+ )
21538
+ else:
21539
+ step_report = step_report.tab_style(
21540
+ style=[
21541
+ style.text(color="#B22222"),
21542
+ style.fill(color="#FFC1C159"),
21543
+ ],
21544
+ locations=loc.body(columns="status"),
21545
+ )
21546
+
21547
+ # If the version of `great_tables` is `>=0.17.0` then disable Quarto table processing
21548
+ if version("great_tables") >= "0.17.0":
21549
+ step_report = step_report.tab_options(quarto_disable_processing=True)
21550
+
21551
+ # If no header requested, return the table as-is
21552
+ if header is None:
21553
+ return step_report
21554
+
21555
+ # Create header content
21556
+ assertion_header_text = STEP_REPORT_TEXT["assertion_header_text"][lang]
21557
+
21558
+ # Wrap assertion text in styled code tag
21559
+ assertion_code = (
21560
+ f"<code style='color: #303030; font-family: monospace; font-size: smaller;'>"
21561
+ f"{assertion_text}</code>"
21562
+ )
21563
+
21564
+ if all_passed:
21565
+ title = STEP_REPORT_TEXT["report_for_step_i"][lang].format(i=i) + " " + CHECK_MARK_SPAN
21566
+ result_stmt = STEP_REPORT_TEXT.get("agg_success_statement", {}).get(
21567
+ lang,
21568
+ f"The aggregate value for column <code>{column_display}</code> satisfies the condition.",
21569
+ )
21570
+ if isinstance(result_stmt, str) and "{column}" in result_stmt:
21571
+ result_stmt = result_stmt.format(column=column_display)
21572
+ else:
21573
+ title = STEP_REPORT_TEXT["report_for_step_i"][lang].format(i=i) + " " + CROSS_MARK_SPAN
21574
+ result_stmt = STEP_REPORT_TEXT.get("agg_failure_statement", {}).get(
21575
+ lang,
21576
+ f"The aggregate value for column <code>{column_display}</code> does not satisfy the condition.",
21577
+ )
21578
+ if isinstance(result_stmt, str) and "{column}" in result_stmt:
21579
+ result_stmt = result_stmt.format(column=column_display)
21580
+
21581
+ details = (
21582
+ f"<div style='font-size: 13.6px; {direction_rtl}'>"
21583
+ "<div style='padding-top: 7px;'>"
21584
+ f"{assertion_header_text} <span style='border-style: solid; border-width: thin; "
21585
+ "border-color: lightblue; padding-left: 2px; padding-right: 2px;'>"
21586
+ "<code style='color: #303030; background-color: transparent; "
21587
+ f"position: relative; bottom: 1px;'>{assertion_code}</code></span>"
21588
+ "</div>"
21589
+ "<div style='padding-top: 7px;'>"
21590
+ f"{result_stmt}"
21591
+ "</div>"
21592
+ "</div>"
21593
+ )
21594
+
21595
+ # Generate the default template text for the header when `":default:"` is used
21596
+ if header == ":default:":
21597
+ header = "{title}{details}"
21598
+
21599
+ # Use commonmark to convert the header text to HTML
21600
+ header = commonmark.commonmark(header)
21601
+
21602
+ # Place any templated text in the header
21603
+ header = header.format(title=title, details=details)
21604
+
21605
+ # Create the header with `header` string
21606
+ step_report = step_report.tab_header(title=md(header))
21607
+
21608
+ return step_report
21609
+
21610
+
20359
21611
  def _step_report_schema_in_order(
20360
21612
  step: int, schema_info: dict, header: str | None, lang: str, debug_return_df: bool = False
20361
21613
  ) -> GT | Any: