pointblank 0.19.0__py3-none-any.whl → 0.20.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (318) hide show
  1. pointblank/__init__.py +44 -1
  2. pointblank/_utils_llms_txt.py +20 -0
  3. pointblank/data/api-docs.txt +793 -1
  4. pointblank/field.py +1507 -0
  5. pointblank/generate/__init__.py +17 -0
  6. pointblank/generate/base.py +49 -0
  7. pointblank/generate/generators.py +573 -0
  8. pointblank/generate/regex.py +217 -0
  9. pointblank/locales/__init__.py +1476 -0
  10. pointblank/locales/data/AR/address.json +73 -0
  11. pointblank/locales/data/AR/company.json +60 -0
  12. pointblank/locales/data/AR/internet.json +19 -0
  13. pointblank/locales/data/AR/misc.json +7 -0
  14. pointblank/locales/data/AR/person.json +39 -0
  15. pointblank/locales/data/AR/text.json +38 -0
  16. pointblank/locales/data/AT/address.json +84 -0
  17. pointblank/locales/data/AT/company.json +65 -0
  18. pointblank/locales/data/AT/internet.json +20 -0
  19. pointblank/locales/data/AT/misc.json +8 -0
  20. pointblank/locales/data/AT/person.json +17 -0
  21. pointblank/locales/data/AT/text.json +35 -0
  22. pointblank/locales/data/AU/address.json +83 -0
  23. pointblank/locales/data/AU/company.json +65 -0
  24. pointblank/locales/data/AU/internet.json +20 -0
  25. pointblank/locales/data/AU/misc.json +8 -0
  26. pointblank/locales/data/AU/person.json +17 -0
  27. pointblank/locales/data/AU/text.json +35 -0
  28. pointblank/locales/data/BE/address.json +225 -0
  29. pointblank/locales/data/BE/company.json +129 -0
  30. pointblank/locales/data/BE/internet.json +36 -0
  31. pointblank/locales/data/BE/misc.json +6 -0
  32. pointblank/locales/data/BE/person.json +62 -0
  33. pointblank/locales/data/BE/text.json +38 -0
  34. pointblank/locales/data/BG/address.json +75 -0
  35. pointblank/locales/data/BG/company.json +60 -0
  36. pointblank/locales/data/BG/internet.json +19 -0
  37. pointblank/locales/data/BG/misc.json +7 -0
  38. pointblank/locales/data/BG/person.json +40 -0
  39. pointblank/locales/data/BG/text.json +38 -0
  40. pointblank/locales/data/BR/address.json +98 -0
  41. pointblank/locales/data/BR/company.json +65 -0
  42. pointblank/locales/data/BR/internet.json +20 -0
  43. pointblank/locales/data/BR/misc.json +8 -0
  44. pointblank/locales/data/BR/person.json +17 -0
  45. pointblank/locales/data/BR/text.json +35 -0
  46. pointblank/locales/data/CA/address.json +747 -0
  47. pointblank/locales/data/CA/company.json +120 -0
  48. pointblank/locales/data/CA/internet.json +24 -0
  49. pointblank/locales/data/CA/misc.json +11 -0
  50. pointblank/locales/data/CA/person.json +1033 -0
  51. pointblank/locales/data/CA/text.json +58 -0
  52. pointblank/locales/data/CH/address.json +184 -0
  53. pointblank/locales/data/CH/company.json +112 -0
  54. pointblank/locales/data/CH/internet.json +20 -0
  55. pointblank/locales/data/CH/misc.json +10 -0
  56. pointblank/locales/data/CH/person.json +64 -0
  57. pointblank/locales/data/CH/text.json +45 -0
  58. pointblank/locales/data/CL/address.json +71 -0
  59. pointblank/locales/data/CL/company.json +60 -0
  60. pointblank/locales/data/CL/internet.json +19 -0
  61. pointblank/locales/data/CL/misc.json +7 -0
  62. pointblank/locales/data/CL/person.json +38 -0
  63. pointblank/locales/data/CL/text.json +38 -0
  64. pointblank/locales/data/CN/address.json +124 -0
  65. pointblank/locales/data/CN/company.json +76 -0
  66. pointblank/locales/data/CN/internet.json +20 -0
  67. pointblank/locales/data/CN/misc.json +8 -0
  68. pointblank/locales/data/CN/person.json +50 -0
  69. pointblank/locales/data/CN/text.json +38 -0
  70. pointblank/locales/data/CO/address.json +76 -0
  71. pointblank/locales/data/CO/company.json +60 -0
  72. pointblank/locales/data/CO/internet.json +19 -0
  73. pointblank/locales/data/CO/misc.json +7 -0
  74. pointblank/locales/data/CO/person.json +38 -0
  75. pointblank/locales/data/CO/text.json +38 -0
  76. pointblank/locales/data/CY/address.json +62 -0
  77. pointblank/locales/data/CY/company.json +60 -0
  78. pointblank/locales/data/CY/internet.json +19 -0
  79. pointblank/locales/data/CY/misc.json +7 -0
  80. pointblank/locales/data/CY/person.json +38 -0
  81. pointblank/locales/data/CY/text.json +38 -0
  82. pointblank/locales/data/CZ/address.json +70 -0
  83. pointblank/locales/data/CZ/company.json +61 -0
  84. pointblank/locales/data/CZ/internet.json +19 -0
  85. pointblank/locales/data/CZ/misc.json +7 -0
  86. pointblank/locales/data/CZ/person.json +40 -0
  87. pointblank/locales/data/CZ/text.json +38 -0
  88. pointblank/locales/data/DE/address.json +756 -0
  89. pointblank/locales/data/DE/company.json +101 -0
  90. pointblank/locales/data/DE/internet.json +22 -0
  91. pointblank/locales/data/DE/misc.json +11 -0
  92. pointblank/locales/data/DE/person.json +1026 -0
  93. pointblank/locales/data/DE/text.json +50 -0
  94. pointblank/locales/data/DK/address.json +231 -0
  95. pointblank/locales/data/DK/company.json +65 -0
  96. pointblank/locales/data/DK/internet.json +20 -0
  97. pointblank/locales/data/DK/misc.json +7 -0
  98. pointblank/locales/data/DK/person.json +45 -0
  99. pointblank/locales/data/DK/text.json +43 -0
  100. pointblank/locales/data/EE/address.json +69 -0
  101. pointblank/locales/data/EE/company.json +60 -0
  102. pointblank/locales/data/EE/internet.json +19 -0
  103. pointblank/locales/data/EE/misc.json +7 -0
  104. pointblank/locales/data/EE/person.json +39 -0
  105. pointblank/locales/data/EE/text.json +38 -0
  106. pointblank/locales/data/ES/address.json +3086 -0
  107. pointblank/locales/data/ES/company.json +644 -0
  108. pointblank/locales/data/ES/internet.json +25 -0
  109. pointblank/locales/data/ES/misc.json +11 -0
  110. pointblank/locales/data/ES/person.json +488 -0
  111. pointblank/locales/data/ES/text.json +49 -0
  112. pointblank/locales/data/FI/address.json +93 -0
  113. pointblank/locales/data/FI/company.json +65 -0
  114. pointblank/locales/data/FI/internet.json +20 -0
  115. pointblank/locales/data/FI/misc.json +8 -0
  116. pointblank/locales/data/FI/person.json +17 -0
  117. pointblank/locales/data/FI/text.json +35 -0
  118. pointblank/locales/data/FR/address.json +619 -0
  119. pointblank/locales/data/FR/company.json +111 -0
  120. pointblank/locales/data/FR/internet.json +22 -0
  121. pointblank/locales/data/FR/misc.json +11 -0
  122. pointblank/locales/data/FR/person.json +1066 -0
  123. pointblank/locales/data/FR/text.json +50 -0
  124. pointblank/locales/data/GB/address.json +5759 -0
  125. pointblank/locales/data/GB/company.json +131 -0
  126. pointblank/locales/data/GB/internet.json +24 -0
  127. pointblank/locales/data/GB/misc.json +45 -0
  128. pointblank/locales/data/GB/person.json +578 -0
  129. pointblank/locales/data/GB/text.json +61 -0
  130. pointblank/locales/data/GR/address.json +68 -0
  131. pointblank/locales/data/GR/company.json +61 -0
  132. pointblank/locales/data/GR/internet.json +19 -0
  133. pointblank/locales/data/GR/misc.json +7 -0
  134. pointblank/locales/data/GR/person.json +39 -0
  135. pointblank/locales/data/GR/text.json +38 -0
  136. pointblank/locales/data/HK/address.json +79 -0
  137. pointblank/locales/data/HK/company.json +69 -0
  138. pointblank/locales/data/HK/internet.json +19 -0
  139. pointblank/locales/data/HK/misc.json +7 -0
  140. pointblank/locales/data/HK/person.json +42 -0
  141. pointblank/locales/data/HK/text.json +38 -0
  142. pointblank/locales/data/HR/address.json +73 -0
  143. pointblank/locales/data/HR/company.json +60 -0
  144. pointblank/locales/data/HR/internet.json +19 -0
  145. pointblank/locales/data/HR/misc.json +7 -0
  146. pointblank/locales/data/HR/person.json +38 -0
  147. pointblank/locales/data/HR/text.json +38 -0
  148. pointblank/locales/data/HU/address.json +70 -0
  149. pointblank/locales/data/HU/company.json +61 -0
  150. pointblank/locales/data/HU/internet.json +19 -0
  151. pointblank/locales/data/HU/misc.json +7 -0
  152. pointblank/locales/data/HU/person.json +40 -0
  153. pointblank/locales/data/HU/text.json +38 -0
  154. pointblank/locales/data/ID/address.json +68 -0
  155. pointblank/locales/data/ID/company.json +61 -0
  156. pointblank/locales/data/ID/internet.json +19 -0
  157. pointblank/locales/data/ID/misc.json +7 -0
  158. pointblank/locales/data/ID/person.json +40 -0
  159. pointblank/locales/data/ID/text.json +38 -0
  160. pointblank/locales/data/IE/address.json +643 -0
  161. pointblank/locales/data/IE/company.json +140 -0
  162. pointblank/locales/data/IE/internet.json +24 -0
  163. pointblank/locales/data/IE/misc.json +44 -0
  164. pointblank/locales/data/IE/person.json +55 -0
  165. pointblank/locales/data/IE/text.json +60 -0
  166. pointblank/locales/data/IN/address.json +92 -0
  167. pointblank/locales/data/IN/company.json +65 -0
  168. pointblank/locales/data/IN/internet.json +20 -0
  169. pointblank/locales/data/IN/misc.json +8 -0
  170. pointblank/locales/data/IN/person.json +52 -0
  171. pointblank/locales/data/IN/text.json +39 -0
  172. pointblank/locales/data/IS/address.json +63 -0
  173. pointblank/locales/data/IS/company.json +61 -0
  174. pointblank/locales/data/IS/internet.json +19 -0
  175. pointblank/locales/data/IS/misc.json +7 -0
  176. pointblank/locales/data/IS/person.json +44 -0
  177. pointblank/locales/data/IS/text.json +38 -0
  178. pointblank/locales/data/IT/address.json +192 -0
  179. pointblank/locales/data/IT/company.json +137 -0
  180. pointblank/locales/data/IT/internet.json +20 -0
  181. pointblank/locales/data/IT/misc.json +10 -0
  182. pointblank/locales/data/IT/person.json +70 -0
  183. pointblank/locales/data/IT/text.json +44 -0
  184. pointblank/locales/data/JP/address.json +713 -0
  185. pointblank/locales/data/JP/company.json +113 -0
  186. pointblank/locales/data/JP/internet.json +22 -0
  187. pointblank/locales/data/JP/misc.json +10 -0
  188. pointblank/locales/data/JP/person.json +1057 -0
  189. pointblank/locales/data/JP/text.json +51 -0
  190. pointblank/locales/data/KR/address.json +77 -0
  191. pointblank/locales/data/KR/company.json +68 -0
  192. pointblank/locales/data/KR/internet.json +19 -0
  193. pointblank/locales/data/KR/misc.json +7 -0
  194. pointblank/locales/data/KR/person.json +40 -0
  195. pointblank/locales/data/KR/text.json +38 -0
  196. pointblank/locales/data/LT/address.json +66 -0
  197. pointblank/locales/data/LT/company.json +60 -0
  198. pointblank/locales/data/LT/internet.json +19 -0
  199. pointblank/locales/data/LT/misc.json +7 -0
  200. pointblank/locales/data/LT/person.json +42 -0
  201. pointblank/locales/data/LT/text.json +38 -0
  202. pointblank/locales/data/LU/address.json +66 -0
  203. pointblank/locales/data/LU/company.json +60 -0
  204. pointblank/locales/data/LU/internet.json +19 -0
  205. pointblank/locales/data/LU/misc.json +7 -0
  206. pointblank/locales/data/LU/person.json +38 -0
  207. pointblank/locales/data/LU/text.json +38 -0
  208. pointblank/locales/data/LV/address.json +62 -0
  209. pointblank/locales/data/LV/company.json +60 -0
  210. pointblank/locales/data/LV/internet.json +19 -0
  211. pointblank/locales/data/LV/misc.json +7 -0
  212. pointblank/locales/data/LV/person.json +40 -0
  213. pointblank/locales/data/LV/text.json +38 -0
  214. pointblank/locales/data/MT/address.json +61 -0
  215. pointblank/locales/data/MT/company.json +60 -0
  216. pointblank/locales/data/MT/internet.json +19 -0
  217. pointblank/locales/data/MT/misc.json +7 -0
  218. pointblank/locales/data/MT/person.json +38 -0
  219. pointblank/locales/data/MT/text.json +38 -0
  220. pointblank/locales/data/MX/address.json +100 -0
  221. pointblank/locales/data/MX/company.json +65 -0
  222. pointblank/locales/data/MX/internet.json +20 -0
  223. pointblank/locales/data/MX/misc.json +8 -0
  224. pointblank/locales/data/MX/person.json +18 -0
  225. pointblank/locales/data/MX/text.json +39 -0
  226. pointblank/locales/data/NL/address.json +1517 -0
  227. pointblank/locales/data/NL/company.json +133 -0
  228. pointblank/locales/data/NL/internet.json +44 -0
  229. pointblank/locales/data/NL/misc.json +55 -0
  230. pointblank/locales/data/NL/person.json +365 -0
  231. pointblank/locales/data/NL/text.json +210 -0
  232. pointblank/locales/data/NO/address.json +86 -0
  233. pointblank/locales/data/NO/company.json +66 -0
  234. pointblank/locales/data/NO/internet.json +20 -0
  235. pointblank/locales/data/NO/misc.json +8 -0
  236. pointblank/locales/data/NO/person.json +17 -0
  237. pointblank/locales/data/NO/text.json +35 -0
  238. pointblank/locales/data/NZ/address.json +90 -0
  239. pointblank/locales/data/NZ/company.json +65 -0
  240. pointblank/locales/data/NZ/internet.json +20 -0
  241. pointblank/locales/data/NZ/misc.json +8 -0
  242. pointblank/locales/data/NZ/person.json +17 -0
  243. pointblank/locales/data/NZ/text.json +39 -0
  244. pointblank/locales/data/PH/address.json +67 -0
  245. pointblank/locales/data/PH/company.json +61 -0
  246. pointblank/locales/data/PH/internet.json +19 -0
  247. pointblank/locales/data/PH/misc.json +7 -0
  248. pointblank/locales/data/PH/person.json +40 -0
  249. pointblank/locales/data/PH/text.json +38 -0
  250. pointblank/locales/data/PL/address.json +91 -0
  251. pointblank/locales/data/PL/company.json +65 -0
  252. pointblank/locales/data/PL/internet.json +20 -0
  253. pointblank/locales/data/PL/misc.json +8 -0
  254. pointblank/locales/data/PL/person.json +17 -0
  255. pointblank/locales/data/PL/text.json +35 -0
  256. pointblank/locales/data/PT/address.json +90 -0
  257. pointblank/locales/data/PT/company.json +65 -0
  258. pointblank/locales/data/PT/internet.json +20 -0
  259. pointblank/locales/data/PT/misc.json +8 -0
  260. pointblank/locales/data/PT/person.json +17 -0
  261. pointblank/locales/data/PT/text.json +35 -0
  262. pointblank/locales/data/RO/address.json +73 -0
  263. pointblank/locales/data/RO/company.json +61 -0
  264. pointblank/locales/data/RO/internet.json +19 -0
  265. pointblank/locales/data/RO/misc.json +7 -0
  266. pointblank/locales/data/RO/person.json +40 -0
  267. pointblank/locales/data/RO/text.json +38 -0
  268. pointblank/locales/data/RU/address.json +74 -0
  269. pointblank/locales/data/RU/company.json +60 -0
  270. pointblank/locales/data/RU/internet.json +19 -0
  271. pointblank/locales/data/RU/misc.json +7 -0
  272. pointblank/locales/data/RU/person.json +38 -0
  273. pointblank/locales/data/RU/text.json +38 -0
  274. pointblank/locales/data/SE/address.json +247 -0
  275. pointblank/locales/data/SE/company.json +65 -0
  276. pointblank/locales/data/SE/internet.json +20 -0
  277. pointblank/locales/data/SE/misc.json +7 -0
  278. pointblank/locales/data/SE/person.json +45 -0
  279. pointblank/locales/data/SE/text.json +43 -0
  280. pointblank/locales/data/SI/address.json +67 -0
  281. pointblank/locales/data/SI/company.json +60 -0
  282. pointblank/locales/data/SI/internet.json +19 -0
  283. pointblank/locales/data/SI/misc.json +7 -0
  284. pointblank/locales/data/SI/person.json +38 -0
  285. pointblank/locales/data/SI/text.json +38 -0
  286. pointblank/locales/data/SK/address.json +64 -0
  287. pointblank/locales/data/SK/company.json +60 -0
  288. pointblank/locales/data/SK/internet.json +19 -0
  289. pointblank/locales/data/SK/misc.json +7 -0
  290. pointblank/locales/data/SK/person.json +38 -0
  291. pointblank/locales/data/SK/text.json +38 -0
  292. pointblank/locales/data/TR/address.json +105 -0
  293. pointblank/locales/data/TR/company.json +65 -0
  294. pointblank/locales/data/TR/internet.json +20 -0
  295. pointblank/locales/data/TR/misc.json +8 -0
  296. pointblank/locales/data/TR/person.json +17 -0
  297. pointblank/locales/data/TR/text.json +35 -0
  298. pointblank/locales/data/TW/address.json +86 -0
  299. pointblank/locales/data/TW/company.json +69 -0
  300. pointblank/locales/data/TW/internet.json +19 -0
  301. pointblank/locales/data/TW/misc.json +7 -0
  302. pointblank/locales/data/TW/person.json +42 -0
  303. pointblank/locales/data/TW/text.json +38 -0
  304. pointblank/locales/data/US/address.json +996 -0
  305. pointblank/locales/data/US/company.json +131 -0
  306. pointblank/locales/data/US/internet.json +22 -0
  307. pointblank/locales/data/US/misc.json +11 -0
  308. pointblank/locales/data/US/person.json +1092 -0
  309. pointblank/locales/data/US/text.json +56 -0
  310. pointblank/locales/data/_shared/misc.json +42 -0
  311. pointblank/schema.py +339 -2
  312. {pointblank-0.19.0.dist-info → pointblank-0.20.0.dist-info}/METADATA +45 -1
  313. pointblank-0.20.0.dist-info/RECORD +366 -0
  314. {pointblank-0.19.0.dist-info → pointblank-0.20.0.dist-info}/WHEEL +1 -1
  315. pointblank-0.19.0.dist-info/RECORD +0 -59
  316. {pointblank-0.19.0.dist-info → pointblank-0.20.0.dist-info}/entry_points.txt +0 -0
  317. {pointblank-0.19.0.dist-info → pointblank-0.20.0.dist-info}/licenses/LICENSE +0 -0
  318. {pointblank-0.19.0.dist-info → pointblank-0.20.0.dist-info}/top_level.txt +0 -0
@@ -11,7 +11,7 @@ failure thresholds (using the `Thresholds` class or through shorthands for this
11
11
  `Validate` class has numerous methods for defining validation steps and for obtaining
12
12
  post-interrogation metrics and data.
13
13
 
14
- Validate(data: 'IntoDataFrame', reference: 'IntoFrame | None' = None, tbl_name: 'str | None' = None, label: 'str | None' = None, thresholds: 'int | float | bool | tuple | dict | Thresholds | None' = None, actions: 'Actions | None' = None, final_actions: 'FinalActions | None' = None, brief: 'str | bool | None' = None, lang: 'str | None' = None, locale: 'str | None' = None) -> None
14
+ Validate(data: 'IntoDataFrame', reference: 'IntoFrame | None' = None, tbl_name: 'str | None' = None, label: 'str | None' = None, thresholds: 'int | float | bool | tuple | dict | Thresholds | None' = None, actions: 'Actions | None' = None, final_actions: 'FinalActions | None' = None, brief: 'str | bool | None' = None, lang: 'str | None' = None, locale: 'str | None' = None, owner: 'str | None' = None, consumers: 'str | list[str] | None' = None, version: 'str | None' = None) -> None
15
15
 
16
16
  Workflow for defining a set of validations on a table and interrogating for results.
17
17
 
@@ -99,6 +99,18 @@ Validate(data: 'IntoDataFrame', reference: 'IntoFrame | None' = None, tbl_name:
99
99
  locale's rules. Examples include `"en-US"` for English (United States) and `"fr-FR"` for
100
100
  French (France). More simply, this can be a language identifier without a designation of
101
101
  territory, like `"es"` for Spanish.
102
+ owner
103
+ An optional string identifying the owner of the data being validated. This is useful for
104
+ governance purposes, indicating who is responsible for the quality and maintenance of the
105
+ data. For example, `"data-platform-team"` or `"analytics-engineering"`.
106
+ consumers
107
+ An optional string or list of strings identifying who depends on or consumes this data.
108
+ This helps document data dependencies and can be useful for impact analysis when data
109
+ quality issues are detected. For example, `"ml-team"` or `["ml-team", "analytics"]`.
110
+ version
111
+ An optional string representing the version of the validation plan or data contract. This
112
+ supports semantic versioning (e.g., `"1.0.0"`, `"2.1.0"`) and is useful for tracking changes
113
+ to validation rules over time and for organizational governance.
102
114
 
103
115
  Returns
104
116
  -------
@@ -8289,6 +8301,271 @@ col_pct_null(self, columns: 'str | list[str] | Column | ColumnSelector | ColumnS
8289
8301
  calculates to 2.7 to 3.9, which rounds down to 2 to 3 rows).
8290
8302
 
8291
8303
 
8304
+ data_freshness(self, column: 'str', max_age: 'str | datetime.timedelta', reference_time: 'datetime.datetime | str | None' = None, timezone: 'str | None' = None, allow_tz_mismatch: 'bool' = False, pre: 'Callable | None' = None, thresholds: 'int | float | bool | tuple | dict | Thresholds | None' = None, actions: 'Actions | None' = None, brief: 'str | bool | None' = None, active: 'bool' = True) -> 'Validate'
8305
+
8306
+ Validate that data in a datetime column is not older than a specified maximum age.
8307
+
8308
+ The `data_freshness()` validation method checks whether the most recent timestamp in the
8309
+ specified datetime column is within the allowed `max_age=` from the `reference_time=` (which
8310
+ defaults to the current time). This is useful for ensuring data pipelines are delivering
8311
+ fresh data and for enforcing data SLAs.
8312
+
8313
+ This method helps detect stale data by comparing the maximum (most recent) value in a
8314
+ datetime column against an expected freshness threshold.
8315
+
8316
+ Parameters
8317
+ ----------
8318
+ column
8319
+ The name of the datetime column to check for freshness. This column should contain
8320
+ date or datetime values.
8321
+ max_age
8322
+ The maximum allowed age of the data. Can be specified as: (1) a string with a
8323
+ human-readable duration like `"24 hours"`, `"1 day"`, `"30 minutes"`, `"2 weeks"`, etc.
8324
+ (supported units: `seconds`, `minutes`, `hours`, `days`, `weeks`), or (2) a
8325
+ `datetime.timedelta` object for precise control.
8326
+ reference_time
8327
+ The reference point in time to compare against. Defaults to `None`, which uses the
8328
+ current time (UTC if `timezone=` is not specified). Can be: (1) a `datetime.datetime`
8329
+ object (timezone-aware recommended), (2) a string in ISO 8601 format (e.g.,
8330
+ `"2024-01-15T10:30:00"` or `"2024-01-15T10:30:00+05:30"`), or (3) `None` to use the
8331
+ current time.
8332
+ timezone
8333
+ The timezone to use for interpreting the data and reference time. Accepts IANA
8334
+ timezone names (e.g., `"America/New_York"`), hour offsets (e.g., `"-7"`), or ISO 8601
8335
+ offsets (e.g., `"-07:00"`). When `None` (default), naive datetimes are treated as UTC.
8336
+ See the *The `timezone=` Parameter* section for details.
8337
+ allow_tz_mismatch
8338
+ Whether to allow timezone mismatches between the column data and reference time.
8339
+ By default (`False`), a warning note is added when comparing timezone-naive with
8340
+ timezone-aware datetimes. Set to `True` to suppress these warnings.
8341
+ pre
8342
+ An optional preprocessing function or lambda to apply to the data table during
8343
+ interrogation. This function should take a table as input and return a modified table.
8344
+ thresholds
8345
+ Set threshold failure levels for reporting and reacting to exceedences of the levels.
8346
+ The thresholds are set at the step level and will override any global thresholds set in
8347
+ `Validate(thresholds=...)`. The default is `None`, which means that no thresholds will
8348
+ be set locally and global thresholds (if any) will take effect.
8349
+ actions
8350
+ Optional actions to take when the validation step meets or exceeds any set threshold
8351
+ levels. If provided, the [`Actions`](`pointblank.Actions`) class should be used to
8352
+ define the actions.
8353
+ brief
8354
+ An optional brief description of the validation step that will be displayed in the
8355
+ reporting table. You can use the templating elements like `"{step}"` to insert
8356
+ the step number, or `"{auto}"` to include an automatically generated brief. If `True`
8357
+ the entire brief will be automatically generated. If `None` (the default) then there
8358
+ won't be a brief.
8359
+ active
8360
+ A boolean value indicating whether the validation step should be active. Using `False`
8361
+ will make the validation step inactive (still reporting its presence and keeping indexes
8362
+ for the steps unchanged).
8363
+
8364
+ Returns
8365
+ -------
8366
+ Validate
8367
+ The `Validate` object with the added validation step.
8368
+
8369
+ How Timezones Affect Freshness Checks
8370
+ -------------------------------------
8371
+ Freshness validation involves comparing two times: the **data time** (the most recent
8372
+ timestamp in your column) and the **execution time** (when and where the validation runs).
8373
+ Timezone confusion typically arises because these two times may originate from different
8374
+ contexts.
8375
+
8376
+ Consider these common scenarios:
8377
+
8378
+ - your data timestamps are stored in UTC (common for databases), but you're running
8379
+ validation on your laptop in New York (Eastern Time)
8380
+ - you develop and test validation locally, then deploy it to a cloud workflow that runs
8381
+ in UTC—suddenly your 'same' validation behaves differently
8382
+ - your data comes from servers in multiple regions, each recording timestamps in their
8383
+ local timezone
8384
+
8385
+ The `timezone=` parameter exists to solve this problem by establishing a single, explicit
8386
+ timezone context for the freshness comparison. When you specify a timezone, Pointblank
8387
+ interprets both the data timestamps (if naive) and the execution time in that timezone,
8388
+ ensuring consistent behavior whether you run validation on your laptop or in a cloud
8389
+ workflow.
8390
+
8391
+ **Scenario 1: Data has timezone-aware datetimes**
8392
+
8393
+ ```python
8394
+ # Your data column has values like: 2024-01-15 10:30:00+00:00 (UTC)
8395
+ # Comparison is straightforward as both sides have explicit timezones
8396
+ .data_freshness(column="updated_at", max_age="24 hours")
8397
+ ```
8398
+
8399
+ **Scenario 2: Data has naive datetimes (no timezone)**
8400
+
8401
+ ```python
8402
+ # Your data column has values like: 2024-01-15 10:30:00 (no timezone)
8403
+ # Specify the timezone the data was recorded in:
8404
+ .data_freshness(column="updated_at", max_age="24 hours", timezone="America/New_York")
8405
+ ```
8406
+
8407
+ **Scenario 3: Ensuring consistent behavior across environments**
8408
+
8409
+ ```python
8410
+ # Pin the timezone to ensure identical results whether running locally or in the cloud
8411
+ .data_freshness(
8412
+ column="updated_at",
8413
+ max_age="24 hours",
8414
+ timezone="UTC", # Explicit timezone removes environment dependence
8415
+ )
8416
+ ```
8417
+
8418
+ The `timezone=` Parameter
8419
+ ---------------------------
8420
+ The `timezone=` parameter accepts several convenient formats, making it easy to specify
8421
+ timezones in whatever way is most natural for your use case. The following examples
8422
+ illustrate the three supported input styles.
8423
+
8424
+ **IANA Timezone Names** (recommended for regions with daylight saving time):
8425
+
8426
+ ```python
8427
+ timezone="America/New_York" # Eastern Time (handles DST automatically)
8428
+ timezone="Europe/London" # UK time
8429
+ timezone="Asia/Tokyo" # Japan Standard Time
8430
+ timezone="Australia/Sydney" # Australian Eastern Time
8431
+ timezone="UTC" # Coordinated Universal Time
8432
+ ```
8433
+
8434
+ **Simple Hour Offsets** (quick and easy):
8435
+
8436
+ **ISO 8601 Offset Format** (precise, including fractional hours):
8437
+
8438
+ When a timezone is specified:
8439
+
8440
+ - naive datetime values in the column are assumed to be in this timezone.
8441
+ - the reference time (if naive) is assumed to be in this timezone.
8442
+ - the validation report will show times in this timezone.
8443
+
8444
+ When `None` (default):
8445
+
8446
+ - if your column has timezone-aware datetimes, those timezones are used
8447
+ - if your column has naive datetimes, they're treated as UTC
8448
+ - the current time reference uses UTC
8449
+
8450
+ Note that IANA timezone names are preferred when daylight saving time transitions matter, as
8451
+ they automatically handle the offset changes. Fixed offsets like `"-7"` or `"-07:00"` do not
8452
+ account for DST.
8453
+
8454
+ Recommendations for Working with Timestamps
8455
+ -------------------------------------------
8456
+ When working with datetime data, storing timestamps in UTC in your databases is strongly
8457
+ recommended since it provides a consistent reference point regardless of where your data
8458
+ originates or where it's consumed. Using timezone-aware datetimes whenever possible helps
8459
+ avoid ambiguity—when a datetime has an explicit timezone, there's no guessing about what
8460
+ time it actually represents.
8461
+
8462
+ If you're working with naive datetimes (which lack timezone information), always specify the
8463
+ `timezone=` parameter so Pointblank knows how to interpret those values. When providing
8464
+ `reference_time=` as a string, use ISO 8601 format with the timezone offset included (e.g.,
8465
+ `"2024-01-15T10:30:00+00:00"`) to ensure unambiguous parsing. Finally, prefer IANA timezone
8466
+ names (like `"America/New_York"`) over fixed offsets (like `"-05:00"`) when daylight saving
8467
+ time transitions matter, since IANA names automatically handle the twice-yearly offset
8468
+ changes. To see all available IANA timezone names in Python, use
8469
+ `zoneinfo.available_timezones()` from the standard library's `zoneinfo` module.
8470
+
8471
+ Examples
8472
+ --------
8473
+ The simplest use of `data_freshness()` requires just two arguments: the `column=` containing
8474
+ your timestamps and `max_age=` specifying how old the data can be. In this first example,
8475
+ we create sample data with an `"updated_at"` column containing timestamps from 1, 12, and
8476
+ 20 hours ago. By setting `max_age="24 hours"`, we're asserting that the most recent
8477
+ timestamp should be within 24 hours of the current time. Since the newest record is only
8478
+ 1 hour old, this validation passes.
8479
+
8480
+ ```python
8481
+ import pointblank as pb
8482
+ import polars as pl
8483
+ from datetime import datetime, timedelta
8484
+
8485
+ # Create sample data with recent timestamps
8486
+ recent_data = pl.DataFrame({
8487
+ "id": [1, 2, 3],
8488
+ "updated_at": [
8489
+ datetime.now() - timedelta(hours=1),
8490
+ datetime.now() - timedelta(hours=12),
8491
+ datetime.now() - timedelta(hours=20),
8492
+ ]
8493
+ })
8494
+
8495
+ validation = (
8496
+ pb.Validate(data=recent_data)
8497
+ .data_freshness(column="updated_at", max_age="24 hours")
8498
+ .interrogate()
8499
+ )
8500
+
8501
+ validation
8502
+ ```
8503
+
8504
+ The `max_age=` parameter accepts human-readable strings with various time units. You can
8505
+ chain multiple `data_freshness()` calls to check different freshness thresholds
8506
+ simultaneously—useful for tiered SLAs where you might want warnings at 30 minutes but
8507
+ errors at 2 days.
8508
+
8509
+ ```python
8510
+ # Check data is fresh within different time windows
8511
+ validation = (
8512
+ pb.Validate(data=recent_data)
8513
+ .data_freshness(column="updated_at", max_age="30 minutes") # Very fresh
8514
+ .data_freshness(column="updated_at", max_age="2 days") # Reasonably fresh
8515
+ .data_freshness(column="updated_at", max_age="1 week") # Within a week
8516
+ .interrogate()
8517
+ )
8518
+
8519
+ validation
8520
+ ```
8521
+
8522
+ When your data contains naive datetimes (timestamps without timezone information), use the
8523
+ `timezone=` parameter to specify what timezone those values represent. Here we have event
8524
+ data recorded in Eastern Time, so we set `timezone="America/New_York"` to ensure the
8525
+ freshness comparison is done correctly.
8526
+
8527
+ ```python
8528
+ # Data with naive datetimes (assume they're in Eastern Time)
8529
+ eastern_data = pl.DataFrame({
8530
+ "event_time": [
8531
+ datetime.now() - timedelta(hours=2),
8532
+ datetime.now() - timedelta(hours=5),
8533
+ ]
8534
+ })
8535
+
8536
+ validation = (
8537
+ pb.Validate(data=eastern_data)
8538
+ .data_freshness(
8539
+ column="event_time",
8540
+ max_age="12 hours",
8541
+ timezone="America/New_York" # Interpret times as Eastern
8542
+ )
8543
+ .interrogate()
8544
+ )
8545
+
8546
+ validation
8547
+ ```
8548
+
8549
+ For reproducible validations or historical checks, you can use `reference_time=` to compare
8550
+ against a specific point in time instead of the current time. This is particularly useful
8551
+ for testing or when validating data snapshots. The reference time should include a timezone
8552
+ offset (like `+00:00` for UTC) to avoid ambiguity.
8553
+
8554
+ ```python
8555
+ validation = (
8556
+ pb.Validate(data=recent_data)
8557
+ .data_freshness(
8558
+ column="updated_at",
8559
+ max_age="24 hours",
8560
+ reference_time="2024-01-15T12:00:00+00:00"
8561
+ )
8562
+ .interrogate()
8563
+ )
8564
+
8565
+ validation
8566
+ ```
8567
+
8568
+
8292
8569
  col_schema_match(self, schema: 'Schema', complete: 'bool' = True, in_order: 'bool' = True, case_sensitive_colnames: 'bool' = True, case_sensitive_dtypes: 'bool' = True, full_match_dtypes: 'bool' = True, pre: 'Callable | None' = None, thresholds: 'int | float | bool | tuple | dict | Thresholds | None' = None, actions: 'Actions | None' = None, brief: 'str | bool | None' = None, active: 'bool' = True) -> 'Validate'
8293
8570
 
8294
8571
  Do columns in the table (and their types) match a predefined schema?
@@ -15241,6 +15518,521 @@ config(report_incl_header: 'bool' = True, report_incl_footer: 'bool' = True, rep
15241
15518
 
15242
15519
 
15243
15520
 
15521
+ ## The Test Data Generation family
15522
+
15523
+ Generate synthetic test data based on schema definitions. Use
15524
+ `generate_dataset()` to create data from a `Schema` object. The helper functions define typed fields
15525
+ with constraints for realistic test data generation.
15526
+
15527
+ generate_dataset(schema: 'Schema', n: 'int' = 100, seed: 'int | None' = None, output: "Literal['polars', 'pandas', 'dict']" = 'polars', country: 'str' = 'US') -> 'Any'
15528
+
15529
+ Generate synthetic test data from a schema.
15530
+
15531
+ This function generates random data that conforms to a schema's column definitions. When the
15532
+ schema is defined using `Field` objects with constraints (e.g., `min_val`, `max_val`,
15533
+ `pattern`, `preset`), the generated data will respect those constraints.
15534
+
15535
+ This is a convenience function that wraps `Schema.generate()` for a more functional style
15536
+ of usage, similar to how `load_dataset()` loads built-in datasets.
15537
+
15538
+ Parameters
15539
+ ----------
15540
+ schema
15541
+ The schema object defining the structure and constraints of the data to generate.
15542
+ n
15543
+ Number of rows to generate. Default is `100`.
15544
+ seed
15545
+ Random seed for reproducibility. If provided, the same seed will produce
15546
+ the same data. Default is `None` (non-deterministic).
15547
+ output
15548
+ Output format for the generated data. Options are: (1) `"polars"` (default) returns a
15549
+ Polars DataFrame, (2) `"pandas"` returns a Pandas DataFrame, and (3) `"dict"` returns
15550
+ a dictionary of lists.
15551
+ country
15552
+ Country code for realistic data generation when using presets (e.g., `preset="email"`,
15553
+ `preset="address"`). Accepts ISO 3166-1 alpha-2 codes (e.g., `"US"`, `"DE"`, `"FR"`)
15554
+ or alpha-3 codes (e.g., `"USA"`, `"DEU"`, `"FRA"`). Default is `"US"`.
15555
+
15556
+ Returns
15557
+ -------
15558
+ DataFrame or dict
15559
+ Generated data in the requested format.
15560
+
15561
+ Raises
15562
+ ------
15563
+ ValueError
15564
+ If the schema has no columns or if constraints cannot be satisfied.
15565
+ ImportError
15566
+ If required optional dependencies are not installed.
15567
+
15568
+ Supported Countries
15569
+ -------------------
15570
+ The `country=` parameter controls the country used for generating realistic data with
15571
+ presets (e.g., `preset="email"`, `preset="address"`). This affects location-specific
15572
+ formats like addresses, phone numbers, and postal codes. Currently, **50 countries** are
15573
+ supported with full locale data:
15574
+
15575
+ **Europe (32 countries):** Austria (`"AT"`), Belgium (`"BE"`), Bulgaria (`"BG"`),
15576
+ Croatia (`"HR"`), Cyprus (`"CY"`), Czech Republic (`"CZ"`), Denmark (`"DK"`),
15577
+ Estonia (`"EE"`), Finland (`"FI"`), France (`"FR"`), Germany (`"DE"`), Greece (`"GR"`),
15578
+ Hungary (`"HU"`), Iceland (`"IS"`), Ireland (`"IE"`), Italy (`"IT"`), Latvia (`"LV"`),
15579
+ Lithuania (`"LT"`), Luxembourg (`"LU"`), Malta (`"MT"`), Netherlands (`"NL"`),
15580
+ Norway (`"NO"`), Poland (`"PL"`), Portugal (`"PT"`), Romania (`"RO"`), Russia (`"RU"`),
15581
+ Slovakia (`"SK"`), Slovenia (`"SI"`), Spain (`"ES"`), Sweden (`"SE"`),
15582
+ Switzerland (`"CH"`), United Kingdom (`"GB"`)
15583
+
15584
+ **Americas (7 countries):** Argentina (`"AR"`), Brazil (`"BR"`), Canada (`"CA"`),
15585
+ Chile (`"CL"`), Colombia (`"CO"`), Mexico (`"MX"`), United States (`"US"`)
15586
+
15587
+ **Asia-Pacific (10 countries):** Australia (`"AU"`), China (`"CN"`), Hong Kong (`"HK"`),
15588
+ India (`"IN"`), Indonesia (`"ID"`), Japan (`"JP"`), New Zealand (`"NZ"`),
15589
+ Philippines (`"PH"`), South Korea (`"KR"`), Taiwan (`"TW"`)
15590
+
15591
+ **Middle East (1 country):** Turkey (`"TR"`)
15592
+
15593
+ Examples
15594
+ --------
15595
+ Generate test data from a schema with field constraints:
15596
+
15597
+ ```python
15598
+ import pointblank as pb
15599
+
15600
+ schema = pb.Schema(
15601
+ user_id=pb.int_field(min_val=1, unique=True),
15602
+ email=pb.string_field(preset="email"),
15603
+ age=pb.int_field(min_val=18, max_val=100),
15604
+ status=pb.string_field(allowed=["active", "pending", "inactive"]),
15605
+ )
15606
+
15607
+ # Generate 100 rows of test data
15608
+ pb.preview(pb.generate_dataset(schema, n=100, seed=23))
15609
+ ```
15610
+
15611
+ Generate data from a simple dtype-only schema as a Pandas DataFrame:
15612
+
15613
+ ```python
15614
+ schema = pb.Schema(name="String", age="Int64", active="Boolean")
15615
+ pb.preview(pb.generate_dataset(schema, n=50, seed=23, output="pandas"))
15616
+ ```
15617
+
15618
+ Generate data with German addresses by using `country="DE"`:
15619
+
15620
+ ```python
15621
+ schema = pb.Schema(
15622
+ name=pb.string_field(preset="name"),
15623
+ address=pb.string_field(preset="address"),
15624
+ city=pb.string_field(preset="city"),
15625
+ )
15626
+ pb.preview(pb.generate_dataset(schema, n=20, seed=23, country="DE"))
15627
+ ```
15628
+
15629
+
15630
+ int_field(min_val: 'int | None' = None, max_val: 'int | None' = None, allowed: 'list[int] | None' = None, nullable: 'bool' = False, null_probability: 'float' = 0.0, unique: 'bool' = False, generator: 'Callable[[], Any] | None' = None, dtype: 'str' = 'Int64') -> 'IntField'
15631
+
15632
+ Create an integer column specification.
15633
+
15634
+ Parameters
15635
+ ----------
15636
+ min_val
15637
+ Minimum value (inclusive). Default is `None` (no minimum).
15638
+ max_val
15639
+ Maximum value (inclusive). Default is `None` (no maximum).
15640
+ allowed
15641
+ List of allowed values (categorical constraint). When provided,
15642
+ values are sampled from this list.
15643
+ nullable
15644
+ Whether the column can contain null values. Default is `False`.
15645
+ null_probability
15646
+ Probability of generating null when `nullable=True`. Default is `0.0`.
15647
+ unique
15648
+ Whether all values must be unique. Default is `False`.
15649
+ generator
15650
+ Custom callable that generates values. Overrides other settings.
15651
+ dtype
15652
+ Integer dtype. Default is `"Int64"`. Options: `"Int8"`, `"Int16"`,
15653
+ `"Int32"`, `"Int64"`, `"UInt8"`, `"UInt16"`, `"UInt32"`, `"UInt64"`.
15654
+
15655
+ Returns
15656
+ -------
15657
+ IntField
15658
+ An integer field specification.
15659
+
15660
+ Examples
15661
+ --------
15662
+ Define a schema with integer fields and generate test data:
15663
+
15664
+ ```python
15665
+ import pointblank as pb
15666
+
15667
+ # Define a schema with integer field specifications
15668
+ schema = pb.Schema(
15669
+ user_id=pb.int_field(min_val=1, unique=True),
15670
+ age=pb.int_field(min_val=0, max_val=120),
15671
+ rating=pb.int_field(allowed=[1, 2, 3, 4, 5]),
15672
+ )
15673
+
15674
+ # Generate 100 rows of test data
15675
+ pb.preview(pb.generate_dataset(schema, n=100, seed=23))
15676
+ ```
15677
+
15678
+ The generated data will have unique user IDs starting from `1`, ages between `0`-`120`,
15679
+ and ratings sampled from the allowed values.
15680
+
15681
+
15682
+ float_field(min_val: 'float | None' = None, max_val: 'float | None' = None, allowed: 'list[float] | None' = None, nullable: 'bool' = False, null_probability: 'float' = 0.0, unique: 'bool' = False, generator: 'Callable[[], Any] | None' = None, dtype: 'str' = 'Float64') -> 'FloatField'
15683
+
15684
+ Create a floating-point column specification.
15685
+
15686
+ Parameters
15687
+ ----------
15688
+ min_val
15689
+ Minimum value (inclusive). Default is `None` (no minimum).
15690
+ max_val
15691
+ Maximum value (inclusive). Default is `None` (no maximum).
15692
+ allowed
15693
+ List of allowed values (categorical constraint). When provided,
15694
+ values are sampled from this list.
15695
+ nullable
15696
+ Whether the column can contain null values. Default is `False`.
15697
+ null_probability
15698
+ Probability of generating null when `nullable=True`. Default is `0.0`.
15699
+ unique
15700
+ Whether all values must be unique. Default is `False`.
15701
+ generator
15702
+ Custom callable that generates values. Overrides other settings.
15703
+ dtype
15704
+ Float dtype. Default is `"Float64"`. Options: `"Float32"`, `"Float64"`.
15705
+
15706
+ Returns
15707
+ -------
15708
+ FloatField
15709
+ A float field specification.
15710
+
15711
+ Examples
15712
+ --------
15713
+ Define a schema with float fields and generate test data:
15714
+
15715
+ ```python
15716
+ import pointblank as pb
15717
+
15718
+ # Define a schema with float field specifications
15719
+ schema = pb.Schema(
15720
+ price=pb.float_field(min_val=0.01, max_val=9999.99),
15721
+ probability=pb.float_field(min_val=0.0, max_val=1.0),
15722
+ temperature=pb.float_field(min_val=-40.0, max_val=50.0),
15723
+ )
15724
+
15725
+ # Generate 100 rows of test data
15726
+ pb.preview(pb.generate_dataset(schema, n=100, seed=23))
15727
+ ```
15728
+
15729
+ Values are uniformly distributed across the specified ranges.
15730
+
15731
+
15732
+ string_field(min_length: 'int | None' = None, max_length: 'int | None' = None, pattern: 'str | None' = None, preset: 'str | None' = None, allowed: 'list[str] | None' = None, nullable: 'bool' = False, null_probability: 'float' = 0.0, unique: 'bool' = False, generator: 'Callable[[], Any] | None' = None) -> 'StringField'
15733
+
15734
+ Create a string column specification.
15735
+
15736
+ Parameters
15737
+ ----------
15738
+ min_length
15739
+ Minimum string length. Default is `None` (no minimum).
15740
+ max_length
15741
+ Maximum string length. Default is `None` (no maximum).
15742
+ pattern
15743
+ Regular expression pattern for generated strings.
15744
+ preset
15745
+ Preset for realistic data (e.g., `"email"`, `"name"`, `"phone_number"`).
15746
+ allowed
15747
+ List of allowed values (categorical constraint).
15748
+ nullable
15749
+ Whether the column can contain null values. Default is `False`.
15750
+ null_probability
15751
+ Probability of generating null when `nullable=True`. Default is `0.0`.
15752
+ unique
15753
+ Whether all values must be unique. Default is `False`.
15754
+ generator
15755
+ Custom callable that generates values. Overrides other settings.
15756
+
15757
+ Returns
15758
+ -------
15759
+ StringField
15760
+ A string field specification.
15761
+
15762
+ Examples
15763
+ --------
15764
+ Define a schema with string fields and generate test data:
15765
+
15766
+ ```python
15767
+ import pointblank as pb
15768
+
15769
+ # Define a schema with string field specifications
15770
+ schema = pb.Schema(
15771
+ name=pb.string_field(preset="name"),
15772
+ email=pb.string_field(preset="email", unique=True),
15773
+ status=pb.string_field(allowed=["active", "pending", "inactive"]),
15774
+ code=pb.string_field(pattern=r"[A-Z]{3}-[0-9]{4}"),
15775
+ )
15776
+
15777
+ # Generate 100 rows of test data
15778
+ pb.preview(pb.generate_dataset(schema, n=100, seed=23))
15779
+ ```
15780
+
15781
+ The generated data will have coherent names and emails (derived from the name),
15782
+ statuses sampled from the allowed values, and codes matching the regex pattern.
15783
+
15784
+
15785
+ bool_field(p_true: 'float' = 0.5, nullable: 'bool' = False, null_probability: 'float' = 0.0, unique: 'bool' = False, generator: 'Callable[[], Any] | None' = None) -> 'BoolField'
15786
+
15787
+ Create a boolean column specification.
15788
+
15789
+ Parameters
15790
+ ----------
15791
+ p_true
15792
+ Probability of generating `True`. Default is `0.5` (equal probability).
15793
+ Must be between 0.0 and 1.0.
15794
+ nullable
15795
+ Whether the column can contain null values. Default is `False`.
15796
+ null_probability
15797
+ Probability of generating null when `nullable=True`. Default is `0.0`.
15798
+ unique
15799
+ Whether all values must be unique. Default is `False`.
15800
+ Note: Boolean can only have 2 unique non-null values.
15801
+ generator
15802
+ Custom callable that generates values. Overrides other settings.
15803
+
15804
+ Returns
15805
+ -------
15806
+ BoolField
15807
+ A boolean field specification.
15808
+
15809
+ Examples
15810
+ --------
15811
+ Define a schema with boolean fields and generate test data:
15812
+
15813
+ ```python
15814
+ import pointblank as pb
15815
+
15816
+ # Define a schema with boolean field specifications
15817
+ schema = pb.Schema(
15818
+ is_active=pb.bool_field(p_true=0.8), # 80% True
15819
+ is_premium=pb.bool_field(p_true=0.2), # 20% True
15820
+ is_verified=pb.bool_field(), # 50% True (default)
15821
+ )
15822
+
15823
+ # Generate 100 rows of test data
15824
+ pb.preview(pb.generate_dataset(schema, n=100, seed=23))
15825
+ ```
15826
+
15827
+ The `p_true=` parameter controls the probability of generating `True` values,
15828
+ which is helpful for simulating real-world distributions.
15829
+
15830
+
15831
+ date_field(min_date: 'str | date | None' = None, max_date: 'str | date | None' = None, nullable: 'bool' = False, null_probability: 'float' = 0.0, unique: 'bool' = False, generator: 'Callable[[], Any] | None' = None) -> 'DateField'
15832
+
15833
+ Create a date column specification.
15834
+
15835
+ Parameters
15836
+ ----------
15837
+ min_date
15838
+ Minimum date (inclusive). Can be ISO string or `date` object.
15839
+ max_date
15840
+ Maximum date (inclusive). Can be ISO string or `date` object.
15841
+ nullable
15842
+ Whether the column can contain null values. Default is `False`.
15843
+ null_probability
15844
+ Probability of generating null when `nullable=True`. Default is `0.0`.
15845
+ unique
15846
+ Whether all values must be unique. Default is `False`.
15847
+ generator
15848
+ Custom callable that generates values. Overrides other settings.
15849
+
15850
+ Returns
15851
+ -------
15852
+ DateField
15853
+ A date field specification.
15854
+
15855
+ Examples
15856
+ --------
15857
+ Define a schema with date fields and generate test data:
15858
+
15859
+ ```python
15860
+ import pointblank as pb
15861
+ from datetime import date
15862
+
15863
+ # Define a schema with date field specifications
15864
+ schema = pb.Schema(
15865
+ birth_date=pb.date_field(
15866
+ min_date=date(1960, 1, 1),
15867
+ max_date=date(2005, 12, 31)
15868
+ ),
15869
+ hire_date=pb.date_field(
15870
+ min_date=date(2020, 1, 1),
15871
+ max_date=date(2024, 12, 31)
15872
+ ),
15873
+ )
15874
+
15875
+ # Generate 100 rows of test data
15876
+ pb.preview(pb.generate_dataset(schema, n=100, seed=23))
15877
+ ```
15878
+
15879
+ Date values are uniformly distributed within the specified range.
15880
+
15881
+
15882
+ datetime_field(min_date: 'str | datetime | None' = None, max_date: 'str | datetime | None' = None, nullable: 'bool' = False, null_probability: 'float' = 0.0, unique: 'bool' = False, generator: 'Callable[[], Any] | None' = None) -> 'DatetimeField'
15883
+
15884
+ Create a datetime column specification.
15885
+
15886
+ Parameters
15887
+ ----------
15888
+ min_date
15889
+ Minimum datetime (inclusive). Can be ISO string or `datetime` object.
15890
+ max_date
15891
+ Maximum datetime (inclusive). Can be ISO string or `datetime` object.
15892
+ nullable
15893
+ Whether the column can contain null values. Default is `False`.
15894
+ null_probability
15895
+ Probability of generating null when `nullable=True`. Default is `0.0`.
15896
+ unique
15897
+ Whether all values must be unique. Default is `False`.
15898
+ generator
15899
+ Custom callable that generates values. Overrides other settings.
15900
+
15901
+ Returns
15902
+ -------
15903
+ DatetimeField
15904
+ A datetime field specification.
15905
+
15906
+ Examples
15907
+ --------
15908
+ Define a schema with datetime fields and generate test data:
15909
+
15910
+ ```python
15911
+ import pointblank as pb
15912
+ from datetime import datetime
15913
+
15914
+ # Define a schema with datetime field specifications
15915
+ schema = pb.Schema(
15916
+ created_at=pb.datetime_field(
15917
+ min_date=datetime(2024, 1, 1),
15918
+ max_date=datetime(2024, 12, 31)
15919
+ ),
15920
+ updated_at=pb.datetime_field(
15921
+ min_date=datetime(2024, 6, 1),
15922
+ max_date=datetime(2024, 12, 31)
15923
+ ),
15924
+ )
15925
+
15926
+ # Generate 100 rows of test data
15927
+ pb.preview(pb.generate_dataset(schema, n=100, seed=23))
15928
+ ```
15929
+
15930
+ Datetime values are uniformly distributed within the specified range.
15931
+
15932
+
15933
+ time_field(min_time: 'str | time | None' = None, max_time: 'str | time | None' = None, nullable: 'bool' = False, null_probability: 'float' = 0.0, unique: 'bool' = False, generator: 'Callable[[], Any] | None' = None) -> 'TimeField'
15934
+
15935
+ Create a time column specification.
15936
+
15937
+ Parameters
15938
+ ----------
15939
+ min_time
15940
+ Minimum time (inclusive). Can be ISO string or `time` object.
15941
+ max_time
15942
+ Maximum time (inclusive). Can be ISO string or `time` object.
15943
+ nullable
15944
+ Whether the column can contain null values. Default is `False`.
15945
+ null_probability
15946
+ Probability of generating null when `nullable=True`. Default is `0.0`.
15947
+ unique
15948
+ Whether all values must be unique. Default is `False`.
15949
+ generator
15950
+ Custom callable that generates values. Overrides other settings.
15951
+
15952
+ Returns
15953
+ -------
15954
+ TimeField
15955
+ A time field specification.
15956
+
15957
+ Examples
15958
+ --------
15959
+ Define a schema with time fields and generate test data:
15960
+
15961
+ ```python
15962
+ import pointblank as pb
15963
+ from datetime import time
15964
+
15965
+ # Define a schema with time field specifications
15966
+ schema = pb.Schema(
15967
+ start_time=pb.time_field(
15968
+ min_time=time(9, 0, 0),
15969
+ max_time=time(12, 0, 0)
15970
+ ),
15971
+ end_time=pb.time_field(
15972
+ min_time=time(13, 0, 0),
15973
+ max_time=time(17, 0, 0)
15974
+ ),
15975
+ )
15976
+
15977
+ # Generate 100 rows of test data
15978
+ pb.preview(pb.generate_dataset(schema, n=100, seed=23))
15979
+ ```
15980
+
15981
+ Time values are uniformly distributed within the specified range.
15982
+
15983
+
15984
+ duration_field(min_duration: 'str | timedelta | None' = None, max_duration: 'str | timedelta | None' = None, nullable: 'bool' = False, null_probability: 'float' = 0.0, unique: 'bool' = False, generator: 'Callable[[], Any] | None' = None) -> 'DurationField'
15985
+
15986
+ Create a duration column specification.
15987
+
15988
+ Parameters
15989
+ ----------
15990
+ min_duration
15991
+ Minimum duration (inclusive). Can be string or `timedelta` object.
15992
+ max_duration
15993
+ Maximum duration (inclusive). Can be string or `timedelta` object.
15994
+ nullable
15995
+ Whether the column can contain null values. Default is `False`.
15996
+ null_probability
15997
+ Probability of generating null when `nullable=True`. Default is `0.0`.
15998
+ unique
15999
+ Whether all values must be unique. Default is `False`.
16000
+ generator
16001
+ Custom callable that generates values. Overrides other settings.
16002
+
16003
+ Returns
16004
+ -------
16005
+ DurationField
16006
+ A duration field specification.
16007
+
16008
+ Examples
16009
+ --------
16010
+ Define a schema with duration fields and generate test data:
16011
+
16012
+ ```python
16013
+ import pointblank as pb
16014
+ from datetime import timedelta
16015
+
16016
+ # Define a schema with duration field specifications
16017
+ schema = pb.Schema(
16018
+ session_length=pb.duration_field(
16019
+ min_duration=timedelta(minutes=5),
16020
+ max_duration=timedelta(hours=2)
16021
+ ),
16022
+ wait_time=pb.duration_field(
16023
+ min_duration=timedelta(seconds=30),
16024
+ max_duration=timedelta(minutes=15)
16025
+ ),
16026
+ )
16027
+
16028
+ # Generate 100 rows of test data
16029
+ pb.generate_dataset(schema, n=100, seed=23)
16030
+ ```
16031
+
16032
+ Duration values are uniformly distributed within the specified range.
16033
+
16034
+
16035
+
15244
16036
  ## The Prebuilt Actions family
15245
16037
 
15246
16038
  The Prebuilt Actions group contains a function that can be used to