pointblank 0.19.0__py3-none-any.whl → 0.20.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (318) hide show
  1. pointblank/__init__.py +44 -1
  2. pointblank/_utils_llms_txt.py +20 -0
  3. pointblank/data/api-docs.txt +793 -1
  4. pointblank/field.py +1507 -0
  5. pointblank/generate/__init__.py +17 -0
  6. pointblank/generate/base.py +49 -0
  7. pointblank/generate/generators.py +573 -0
  8. pointblank/generate/regex.py +217 -0
  9. pointblank/locales/__init__.py +1476 -0
  10. pointblank/locales/data/AR/address.json +73 -0
  11. pointblank/locales/data/AR/company.json +60 -0
  12. pointblank/locales/data/AR/internet.json +19 -0
  13. pointblank/locales/data/AR/misc.json +7 -0
  14. pointblank/locales/data/AR/person.json +39 -0
  15. pointblank/locales/data/AR/text.json +38 -0
  16. pointblank/locales/data/AT/address.json +84 -0
  17. pointblank/locales/data/AT/company.json +65 -0
  18. pointblank/locales/data/AT/internet.json +20 -0
  19. pointblank/locales/data/AT/misc.json +8 -0
  20. pointblank/locales/data/AT/person.json +17 -0
  21. pointblank/locales/data/AT/text.json +35 -0
  22. pointblank/locales/data/AU/address.json +83 -0
  23. pointblank/locales/data/AU/company.json +65 -0
  24. pointblank/locales/data/AU/internet.json +20 -0
  25. pointblank/locales/data/AU/misc.json +8 -0
  26. pointblank/locales/data/AU/person.json +17 -0
  27. pointblank/locales/data/AU/text.json +35 -0
  28. pointblank/locales/data/BE/address.json +225 -0
  29. pointblank/locales/data/BE/company.json +129 -0
  30. pointblank/locales/data/BE/internet.json +36 -0
  31. pointblank/locales/data/BE/misc.json +6 -0
  32. pointblank/locales/data/BE/person.json +62 -0
  33. pointblank/locales/data/BE/text.json +38 -0
  34. pointblank/locales/data/BG/address.json +75 -0
  35. pointblank/locales/data/BG/company.json +60 -0
  36. pointblank/locales/data/BG/internet.json +19 -0
  37. pointblank/locales/data/BG/misc.json +7 -0
  38. pointblank/locales/data/BG/person.json +40 -0
  39. pointblank/locales/data/BG/text.json +38 -0
  40. pointblank/locales/data/BR/address.json +98 -0
  41. pointblank/locales/data/BR/company.json +65 -0
  42. pointblank/locales/data/BR/internet.json +20 -0
  43. pointblank/locales/data/BR/misc.json +8 -0
  44. pointblank/locales/data/BR/person.json +17 -0
  45. pointblank/locales/data/BR/text.json +35 -0
  46. pointblank/locales/data/CA/address.json +747 -0
  47. pointblank/locales/data/CA/company.json +120 -0
  48. pointblank/locales/data/CA/internet.json +24 -0
  49. pointblank/locales/data/CA/misc.json +11 -0
  50. pointblank/locales/data/CA/person.json +1033 -0
  51. pointblank/locales/data/CA/text.json +58 -0
  52. pointblank/locales/data/CH/address.json +184 -0
  53. pointblank/locales/data/CH/company.json +112 -0
  54. pointblank/locales/data/CH/internet.json +20 -0
  55. pointblank/locales/data/CH/misc.json +10 -0
  56. pointblank/locales/data/CH/person.json +64 -0
  57. pointblank/locales/data/CH/text.json +45 -0
  58. pointblank/locales/data/CL/address.json +71 -0
  59. pointblank/locales/data/CL/company.json +60 -0
  60. pointblank/locales/data/CL/internet.json +19 -0
  61. pointblank/locales/data/CL/misc.json +7 -0
  62. pointblank/locales/data/CL/person.json +38 -0
  63. pointblank/locales/data/CL/text.json +38 -0
  64. pointblank/locales/data/CN/address.json +124 -0
  65. pointblank/locales/data/CN/company.json +76 -0
  66. pointblank/locales/data/CN/internet.json +20 -0
  67. pointblank/locales/data/CN/misc.json +8 -0
  68. pointblank/locales/data/CN/person.json +50 -0
  69. pointblank/locales/data/CN/text.json +38 -0
  70. pointblank/locales/data/CO/address.json +76 -0
  71. pointblank/locales/data/CO/company.json +60 -0
  72. pointblank/locales/data/CO/internet.json +19 -0
  73. pointblank/locales/data/CO/misc.json +7 -0
  74. pointblank/locales/data/CO/person.json +38 -0
  75. pointblank/locales/data/CO/text.json +38 -0
  76. pointblank/locales/data/CY/address.json +62 -0
  77. pointblank/locales/data/CY/company.json +60 -0
  78. pointblank/locales/data/CY/internet.json +19 -0
  79. pointblank/locales/data/CY/misc.json +7 -0
  80. pointblank/locales/data/CY/person.json +38 -0
  81. pointblank/locales/data/CY/text.json +38 -0
  82. pointblank/locales/data/CZ/address.json +70 -0
  83. pointblank/locales/data/CZ/company.json +61 -0
  84. pointblank/locales/data/CZ/internet.json +19 -0
  85. pointblank/locales/data/CZ/misc.json +7 -0
  86. pointblank/locales/data/CZ/person.json +40 -0
  87. pointblank/locales/data/CZ/text.json +38 -0
  88. pointblank/locales/data/DE/address.json +756 -0
  89. pointblank/locales/data/DE/company.json +101 -0
  90. pointblank/locales/data/DE/internet.json +22 -0
  91. pointblank/locales/data/DE/misc.json +11 -0
  92. pointblank/locales/data/DE/person.json +1026 -0
  93. pointblank/locales/data/DE/text.json +50 -0
  94. pointblank/locales/data/DK/address.json +231 -0
  95. pointblank/locales/data/DK/company.json +65 -0
  96. pointblank/locales/data/DK/internet.json +20 -0
  97. pointblank/locales/data/DK/misc.json +7 -0
  98. pointblank/locales/data/DK/person.json +45 -0
  99. pointblank/locales/data/DK/text.json +43 -0
  100. pointblank/locales/data/EE/address.json +69 -0
  101. pointblank/locales/data/EE/company.json +60 -0
  102. pointblank/locales/data/EE/internet.json +19 -0
  103. pointblank/locales/data/EE/misc.json +7 -0
  104. pointblank/locales/data/EE/person.json +39 -0
  105. pointblank/locales/data/EE/text.json +38 -0
  106. pointblank/locales/data/ES/address.json +3086 -0
  107. pointblank/locales/data/ES/company.json +644 -0
  108. pointblank/locales/data/ES/internet.json +25 -0
  109. pointblank/locales/data/ES/misc.json +11 -0
  110. pointblank/locales/data/ES/person.json +488 -0
  111. pointblank/locales/data/ES/text.json +49 -0
  112. pointblank/locales/data/FI/address.json +93 -0
  113. pointblank/locales/data/FI/company.json +65 -0
  114. pointblank/locales/data/FI/internet.json +20 -0
  115. pointblank/locales/data/FI/misc.json +8 -0
  116. pointblank/locales/data/FI/person.json +17 -0
  117. pointblank/locales/data/FI/text.json +35 -0
  118. pointblank/locales/data/FR/address.json +619 -0
  119. pointblank/locales/data/FR/company.json +111 -0
  120. pointblank/locales/data/FR/internet.json +22 -0
  121. pointblank/locales/data/FR/misc.json +11 -0
  122. pointblank/locales/data/FR/person.json +1066 -0
  123. pointblank/locales/data/FR/text.json +50 -0
  124. pointblank/locales/data/GB/address.json +5759 -0
  125. pointblank/locales/data/GB/company.json +131 -0
  126. pointblank/locales/data/GB/internet.json +24 -0
  127. pointblank/locales/data/GB/misc.json +45 -0
  128. pointblank/locales/data/GB/person.json +578 -0
  129. pointblank/locales/data/GB/text.json +61 -0
  130. pointblank/locales/data/GR/address.json +68 -0
  131. pointblank/locales/data/GR/company.json +61 -0
  132. pointblank/locales/data/GR/internet.json +19 -0
  133. pointblank/locales/data/GR/misc.json +7 -0
  134. pointblank/locales/data/GR/person.json +39 -0
  135. pointblank/locales/data/GR/text.json +38 -0
  136. pointblank/locales/data/HK/address.json +79 -0
  137. pointblank/locales/data/HK/company.json +69 -0
  138. pointblank/locales/data/HK/internet.json +19 -0
  139. pointblank/locales/data/HK/misc.json +7 -0
  140. pointblank/locales/data/HK/person.json +42 -0
  141. pointblank/locales/data/HK/text.json +38 -0
  142. pointblank/locales/data/HR/address.json +73 -0
  143. pointblank/locales/data/HR/company.json +60 -0
  144. pointblank/locales/data/HR/internet.json +19 -0
  145. pointblank/locales/data/HR/misc.json +7 -0
  146. pointblank/locales/data/HR/person.json +38 -0
  147. pointblank/locales/data/HR/text.json +38 -0
  148. pointblank/locales/data/HU/address.json +70 -0
  149. pointblank/locales/data/HU/company.json +61 -0
  150. pointblank/locales/data/HU/internet.json +19 -0
  151. pointblank/locales/data/HU/misc.json +7 -0
  152. pointblank/locales/data/HU/person.json +40 -0
  153. pointblank/locales/data/HU/text.json +38 -0
  154. pointblank/locales/data/ID/address.json +68 -0
  155. pointblank/locales/data/ID/company.json +61 -0
  156. pointblank/locales/data/ID/internet.json +19 -0
  157. pointblank/locales/data/ID/misc.json +7 -0
  158. pointblank/locales/data/ID/person.json +40 -0
  159. pointblank/locales/data/ID/text.json +38 -0
  160. pointblank/locales/data/IE/address.json +643 -0
  161. pointblank/locales/data/IE/company.json +140 -0
  162. pointblank/locales/data/IE/internet.json +24 -0
  163. pointblank/locales/data/IE/misc.json +44 -0
  164. pointblank/locales/data/IE/person.json +55 -0
  165. pointblank/locales/data/IE/text.json +60 -0
  166. pointblank/locales/data/IN/address.json +92 -0
  167. pointblank/locales/data/IN/company.json +65 -0
  168. pointblank/locales/data/IN/internet.json +20 -0
  169. pointblank/locales/data/IN/misc.json +8 -0
  170. pointblank/locales/data/IN/person.json +52 -0
  171. pointblank/locales/data/IN/text.json +39 -0
  172. pointblank/locales/data/IS/address.json +63 -0
  173. pointblank/locales/data/IS/company.json +61 -0
  174. pointblank/locales/data/IS/internet.json +19 -0
  175. pointblank/locales/data/IS/misc.json +7 -0
  176. pointblank/locales/data/IS/person.json +44 -0
  177. pointblank/locales/data/IS/text.json +38 -0
  178. pointblank/locales/data/IT/address.json +192 -0
  179. pointblank/locales/data/IT/company.json +137 -0
  180. pointblank/locales/data/IT/internet.json +20 -0
  181. pointblank/locales/data/IT/misc.json +10 -0
  182. pointblank/locales/data/IT/person.json +70 -0
  183. pointblank/locales/data/IT/text.json +44 -0
  184. pointblank/locales/data/JP/address.json +713 -0
  185. pointblank/locales/data/JP/company.json +113 -0
  186. pointblank/locales/data/JP/internet.json +22 -0
  187. pointblank/locales/data/JP/misc.json +10 -0
  188. pointblank/locales/data/JP/person.json +1057 -0
  189. pointblank/locales/data/JP/text.json +51 -0
  190. pointblank/locales/data/KR/address.json +77 -0
  191. pointblank/locales/data/KR/company.json +68 -0
  192. pointblank/locales/data/KR/internet.json +19 -0
  193. pointblank/locales/data/KR/misc.json +7 -0
  194. pointblank/locales/data/KR/person.json +40 -0
  195. pointblank/locales/data/KR/text.json +38 -0
  196. pointblank/locales/data/LT/address.json +66 -0
  197. pointblank/locales/data/LT/company.json +60 -0
  198. pointblank/locales/data/LT/internet.json +19 -0
  199. pointblank/locales/data/LT/misc.json +7 -0
  200. pointblank/locales/data/LT/person.json +42 -0
  201. pointblank/locales/data/LT/text.json +38 -0
  202. pointblank/locales/data/LU/address.json +66 -0
  203. pointblank/locales/data/LU/company.json +60 -0
  204. pointblank/locales/data/LU/internet.json +19 -0
  205. pointblank/locales/data/LU/misc.json +7 -0
  206. pointblank/locales/data/LU/person.json +38 -0
  207. pointblank/locales/data/LU/text.json +38 -0
  208. pointblank/locales/data/LV/address.json +62 -0
  209. pointblank/locales/data/LV/company.json +60 -0
  210. pointblank/locales/data/LV/internet.json +19 -0
  211. pointblank/locales/data/LV/misc.json +7 -0
  212. pointblank/locales/data/LV/person.json +40 -0
  213. pointblank/locales/data/LV/text.json +38 -0
  214. pointblank/locales/data/MT/address.json +61 -0
  215. pointblank/locales/data/MT/company.json +60 -0
  216. pointblank/locales/data/MT/internet.json +19 -0
  217. pointblank/locales/data/MT/misc.json +7 -0
  218. pointblank/locales/data/MT/person.json +38 -0
  219. pointblank/locales/data/MT/text.json +38 -0
  220. pointblank/locales/data/MX/address.json +100 -0
  221. pointblank/locales/data/MX/company.json +65 -0
  222. pointblank/locales/data/MX/internet.json +20 -0
  223. pointblank/locales/data/MX/misc.json +8 -0
  224. pointblank/locales/data/MX/person.json +18 -0
  225. pointblank/locales/data/MX/text.json +39 -0
  226. pointblank/locales/data/NL/address.json +1517 -0
  227. pointblank/locales/data/NL/company.json +133 -0
  228. pointblank/locales/data/NL/internet.json +44 -0
  229. pointblank/locales/data/NL/misc.json +55 -0
  230. pointblank/locales/data/NL/person.json +365 -0
  231. pointblank/locales/data/NL/text.json +210 -0
  232. pointblank/locales/data/NO/address.json +86 -0
  233. pointblank/locales/data/NO/company.json +66 -0
  234. pointblank/locales/data/NO/internet.json +20 -0
  235. pointblank/locales/data/NO/misc.json +8 -0
  236. pointblank/locales/data/NO/person.json +17 -0
  237. pointblank/locales/data/NO/text.json +35 -0
  238. pointblank/locales/data/NZ/address.json +90 -0
  239. pointblank/locales/data/NZ/company.json +65 -0
  240. pointblank/locales/data/NZ/internet.json +20 -0
  241. pointblank/locales/data/NZ/misc.json +8 -0
  242. pointblank/locales/data/NZ/person.json +17 -0
  243. pointblank/locales/data/NZ/text.json +39 -0
  244. pointblank/locales/data/PH/address.json +67 -0
  245. pointblank/locales/data/PH/company.json +61 -0
  246. pointblank/locales/data/PH/internet.json +19 -0
  247. pointblank/locales/data/PH/misc.json +7 -0
  248. pointblank/locales/data/PH/person.json +40 -0
  249. pointblank/locales/data/PH/text.json +38 -0
  250. pointblank/locales/data/PL/address.json +91 -0
  251. pointblank/locales/data/PL/company.json +65 -0
  252. pointblank/locales/data/PL/internet.json +20 -0
  253. pointblank/locales/data/PL/misc.json +8 -0
  254. pointblank/locales/data/PL/person.json +17 -0
  255. pointblank/locales/data/PL/text.json +35 -0
  256. pointblank/locales/data/PT/address.json +90 -0
  257. pointblank/locales/data/PT/company.json +65 -0
  258. pointblank/locales/data/PT/internet.json +20 -0
  259. pointblank/locales/data/PT/misc.json +8 -0
  260. pointblank/locales/data/PT/person.json +17 -0
  261. pointblank/locales/data/PT/text.json +35 -0
  262. pointblank/locales/data/RO/address.json +73 -0
  263. pointblank/locales/data/RO/company.json +61 -0
  264. pointblank/locales/data/RO/internet.json +19 -0
  265. pointblank/locales/data/RO/misc.json +7 -0
  266. pointblank/locales/data/RO/person.json +40 -0
  267. pointblank/locales/data/RO/text.json +38 -0
  268. pointblank/locales/data/RU/address.json +74 -0
  269. pointblank/locales/data/RU/company.json +60 -0
  270. pointblank/locales/data/RU/internet.json +19 -0
  271. pointblank/locales/data/RU/misc.json +7 -0
  272. pointblank/locales/data/RU/person.json +38 -0
  273. pointblank/locales/data/RU/text.json +38 -0
  274. pointblank/locales/data/SE/address.json +247 -0
  275. pointblank/locales/data/SE/company.json +65 -0
  276. pointblank/locales/data/SE/internet.json +20 -0
  277. pointblank/locales/data/SE/misc.json +7 -0
  278. pointblank/locales/data/SE/person.json +45 -0
  279. pointblank/locales/data/SE/text.json +43 -0
  280. pointblank/locales/data/SI/address.json +67 -0
  281. pointblank/locales/data/SI/company.json +60 -0
  282. pointblank/locales/data/SI/internet.json +19 -0
  283. pointblank/locales/data/SI/misc.json +7 -0
  284. pointblank/locales/data/SI/person.json +38 -0
  285. pointblank/locales/data/SI/text.json +38 -0
  286. pointblank/locales/data/SK/address.json +64 -0
  287. pointblank/locales/data/SK/company.json +60 -0
  288. pointblank/locales/data/SK/internet.json +19 -0
  289. pointblank/locales/data/SK/misc.json +7 -0
  290. pointblank/locales/data/SK/person.json +38 -0
  291. pointblank/locales/data/SK/text.json +38 -0
  292. pointblank/locales/data/TR/address.json +105 -0
  293. pointblank/locales/data/TR/company.json +65 -0
  294. pointblank/locales/data/TR/internet.json +20 -0
  295. pointblank/locales/data/TR/misc.json +8 -0
  296. pointblank/locales/data/TR/person.json +17 -0
  297. pointblank/locales/data/TR/text.json +35 -0
  298. pointblank/locales/data/TW/address.json +86 -0
  299. pointblank/locales/data/TW/company.json +69 -0
  300. pointblank/locales/data/TW/internet.json +19 -0
  301. pointblank/locales/data/TW/misc.json +7 -0
  302. pointblank/locales/data/TW/person.json +42 -0
  303. pointblank/locales/data/TW/text.json +38 -0
  304. pointblank/locales/data/US/address.json +996 -0
  305. pointblank/locales/data/US/company.json +131 -0
  306. pointblank/locales/data/US/internet.json +22 -0
  307. pointblank/locales/data/US/misc.json +11 -0
  308. pointblank/locales/data/US/person.json +1092 -0
  309. pointblank/locales/data/US/text.json +56 -0
  310. pointblank/locales/data/_shared/misc.json +42 -0
  311. pointblank/schema.py +339 -2
  312. {pointblank-0.19.0.dist-info → pointblank-0.20.0.dist-info}/METADATA +45 -1
  313. pointblank-0.20.0.dist-info/RECORD +366 -0
  314. {pointblank-0.19.0.dist-info → pointblank-0.20.0.dist-info}/WHEEL +1 -1
  315. pointblank-0.19.0.dist-info/RECORD +0 -59
  316. {pointblank-0.19.0.dist-info → pointblank-0.20.0.dist-info}/entry_points.txt +0 -0
  317. {pointblank-0.19.0.dist-info → pointblank-0.20.0.dist-info}/licenses/LICENSE +0 -0
  318. {pointblank-0.19.0.dist-info → pointblank-0.20.0.dist-info}/top_level.txt +0 -0
pointblank/field.py ADDED
@@ -0,0 +1,1507 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass, field
4
+ from datetime import date, datetime, time, timedelta
5
+ from typing import TYPE_CHECKING, Any, Callable
6
+
7
+ if TYPE_CHECKING:
8
+ pass
9
+
10
+ __all__ = [
11
+ # Helper functions (primary API)
12
+ "int_field",
13
+ "float_field",
14
+ "string_field",
15
+ "bool_field",
16
+ "date_field",
17
+ "datetime_field",
18
+ "time_field",
19
+ "duration_field",
20
+ # Classes (for type hints and advanced usage)
21
+ "Field",
22
+ "IntField",
23
+ "FloatField",
24
+ "StringField",
25
+ "BoolField",
26
+ "DateField",
27
+ "DatetimeField",
28
+ "TimeField",
29
+ "DurationField",
30
+ ]
31
+
32
+
33
+ # Available presets for realistic data generation
34
+ AVAILABLE_PRESETS = frozenset(
35
+ {
36
+ # Personal
37
+ "name",
38
+ "name_full",
39
+ "first_name",
40
+ "last_name",
41
+ "email",
42
+ "phone_number",
43
+ "address",
44
+ "city",
45
+ "state",
46
+ "country",
47
+ "postcode",
48
+ "latitude",
49
+ "longitude",
50
+ # Business
51
+ "company",
52
+ "job",
53
+ "catch_phrase",
54
+ # Internet
55
+ "url",
56
+ "domain_name",
57
+ "ipv4",
58
+ "ipv6",
59
+ "user_name",
60
+ "password",
61
+ # Text
62
+ "text",
63
+ "sentence",
64
+ "paragraph",
65
+ "word",
66
+ # Financial
67
+ "credit_card_number",
68
+ "iban",
69
+ "currency_code",
70
+ # Identifiers
71
+ "uuid4",
72
+ "ssn",
73
+ "license_plate",
74
+ # Date/Time (for string representations)
75
+ "date_this_year",
76
+ "date_this_decade",
77
+ "time",
78
+ # Misc
79
+ "color_name",
80
+ "file_name",
81
+ "file_extension",
82
+ "mime_type",
83
+ }
84
+ )
85
+
86
+
87
+ # =============================================================================
88
+ # Base Field Class
89
+ # =============================================================================
90
+
91
+
92
+ @dataclass
93
+ class Field:
94
+ """
95
+ Base class for column specifications in schema definition.
96
+
97
+ This is the base class used internally. For creating fields, use the
98
+ purpose-built field classes or helper functions:
99
+
100
+ - `int_field()` / `IntField` for integer columns
101
+ - `float_field()` / `FloatField` for floating-point columns
102
+ - `string_field()` / `StringField` for string columns
103
+ - `bool_field()` / `BoolField` for boolean columns
104
+ - `date_field()` / `DateField` for date columns
105
+ - `datetime_field()` / `DatetimeField` for datetime columns
106
+ - `time_field()` / `TimeField` for time columns
107
+ - `duration_field()` / `DurationField` for duration columns
108
+ """
109
+
110
+ dtype: str
111
+
112
+ # Nullability
113
+ nullable: bool = False
114
+ null_probability: float = 0.0
115
+
116
+ # Uniqueness
117
+ unique: bool = False
118
+
119
+ # Custom generator
120
+ generator: Callable[[], Any] | None = field(default=None, repr=False)
121
+
122
+ def __post_init__(self):
123
+ """Validate field constraints after initialization."""
124
+ self._validate()
125
+
126
+ def _validate(self) -> None:
127
+ """Validate that all field constraints are consistent and valid."""
128
+ # Validate null_probability
129
+ if not 0.0 <= self.null_probability <= 1.0:
130
+ raise ValueError(
131
+ f"null_probability must be between 0.0 and 1.0, got {self.null_probability}"
132
+ )
133
+
134
+ if self.null_probability > 0.0 and not self.nullable:
135
+ raise ValueError("null_probability > 0 requires nullable=True")
136
+
137
+ def is_numeric(self) -> bool:
138
+ """Check if this field has a numeric dtype."""
139
+ return self.dtype in {
140
+ "Int8",
141
+ "Int16",
142
+ "Int32",
143
+ "Int64",
144
+ "UInt8",
145
+ "UInt16",
146
+ "UInt32",
147
+ "UInt64",
148
+ "Float32",
149
+ "Float64",
150
+ }
151
+
152
+ def is_integer(self) -> bool:
153
+ """Check if this field has an integer dtype."""
154
+ return self.dtype in {
155
+ "Int8",
156
+ "Int16",
157
+ "Int32",
158
+ "Int64",
159
+ "UInt8",
160
+ "UInt16",
161
+ "UInt32",
162
+ "UInt64",
163
+ }
164
+
165
+ def is_float(self) -> bool:
166
+ """Check if this field has a float dtype."""
167
+ return self.dtype in {"Float32", "Float64"}
168
+
169
+ def is_string(self) -> bool:
170
+ """Check if this field has a string dtype."""
171
+ return self.dtype == "String"
172
+
173
+ def is_boolean(self) -> bool:
174
+ """Check if this field has a boolean dtype."""
175
+ return self.dtype == "Boolean"
176
+
177
+ def is_temporal(self) -> bool:
178
+ """Check if this field has a temporal dtype."""
179
+ return self.dtype in {"Date", "Datetime", "Time", "Duration"}
180
+
181
+ def has_custom_generator(self) -> bool:
182
+ """Check if this field uses a custom generator."""
183
+ return self.generator is not None
184
+
185
+
186
+ # =============================================================================
187
+ # Integer Field
188
+ # =============================================================================
189
+
190
+ # Valid integer dtypes
191
+ INT_DTYPES = frozenset({"Int8", "Int16", "Int32", "Int64", "UInt8", "UInt16", "UInt32", "UInt64"})
192
+
193
+
194
+ @dataclass
195
+ class IntField(Field):
196
+ """
197
+ Integer column specification for schema definition.
198
+
199
+ Parameters
200
+ ----------
201
+ min_val
202
+ Minimum value (inclusive). Default is `None` (no minimum).
203
+ max_val
204
+ Maximum value (inclusive). Default is `None` (no maximum).
205
+ allowed
206
+ List of allowed values (categorical constraint). When provided,
207
+ values are sampled from this list.
208
+ nullable
209
+ Whether the column can contain null values. Default is `False`.
210
+ null_probability
211
+ Probability of generating null when `nullable=True`. Default is `0.0`.
212
+ unique
213
+ Whether all values must be unique. Default is `False`.
214
+ generator
215
+ Custom callable that generates values. Overrides other settings.
216
+ dtype
217
+ Integer dtype. Default is `"Int64"`. Options: `"Int8"`, `"Int16"`,
218
+ `"Int32"`, `"Int64"`, `"UInt8"`, `"UInt16"`, `"UInt32"`, `"UInt64"`.
219
+
220
+ Raises
221
+ ------
222
+ ValueError
223
+ If constraints are invalid (e.g., `min_val > max_val`).
224
+
225
+ Examples
226
+ --------
227
+ Define a schema with integer fields and generate test data:
228
+
229
+ ```python
230
+ import pointblank as pb
231
+
232
+ # Define a schema with integer field specifications
233
+ schema = pb.Schema(
234
+ user_id=pb.int_field(min_val=1, unique=True),
235
+ age=pb.int_field(min_val=0, max_val=120),
236
+ rating=pb.int_field(allowed=[1, 2, 3, 4, 5]),
237
+ )
238
+
239
+ # Generate 100 rows of test data
240
+ pb.preview(pb.generate_dataset(schema, n=100, seed=23))
241
+ ```
242
+
243
+ The generated data will have unique user IDs starting from `1`, ages between `0`-`120`,
244
+ and ratings sampled from the allowed values.
245
+ """
246
+
247
+ # Integer-specific constraints
248
+ min_val: int | None = None
249
+ max_val: int | None = None
250
+ allowed: list[int] | None = field(default=None)
251
+
252
+ # Override dtype with default
253
+ dtype: str = "Int64"
254
+
255
+ def _validate(self) -> None:
256
+ """Validate integer field constraints."""
257
+ super()._validate()
258
+
259
+ # Validate dtype
260
+ if self.dtype not in INT_DTYPES:
261
+ raise ValueError(
262
+ f"Invalid dtype '{self.dtype}' for IntField. Valid options: {sorted(INT_DTYPES)}"
263
+ )
264
+
265
+ # Validate min/max
266
+ if self.min_val is not None and self.max_val is not None:
267
+ if self.min_val > self.max_val:
268
+ raise ValueError(
269
+ f"min_val ({self.min_val}) cannot be greater than max_val ({self.max_val})"
270
+ )
271
+
272
+ # Validate allowed list
273
+ if self.allowed is not None:
274
+ if len(self.allowed) == 0:
275
+ raise ValueError("allowed list cannot be empty")
276
+
277
+ def has_allowed_values(self) -> bool:
278
+ """Check if this field has a set of allowed values."""
279
+ return self.allowed is not None
280
+
281
+
282
+ def int_field(
283
+ min_val: int | None = None,
284
+ max_val: int | None = None,
285
+ allowed: list[int] | None = None,
286
+ nullable: bool = False,
287
+ null_probability: float = 0.0,
288
+ unique: bool = False,
289
+ generator: Callable[[], Any] | None = None,
290
+ dtype: str = "Int64",
291
+ ) -> IntField:
292
+ """
293
+ Create an integer column specification.
294
+
295
+ Parameters
296
+ ----------
297
+ min_val
298
+ Minimum value (inclusive). Default is `None` (no minimum).
299
+ max_val
300
+ Maximum value (inclusive). Default is `None` (no maximum).
301
+ allowed
302
+ List of allowed values (categorical constraint). When provided,
303
+ values are sampled from this list.
304
+ nullable
305
+ Whether the column can contain null values. Default is `False`.
306
+ null_probability
307
+ Probability of generating null when `nullable=True`. Default is `0.0`.
308
+ unique
309
+ Whether all values must be unique. Default is `False`.
310
+ generator
311
+ Custom callable that generates values. Overrides other settings.
312
+ dtype
313
+ Integer dtype. Default is `"Int64"`. Options: `"Int8"`, `"Int16"`,
314
+ `"Int32"`, `"Int64"`, `"UInt8"`, `"UInt16"`, `"UInt32"`, `"UInt64"`.
315
+
316
+ Returns
317
+ -------
318
+ IntField
319
+ An integer field specification.
320
+
321
+ Examples
322
+ --------
323
+ Define a schema with integer fields and generate test data:
324
+
325
+ ```{python}
326
+ import pointblank as pb
327
+
328
+ # Define a schema with integer field specifications
329
+ schema = pb.Schema(
330
+ user_id=pb.int_field(min_val=1, unique=True),
331
+ age=pb.int_field(min_val=0, max_val=120),
332
+ rating=pb.int_field(allowed=[1, 2, 3, 4, 5]),
333
+ )
334
+
335
+ # Generate 100 rows of test data
336
+ pb.preview(pb.generate_dataset(schema, n=100, seed=23))
337
+ ```
338
+
339
+ The generated data will have unique user IDs starting from `1`, ages between `0`-`120`,
340
+ and ratings sampled from the allowed values.
341
+ """
342
+ return IntField(
343
+ min_val=min_val,
344
+ max_val=max_val,
345
+ allowed=allowed,
346
+ nullable=nullable,
347
+ null_probability=null_probability,
348
+ unique=unique,
349
+ generator=generator,
350
+ dtype=dtype,
351
+ )
352
+
353
+
354
+ # =============================================================================
355
+ # Float Field
356
+ # =============================================================================
357
+
358
+ FLOAT_DTYPES = frozenset({"Float32", "Float64"})
359
+
360
+
361
+ @dataclass
362
+ class FloatField(Field):
363
+ """
364
+ Floating-point column specification for schema definition.
365
+
366
+ Parameters
367
+ ----------
368
+ min_val
369
+ Minimum value (inclusive). Default is `None` (no minimum).
370
+ max_val
371
+ Maximum value (inclusive). Default is `None` (no maximum).
372
+ allowed
373
+ List of allowed values (categorical constraint). When provided,
374
+ values are sampled from this list.
375
+ nullable
376
+ Whether the column can contain null values. Default is `False`.
377
+ null_probability
378
+ Probability of generating null when `nullable=True`. Default is `0.0`.
379
+ unique
380
+ Whether all values must be unique. Default is `False`.
381
+ generator
382
+ Custom callable that generates values. Overrides other settings.
383
+ dtype
384
+ Float dtype. Default is `"Float64"`. Options: `"Float32"`, `"Float64"`.
385
+
386
+ Raises
387
+ ------
388
+ ValueError
389
+ If constraints are invalid (e.g., `min_val > max_val`).
390
+
391
+ Examples
392
+ --------
393
+ Define a schema with float fields and generate test data:
394
+
395
+ ```python
396
+ import pointblank as pb
397
+
398
+ # Define a schema with float field specifications
399
+ schema = pb.Schema(
400
+ price=pb.float_field(min_val=0.01, max_val=9999.99),
401
+ probability=pb.float_field(min_val=0.0, max_val=1.0),
402
+ temperature=pb.float_field(min_val=-40.0, max_val=50.0),
403
+ )
404
+
405
+ # Generate 100 rows of test data
406
+ pb.preview(pb.generate_dataset(schema, n=100, seed=23))
407
+ ```
408
+
409
+ Values are uniformly distributed across the specified ranges.
410
+ """
411
+
412
+ # Float-specific constraints
413
+ min_val: float | None = None
414
+ max_val: float | None = None
415
+ allowed: list[float] | None = field(default=None)
416
+
417
+ # Override dtype with default
418
+ dtype: str = "Float64"
419
+
420
+ def _validate(self) -> None:
421
+ """Validate float field constraints."""
422
+ super()._validate()
423
+
424
+ # Validate dtype
425
+ if self.dtype not in FLOAT_DTYPES:
426
+ raise ValueError(
427
+ f"Invalid dtype '{self.dtype}' for FloatField. "
428
+ f"Valid options: {sorted(FLOAT_DTYPES)}"
429
+ )
430
+
431
+ # Validate min/max
432
+ if self.min_val is not None and self.max_val is not None:
433
+ if self.min_val > self.max_val:
434
+ raise ValueError(
435
+ f"min_val ({self.min_val}) cannot be greater than max_val ({self.max_val})"
436
+ )
437
+
438
+ # Validate allowed list
439
+ if self.allowed is not None:
440
+ if len(self.allowed) == 0:
441
+ raise ValueError("allowed list cannot be empty")
442
+
443
+ def has_allowed_values(self) -> bool:
444
+ """Check if this field has a set of allowed values."""
445
+ return self.allowed is not None
446
+
447
+
448
+ def float_field(
449
+ min_val: float | None = None,
450
+ max_val: float | None = None,
451
+ allowed: list[float] | None = None,
452
+ nullable: bool = False,
453
+ null_probability: float = 0.0,
454
+ unique: bool = False,
455
+ generator: Callable[[], Any] | None = None,
456
+ dtype: str = "Float64",
457
+ ) -> FloatField:
458
+ """
459
+ Create a floating-point column specification.
460
+
461
+ Parameters
462
+ ----------
463
+ min_val
464
+ Minimum value (inclusive). Default is `None` (no minimum).
465
+ max_val
466
+ Maximum value (inclusive). Default is `None` (no maximum).
467
+ allowed
468
+ List of allowed values (categorical constraint). When provided,
469
+ values are sampled from this list.
470
+ nullable
471
+ Whether the column can contain null values. Default is `False`.
472
+ null_probability
473
+ Probability of generating null when `nullable=True`. Default is `0.0`.
474
+ unique
475
+ Whether all values must be unique. Default is `False`.
476
+ generator
477
+ Custom callable that generates values. Overrides other settings.
478
+ dtype
479
+ Float dtype. Default is `"Float64"`. Options: `"Float32"`, `"Float64"`.
480
+
481
+ Returns
482
+ -------
483
+ FloatField
484
+ A float field specification.
485
+
486
+ Examples
487
+ --------
488
+ Define a schema with float fields and generate test data:
489
+
490
+ ```{python}
491
+ import pointblank as pb
492
+
493
+ # Define a schema with float field specifications
494
+ schema = pb.Schema(
495
+ price=pb.float_field(min_val=0.01, max_val=9999.99),
496
+ probability=pb.float_field(min_val=0.0, max_val=1.0),
497
+ temperature=pb.float_field(min_val=-40.0, max_val=50.0),
498
+ )
499
+
500
+ # Generate 100 rows of test data
501
+ pb.preview(pb.generate_dataset(schema, n=100, seed=23))
502
+ ```
503
+
504
+ Values are uniformly distributed across the specified ranges.
505
+ """
506
+ return FloatField(
507
+ min_val=min_val,
508
+ max_val=max_val,
509
+ allowed=allowed,
510
+ nullable=nullable,
511
+ null_probability=null_probability,
512
+ unique=unique,
513
+ generator=generator,
514
+ dtype=dtype,
515
+ )
516
+
517
+
518
+ # =============================================================================
519
+ # String Field
520
+ # =============================================================================
521
+
522
+
523
+ @dataclass
524
+ class StringField(Field):
525
+ """
526
+ String column specification for schema definition.
527
+
528
+ Parameters
529
+ ----------
530
+ min_length
531
+ Minimum string length. Default is `None` (no minimum).
532
+ max_length
533
+ Maximum string length. Default is `None` (no maximum).
534
+ pattern
535
+ Regular expression pattern for generated strings.
536
+ preset
537
+ Preset for realistic data (e.g., `"email"`, `"name"`, `"phone_number"`).
538
+ allowed
539
+ List of allowed values (categorical constraint).
540
+ nullable
541
+ Whether the column can contain null values. Default is `False`.
542
+ null_probability
543
+ Probability of generating null when `nullable=True`. Default is `0.0`.
544
+ unique
545
+ Whether all values must be unique. Default is `False`.
546
+ generator
547
+ Custom callable that generates values. Overrides other settings.
548
+ dtype
549
+ Always `"String"` for StringField.
550
+
551
+ Raises
552
+ ------
553
+ ValueError
554
+ If constraints are invalid or incompatible.
555
+
556
+ Examples
557
+ --------
558
+ Define a schema with string fields and generate test data:
559
+
560
+ ```python
561
+ import pointblank as pb
562
+
563
+ # Define a schema with string field specifications
564
+ schema = pb.Schema(
565
+ name=pb.string_field(preset="name"),
566
+ email=pb.string_field(preset="email", unique=True),
567
+ status=pb.string_field(allowed=["active", "pending", "inactive"]),
568
+ code=pb.string_field(pattern=r"[A-Z]{3}-[0-9]{4}"),
569
+ )
570
+
571
+ # Generate 100 rows of test data
572
+ pb.preview(pb.generate_dataset(schema, n=100, seed=23))
573
+ ```
574
+
575
+ The generated data will have coherent names and emails (derived from the name),
576
+ statuses sampled from the allowed values, and codes matching the regex pattern.
577
+ """
578
+
579
+ # String-specific constraints
580
+ min_length: int | None = None
581
+ max_length: int | None = None
582
+ pattern: str | None = None
583
+ preset: str | None = None
584
+ allowed: list[str] | None = field(default=None)
585
+
586
+ # Override dtype with fixed value
587
+ dtype: str = "String"
588
+
589
+ def _validate(self) -> None:
590
+ """Validate string field constraints."""
591
+ super()._validate()
592
+
593
+ # Validate dtype (must be String)
594
+ if self.dtype != "String":
595
+ raise ValueError(f"StringField dtype must be 'String', got '{self.dtype}'")
596
+
597
+ # Validate length constraints
598
+ if self.min_length is not None and self.min_length < 0:
599
+ raise ValueError(f"min_length must be non-negative, got {self.min_length}")
600
+
601
+ if self.max_length is not None and self.max_length < 0:
602
+ raise ValueError(f"max_length must be non-negative, got {self.max_length}")
603
+
604
+ if self.min_length is not None and self.max_length is not None:
605
+ if self.min_length > self.max_length:
606
+ raise ValueError(
607
+ f"min_length ({self.min_length}) cannot be greater than "
608
+ f"max_length ({self.max_length})"
609
+ )
610
+
611
+ # Validate preset
612
+ if self.preset is not None and self.preset not in AVAILABLE_PRESETS:
613
+ raise ValueError(
614
+ f"Unknown preset '{self.preset}'. Available presets: {sorted(AVAILABLE_PRESETS)}"
615
+ )
616
+
617
+ # Validate allowed list
618
+ if self.allowed is not None:
619
+ if len(self.allowed) == 0:
620
+ raise ValueError("allowed list cannot be empty")
621
+
622
+ # Validate incompatible combinations
623
+ specified = []
624
+ if self.preset is not None:
625
+ specified.append("preset")
626
+ if self.pattern is not None:
627
+ specified.append("pattern")
628
+ if self.allowed is not None:
629
+ specified.append("allowed")
630
+
631
+ if len(specified) > 1:
632
+ raise ValueError(
633
+ f"Only one of preset, pattern, or allowed can be specified. "
634
+ f"Got: {', '.join(specified)}"
635
+ )
636
+
637
+ def has_preset(self) -> bool:
638
+ """Check if this field uses a preset for generation."""
639
+ return self.preset is not None
640
+
641
+ def has_allowed_values(self) -> bool:
642
+ """Check if this field has a set of allowed values."""
643
+ return self.allowed is not None
644
+
645
+ def has_pattern(self) -> bool:
646
+ """Check if this field has a regex pattern constraint."""
647
+ return self.pattern is not None
648
+
649
+
650
+ def string_field(
651
+ min_length: int | None = None,
652
+ max_length: int | None = None,
653
+ pattern: str | None = None,
654
+ preset: str | None = None,
655
+ allowed: list[str] | None = None,
656
+ nullable: bool = False,
657
+ null_probability: float = 0.0,
658
+ unique: bool = False,
659
+ generator: Callable[[], Any] | None = None,
660
+ ) -> StringField:
661
+ """
662
+ Create a string column specification.
663
+
664
+ Parameters
665
+ ----------
666
+ min_length
667
+ Minimum string length. Default is `None` (no minimum).
668
+ max_length
669
+ Maximum string length. Default is `None` (no maximum).
670
+ pattern
671
+ Regular expression pattern for generated strings.
672
+ preset
673
+ Preset for realistic data (e.g., `"email"`, `"name"`, `"phone_number"`).
674
+ allowed
675
+ List of allowed values (categorical constraint).
676
+ nullable
677
+ Whether the column can contain null values. Default is `False`.
678
+ null_probability
679
+ Probability of generating null when `nullable=True`. Default is `0.0`.
680
+ unique
681
+ Whether all values must be unique. Default is `False`.
682
+ generator
683
+ Custom callable that generates values. Overrides other settings.
684
+
685
+ Returns
686
+ -------
687
+ StringField
688
+ A string field specification.
689
+
690
+ Examples
691
+ --------
692
+ Define a schema with string fields and generate test data:
693
+
694
+ ```{python}
695
+ import pointblank as pb
696
+
697
+ # Define a schema with string field specifications
698
+ schema = pb.Schema(
699
+ name=pb.string_field(preset="name"),
700
+ email=pb.string_field(preset="email", unique=True),
701
+ status=pb.string_field(allowed=["active", "pending", "inactive"]),
702
+ code=pb.string_field(pattern=r"[A-Z]{3}-[0-9]{4}"),
703
+ )
704
+
705
+ # Generate 100 rows of test data
706
+ pb.preview(pb.generate_dataset(schema, n=100, seed=23))
707
+ ```
708
+
709
+ The generated data will have coherent names and emails (derived from the name),
710
+ statuses sampled from the allowed values, and codes matching the regex pattern.
711
+ """
712
+ return StringField(
713
+ min_length=min_length,
714
+ max_length=max_length,
715
+ pattern=pattern,
716
+ preset=preset,
717
+ allowed=allowed,
718
+ nullable=nullable,
719
+ null_probability=null_probability,
720
+ unique=unique,
721
+ generator=generator,
722
+ )
723
+
724
+
725
+ # =============================================================================
726
+ # Boolean Field
727
+ # =============================================================================
728
+
729
+
730
+ @dataclass
731
+ class BoolField(Field):
732
+ """
733
+ Boolean column specification for schema definition.
734
+
735
+ Parameters
736
+ ----------
737
+ p_true
738
+ Probability of generating `True`. Default is `0.5` (equal probability).
739
+ Must be between 0.0 and 1.0.
740
+ nullable
741
+ Whether the column can contain null values. Default is `False`.
742
+ null_probability
743
+ Probability of generating null when `nullable=True`. Default is `0.0`.
744
+ unique
745
+ Whether all values must be unique. Default is `False`.
746
+ Note: Boolean can only have 2 unique non-null values.
747
+ generator
748
+ Custom callable that generates values. Overrides other settings.
749
+ dtype
750
+ Always `"Boolean"` for BoolField.
751
+
752
+ Examples
753
+ --------
754
+ Define a schema with boolean fields and generate test data:
755
+
756
+ ```python
757
+ import pointblank as pb
758
+
759
+ # Define a schema with boolean field specifications
760
+ schema = pb.Schema(
761
+ is_active=pb.bool_field(p_true=0.8), # 80% True
762
+ is_premium=pb.bool_field(p_true=0.2), # 20% True
763
+ is_verified=pb.bool_field(), # 50% True (default)
764
+ )
765
+
766
+ # Generate 100 rows of test data
767
+ pb.preview(pb.generate_dataset(schema, n=100, seed=23))
768
+ ```
769
+
770
+ The `p_true` parameter controls the probability of generating `True` values,
771
+ which is helpful for simulating real-world distributions.
772
+ """
773
+
774
+ # Boolean-specific parameter
775
+ p_true: float = 0.5
776
+
777
+ # Override dtype with fixed value
778
+ dtype: str = "Boolean"
779
+
780
+ def _validate(self) -> None:
781
+ """Validate boolean field constraints."""
782
+ super()._validate()
783
+
784
+ # Validate dtype (must be Boolean)
785
+ if self.dtype != "Boolean":
786
+ raise ValueError(f"BoolField dtype must be 'Boolean', got '{self.dtype}'")
787
+
788
+ # Validate p_true
789
+ if not 0.0 <= self.p_true <= 1.0:
790
+ raise ValueError(f"p_true must be between 0.0 and 1.0, got {self.p_true}")
791
+
792
+
793
+ def bool_field(
794
+ p_true: float = 0.5,
795
+ nullable: bool = False,
796
+ null_probability: float = 0.0,
797
+ unique: bool = False,
798
+ generator: Callable[[], Any] | None = None,
799
+ ) -> BoolField:
800
+ """
801
+ Create a boolean column specification.
802
+
803
+ Parameters
804
+ ----------
805
+ p_true
806
+ Probability of generating `True`. Default is `0.5` (equal probability).
807
+ Must be between 0.0 and 1.0.
808
+ nullable
809
+ Whether the column can contain null values. Default is `False`.
810
+ null_probability
811
+ Probability of generating null when `nullable=True`. Default is `0.0`.
812
+ unique
813
+ Whether all values must be unique. Default is `False`.
814
+ Note: Boolean can only have 2 unique non-null values.
815
+ generator
816
+ Custom callable that generates values. Overrides other settings.
817
+
818
+ Returns
819
+ -------
820
+ BoolField
821
+ A boolean field specification.
822
+
823
+ Examples
824
+ --------
825
+ Define a schema with boolean fields and generate test data:
826
+
827
+ ```{python}
828
+ import pointblank as pb
829
+
830
+ # Define a schema with boolean field specifications
831
+ schema = pb.Schema(
832
+ is_active=pb.bool_field(p_true=0.8), # 80% True
833
+ is_premium=pb.bool_field(p_true=0.2), # 20% True
834
+ is_verified=pb.bool_field(), # 50% True (default)
835
+ )
836
+
837
+ # Generate 100 rows of test data
838
+ pb.preview(pb.generate_dataset(schema, n=100, seed=23))
839
+ ```
840
+
841
+ The `p_true=` parameter controls the probability of generating `True` values,
842
+ which is helpful for simulating real-world distributions.
843
+ """
844
+ return BoolField(
845
+ p_true=p_true,
846
+ nullable=nullable,
847
+ null_probability=null_probability,
848
+ unique=unique,
849
+ generator=generator,
850
+ )
851
+
852
+
853
+ # =============================================================================
854
+ # Date Field
855
+ # =============================================================================
856
+
857
+
858
+ @dataclass
859
+ class DateField(Field):
860
+ """
861
+ Date column specification for schema definition.
862
+
863
+ Parameters
864
+ ----------
865
+ min_date
866
+ Minimum date (inclusive). Can be ISO string or `date` object.
867
+ max_date
868
+ Maximum date (inclusive). Can be ISO string or `date` object.
869
+ nullable
870
+ Whether the column can contain null values. Default is `False`.
871
+ null_probability
872
+ Probability of generating null when `nullable=True`. Default is `0.0`.
873
+ unique
874
+ Whether all values must be unique. Default is `False`.
875
+ generator
876
+ Custom callable that generates values. Overrides other settings.
877
+ dtype
878
+ Always `"Date"` for DateField.
879
+
880
+ Examples
881
+ --------
882
+ Define a schema with date fields and generate test data:
883
+
884
+ ```python
885
+ import pointblank as pb
886
+ from datetime import date
887
+
888
+ # Define a schema with date field specifications
889
+ schema = pb.Schema(
890
+ birth_date=pb.date_field(
891
+ min_date=date(1960, 1, 1),
892
+ max_date=date(2005, 12, 31)
893
+ ),
894
+ hire_date=pb.date_field(
895
+ min_date=date(2020, 1, 1),
896
+ max_date=date(2024, 12, 31)
897
+ ),
898
+ )
899
+
900
+ # Generate 100 rows of test data
901
+ pb.preview(pb.generate_dataset(schema, n=100, seed=23))
902
+ ```
903
+
904
+ Date values are uniformly distributed within the specified range.
905
+ """
906
+
907
+ # Date-specific constraints
908
+ min_date: str | date | None = None
909
+ max_date: str | date | None = None
910
+
911
+ # Override dtype with fixed value
912
+ dtype: str = "Date"
913
+
914
+ def _validate(self) -> None:
915
+ """Validate date field constraints."""
916
+ super()._validate()
917
+
918
+ # Validate dtype (must be Date)
919
+ if self.dtype != "Date":
920
+ raise ValueError(f"DateField dtype must be 'Date', got '{self.dtype}'")
921
+
922
+ # Validate date range
923
+ if self.min_date is not None and self.max_date is not None:
924
+ min_dt = self._parse_date(self.min_date)
925
+ max_dt = self._parse_date(self.max_date)
926
+ if min_dt > max_dt:
927
+ raise ValueError(
928
+ f"min_date ({self.min_date}) cannot be greater than max_date ({self.max_date})"
929
+ )
930
+
931
+ @staticmethod
932
+ def _parse_date(value: str | date | datetime) -> datetime:
933
+ """Parse a date value to datetime for comparison."""
934
+ if isinstance(value, datetime):
935
+ return value
936
+ if isinstance(value, date):
937
+ return datetime.combine(value, datetime.min.time())
938
+ if isinstance(value, str):
939
+ try:
940
+ return datetime.fromisoformat(value)
941
+ except ValueError:
942
+ raise ValueError(
943
+ f"Unable to parse date string '{value}'. Use ISO format (YYYY-MM-DD)."
944
+ )
945
+ raise ValueError(f"Invalid date type: {type(value)}")
946
+
947
+
948
+ def date_field(
949
+ min_date: str | date | None = None,
950
+ max_date: str | date | None = None,
951
+ nullable: bool = False,
952
+ null_probability: float = 0.0,
953
+ unique: bool = False,
954
+ generator: Callable[[], Any] | None = None,
955
+ ) -> DateField:
956
+ """
957
+ Create a date column specification.
958
+
959
+ Parameters
960
+ ----------
961
+ min_date
962
+ Minimum date (inclusive). Can be ISO string or `date` object.
963
+ max_date
964
+ Maximum date (inclusive). Can be ISO string or `date` object.
965
+ nullable
966
+ Whether the column can contain null values. Default is `False`.
967
+ null_probability
968
+ Probability of generating null when `nullable=True`. Default is `0.0`.
969
+ unique
970
+ Whether all values must be unique. Default is `False`.
971
+ generator
972
+ Custom callable that generates values. Overrides other settings.
973
+
974
+ Returns
975
+ -------
976
+ DateField
977
+ A date field specification.
978
+
979
+ Examples
980
+ --------
981
+ Define a schema with date fields and generate test data:
982
+
983
+ ```{python}
984
+ import pointblank as pb
985
+ from datetime import date
986
+
987
+ # Define a schema with date field specifications
988
+ schema = pb.Schema(
989
+ birth_date=pb.date_field(
990
+ min_date=date(1960, 1, 1),
991
+ max_date=date(2005, 12, 31)
992
+ ),
993
+ hire_date=pb.date_field(
994
+ min_date=date(2020, 1, 1),
995
+ max_date=date(2024, 12, 31)
996
+ ),
997
+ )
998
+
999
+ # Generate 100 rows of test data
1000
+ pb.preview(pb.generate_dataset(schema, n=100, seed=23))
1001
+ ```
1002
+
1003
+ Date values are uniformly distributed within the specified range.
1004
+ """
1005
+ return DateField(
1006
+ min_date=min_date,
1007
+ max_date=max_date,
1008
+ nullable=nullable,
1009
+ null_probability=null_probability,
1010
+ unique=unique,
1011
+ generator=generator,
1012
+ )
1013
+
1014
+
1015
+ # =============================================================================
1016
+ # Datetime Field
1017
+ # =============================================================================
1018
+
1019
+
1020
+ @dataclass
1021
+ class DatetimeField(Field):
1022
+ """
1023
+ Datetime column specification for schema definition.
1024
+
1025
+ Parameters
1026
+ ----------
1027
+ min_date
1028
+ Minimum datetime (inclusive). Can be ISO string or `datetime` object.
1029
+ max_date
1030
+ Maximum datetime (inclusive). Can be ISO string or `datetime` object.
1031
+ nullable
1032
+ Whether the column can contain null values. Default is `False`.
1033
+ null_probability
1034
+ Probability of generating null when `nullable=True`. Default is `0.0`.
1035
+ unique
1036
+ Whether all values must be unique. Default is `False`.
1037
+ generator
1038
+ Custom callable that generates values. Overrides other settings.
1039
+ dtype
1040
+ Always `"Datetime"` for DatetimeField.
1041
+
1042
+ Examples
1043
+ --------
1044
+ Define a schema with datetime fields and generate test data:
1045
+
1046
+ ```python
1047
+ import pointblank as pb
1048
+ from datetime import datetime
1049
+
1050
+ # Define a schema with datetime field specifications
1051
+ schema = pb.Schema(
1052
+ created_at=pb.datetime_field(
1053
+ min_date=datetime(2024, 1, 1),
1054
+ max_date=datetime(2024, 12, 31)
1055
+ ),
1056
+ updated_at=pb.datetime_field(
1057
+ min_date=datetime(2024, 6, 1),
1058
+ max_date=datetime(2024, 12, 31)
1059
+ ),
1060
+ )
1061
+
1062
+ # Generate 100 rows of test data
1063
+ pb.preview(pb.generate_dataset(schema, n=100, seed=23))
1064
+ ```
1065
+
1066
+ Datetime values are uniformly distributed within the specified range.
1067
+ """
1068
+
1069
+ # Datetime-specific constraints
1070
+ min_date: str | datetime | None = None
1071
+ max_date: str | datetime | None = None
1072
+
1073
+ # Override dtype with fixed value
1074
+ dtype: str = "Datetime"
1075
+
1076
+ def _validate(self) -> None:
1077
+ """Validate datetime field constraints."""
1078
+ super()._validate()
1079
+
1080
+ # Validate dtype (must be Datetime)
1081
+ if self.dtype != "Datetime":
1082
+ raise ValueError(f"DatetimeField dtype must be 'Datetime', got '{self.dtype}'")
1083
+
1084
+ # Validate date range
1085
+ if self.min_date is not None and self.max_date is not None:
1086
+ min_dt = self._parse_datetime(self.min_date)
1087
+ max_dt = self._parse_datetime(self.max_date)
1088
+ if min_dt > max_dt:
1089
+ raise ValueError(
1090
+ f"min_date ({self.min_date}) cannot be greater than max_date ({self.max_date})"
1091
+ )
1092
+
1093
+ @staticmethod
1094
+ def _parse_datetime(value: str | datetime) -> datetime:
1095
+ """Parse a datetime value for comparison."""
1096
+ if isinstance(value, datetime):
1097
+ return value
1098
+ if isinstance(value, date):
1099
+ return datetime.combine(value, datetime.min.time())
1100
+ if isinstance(value, str):
1101
+ try:
1102
+ return datetime.fromisoformat(value)
1103
+ except ValueError:
1104
+ raise ValueError(
1105
+ f"Unable to parse datetime string '{value}'. "
1106
+ "Use ISO format (YYYY-MM-DDTHH:MM:SS)."
1107
+ )
1108
+ raise ValueError(f"Invalid datetime type: {type(value)}")
1109
+
1110
+
1111
+ def datetime_field(
1112
+ min_date: str | datetime | None = None,
1113
+ max_date: str | datetime | None = None,
1114
+ nullable: bool = False,
1115
+ null_probability: float = 0.0,
1116
+ unique: bool = False,
1117
+ generator: Callable[[], Any] | None = None,
1118
+ ) -> DatetimeField:
1119
+ """
1120
+ Create a datetime column specification.
1121
+
1122
+ Parameters
1123
+ ----------
1124
+ min_date
1125
+ Minimum datetime (inclusive). Can be ISO string or `datetime` object.
1126
+ max_date
1127
+ Maximum datetime (inclusive). Can be ISO string or `datetime` object.
1128
+ nullable
1129
+ Whether the column can contain null values. Default is `False`.
1130
+ null_probability
1131
+ Probability of generating null when `nullable=True`. Default is `0.0`.
1132
+ unique
1133
+ Whether all values must be unique. Default is `False`.
1134
+ generator
1135
+ Custom callable that generates values. Overrides other settings.
1136
+
1137
+ Returns
1138
+ -------
1139
+ DatetimeField
1140
+ A datetime field specification.
1141
+
1142
+ Examples
1143
+ --------
1144
+ Define a schema with datetime fields and generate test data:
1145
+
1146
+ ```{python}
1147
+ import pointblank as pb
1148
+ from datetime import datetime
1149
+
1150
+ # Define a schema with datetime field specifications
1151
+ schema = pb.Schema(
1152
+ created_at=pb.datetime_field(
1153
+ min_date=datetime(2024, 1, 1),
1154
+ max_date=datetime(2024, 12, 31)
1155
+ ),
1156
+ updated_at=pb.datetime_field(
1157
+ min_date=datetime(2024, 6, 1),
1158
+ max_date=datetime(2024, 12, 31)
1159
+ ),
1160
+ )
1161
+
1162
+ # Generate 100 rows of test data
1163
+ pb.preview(pb.generate_dataset(schema, n=100, seed=23))
1164
+ ```
1165
+
1166
+ Datetime values are uniformly distributed within the specified range.
1167
+ """
1168
+ return DatetimeField(
1169
+ min_date=min_date,
1170
+ max_date=max_date,
1171
+ nullable=nullable,
1172
+ null_probability=null_probability,
1173
+ unique=unique,
1174
+ generator=generator,
1175
+ )
1176
+
1177
+
1178
+ # =============================================================================
1179
+ # Time Field
1180
+ # =============================================================================
1181
+
1182
+
1183
+ @dataclass
1184
+ class TimeField(Field):
1185
+ """
1186
+ Time column specification for schema definition.
1187
+
1188
+ Parameters
1189
+ ----------
1190
+ min_time
1191
+ Minimum time (inclusive). Can be ISO string or `time` object.
1192
+ max_time
1193
+ Maximum time (inclusive). Can be ISO string or `time` object.
1194
+ nullable
1195
+ Whether the column can contain null values. Default is `False`.
1196
+ null_probability
1197
+ Probability of generating null when `nullable=True`. Default is `0.0`.
1198
+ unique
1199
+ Whether all values must be unique. Default is `False`.
1200
+ generator
1201
+ Custom callable that generates values. Overrides other settings.
1202
+ dtype
1203
+ Always `"Time"` for TimeField.
1204
+
1205
+ Examples
1206
+ --------
1207
+ Define a schema with time fields and generate test data:
1208
+
1209
+ ```python
1210
+ import pointblank as pb
1211
+ from datetime import time
1212
+
1213
+ # Define a schema with time field specifications
1214
+ schema = pb.Schema(
1215
+ start_time=pb.time_field(
1216
+ min_time=time(9, 0, 0),
1217
+ max_time=time(12, 0, 0)
1218
+ ),
1219
+ end_time=pb.time_field(
1220
+ min_time=time(13, 0, 0),
1221
+ max_time=time(17, 0, 0)
1222
+ ),
1223
+ )
1224
+
1225
+ # Generate 100 rows of test data
1226
+ pb.preview(pb.generate_dataset(schema, n=100, seed=23))
1227
+ ```
1228
+
1229
+ Time values are uniformly distributed within the specified range.
1230
+ """
1231
+
1232
+ # Time-specific constraints
1233
+ min_time: str | time | None = None
1234
+ max_time: str | time | None = None
1235
+
1236
+ # Override dtype with fixed value
1237
+ dtype: str = "Time"
1238
+
1239
+ def _validate(self) -> None:
1240
+ """Validate time field constraints."""
1241
+ super()._validate()
1242
+
1243
+ # Validate dtype (must be Time)
1244
+ if self.dtype != "Time":
1245
+ raise ValueError(f"TimeField dtype must be 'Time', got '{self.dtype}'")
1246
+
1247
+ # Validate time range
1248
+ if self.min_time is not None and self.max_time is not None:
1249
+ min_t = self._parse_time(self.min_time)
1250
+ max_t = self._parse_time(self.max_time)
1251
+ if min_t > max_t:
1252
+ raise ValueError(
1253
+ f"min_time ({self.min_time}) cannot be greater than max_time ({self.max_time})"
1254
+ )
1255
+
1256
+ @staticmethod
1257
+ def _parse_time(value: str | time) -> time:
1258
+ """Parse a time value for comparison."""
1259
+ if isinstance(value, time):
1260
+ return value
1261
+ if isinstance(value, str):
1262
+ try:
1263
+ return time.fromisoformat(value)
1264
+ except ValueError:
1265
+ raise ValueError(
1266
+ f"Unable to parse time string '{value}'. Use ISO format (HH:MM:SS)."
1267
+ )
1268
+ raise ValueError(f"Invalid time type: {type(value)}")
1269
+
1270
+
1271
+ def time_field(
1272
+ min_time: str | time | None = None,
1273
+ max_time: str | time | None = None,
1274
+ nullable: bool = False,
1275
+ null_probability: float = 0.0,
1276
+ unique: bool = False,
1277
+ generator: Callable[[], Any] | None = None,
1278
+ ) -> TimeField:
1279
+ """
1280
+ Create a time column specification.
1281
+
1282
+ Parameters
1283
+ ----------
1284
+ min_time
1285
+ Minimum time (inclusive). Can be ISO string or `time` object.
1286
+ max_time
1287
+ Maximum time (inclusive). Can be ISO string or `time` object.
1288
+ nullable
1289
+ Whether the column can contain null values. Default is `False`.
1290
+ null_probability
1291
+ Probability of generating null when `nullable=True`. Default is `0.0`.
1292
+ unique
1293
+ Whether all values must be unique. Default is `False`.
1294
+ generator
1295
+ Custom callable that generates values. Overrides other settings.
1296
+
1297
+ Returns
1298
+ -------
1299
+ TimeField
1300
+ A time field specification.
1301
+
1302
+ Examples
1303
+ --------
1304
+ Define a schema with time fields and generate test data:
1305
+
1306
+ ```{python}
1307
+ import pointblank as pb
1308
+ from datetime import time
1309
+
1310
+ # Define a schema with time field specifications
1311
+ schema = pb.Schema(
1312
+ start_time=pb.time_field(
1313
+ min_time=time(9, 0, 0),
1314
+ max_time=time(12, 0, 0)
1315
+ ),
1316
+ end_time=pb.time_field(
1317
+ min_time=time(13, 0, 0),
1318
+ max_time=time(17, 0, 0)
1319
+ ),
1320
+ )
1321
+
1322
+ # Generate 100 rows of test data
1323
+ pb.preview(pb.generate_dataset(schema, n=100, seed=23))
1324
+ ```
1325
+
1326
+ Time values are uniformly distributed within the specified range.
1327
+ """
1328
+ return TimeField(
1329
+ min_time=min_time,
1330
+ max_time=max_time,
1331
+ nullable=nullable,
1332
+ null_probability=null_probability,
1333
+ unique=unique,
1334
+ generator=generator,
1335
+ )
1336
+
1337
+
1338
+ # =============================================================================
1339
+ # Duration Field
1340
+ # =============================================================================
1341
+
1342
+
1343
+ @dataclass
1344
+ class DurationField(Field):
1345
+ """
1346
+ Duration column specification for schema definition.
1347
+
1348
+ Parameters
1349
+ ----------
1350
+ min_duration
1351
+ Minimum duration (inclusive). Can be ISO string or `timedelta` object.
1352
+ max_duration
1353
+ Maximum duration (inclusive). Can be ISO string or `timedelta` object.
1354
+ nullable
1355
+ Whether the column can contain null values. Default is `False`.
1356
+ null_probability
1357
+ Probability of generating null when `nullable=True`. Default is `0.0`.
1358
+ unique
1359
+ Whether all values must be unique. Default is `False`.
1360
+ generator
1361
+ Custom callable that generates values. Overrides other settings.
1362
+ dtype
1363
+ Always `"Duration"` for DurationField.
1364
+
1365
+ Examples
1366
+ --------
1367
+ Define a schema with duration fields and generate test data:
1368
+
1369
+ ```python
1370
+ import pointblank as pb
1371
+ from datetime import timedelta
1372
+
1373
+ # Define a schema with duration field specifications
1374
+ schema = pb.Schema(
1375
+ session_length=pb.duration_field(
1376
+ min_duration=timedelta(minutes=5),
1377
+ max_duration=timedelta(hours=2)
1378
+ ),
1379
+ wait_time=pb.duration_field(
1380
+ min_duration=timedelta(seconds=30),
1381
+ max_duration=timedelta(minutes=15)
1382
+ ),
1383
+ )
1384
+
1385
+ # Generate 100 rows of test data
1386
+ pb.preview(pb.generate_dataset(schema, n=100, seed=23))
1387
+ ```
1388
+
1389
+ Duration values are uniformly distributed within the specified range.
1390
+ """
1391
+
1392
+ # Duration-specific constraints
1393
+ min_duration: str | timedelta | None = None
1394
+ max_duration: str | timedelta | None = None
1395
+
1396
+ # Override dtype with fixed value
1397
+ dtype: str = "Duration"
1398
+
1399
+ def _validate(self) -> None:
1400
+ """Validate duration field constraints."""
1401
+ super()._validate()
1402
+
1403
+ # Validate dtype (must be Duration)
1404
+ if self.dtype != "Duration":
1405
+ raise ValueError(f"DurationField dtype must be 'Duration', got '{self.dtype}'")
1406
+
1407
+ # Validate duration range
1408
+ if self.min_duration is not None and self.max_duration is not None:
1409
+ min_d = self._parse_duration(self.min_duration)
1410
+ max_d = self._parse_duration(self.max_duration)
1411
+ if min_d > max_d:
1412
+ raise ValueError(
1413
+ f"min_duration ({self.min_duration}) cannot be greater than "
1414
+ f"max_duration ({self.max_duration})"
1415
+ )
1416
+
1417
+ @staticmethod
1418
+ def _parse_duration(value: str | timedelta) -> timedelta:
1419
+ """Parse a duration value for comparison."""
1420
+ if isinstance(value, timedelta):
1421
+ return value
1422
+ if isinstance(value, str):
1423
+ # Parse ISO 8601 duration format (simplified)
1424
+ # e.g., "PT1H30M" for 1 hour 30 minutes
1425
+ # For simplicity, we also accept formats like "1:30:00"
1426
+ try:
1427
+ parts = value.split(":")
1428
+ if len(parts) == 3:
1429
+ hours, minutes, seconds = map(float, parts)
1430
+ return timedelta(hours=hours, minutes=minutes, seconds=seconds)
1431
+ elif len(parts) == 2:
1432
+ minutes, seconds = map(float, parts)
1433
+ return timedelta(minutes=minutes, seconds=seconds)
1434
+ except ValueError:
1435
+ pass
1436
+ raise ValueError(
1437
+ f"Unable to parse duration string '{value}'. "
1438
+ "Use format 'HH:MM:SS' or timedelta object."
1439
+ )
1440
+ raise ValueError(f"Invalid duration type: {type(value)}")
1441
+
1442
+
1443
+ def duration_field(
1444
+ min_duration: str | timedelta | None = None,
1445
+ max_duration: str | timedelta | None = None,
1446
+ nullable: bool = False,
1447
+ null_probability: float = 0.0,
1448
+ unique: bool = False,
1449
+ generator: Callable[[], Any] | None = None,
1450
+ ) -> DurationField:
1451
+ """
1452
+ Create a duration column specification.
1453
+
1454
+ Parameters
1455
+ ----------
1456
+ min_duration
1457
+ Minimum duration (inclusive). Can be string or `timedelta` object.
1458
+ max_duration
1459
+ Maximum duration (inclusive). Can be string or `timedelta` object.
1460
+ nullable
1461
+ Whether the column can contain null values. Default is `False`.
1462
+ null_probability
1463
+ Probability of generating null when `nullable=True`. Default is `0.0`.
1464
+ unique
1465
+ Whether all values must be unique. Default is `False`.
1466
+ generator
1467
+ Custom callable that generates values. Overrides other settings.
1468
+
1469
+ Returns
1470
+ -------
1471
+ DurationField
1472
+ A duration field specification.
1473
+
1474
+ Examples
1475
+ --------
1476
+ Define a schema with duration fields and generate test data:
1477
+
1478
+ ```{python}
1479
+ import pointblank as pb
1480
+ from datetime import timedelta
1481
+
1482
+ # Define a schema with duration field specifications
1483
+ schema = pb.Schema(
1484
+ session_length=pb.duration_field(
1485
+ min_duration=timedelta(minutes=5),
1486
+ max_duration=timedelta(hours=2)
1487
+ ),
1488
+ wait_time=pb.duration_field(
1489
+ min_duration=timedelta(seconds=30),
1490
+ max_duration=timedelta(minutes=15)
1491
+ ),
1492
+ )
1493
+
1494
+ # Generate 100 rows of test data
1495
+ pb.generate_dataset(schema, n=100, seed=23)
1496
+ ```
1497
+
1498
+ Duration values are uniformly distributed within the specified range.
1499
+ """
1500
+ return DurationField(
1501
+ min_duration=min_duration,
1502
+ max_duration=max_duration,
1503
+ nullable=nullable,
1504
+ null_probability=null_probability,
1505
+ unique=unique,
1506
+ generator=generator,
1507
+ )