pointblank 0.19.0__py3-none-any.whl → 0.20.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (318) hide show
  1. pointblank/__init__.py +44 -1
  2. pointblank/_utils_llms_txt.py +20 -0
  3. pointblank/data/api-docs.txt +793 -1
  4. pointblank/field.py +1507 -0
  5. pointblank/generate/__init__.py +17 -0
  6. pointblank/generate/base.py +49 -0
  7. pointblank/generate/generators.py +573 -0
  8. pointblank/generate/regex.py +217 -0
  9. pointblank/locales/__init__.py +1476 -0
  10. pointblank/locales/data/AR/address.json +73 -0
  11. pointblank/locales/data/AR/company.json +60 -0
  12. pointblank/locales/data/AR/internet.json +19 -0
  13. pointblank/locales/data/AR/misc.json +7 -0
  14. pointblank/locales/data/AR/person.json +39 -0
  15. pointblank/locales/data/AR/text.json +38 -0
  16. pointblank/locales/data/AT/address.json +84 -0
  17. pointblank/locales/data/AT/company.json +65 -0
  18. pointblank/locales/data/AT/internet.json +20 -0
  19. pointblank/locales/data/AT/misc.json +8 -0
  20. pointblank/locales/data/AT/person.json +17 -0
  21. pointblank/locales/data/AT/text.json +35 -0
  22. pointblank/locales/data/AU/address.json +83 -0
  23. pointblank/locales/data/AU/company.json +65 -0
  24. pointblank/locales/data/AU/internet.json +20 -0
  25. pointblank/locales/data/AU/misc.json +8 -0
  26. pointblank/locales/data/AU/person.json +17 -0
  27. pointblank/locales/data/AU/text.json +35 -0
  28. pointblank/locales/data/BE/address.json +225 -0
  29. pointblank/locales/data/BE/company.json +129 -0
  30. pointblank/locales/data/BE/internet.json +36 -0
  31. pointblank/locales/data/BE/misc.json +6 -0
  32. pointblank/locales/data/BE/person.json +62 -0
  33. pointblank/locales/data/BE/text.json +38 -0
  34. pointblank/locales/data/BG/address.json +75 -0
  35. pointblank/locales/data/BG/company.json +60 -0
  36. pointblank/locales/data/BG/internet.json +19 -0
  37. pointblank/locales/data/BG/misc.json +7 -0
  38. pointblank/locales/data/BG/person.json +40 -0
  39. pointblank/locales/data/BG/text.json +38 -0
  40. pointblank/locales/data/BR/address.json +98 -0
  41. pointblank/locales/data/BR/company.json +65 -0
  42. pointblank/locales/data/BR/internet.json +20 -0
  43. pointblank/locales/data/BR/misc.json +8 -0
  44. pointblank/locales/data/BR/person.json +17 -0
  45. pointblank/locales/data/BR/text.json +35 -0
  46. pointblank/locales/data/CA/address.json +747 -0
  47. pointblank/locales/data/CA/company.json +120 -0
  48. pointblank/locales/data/CA/internet.json +24 -0
  49. pointblank/locales/data/CA/misc.json +11 -0
  50. pointblank/locales/data/CA/person.json +1033 -0
  51. pointblank/locales/data/CA/text.json +58 -0
  52. pointblank/locales/data/CH/address.json +184 -0
  53. pointblank/locales/data/CH/company.json +112 -0
  54. pointblank/locales/data/CH/internet.json +20 -0
  55. pointblank/locales/data/CH/misc.json +10 -0
  56. pointblank/locales/data/CH/person.json +64 -0
  57. pointblank/locales/data/CH/text.json +45 -0
  58. pointblank/locales/data/CL/address.json +71 -0
  59. pointblank/locales/data/CL/company.json +60 -0
  60. pointblank/locales/data/CL/internet.json +19 -0
  61. pointblank/locales/data/CL/misc.json +7 -0
  62. pointblank/locales/data/CL/person.json +38 -0
  63. pointblank/locales/data/CL/text.json +38 -0
  64. pointblank/locales/data/CN/address.json +124 -0
  65. pointblank/locales/data/CN/company.json +76 -0
  66. pointblank/locales/data/CN/internet.json +20 -0
  67. pointblank/locales/data/CN/misc.json +8 -0
  68. pointblank/locales/data/CN/person.json +50 -0
  69. pointblank/locales/data/CN/text.json +38 -0
  70. pointblank/locales/data/CO/address.json +76 -0
  71. pointblank/locales/data/CO/company.json +60 -0
  72. pointblank/locales/data/CO/internet.json +19 -0
  73. pointblank/locales/data/CO/misc.json +7 -0
  74. pointblank/locales/data/CO/person.json +38 -0
  75. pointblank/locales/data/CO/text.json +38 -0
  76. pointblank/locales/data/CY/address.json +62 -0
  77. pointblank/locales/data/CY/company.json +60 -0
  78. pointblank/locales/data/CY/internet.json +19 -0
  79. pointblank/locales/data/CY/misc.json +7 -0
  80. pointblank/locales/data/CY/person.json +38 -0
  81. pointblank/locales/data/CY/text.json +38 -0
  82. pointblank/locales/data/CZ/address.json +70 -0
  83. pointblank/locales/data/CZ/company.json +61 -0
  84. pointblank/locales/data/CZ/internet.json +19 -0
  85. pointblank/locales/data/CZ/misc.json +7 -0
  86. pointblank/locales/data/CZ/person.json +40 -0
  87. pointblank/locales/data/CZ/text.json +38 -0
  88. pointblank/locales/data/DE/address.json +756 -0
  89. pointblank/locales/data/DE/company.json +101 -0
  90. pointblank/locales/data/DE/internet.json +22 -0
  91. pointblank/locales/data/DE/misc.json +11 -0
  92. pointblank/locales/data/DE/person.json +1026 -0
  93. pointblank/locales/data/DE/text.json +50 -0
  94. pointblank/locales/data/DK/address.json +231 -0
  95. pointblank/locales/data/DK/company.json +65 -0
  96. pointblank/locales/data/DK/internet.json +20 -0
  97. pointblank/locales/data/DK/misc.json +7 -0
  98. pointblank/locales/data/DK/person.json +45 -0
  99. pointblank/locales/data/DK/text.json +43 -0
  100. pointblank/locales/data/EE/address.json +69 -0
  101. pointblank/locales/data/EE/company.json +60 -0
  102. pointblank/locales/data/EE/internet.json +19 -0
  103. pointblank/locales/data/EE/misc.json +7 -0
  104. pointblank/locales/data/EE/person.json +39 -0
  105. pointblank/locales/data/EE/text.json +38 -0
  106. pointblank/locales/data/ES/address.json +3086 -0
  107. pointblank/locales/data/ES/company.json +644 -0
  108. pointblank/locales/data/ES/internet.json +25 -0
  109. pointblank/locales/data/ES/misc.json +11 -0
  110. pointblank/locales/data/ES/person.json +488 -0
  111. pointblank/locales/data/ES/text.json +49 -0
  112. pointblank/locales/data/FI/address.json +93 -0
  113. pointblank/locales/data/FI/company.json +65 -0
  114. pointblank/locales/data/FI/internet.json +20 -0
  115. pointblank/locales/data/FI/misc.json +8 -0
  116. pointblank/locales/data/FI/person.json +17 -0
  117. pointblank/locales/data/FI/text.json +35 -0
  118. pointblank/locales/data/FR/address.json +619 -0
  119. pointblank/locales/data/FR/company.json +111 -0
  120. pointblank/locales/data/FR/internet.json +22 -0
  121. pointblank/locales/data/FR/misc.json +11 -0
  122. pointblank/locales/data/FR/person.json +1066 -0
  123. pointblank/locales/data/FR/text.json +50 -0
  124. pointblank/locales/data/GB/address.json +5759 -0
  125. pointblank/locales/data/GB/company.json +131 -0
  126. pointblank/locales/data/GB/internet.json +24 -0
  127. pointblank/locales/data/GB/misc.json +45 -0
  128. pointblank/locales/data/GB/person.json +578 -0
  129. pointblank/locales/data/GB/text.json +61 -0
  130. pointblank/locales/data/GR/address.json +68 -0
  131. pointblank/locales/data/GR/company.json +61 -0
  132. pointblank/locales/data/GR/internet.json +19 -0
  133. pointblank/locales/data/GR/misc.json +7 -0
  134. pointblank/locales/data/GR/person.json +39 -0
  135. pointblank/locales/data/GR/text.json +38 -0
  136. pointblank/locales/data/HK/address.json +79 -0
  137. pointblank/locales/data/HK/company.json +69 -0
  138. pointblank/locales/data/HK/internet.json +19 -0
  139. pointblank/locales/data/HK/misc.json +7 -0
  140. pointblank/locales/data/HK/person.json +42 -0
  141. pointblank/locales/data/HK/text.json +38 -0
  142. pointblank/locales/data/HR/address.json +73 -0
  143. pointblank/locales/data/HR/company.json +60 -0
  144. pointblank/locales/data/HR/internet.json +19 -0
  145. pointblank/locales/data/HR/misc.json +7 -0
  146. pointblank/locales/data/HR/person.json +38 -0
  147. pointblank/locales/data/HR/text.json +38 -0
  148. pointblank/locales/data/HU/address.json +70 -0
  149. pointblank/locales/data/HU/company.json +61 -0
  150. pointblank/locales/data/HU/internet.json +19 -0
  151. pointblank/locales/data/HU/misc.json +7 -0
  152. pointblank/locales/data/HU/person.json +40 -0
  153. pointblank/locales/data/HU/text.json +38 -0
  154. pointblank/locales/data/ID/address.json +68 -0
  155. pointblank/locales/data/ID/company.json +61 -0
  156. pointblank/locales/data/ID/internet.json +19 -0
  157. pointblank/locales/data/ID/misc.json +7 -0
  158. pointblank/locales/data/ID/person.json +40 -0
  159. pointblank/locales/data/ID/text.json +38 -0
  160. pointblank/locales/data/IE/address.json +643 -0
  161. pointblank/locales/data/IE/company.json +140 -0
  162. pointblank/locales/data/IE/internet.json +24 -0
  163. pointblank/locales/data/IE/misc.json +44 -0
  164. pointblank/locales/data/IE/person.json +55 -0
  165. pointblank/locales/data/IE/text.json +60 -0
  166. pointblank/locales/data/IN/address.json +92 -0
  167. pointblank/locales/data/IN/company.json +65 -0
  168. pointblank/locales/data/IN/internet.json +20 -0
  169. pointblank/locales/data/IN/misc.json +8 -0
  170. pointblank/locales/data/IN/person.json +52 -0
  171. pointblank/locales/data/IN/text.json +39 -0
  172. pointblank/locales/data/IS/address.json +63 -0
  173. pointblank/locales/data/IS/company.json +61 -0
  174. pointblank/locales/data/IS/internet.json +19 -0
  175. pointblank/locales/data/IS/misc.json +7 -0
  176. pointblank/locales/data/IS/person.json +44 -0
  177. pointblank/locales/data/IS/text.json +38 -0
  178. pointblank/locales/data/IT/address.json +192 -0
  179. pointblank/locales/data/IT/company.json +137 -0
  180. pointblank/locales/data/IT/internet.json +20 -0
  181. pointblank/locales/data/IT/misc.json +10 -0
  182. pointblank/locales/data/IT/person.json +70 -0
  183. pointblank/locales/data/IT/text.json +44 -0
  184. pointblank/locales/data/JP/address.json +713 -0
  185. pointblank/locales/data/JP/company.json +113 -0
  186. pointblank/locales/data/JP/internet.json +22 -0
  187. pointblank/locales/data/JP/misc.json +10 -0
  188. pointblank/locales/data/JP/person.json +1057 -0
  189. pointblank/locales/data/JP/text.json +51 -0
  190. pointblank/locales/data/KR/address.json +77 -0
  191. pointblank/locales/data/KR/company.json +68 -0
  192. pointblank/locales/data/KR/internet.json +19 -0
  193. pointblank/locales/data/KR/misc.json +7 -0
  194. pointblank/locales/data/KR/person.json +40 -0
  195. pointblank/locales/data/KR/text.json +38 -0
  196. pointblank/locales/data/LT/address.json +66 -0
  197. pointblank/locales/data/LT/company.json +60 -0
  198. pointblank/locales/data/LT/internet.json +19 -0
  199. pointblank/locales/data/LT/misc.json +7 -0
  200. pointblank/locales/data/LT/person.json +42 -0
  201. pointblank/locales/data/LT/text.json +38 -0
  202. pointblank/locales/data/LU/address.json +66 -0
  203. pointblank/locales/data/LU/company.json +60 -0
  204. pointblank/locales/data/LU/internet.json +19 -0
  205. pointblank/locales/data/LU/misc.json +7 -0
  206. pointblank/locales/data/LU/person.json +38 -0
  207. pointblank/locales/data/LU/text.json +38 -0
  208. pointblank/locales/data/LV/address.json +62 -0
  209. pointblank/locales/data/LV/company.json +60 -0
  210. pointblank/locales/data/LV/internet.json +19 -0
  211. pointblank/locales/data/LV/misc.json +7 -0
  212. pointblank/locales/data/LV/person.json +40 -0
  213. pointblank/locales/data/LV/text.json +38 -0
  214. pointblank/locales/data/MT/address.json +61 -0
  215. pointblank/locales/data/MT/company.json +60 -0
  216. pointblank/locales/data/MT/internet.json +19 -0
  217. pointblank/locales/data/MT/misc.json +7 -0
  218. pointblank/locales/data/MT/person.json +38 -0
  219. pointblank/locales/data/MT/text.json +38 -0
  220. pointblank/locales/data/MX/address.json +100 -0
  221. pointblank/locales/data/MX/company.json +65 -0
  222. pointblank/locales/data/MX/internet.json +20 -0
  223. pointblank/locales/data/MX/misc.json +8 -0
  224. pointblank/locales/data/MX/person.json +18 -0
  225. pointblank/locales/data/MX/text.json +39 -0
  226. pointblank/locales/data/NL/address.json +1517 -0
  227. pointblank/locales/data/NL/company.json +133 -0
  228. pointblank/locales/data/NL/internet.json +44 -0
  229. pointblank/locales/data/NL/misc.json +55 -0
  230. pointblank/locales/data/NL/person.json +365 -0
  231. pointblank/locales/data/NL/text.json +210 -0
  232. pointblank/locales/data/NO/address.json +86 -0
  233. pointblank/locales/data/NO/company.json +66 -0
  234. pointblank/locales/data/NO/internet.json +20 -0
  235. pointblank/locales/data/NO/misc.json +8 -0
  236. pointblank/locales/data/NO/person.json +17 -0
  237. pointblank/locales/data/NO/text.json +35 -0
  238. pointblank/locales/data/NZ/address.json +90 -0
  239. pointblank/locales/data/NZ/company.json +65 -0
  240. pointblank/locales/data/NZ/internet.json +20 -0
  241. pointblank/locales/data/NZ/misc.json +8 -0
  242. pointblank/locales/data/NZ/person.json +17 -0
  243. pointblank/locales/data/NZ/text.json +39 -0
  244. pointblank/locales/data/PH/address.json +67 -0
  245. pointblank/locales/data/PH/company.json +61 -0
  246. pointblank/locales/data/PH/internet.json +19 -0
  247. pointblank/locales/data/PH/misc.json +7 -0
  248. pointblank/locales/data/PH/person.json +40 -0
  249. pointblank/locales/data/PH/text.json +38 -0
  250. pointblank/locales/data/PL/address.json +91 -0
  251. pointblank/locales/data/PL/company.json +65 -0
  252. pointblank/locales/data/PL/internet.json +20 -0
  253. pointblank/locales/data/PL/misc.json +8 -0
  254. pointblank/locales/data/PL/person.json +17 -0
  255. pointblank/locales/data/PL/text.json +35 -0
  256. pointblank/locales/data/PT/address.json +90 -0
  257. pointblank/locales/data/PT/company.json +65 -0
  258. pointblank/locales/data/PT/internet.json +20 -0
  259. pointblank/locales/data/PT/misc.json +8 -0
  260. pointblank/locales/data/PT/person.json +17 -0
  261. pointblank/locales/data/PT/text.json +35 -0
  262. pointblank/locales/data/RO/address.json +73 -0
  263. pointblank/locales/data/RO/company.json +61 -0
  264. pointblank/locales/data/RO/internet.json +19 -0
  265. pointblank/locales/data/RO/misc.json +7 -0
  266. pointblank/locales/data/RO/person.json +40 -0
  267. pointblank/locales/data/RO/text.json +38 -0
  268. pointblank/locales/data/RU/address.json +74 -0
  269. pointblank/locales/data/RU/company.json +60 -0
  270. pointblank/locales/data/RU/internet.json +19 -0
  271. pointblank/locales/data/RU/misc.json +7 -0
  272. pointblank/locales/data/RU/person.json +38 -0
  273. pointblank/locales/data/RU/text.json +38 -0
  274. pointblank/locales/data/SE/address.json +247 -0
  275. pointblank/locales/data/SE/company.json +65 -0
  276. pointblank/locales/data/SE/internet.json +20 -0
  277. pointblank/locales/data/SE/misc.json +7 -0
  278. pointblank/locales/data/SE/person.json +45 -0
  279. pointblank/locales/data/SE/text.json +43 -0
  280. pointblank/locales/data/SI/address.json +67 -0
  281. pointblank/locales/data/SI/company.json +60 -0
  282. pointblank/locales/data/SI/internet.json +19 -0
  283. pointblank/locales/data/SI/misc.json +7 -0
  284. pointblank/locales/data/SI/person.json +38 -0
  285. pointblank/locales/data/SI/text.json +38 -0
  286. pointblank/locales/data/SK/address.json +64 -0
  287. pointblank/locales/data/SK/company.json +60 -0
  288. pointblank/locales/data/SK/internet.json +19 -0
  289. pointblank/locales/data/SK/misc.json +7 -0
  290. pointblank/locales/data/SK/person.json +38 -0
  291. pointblank/locales/data/SK/text.json +38 -0
  292. pointblank/locales/data/TR/address.json +105 -0
  293. pointblank/locales/data/TR/company.json +65 -0
  294. pointblank/locales/data/TR/internet.json +20 -0
  295. pointblank/locales/data/TR/misc.json +8 -0
  296. pointblank/locales/data/TR/person.json +17 -0
  297. pointblank/locales/data/TR/text.json +35 -0
  298. pointblank/locales/data/TW/address.json +86 -0
  299. pointblank/locales/data/TW/company.json +69 -0
  300. pointblank/locales/data/TW/internet.json +19 -0
  301. pointblank/locales/data/TW/misc.json +7 -0
  302. pointblank/locales/data/TW/person.json +42 -0
  303. pointblank/locales/data/TW/text.json +38 -0
  304. pointblank/locales/data/US/address.json +996 -0
  305. pointblank/locales/data/US/company.json +131 -0
  306. pointblank/locales/data/US/internet.json +22 -0
  307. pointblank/locales/data/US/misc.json +11 -0
  308. pointblank/locales/data/US/person.json +1092 -0
  309. pointblank/locales/data/US/text.json +56 -0
  310. pointblank/locales/data/_shared/misc.json +42 -0
  311. pointblank/schema.py +339 -2
  312. {pointblank-0.19.0.dist-info → pointblank-0.20.0.dist-info}/METADATA +45 -1
  313. pointblank-0.20.0.dist-info/RECORD +366 -0
  314. {pointblank-0.19.0.dist-info → pointblank-0.20.0.dist-info}/WHEEL +1 -1
  315. pointblank-0.19.0.dist-info/RECORD +0 -59
  316. {pointblank-0.19.0.dist-info → pointblank-0.20.0.dist-info}/entry_points.txt +0 -0
  317. {pointblank-0.19.0.dist-info → pointblank-0.20.0.dist-info}/licenses/LICENSE +0 -0
  318. {pointblank-0.19.0.dist-info → pointblank-0.20.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,1476 @@
1
+ """
2
+ Country-based data generation for synthetic test data.
3
+
4
+ This module provides country-specific data generation without external dependencies.
5
+ It supports generating realistic names, addresses, emails, and other data types
6
+ with proper localization based on ISO 3166-1 country codes.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import json
12
+ import random
13
+ import unicodedata
14
+ from dataclasses import dataclass, field
15
+ from importlib.resources import files
16
+ from typing import TYPE_CHECKING, Any
17
+
18
+ if TYPE_CHECKING:
19
+ pass
20
+
21
+ __all__ = [
22
+ "LocaleRegistry",
23
+ "LocaleGenerator",
24
+ "get_generator",
25
+ "COUNTRY_CODE_MAP",
26
+ "COUNTRIES_WITH_FULL_DATA",
27
+ ]
28
+
29
+
30
+ # ISO 3166-1 country code mappings
31
+ # Maps alpha-2 (2-letter) and alpha-3 (3-letter) codes to internal data directory names
32
+ COUNTRY_CODE_MAP: dict[str, str] = {
33
+ # United States
34
+ "US": "US",
35
+ "USA": "US",
36
+ # United Kingdom
37
+ "GB": "GB",
38
+ "GBR": "GB",
39
+ "UK": "GB", # Common alias
40
+ # Ireland
41
+ "IE": "IE",
42
+ "IRL": "IE",
43
+ # Iceland
44
+ "IS": "IS",
45
+ "ISL": "IS",
46
+ # Australia
47
+ "AU": "AU",
48
+ "AUS": "AU",
49
+ # Argentina
50
+ "AR": "AR",
51
+ "ARG": "AR",
52
+ # Canada
53
+ "CA": "CA",
54
+ "CAN": "CA",
55
+ # Germany
56
+ "DE": "DE",
57
+ "DEU": "DE",
58
+ # Greece
59
+ "GR": "GR",
60
+ "GRC": "GR",
61
+ # Austria
62
+ "AT": "AT",
63
+ "AUT": "AT",
64
+ # Switzerland
65
+ "CH": "CH",
66
+ "CHE": "CH",
67
+ # Chile
68
+ "CL": "CL",
69
+ "CHL": "CL",
70
+ # France
71
+ "FR": "FR",
72
+ "FRA": "FR",
73
+ # Spain
74
+ "ES": "ES",
75
+ "ESP": "ES",
76
+ # Mexico
77
+ "MX": "MX",
78
+ "MEX": "MX",
79
+ # Malta
80
+ "MT": "MT",
81
+ "MLT": "MT",
82
+ # Portugal
83
+ "PT": "PT",
84
+ "PRT": "PT",
85
+ # Brazil
86
+ "BR": "BR",
87
+ "BRA": "BR",
88
+ # India
89
+ "IN": "IN",
90
+ "IND": "IN",
91
+ # Italy
92
+ "IT": "IT",
93
+ "ITA": "IT",
94
+ # Netherlands
95
+ "NL": "NL",
96
+ "NLD": "NL",
97
+ # Belgium
98
+ "BE": "BE",
99
+ "BEL": "BE",
100
+ # Bulgaria
101
+ "BG": "BG",
102
+ "BGR": "BG",
103
+ # Poland
104
+ "PL": "PL",
105
+ "POL": "PL",
106
+ # Romania
107
+ "RO": "RO",
108
+ "ROU": "RO",
109
+ # Russia
110
+ "RU": "RU",
111
+ "RUS": "RU",
112
+ # Slovakia
113
+ "SK": "SK",
114
+ "SVK": "SK",
115
+ # Slovenia
116
+ "SI": "SI",
117
+ "SVN": "SI",
118
+ # Japan
119
+ "JP": "JP",
120
+ "JPN": "JP",
121
+ # South Korea
122
+ "KR": "KR",
123
+ "KOR": "KR",
124
+ # Latvia
125
+ "LV": "LV",
126
+ "LVA": "LV",
127
+ # Lithuania
128
+ "LT": "LT",
129
+ "LTU": "LT",
130
+ # Luxembourg
131
+ "LU": "LU",
132
+ "LUX": "LU",
133
+ # China
134
+ "CN": "CN",
135
+ "CHN": "CN",
136
+ # Colombia
137
+ "CO": "CO",
138
+ "COL": "CO",
139
+ # Cyprus
140
+ "CY": "CY",
141
+ "CYP": "CY",
142
+ # Czech Republic
143
+ "CZ": "CZ",
144
+ "CZE": "CZ",
145
+ # Estonia
146
+ "EE": "EE",
147
+ "EST": "EE",
148
+ # Hong Kong
149
+ "HK": "HK",
150
+ "HKG": "HK",
151
+ # Croatia
152
+ "HR": "HR",
153
+ "HRV": "HR",
154
+ # Hungary
155
+ "HU": "HU",
156
+ "HUN": "HU",
157
+ # Indonesia
158
+ "ID": "ID",
159
+ "IDN": "ID",
160
+ # Taiwan
161
+ "TW": "TW",
162
+ "TWN": "TW",
163
+ # Turkey
164
+ "TR": "TR",
165
+ "TUR": "TR",
166
+ # New Zealand
167
+ "NZ": "NZ",
168
+ "NZL": "NZ",
169
+ # Philippines
170
+ "PH": "PH",
171
+ "PHL": "PH",
172
+ }
173
+
174
+ # Countries that have complete locale data files
175
+ # These are the ISO alpha-2 codes for countries with full address, company,
176
+ # internet, misc, person, and text JSON files in the data directory
177
+ COUNTRIES_WITH_FULL_DATA: list[str] = [
178
+ "US", # United States
179
+ "AR", # Argentina
180
+ "AT", # Austria
181
+ "AU", # Australia
182
+ "BE", # Belgium
183
+ "BG", # Bulgaria
184
+ "BR", # Brazil
185
+ "CA", # Canada
186
+ "CH", # Switzerland
187
+ "CL", # Chile
188
+ "CN", # China
189
+ "CO", # Colombia
190
+ "CY", # Cyprus
191
+ "CZ", # Czech Republic
192
+ "DE", # Germany
193
+ "DK", # Denmark
194
+ "EE", # Estonia
195
+ "ES", # Spain
196
+ "FI", # Finland
197
+ "FR", # France
198
+ "GB", # United Kingdom
199
+ "GR", # Greece
200
+ "HK", # Hong Kong
201
+ "HR", # Croatia
202
+ "HU", # Hungary
203
+ "ID", # Indonesia
204
+ "IE", # Ireland
205
+ "IN", # India
206
+ "IS", # Iceland
207
+ "IT", # Italy
208
+ "JP", # Japan
209
+ "KR", # South Korea
210
+ "LV", # Latvia
211
+ "LT", # Lithuania
212
+ "LU", # Luxembourg
213
+ "MT", # Malta
214
+ "MX", # Mexico
215
+ "NL", # Netherlands
216
+ "NO", # Norway
217
+ "NZ", # New Zealand
218
+ "PL", # Poland
219
+ "PH", # Philippines
220
+ "PT", # Portugal
221
+ "RO", # Romania
222
+ "RU", # Russia
223
+ "SE", # Sweden
224
+ "SK", # Slovakia
225
+ "SI", # Slovenia
226
+ "TR", # Turkey
227
+ "TW", # Taiwan
228
+ ]
229
+
230
+ # Fallback chains for countries (when a country's data is incomplete)
231
+ COUNTRY_FALLBACKS: dict[str, list[str]] = {
232
+ # English-speaking countries fall back to US
233
+ "GB": ["GB", "US"],
234
+ "IE": ["IE", "GB", "US"],
235
+ "AU": ["AU", "GB", "US"],
236
+ "CA": ["CA", "US"],
237
+ # German-speaking countries
238
+ "DE": ["DE", "US"],
239
+ "AT": ["AT", "DE", "US"],
240
+ "CH": ["CH", "DE", "US"],
241
+ # French-speaking
242
+ "FR": ["FR", "US"],
243
+ # Belgian (Dutch/French bilingual)
244
+ "BE": ["BE", "NL", "FR", "US"],
245
+ # Scandinavian
246
+ "DK": ["DK", "DE", "US"],
247
+ "NO": ["NO", "DK", "DE", "US"],
248
+ "SE": ["SE", "DK", "DE", "US"],
249
+ "FI": ["FI", "SE", "US"],
250
+ # Spanish-speaking
251
+ "ES": ["ES", "US"],
252
+ "MX": ["MX", "ES", "US"],
253
+ # Portuguese-speaking
254
+ "PT": ["PT", "US"],
255
+ "BR": ["BR", "PT", "US"],
256
+ # Other European
257
+ "IT": ["IT", "US"],
258
+ "NL": ["NL", "US"],
259
+ "PL": ["PL", "US"],
260
+ "RU": ["RU", "US"],
261
+ # Asian countries
262
+ "JP": ["JP", "US"],
263
+ "KR": ["KR", "US"],
264
+ "CN": ["CN", "US"],
265
+ "TW": ["TW", "CN", "US"],
266
+ # Turkey
267
+ "TR": ["TR", "US"],
268
+ }
269
+
270
+
271
+ @dataclass
272
+ class LocaleData:
273
+ """Container for all locale-specific data."""
274
+
275
+ locale: str
276
+ person: dict[str, Any] = field(default_factory=dict)
277
+ address: dict[str, Any] = field(default_factory=dict)
278
+ company: dict[str, Any] = field(default_factory=dict)
279
+ internet: dict[str, Any] = field(default_factory=dict)
280
+ text: dict[str, Any] = field(default_factory=dict)
281
+ misc: dict[str, Any] = field(default_factory=dict)
282
+
283
+
284
+ # Transliteration map for special characters (umlauts add 'e', others simplified)
285
+ _TRANSLITERATION_MAP: dict[str, str] = {
286
+ # German umlauts -> add 'e'
287
+ "ä": "ae",
288
+ "ö": "oe",
289
+ "ü": "ue",
290
+ "Ä": "Ae",
291
+ "Ö": "Oe",
292
+ "Ü": "Ue",
293
+ "ß": "ss",
294
+ # Scandinavian
295
+ "å": "aa",
296
+ "Å": "Aa",
297
+ "ø": "oe",
298
+ "Ø": "Oe",
299
+ "æ": "ae",
300
+ "Æ": "Ae",
301
+ # French/Spanish/Portuguese/Italian accents
302
+ "à": "a",
303
+ "á": "a",
304
+ "â": "a",
305
+ "ã": "a",
306
+ "À": "A",
307
+ "Á": "A",
308
+ "Â": "A",
309
+ "Ã": "A",
310
+ "è": "e",
311
+ "é": "e",
312
+ "ê": "e",
313
+ "ë": "e",
314
+ "È": "E",
315
+ "É": "E",
316
+ "Ê": "E",
317
+ "Ë": "E",
318
+ "ì": "i",
319
+ "í": "i",
320
+ "î": "i",
321
+ "ï": "i",
322
+ "Ì": "I",
323
+ "Í": "I",
324
+ "Î": "I",
325
+ "Ï": "I",
326
+ "ò": "o",
327
+ "ó": "o",
328
+ "ô": "o",
329
+ "õ": "o",
330
+ "Ò": "O",
331
+ "Ó": "O",
332
+ "Ô": "O",
333
+ "Õ": "O",
334
+ "ù": "u",
335
+ "ú": "u",
336
+ "û": "u",
337
+ "Ù": "U",
338
+ "Ú": "U",
339
+ "Û": "U",
340
+ "ñ": "n",
341
+ "Ñ": "N",
342
+ "ç": "c",
343
+ "Ç": "C",
344
+ "ý": "y",
345
+ "ÿ": "y",
346
+ "Ý": "Y",
347
+ # Eastern European
348
+ "ł": "l",
349
+ "Ł": "L",
350
+ "ń": "n",
351
+ "Ń": "N",
352
+ "ś": "s",
353
+ "Ś": "S",
354
+ "ź": "z",
355
+ "Ź": "Z",
356
+ "ż": "z",
357
+ "Ż": "Z",
358
+ "ć": "c",
359
+ "Ć": "C",
360
+ "ě": "e",
361
+ "Ě": "E",
362
+ "š": "s",
363
+ "Š": "S",
364
+ "č": "c",
365
+ "Č": "C",
366
+ "ř": "r",
367
+ "Ř": "R",
368
+ "ž": "z",
369
+ "Ž": "Z",
370
+ "ů": "u",
371
+ "Ů": "U",
372
+ "ď": "d",
373
+ "Ď": "D",
374
+ "ť": "t",
375
+ "Ť": "T",
376
+ "ň": "n",
377
+ "Ň": "N",
378
+ # Other
379
+ "đ": "d",
380
+ "Đ": "D",
381
+ "ğ": "g",
382
+ "Ğ": "G",
383
+ "ı": "i",
384
+ "İ": "I",
385
+ "ş": "s",
386
+ "Ş": "S",
387
+ "ț": "t",
388
+ "Ț": "T",
389
+ "ă": "a",
390
+ "Ă": "A",
391
+ }
392
+
393
+
394
+ def _transliterate_to_ascii(text: str) -> str:
395
+ """
396
+ Transliterate text to ASCII-safe characters for email addresses and usernames.
397
+
398
+ Handles German umlauts specially (ü -> ue, ö -> oe, ä -> ae) and converts
399
+ other accented characters to their base ASCII equivalents.
400
+
401
+ Parameters
402
+ ----------
403
+ text
404
+ The text to transliterate.
405
+
406
+ Returns
407
+ -------
408
+ str
409
+ ASCII-safe version of the text.
410
+ """
411
+ # First apply our custom transliteration map
412
+ result = []
413
+ for char in text:
414
+ if char in _TRANSLITERATION_MAP:
415
+ result.append(_TRANSLITERATION_MAP[char])
416
+ else:
417
+ result.append(char)
418
+ text = "".join(result)
419
+
420
+ # Then use unicodedata to handle any remaining non-ASCII characters
421
+ # NFD decomposes characters (e.g., é -> e + combining accent)
422
+ # We then filter to keep only ASCII characters
423
+ normalized = unicodedata.normalize("NFD", text)
424
+ ascii_text = "".join(c for c in normalized if unicodedata.category(c) != "Mn")
425
+
426
+ return ascii_text
427
+
428
+
429
+ def _normalize_country(country: str) -> str:
430
+ """
431
+ Normalize a country code to the standard 2-letter ISO 3166-1 alpha-2 format.
432
+
433
+ Parameters
434
+ ----------
435
+ country
436
+ Country code in alpha-2 (US), alpha-3 (USA), or legacy locale format (en_US).
437
+
438
+ Returns
439
+ -------
440
+ str
441
+ The normalized 2-letter country code.
442
+
443
+ Raises
444
+ ------
445
+ ValueError
446
+ If the country code is not recognized.
447
+ """
448
+ # Uppercase and strip whitespace
449
+ code = country.strip().upper()
450
+
451
+ # Handle legacy locale format (en_US, de-DE, etc.)
452
+ if "_" in code or "-" in code:
453
+ # Extract country part from locale code
454
+ parts = code.replace("-", "_").split("_")
455
+ if len(parts) == 2:
456
+ code = parts[1] # Take the country part (US from en_US)
457
+
458
+ # Look up in the country code map
459
+ if code in COUNTRY_CODE_MAP:
460
+ return COUNTRY_CODE_MAP[code]
461
+
462
+ # If already a valid 2-letter code in fallbacks, use it
463
+ if code in COUNTRY_FALLBACKS:
464
+ return code
465
+
466
+ # Default to US with a warning (or raise an error)
467
+ raise ValueError(
468
+ f"Unknown country code: {country!r}. "
469
+ f"Supported codes: {', '.join(sorted(set(COUNTRY_CODE_MAP.keys())))}"
470
+ )
471
+
472
+
473
+ class LocaleRegistry:
474
+ """Registry for country data with fallback support."""
475
+
476
+ _instance: LocaleRegistry | None = None
477
+ _cache: dict[str, LocaleData]
478
+
479
+ def __new__(cls) -> LocaleRegistry:
480
+ if cls._instance is None:
481
+ cls._instance = super().__new__(cls)
482
+ cls._instance._cache = {}
483
+ return cls._instance
484
+
485
+ def get(self, country: str) -> LocaleData:
486
+ """
487
+ Get country data with fallback chain.
488
+
489
+ Parameters
490
+ ----------
491
+ country
492
+ Country code (e.g., "US", "DE", "USA", "DEU").
493
+ Also accepts legacy locale codes like "en_US" for backwards compatibility.
494
+
495
+ Returns
496
+ -------
497
+ LocaleData
498
+ The country data, falling back to parent countries if needed.
499
+ """
500
+ # Normalize to 2-letter country code
501
+ country_code = _normalize_country(country)
502
+
503
+ if country_code in self._cache:
504
+ return self._cache[country_code]
505
+
506
+ # Get fallback chain
507
+ fallback_chain = COUNTRY_FALLBACKS.get(country_code, [country_code, "US"])
508
+ if country_code not in fallback_chain:
509
+ fallback_chain = [country_code] + fallback_chain
510
+
511
+ # Load data with fallback
512
+ locale_data = self._load_with_fallback(fallback_chain)
513
+ self._cache[country_code] = locale_data
514
+ return locale_data
515
+
516
+ def _load_with_fallback(self, fallback_chain: list[str]) -> LocaleData:
517
+ """Load country data, falling back through the chain."""
518
+ merged_data = LocaleData(locale=fallback_chain[0])
519
+
520
+ # Load shared/universal data first (e.g., file extensions, MIME types)
521
+ shared_data = self._load_country_files("_shared")
522
+ if shared_data:
523
+ self._merge_data(merged_data, shared_data)
524
+
525
+ # Load in reverse order so more specific countries override
526
+ for country in reversed(fallback_chain):
527
+ data = self._load_country_files(country)
528
+ if data:
529
+ self._merge_data(merged_data, data)
530
+
531
+ return merged_data
532
+
533
+ def _load_country_files(self, country: str) -> dict[str, Any] | None:
534
+ """Load all data files for a country."""
535
+ try:
536
+ data_path = files("pointblank.locales.data") / country
537
+ if not data_path.is_dir():
538
+ return None
539
+
540
+ result: dict[str, Any] = {}
541
+ for category in ["person", "address", "company", "internet", "text", "misc"]:
542
+ file_path = data_path / f"{category}.json"
543
+ try:
544
+ content = file_path.read_text(encoding="utf-8")
545
+ result[category] = json.loads(content)
546
+ except (FileNotFoundError, json.JSONDecodeError):
547
+ pass
548
+
549
+ return result if result else None
550
+ except (TypeError, FileNotFoundError):
551
+ return None
552
+
553
+ def _merge_data(self, target: LocaleData, source: dict[str, Any]) -> None:
554
+ """Merge source data into target LocaleData."""
555
+ for category, data in source.items():
556
+ if hasattr(target, category):
557
+ existing = getattr(target, category)
558
+ if isinstance(existing, dict) and isinstance(data, dict):
559
+ existing.update(data)
560
+ else:
561
+ setattr(target, category, data)
562
+
563
+ def clear_cache(self) -> None:
564
+ """Clear the country data cache."""
565
+ self._cache.clear()
566
+
567
+
568
+ class LocaleGenerator:
569
+ """
570
+ Generator for country-specific test data.
571
+
572
+ This class provides methods to generate realistic data like names, emails,
573
+ addresses, etc. based on country-specific patterns and data.
574
+ """
575
+
576
+ def __init__(self, country: str = "US", seed: int | None = None):
577
+ """
578
+ Initialize the country data generator.
579
+
580
+ Parameters
581
+ ----------
582
+ country
583
+ Country code (e.g., "US", "DE", "USA", "DEU").
584
+ Also accepts legacy locale codes like "en_US" for backwards compatibility.
585
+ seed
586
+ Random seed for reproducibility.
587
+ """
588
+ self.country_code = _normalize_country(country)
589
+ self.rng = random.Random(seed)
590
+ self._registry = LocaleRegistry()
591
+ self._data = self._registry.get(self.country_code)
592
+
593
+ def seed(self, seed: int) -> None:
594
+ """Set the random seed."""
595
+ self.rng.seed(seed)
596
+
597
+ # =========================================================================
598
+ # Person
599
+ # =========================================================================
600
+
601
+ _current_person: dict[str, str] | None = None
602
+ _row_persons: list[dict[str, str]] | None = None
603
+
604
+ def _get_person(self, gender: str | None = None) -> dict[str, str]:
605
+ """Get a coherent person (first_name, last_name, gender) from the data."""
606
+ # If no gender specified, randomly select one (weighted toward male/female)
607
+ if gender is None:
608
+ gender = self.rng.choice(["male", "female"])
609
+
610
+ return {
611
+ "first_name": self._generate_first_name(gender),
612
+ "last_name": self._generate_last_name(),
613
+ "gender": gender,
614
+ }
615
+
616
+ def _generate_first_name(self, gender: str | None = None) -> str:
617
+ """Generate a random first name (internal, no caching)."""
618
+ names = self._data.person.get("first_names", {})
619
+
620
+ if gender and gender in names:
621
+ name_list = names[gender]
622
+ elif "neutral" in names:
623
+ # Combine all available names
624
+ all_names = []
625
+ for category in ["male", "female", "neutral"]:
626
+ all_names.extend(names.get(category, []))
627
+ name_list = all_names if all_names else ["Alex"]
628
+ else:
629
+ # Flatten all categories
630
+ all_names = []
631
+ for category_names in names.values():
632
+ if isinstance(category_names, list):
633
+ all_names.extend(category_names)
634
+ name_list = all_names if all_names else ["Alex"]
635
+
636
+ return self.rng.choice(name_list)
637
+
638
+ def _generate_last_name(self) -> str:
639
+ """Generate a random last name (internal, no caching)."""
640
+ names = self._data.person.get("last_names", ["Smith"])
641
+ return self.rng.choice(names)
642
+
643
+ def init_row_persons(self, n_rows: int) -> None:
644
+ """
645
+ Pre-generate person data for multiple rows to ensure coherence across columns.
646
+
647
+ This should be called before generating a dataset with person-related columns.
648
+ When active, first_name(), last_name(), name(), email() will use the person
649
+ for the current row (set via set_row()).
650
+
651
+ Parameters
652
+ ----------
653
+ n_rows
654
+ Number of rows to pre-generate persons for.
655
+ """
656
+ self._row_persons = [self._get_person() for _ in range(n_rows)]
657
+
658
+ def clear_row_persons(self) -> None:
659
+ """Clear all pre-generated row persons."""
660
+ self._row_persons = None
661
+
662
+ def new_person(self, gender: str | None = None) -> dict[str, str]:
663
+ """
664
+ Select a new random person and cache it for coherent generation.
665
+
666
+ Call this before generating related person components (first_name, last_name, email)
667
+ to ensure they all refer to the same person.
668
+
669
+ Returns
670
+ -------
671
+ dict
672
+ The selected person with first_name and last_name.
673
+ """
674
+ self._current_person = self._get_person(gender)
675
+ return self._current_person
676
+
677
+ def _get_current_person(self) -> dict[str, str]:
678
+ """Get the current cached person, or select a new one."""
679
+ # If row persons are active, use those
680
+ if self._row_persons is not None and self._current_row is not None:
681
+ return self._row_persons[self._current_row]
682
+ # Otherwise use single cached person
683
+ if self._current_person is None:
684
+ self._current_person = self._get_person()
685
+ return self._current_person
686
+
687
+ def clear_person(self) -> None:
688
+ """Clear the cached person so the next call will select a new one."""
689
+ self._current_person = None
690
+
691
+ def first_name(self, gender: str | None = None) -> str:
692
+ """Generate a random first name (coherent with current person context)."""
693
+ person = self._get_current_person()
694
+ return person.get("first_name", "Alex")
695
+
696
+ def last_name(self) -> str:
697
+ """Generate a random last name (coherent with current person context)."""
698
+ person = self._get_current_person()
699
+ return person.get("last_name", "Smith")
700
+
701
+ def name(self, gender: str | None = None) -> str:
702
+ """Generate a simple full name (first + last, coherent with current person context).
703
+
704
+ For names with prefixes (Mr., Ms., Dr., etc.) and occasional suffixes (Jr., III),
705
+ use name_full() instead.
706
+ """
707
+ person = self._get_current_person()
708
+ first = person.get("first_name", "Alex")
709
+ last = person.get("last_name", "Smith")
710
+
711
+ # Check if locale uses "last first" order (e.g., Japanese)
712
+ formats = self._data.person.get("name_formats", ["{first_name} {last_name}"])
713
+ # Use the simplest format (usually first one, which is typically "first last" or "last first")
714
+ if formats and "{last_name} {first_name}" in formats[0]:
715
+ return f"{last} {first}"
716
+ return f"{first} {last}"
717
+
718
+ def name_full(self, gender: str | None = None) -> str:
719
+ """Generate a full name with optional prefix and rare suffix.
720
+
721
+ Includes honorific prefixes with realistic frequencies:
722
+ - Common honorifics (Mr., Ms., Mrs., etc.): ~95% of names
723
+ - Professional titles (Dr., Prof., Rev., etc.): ~5% of names
724
+
725
+ Suffixes (Jr., II, III) appear very rarely (~1 in 2000).
726
+ """
727
+ person = self._get_current_person()
728
+ first = person.get("first_name", "Alex")
729
+ last = person.get("last_name", "Smith")
730
+
731
+ # Get gender for prefix selection (from person context or parameter)
732
+ person_gender = person.get("gender", "neutral")
733
+ if gender:
734
+ person_gender = gender
735
+
736
+ # Professional titles are rare (~2-3% of population for Dr., ~0.5% for Prof.)
737
+ # These should appear infrequently
738
+ professional_titles = {
739
+ "Dr.",
740
+ "Prof.",
741
+ "Professor",
742
+ "Rev.",
743
+ "Pr.",
744
+ "Prof. Dr.",
745
+ "Rabbi",
746
+ "Father",
747
+ "Sister",
748
+ "Pastor",
749
+ "Elder",
750
+ }
751
+
752
+ # Get prefix based on gender from locale data
753
+ prefixes = self._data.person.get("prefixes", {})
754
+ prefix_list = prefixes.get(person_gender, prefixes.get("neutral", []))
755
+
756
+ # Separate common honorifics and professional titles from locale data
757
+ locale_common = [p for p in prefix_list if p not in professional_titles]
758
+ locale_professional = [p for p in prefix_list if p in professional_titles]
759
+
760
+ # Select prefix with realistic probabilities
761
+ # ~95% common honorific, ~5% professional title
762
+ if locale_professional and self.rng.random() < 0.05:
763
+ prefix = self.rng.choice(locale_professional)
764
+ elif locale_common:
765
+ prefix = self.rng.choice(locale_common)
766
+ else:
767
+ # Fallback defaults if no common prefixes in locale
768
+ fallback = {"male": "Mr.", "female": "Ms.", "neutral": "Mr."}
769
+ prefix = fallback.get(person_gender, "")
770
+
771
+ # Get suffix - very rare (approximately 1/2000 chance)
772
+ suffix = ""
773
+ if self.rng.random() < 0.0005: # 1 in 2000
774
+ suffixes = self._data.person.get("suffixes", [])
775
+ # Filter out empty strings
776
+ suffixes = [s for s in suffixes if s]
777
+ if suffixes:
778
+ suffix = self.rng.choice(suffixes)
779
+
780
+ # Check if locale uses "last first" order (e.g., Japanese)
781
+ formats = self._data.person.get("name_formats", ["{first_name} {last_name}"])
782
+ if formats and "{last_name} {first_name}" in formats[0]:
783
+ # For "last first" cultures, prefix typically comes before everything
784
+ parts = [prefix, last, first] if prefix else [last, first]
785
+ else:
786
+ parts = [prefix, first, last] if prefix else [first, last]
787
+
788
+ if suffix:
789
+ parts.append(suffix)
790
+
791
+ return " ".join(parts)
792
+
793
+ # =========================================================================
794
+ # Address
795
+ # =========================================================================
796
+
797
+ _current_location: dict[str, str] | None = None
798
+ _row_locations: list[dict[str, str]] | None = None
799
+ _current_row: int | None = None
800
+
801
+ def _get_location(self) -> dict[str, str]:
802
+ """Get a coherent location (city, state, postcode_prefix) from the data."""
803
+ locations = self._data.address.get("locations", [])
804
+ if locations:
805
+ return self.rng.choice(locations)
806
+ # Fallback for old-style data
807
+ return {
808
+ "city": "Springfield",
809
+ "state": "State",
810
+ "state_abbr": "ST",
811
+ "postcode_prefix": "000",
812
+ }
813
+
814
+ def init_row_locations(self, n_rows: int) -> None:
815
+ """
816
+ Pre-generate locations for multiple rows to ensure coherence across columns.
817
+
818
+ This should be called before generating a dataset with address-related columns.
819
+ When active, city(), state(), postcode() etc. will use the location for the
820
+ current row (set via set_row()).
821
+
822
+ Parameters
823
+ ----------
824
+ n_rows
825
+ Number of rows to pre-generate locations for.
826
+ """
827
+ self._row_locations = [self._get_location() for _ in range(n_rows)]
828
+ self._current_row = None
829
+
830
+ def set_row(self, row_index: int) -> None:
831
+ """
832
+ Set the current row index for location-based generation.
833
+
834
+ When row locations are initialized, this sets which row's location to use.
835
+
836
+ Parameters
837
+ ----------
838
+ row_index
839
+ The row index (0-based).
840
+ """
841
+ self._current_row = row_index
842
+
843
+ def clear_row_locations(self) -> None:
844
+ """Clear all pre-generated row locations."""
845
+ self._row_locations = None
846
+ self._current_row = None
847
+
848
+ def new_location(self) -> dict[str, str]:
849
+ """
850
+ Select a new random location and cache it for coherent address generation.
851
+
852
+ Call this before generating related address components (city, state, postcode)
853
+ to ensure they all refer to the same location.
854
+
855
+ Returns
856
+ -------
857
+ dict
858
+ The selected location with city, state, state_abbr, and postcode_prefix.
859
+ """
860
+ self._current_location = self._get_location()
861
+ return self._current_location
862
+
863
+ def _get_current_location(self) -> dict[str, str]:
864
+ """Get the current cached location, or select a new one."""
865
+ # If row locations are active, use those
866
+ if self._row_locations is not None and self._current_row is not None:
867
+ return self._row_locations[self._current_row]
868
+ # Otherwise use single cached location
869
+ if self._current_location is None:
870
+ self._current_location = self._get_location()
871
+ return self._current_location
872
+
873
+ def clear_location(self) -> None:
874
+ """Clear the cached location so the next call will select a new one."""
875
+ self._current_location = None
876
+
877
+ def city(self) -> str:
878
+ """Generate a random city name (coherent with current location context).
879
+
880
+ Returns the exonym (English name) if available, otherwise the native city name.
881
+ This allows addresses to use native names while city presets use international names.
882
+ """
883
+ location = self._get_current_location()
884
+ # Prefer exonym (English name) for standalone city preset
885
+ return location.get("exonym", location.get("city", "Springfield"))
886
+
887
+ def _city_native(self) -> str:
888
+ """Get the native city name (used internally for addresses).
889
+
890
+ Always returns the native name, ignoring any exonym.
891
+ """
892
+ location = self._get_current_location()
893
+ return location.get("city", "Springfield")
894
+
895
+ def state(self, abbr: bool = False) -> str:
896
+ """Generate a random state/province name (coherent with current location context)."""
897
+ location = self._get_current_location()
898
+ if abbr:
899
+ return location.get("state_abbr", "ST")
900
+ return location.get("state", "State")
901
+
902
+ def country(self) -> str:
903
+ """Generate the country name for this locale."""
904
+ return self._data.address.get("country", "United States")
905
+
906
+ def postcode(self) -> str:
907
+ """Generate a random postal code (coherent with current location context)."""
908
+ location = self._get_current_location()
909
+ prefix = location.get("postcode_prefix", "")
910
+ postcode_format = self._data.address.get("postcode_format", "")
911
+
912
+ # If format uses pattern characters (? for letter, # for digit), generate accordingly
913
+ if "?" in postcode_format or "#" in postcode_format:
914
+ # Generate the full postcode from the format pattern
915
+ # Replace ? with random uppercase letter, # with random digit
916
+ result = []
917
+ prefix_idx = 0
918
+ for char in postcode_format:
919
+ if char == "?":
920
+ # Use prefix character if available, otherwise random letter
921
+ if prefix_idx < len(prefix) and prefix[prefix_idx].isalpha():
922
+ result.append(prefix[prefix_idx])
923
+ prefix_idx += 1
924
+ else:
925
+ result.append(self.rng.choice("ABCDEFGHIJKLMNOPQRSTUVWXYZ"))
926
+ elif char == "#":
927
+ # Use prefix character if available, otherwise random digit
928
+ if prefix_idx < len(prefix) and prefix[prefix_idx].isdigit():
929
+ result.append(prefix[prefix_idx])
930
+ prefix_idx += 1
931
+ else:
932
+ result.append(str(self.rng.randint(0, 9)))
933
+ else:
934
+ # Keep literal characters (spaces, dashes, etc.)
935
+ result.append(char)
936
+ return "".join(result)
937
+
938
+ # Default: append digits to complete the postal code
939
+ remaining = 5 - len(prefix)
940
+ suffix = "".join(str(self.rng.randint(0, 9)) for _ in range(remaining))
941
+ return prefix + suffix
942
+
943
+ def street_name(self) -> str:
944
+ """Generate a random street name.
945
+
946
+ If the locale has `streets_by_city`, use city-specific streets.
947
+ Otherwise, fall back to combining `street_names` and `street_suffixes`.
948
+ """
949
+ # Check if locale uses city-specific streets
950
+ streets_by_city = self._data.address.get("streets_by_city")
951
+ if streets_by_city:
952
+ # Get current city from location
953
+ location = self._get_current_location()
954
+ city = location.get("city", "")
955
+ city_streets = streets_by_city.get(city)
956
+ if city_streets:
957
+ return self.rng.choice(city_streets)
958
+
959
+ # Fall back to old street_names + street_suffixes approach
960
+ names = self._data.address.get("street_names", ["Main"])
961
+ suffixes = self._data.address.get("street_suffixes", ["St"])
962
+ return f"{self.rng.choice(names)} {self.rng.choice(suffixes)}"
963
+
964
+ def building_number(self) -> str:
965
+ """Generate a random building number."""
966
+ return str(self.rng.randint(1, 9999))
967
+
968
+ def address(self) -> str:
969
+ """Generate a full coherent address (city, state, postcode are consistent)."""
970
+ # Only select a new location if row locations are not active
971
+ # This ensures coherence with other address-related columns (city, state, etc.)
972
+ using_row_context = self._row_locations is not None and self._current_row is not None
973
+ if not using_row_context:
974
+ self.new_location()
975
+
976
+ formats = self._data.address.get(
977
+ "address_formats",
978
+ ["{building_number} {street}, {city}, {state} {postcode}"],
979
+ )
980
+ fmt = self.rng.choice(formats)
981
+
982
+ result = fmt.format(
983
+ building_number=self.building_number(),
984
+ street=self.street_name(),
985
+ city=self._city_native(), # Use native name in addresses
986
+ state=self.state(abbr=False),
987
+ state_abbr=self.state(abbr=True),
988
+ postcode=self.postcode(),
989
+ country=self.country(),
990
+ unit=str(self.rng.randint(1, 999)),
991
+ )
992
+
993
+ # Clear location after generating full address (only if we set it)
994
+ if not using_row_context:
995
+ self.clear_location()
996
+ return result
997
+
998
+ def phone_number(self) -> str:
999
+ """Generate a phone number with area code matching the current location's state."""
1000
+ location = self._get_current_location()
1001
+ state = location.get("state", "California")
1002
+
1003
+ # Get area codes for this state
1004
+ area_codes = self._data.address.get("phone_area_codes", {})
1005
+ state_codes = area_codes.get(state, ["555"]) # 555 is fictional fallback
1006
+ area_code = self.rng.choice(state_codes)
1007
+
1008
+ # Generate the rest of the number
1009
+ exchange = str(self.rng.randint(200, 999)) # Exchange can't start with 0 or 1
1010
+ subscriber = str(self.rng.randint(0, 9999)).zfill(4)
1011
+
1012
+ return f"({area_code}) {exchange}-{subscriber}"
1013
+
1014
+ def latitude(self) -> str:
1015
+ """Generate a random latitude (bounded by current location if available)."""
1016
+ location = self._get_current_location()
1017
+ lat_min = location.get("lat_min", -90)
1018
+ lat_max = location.get("lat_max", 90)
1019
+ return f"{self.rng.uniform(lat_min, lat_max):.6f}"
1020
+
1021
+ def longitude(self) -> str:
1022
+ """Generate a random longitude (bounded by current location if available)."""
1023
+ location = self._get_current_location()
1024
+ lon_min = location.get("lon_min", -180)
1025
+ lon_max = location.get("lon_max", 180)
1026
+ return f"{self.rng.uniform(lon_min, lon_max):.6f}"
1027
+
1028
+ # =========================================================================
1029
+ # Company
1030
+ # =========================================================================
1031
+
1032
+ def company(self) -> str:
1033
+ """Generate a random company name.
1034
+
1035
+ Has a ~15% chance to return a well-known company name, with preference
1036
+ for companies that have offices in the current city (if location context is active).
1037
+ Otherwise generates a fictional company name.
1038
+ """
1039
+ # 15% chance to use a well-known company
1040
+ if self.rng.random() < 0.15:
1041
+ well_known = self._data.company.get("well_known_companies", [])
1042
+ if well_known:
1043
+ # Get current city if location context is active
1044
+ current_city = None
1045
+ if self._row_locations is not None and self._current_row is not None:
1046
+ current_city = self._row_locations[self._current_row].get("city")
1047
+
1048
+ # Collect all companies, preferring those in the current city
1049
+ city_companies = []
1050
+ all_companies = []
1051
+
1052
+ for company in well_known:
1053
+ name = company.get("name") if isinstance(company, dict) else company
1054
+ cities = company.get("cities", []) if isinstance(company, dict) else []
1055
+ all_companies.append(name)
1056
+ if current_city and current_city in cities:
1057
+ city_companies.append(name)
1058
+
1059
+ # 70% chance to use city-relevant company if available
1060
+ if city_companies and self.rng.random() < 0.7:
1061
+ return self.rng.choice(city_companies)
1062
+ elif all_companies:
1063
+ return self.rng.choice(all_companies)
1064
+
1065
+ # Generate a fictional company name
1066
+ formats = self._data.company.get("formats", ["{last_name} {suffix}"])
1067
+ fmt = self.rng.choice(formats)
1068
+
1069
+ suffixes = self._data.company.get("suffixes", ["Inc", "LLC", "Corp"])
1070
+ adjectives = self._data.company.get("adjectives", ["Global", "Advanced"])
1071
+ nouns = self._data.company.get("nouns", ["Solutions", "Systems"])
1072
+
1073
+ # Count how many {last_name} placeholders are in the format
1074
+ # and generate distinct last names for each
1075
+ last_name_count = fmt.count("{last_name}")
1076
+ if last_name_count <= 1:
1077
+ company_last_name = self._generate_last_name()
1078
+ return fmt.format(
1079
+ last_name=company_last_name,
1080
+ suffix=self.rng.choice(suffixes),
1081
+ adjective=self.rng.choice(adjectives),
1082
+ noun=self.rng.choice(nouns),
1083
+ )
1084
+ else:
1085
+ # Generate distinct last names for formats like "{last_name} and {last_name}"
1086
+ last_names = []
1087
+ for _ in range(last_name_count):
1088
+ new_name = self._generate_last_name()
1089
+ # Ensure we don't repeat the same name
1090
+ attempts = 0
1091
+ while new_name in last_names and attempts < 10:
1092
+ new_name = self._generate_last_name()
1093
+ attempts += 1
1094
+ last_names.append(new_name)
1095
+
1096
+ # Replace placeholders one at a time
1097
+ result = fmt
1098
+ for name in last_names:
1099
+ result = result.replace("{last_name}", name, 1)
1100
+
1101
+ return result.format(
1102
+ suffix=self.rng.choice(suffixes),
1103
+ adjective=self.rng.choice(adjectives),
1104
+ noun=self.rng.choice(nouns),
1105
+ )
1106
+
1107
+ def job(self) -> str:
1108
+ """Generate a random job title."""
1109
+ jobs = self._data.company.get("jobs", ["Manager"])
1110
+ return self.rng.choice(jobs)
1111
+
1112
+ def catch_phrase(self) -> str:
1113
+ """Generate a random business catch phrase."""
1114
+ adjectives = self._data.company.get("catch_phrase_adjectives", ["Innovative", "Dynamic"])
1115
+ nouns = self._data.company.get("catch_phrase_nouns", ["solutions", "paradigms"])
1116
+ verbs = self._data.company.get("catch_phrase_verbs", ["deliver", "leverage"])
1117
+ return (
1118
+ f"{self.rng.choice(adjectives)} {self.rng.choice(nouns)} that {self.rng.choice(verbs)}"
1119
+ )
1120
+
1121
+ # =========================================================================
1122
+ # Internet
1123
+ # =========================================================================
1124
+
1125
+ def email(self) -> str:
1126
+ """Generate a random email address (coherent with current person context)."""
1127
+ # Get person data - uses cached person if available
1128
+ person = self._get_current_person()
1129
+ first = person.get("first_name", "user").lower()
1130
+ last = person.get("last_name", "name").lower()
1131
+ domains = self._data.internet.get("free_email_domains", ["gmail.com", "outlook.com"])
1132
+
1133
+ # Transliterate to ASCII for valid email addresses
1134
+ first = _transliterate_to_ascii(first)
1135
+ last = _transliterate_to_ascii(last)
1136
+
1137
+ # Clean names for email (remove non-alphanumeric)
1138
+ first = "".join(c for c in first if c.isalnum())
1139
+ last = "".join(c for c in last if c.isalnum())
1140
+
1141
+ # Various realistic email patterns
1142
+ patterns = [
1143
+ f"{first}.{last}", # john.smith
1144
+ f"{first}{last}", # johnsmith
1145
+ f"{first}_{last}", # john_smith
1146
+ f"{first[0]}{last}", # jsmith
1147
+ f"{first}{self.rng.randint(1, 999)}", # john123
1148
+ f"{first[0]}{last}{self.rng.randint(1, 99)}", # jsmith42
1149
+ f"{first}.{last}{self.rng.randint(1, 99)}", # john.smith99
1150
+ f"{first[0]}_{last}", # j_smith
1151
+ ]
1152
+
1153
+ return f"{self.rng.choice(patterns)}@{self.rng.choice(domains)}"
1154
+
1155
+ def user_name(self) -> str:
1156
+ """Generate a random username (coherent with current person context)."""
1157
+ # Get person data - uses cached person if available
1158
+ person = self._get_current_person()
1159
+ first = person.get("first_name", "user").lower()
1160
+ last = person.get("last_name", "name").lower()
1161
+
1162
+ # Transliterate to ASCII for valid usernames
1163
+ first = _transliterate_to_ascii(first)
1164
+ last = _transliterate_to_ascii(last)
1165
+
1166
+ # Clean names
1167
+ first = "".join(c for c in first if c.isalnum())
1168
+ last = "".join(c for c in last if c.isalnum())
1169
+
1170
+ patterns = [
1171
+ f"{first}{last}",
1172
+ f"{first}_{last}",
1173
+ f"{first}{self.rng.randint(1, 999)}",
1174
+ f"{first[0]}{last}{self.rng.randint(1, 99)}",
1175
+ ]
1176
+
1177
+ return self.rng.choice(patterns)
1178
+
1179
+ def password(self, length: int = 12) -> str:
1180
+ """Generate a random password."""
1181
+ import string
1182
+
1183
+ chars = string.ascii_letters + string.digits + "!@#$%^&*"
1184
+ return "".join(self.rng.choice(chars) for _ in range(length))
1185
+
1186
+ def url(self) -> str:
1187
+ """Generate a random URL."""
1188
+ protocols = ["https://"]
1189
+ tlds = self._data.internet.get("tlds", ["com", "org", "net"])
1190
+ words = self._data.text.get("words", ["example", "test", "sample"])
1191
+
1192
+ domain = self.rng.choice(words).lower()
1193
+ domain = "".join(c for c in domain if c.isalnum())
1194
+
1195
+ return f"{self.rng.choice(protocols)}www.{domain}.{self.rng.choice(tlds)}"
1196
+
1197
+ def domain_name(self) -> str:
1198
+ """Generate a random domain name."""
1199
+ tlds = self._data.internet.get("tlds", ["com", "org", "net"])
1200
+ words = self._data.text.get("words", ["example", "test", "sample"])
1201
+
1202
+ domain = self.rng.choice(words).lower()
1203
+ domain = "".join(c for c in domain if c.isalnum())
1204
+
1205
+ return f"{domain}.{self.rng.choice(tlds)}"
1206
+
1207
+ def ipv4(self) -> str:
1208
+ """Generate a random IPv4 address."""
1209
+ return ".".join(str(self.rng.randint(0, 255)) for _ in range(4))
1210
+
1211
+ def ipv6(self) -> str:
1212
+ """Generate a random IPv6 address."""
1213
+ return ":".join(f"{self.rng.randint(0, 65535):04x}" for _ in range(8))
1214
+
1215
+ # =========================================================================
1216
+ # Text
1217
+ # =========================================================================
1218
+
1219
+ def word(self) -> str:
1220
+ """Generate a random word."""
1221
+ words = self._data.text.get("words", ["lorem", "ipsum", "dolor"])
1222
+ return self.rng.choice(words)
1223
+
1224
+ def sentence(self, num_words: int | None = None) -> str:
1225
+ """Generate a random sentence."""
1226
+ if num_words is None:
1227
+ num_words = self.rng.randint(5, 15)
1228
+
1229
+ words = [self.word() for _ in range(num_words)]
1230
+ words[0] = words[0].capitalize()
1231
+ return " ".join(words) + "."
1232
+
1233
+ def paragraph(self, num_sentences: int | None = None) -> str:
1234
+ """Generate a random paragraph."""
1235
+ if num_sentences is None:
1236
+ num_sentences = self.rng.randint(3, 7)
1237
+
1238
+ return " ".join(self.sentence() for _ in range(num_sentences))
1239
+
1240
+ def text(self, max_chars: int = 200) -> str:
1241
+ """Generate random text up to max_chars."""
1242
+ result = []
1243
+ current_length = 0
1244
+
1245
+ while current_length < max_chars:
1246
+ sentence = self.sentence()
1247
+ if current_length + len(sentence) + 1 > max_chars:
1248
+ break
1249
+ result.append(sentence)
1250
+ current_length += len(sentence) + 1
1251
+
1252
+ return " ".join(result) if result else self.sentence()[:max_chars]
1253
+
1254
+ # =========================================================================
1255
+ # Financial
1256
+ # =========================================================================
1257
+
1258
+ def credit_card_number(self) -> str:
1259
+ """Generate a random credit card number (not valid for transactions)."""
1260
+ # Generate a 16-digit number with valid Luhn checksum
1261
+ prefix = self.rng.choice(["4", "5", "37", "6011"]) # Visa, MC, Amex, Discover
1262
+ length = 15 if prefix == "37" else 16
1263
+
1264
+ # Generate digits (minus check digit)
1265
+ digits = list(prefix)
1266
+ while len(digits) < length - 1:
1267
+ digits.append(str(self.rng.randint(0, 9)))
1268
+
1269
+ # Calculate Luhn check digit
1270
+ check_digit = self._luhn_checksum(digits)
1271
+ digits.append(str(check_digit))
1272
+
1273
+ return "".join(digits)
1274
+
1275
+ def _luhn_checksum(self, digits: list[str]) -> int:
1276
+ """Calculate Luhn check digit for a partial card number.
1277
+
1278
+ The check digit is appended to make the full number pass the Luhn algorithm.
1279
+ We process from right to left, doubling every second digit starting from
1280
+ the rightmost digit of the partial number (since the check digit will be
1281
+ at position 0 and won't be doubled).
1282
+ """
1283
+ nums = [int(d) for d in digits]
1284
+ total = 0
1285
+ for i, d in enumerate(reversed(nums)):
1286
+ if i % 2 == 0: # These positions get doubled (check digit at pos 0 won't be)
1287
+ d = d * 2
1288
+ if d > 9:
1289
+ d -= 9
1290
+ total += d
1291
+ return (10 - (total % 10)) % 10
1292
+
1293
+ def iban(self) -> str:
1294
+ """Generate a random IBAN."""
1295
+ # Simplified - generates a plausible-looking IBAN
1296
+ country = self._data.address.get("country_code", "US")
1297
+ if country == "US":
1298
+ # US doesn't use IBAN, use DE as example
1299
+ country = "DE"
1300
+
1301
+ check_digits = f"{self.rng.randint(10, 99)}"
1302
+ bank_code = "".join(str(self.rng.randint(0, 9)) for _ in range(8))
1303
+ account = "".join(str(self.rng.randint(0, 9)) for _ in range(10))
1304
+
1305
+ return f"{country}{check_digits}{bank_code}{account}"
1306
+
1307
+ def currency_code(self) -> str:
1308
+ """Generate a random currency code."""
1309
+ codes = self._data.misc.get("currency_codes", ["USD", "EUR", "GBP", "JPY", "CNY"])
1310
+ return self.rng.choice(codes)
1311
+
1312
+ # =========================================================================
1313
+ # Identifiers
1314
+ # =========================================================================
1315
+
1316
+ def uuid4(self) -> str:
1317
+ """Generate a random UUID4."""
1318
+ # Use our RNG to generate deterministic UUIDs
1319
+ hex_chars = "0123456789abcdef"
1320
+ parts = [
1321
+ "".join(self.rng.choice(hex_chars) for _ in range(8)),
1322
+ "".join(self.rng.choice(hex_chars) for _ in range(4)),
1323
+ "4" + "".join(self.rng.choice(hex_chars) for _ in range(3)), # Version 4
1324
+ self.rng.choice("89ab")
1325
+ + "".join(self.rng.choice(hex_chars) for _ in range(3)), # Variant
1326
+ "".join(self.rng.choice(hex_chars) for _ in range(12)),
1327
+ ]
1328
+ return "-".join(parts)
1329
+
1330
+ def ssn(self) -> str:
1331
+ """Generate a random SSN-like identifier."""
1332
+ # US format: XXX-XX-XXXX
1333
+ fmt = self._data.misc.get("ssn_format", "###-##-####")
1334
+ return self._generate_from_format(fmt)
1335
+
1336
+ def license_plate(self) -> str:
1337
+ """Generate a random license plate."""
1338
+ fmt = self._data.misc.get("license_plate_format", "???-####")
1339
+ return self._generate_from_format(fmt)
1340
+
1341
+ # =========================================================================
1342
+ # Date/Time (string representations)
1343
+ # =========================================================================
1344
+
1345
+ def date_this_year(self) -> str:
1346
+ """Generate a random date from this year as ISO string."""
1347
+ from datetime import date, timedelta
1348
+
1349
+ today = date.today()
1350
+ start = date(today.year, 1, 1)
1351
+ days = (today - start).days
1352
+ random_date = start + timedelta(days=self.rng.randint(0, max(days, 1)))
1353
+ return random_date.isoformat()
1354
+
1355
+ def date_this_decade(self) -> str:
1356
+ """Generate a random date from this decade as ISO string."""
1357
+ from datetime import date, timedelta
1358
+
1359
+ today = date.today()
1360
+ decade_start = (today.year // 10) * 10
1361
+ start = date(decade_start, 1, 1)
1362
+ days = (today - start).days
1363
+ random_date = start + timedelta(days=self.rng.randint(0, max(days, 1)))
1364
+ return random_date.isoformat()
1365
+
1366
+ def time(self) -> str:
1367
+ """Generate a random time as string."""
1368
+ hour = self.rng.randint(0, 23)
1369
+ minute = self.rng.randint(0, 59)
1370
+ second = self.rng.randint(0, 59)
1371
+ return f"{hour:02d}:{minute:02d}:{second:02d}"
1372
+
1373
+ # =========================================================================
1374
+ # Misc
1375
+ # =========================================================================
1376
+
1377
+ def color_name(self) -> str:
1378
+ """Generate a random color name."""
1379
+ colors = self._data.misc.get(
1380
+ "colors",
1381
+ [
1382
+ "Red",
1383
+ "Blue",
1384
+ "Green",
1385
+ "Yellow",
1386
+ "Purple",
1387
+ "Orange",
1388
+ "Pink",
1389
+ "Brown",
1390
+ "Black",
1391
+ "White",
1392
+ "Gray",
1393
+ "Cyan",
1394
+ "Magenta",
1395
+ ],
1396
+ )
1397
+ return self.rng.choice(colors)
1398
+
1399
+ def file_name(self) -> str:
1400
+ """Generate a random file name."""
1401
+ words = self._data.text.get("words", ["document", "file", "report"])
1402
+ extensions = self._data.misc.get("file_extensions", ["txt", "pdf", "doc", "xlsx"])
1403
+ word = self.rng.choice(words).lower()
1404
+ word = "".join(c for c in word if c.isalnum())
1405
+ return f"{word}.{self.rng.choice(extensions)}"
1406
+
1407
+ def file_extension(self) -> str:
1408
+ """Generate a random file extension."""
1409
+ extensions = self._data.misc.get(
1410
+ "file_extensions", ["txt", "pdf", "doc", "xlsx", "png", "jpg"]
1411
+ )
1412
+ return self.rng.choice(extensions)
1413
+
1414
+ def mime_type(self) -> str:
1415
+ """Generate a random MIME type."""
1416
+ mime_types = self._data.misc.get(
1417
+ "mime_types",
1418
+ [
1419
+ "text/plain",
1420
+ "text/html",
1421
+ "application/json",
1422
+ "application/pdf",
1423
+ "image/png",
1424
+ "image/jpeg",
1425
+ ],
1426
+ )
1427
+ return self.rng.choice(mime_types)
1428
+
1429
+ # =========================================================================
1430
+ # Utilities
1431
+ # =========================================================================
1432
+
1433
+ def _generate_from_format(self, fmt: str) -> str:
1434
+ """
1435
+ Generate a string from a format pattern.
1436
+
1437
+ Patterns:
1438
+ - # = digit (0-9)
1439
+ - ? = uppercase letter (A-Z)
1440
+ - * = alphanumeric
1441
+ """
1442
+ result = []
1443
+ for char in fmt:
1444
+ if char == "#":
1445
+ result.append(str(self.rng.randint(0, 9)))
1446
+ elif char == "?":
1447
+ result.append(self.rng.choice("ABCDEFGHIJKLMNOPQRSTUVWXYZ"))
1448
+ elif char == "*":
1449
+ result.append(self.rng.choice("ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"))
1450
+ else:
1451
+ result.append(char)
1452
+ return "".join(result)
1453
+
1454
+
1455
+ # Module-level convenience function
1456
+ _default_registry = LocaleRegistry()
1457
+
1458
+
1459
+ def get_generator(country: str = "US", seed: int | None = None) -> LocaleGenerator:
1460
+ """
1461
+ Get a country data generator instance.
1462
+
1463
+ Parameters
1464
+ ----------
1465
+ country
1466
+ Country code (e.g., "US", "DE", "USA", "DEU").
1467
+ Also accepts legacy locale codes like "en_US" for backwards compatibility.
1468
+ seed
1469
+ Random seed for reproducibility.
1470
+
1471
+ Returns
1472
+ -------
1473
+ LocaleGenerator
1474
+ A generator configured for the specified country.
1475
+ """
1476
+ return LocaleGenerator(country=country, seed=seed)