dataforge-py 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (145) hide show
  1. dataforge/__init__.py +20 -0
  2. dataforge/backend.py +147 -0
  3. dataforge/cli.py +166 -0
  4. dataforge/core.py +1169 -0
  5. dataforge/locales/__init__.py +1 -0
  6. dataforge/locales/ar_SA/__init__.py +1 -0
  7. dataforge/locales/ar_SA/address.py +128 -0
  8. dataforge/locales/ar_SA/company.py +183 -0
  9. dataforge/locales/ar_SA/internet.py +25 -0
  10. dataforge/locales/ar_SA/person.py +217 -0
  11. dataforge/locales/ar_SA/phone.py +15 -0
  12. dataforge/locales/de_DE/__init__.py +1 -0
  13. dataforge/locales/de_DE/address.py +148 -0
  14. dataforge/locales/de_DE/company.py +125 -0
  15. dataforge/locales/de_DE/internet.py +32 -0
  16. dataforge/locales/de_DE/person.py +212 -0
  17. dataforge/locales/de_DE/phone.py +17 -0
  18. dataforge/locales/en_AU/__init__.py +1 -0
  19. dataforge/locales/en_AU/address.py +231 -0
  20. dataforge/locales/en_AU/company.py +193 -0
  21. dataforge/locales/en_AU/internet.py +34 -0
  22. dataforge/locales/en_AU/person.py +370 -0
  23. dataforge/locales/en_AU/phone.py +16 -0
  24. dataforge/locales/en_CA/__init__.py +1 -0
  25. dataforge/locales/en_CA/address.py +276 -0
  26. dataforge/locales/en_CA/company.py +193 -0
  27. dataforge/locales/en_CA/internet.py +34 -0
  28. dataforge/locales/en_CA/person.py +377 -0
  29. dataforge/locales/en_CA/phone.py +15 -0
  30. dataforge/locales/en_GB/__init__.py +1 -0
  31. dataforge/locales/en_GB/address.py +312 -0
  32. dataforge/locales/en_GB/company.py +196 -0
  33. dataforge/locales/en_GB/internet.py +34 -0
  34. dataforge/locales/en_GB/person.py +372 -0
  35. dataforge/locales/en_GB/phone.py +15 -0
  36. dataforge/locales/en_US/__init__.py +1 -0
  37. dataforge/locales/en_US/address.py +268 -0
  38. dataforge/locales/en_US/company.py +191 -0
  39. dataforge/locales/en_US/internet.py +34 -0
  40. dataforge/locales/en_US/person.py +370 -0
  41. dataforge/locales/en_US/phone.py +15 -0
  42. dataforge/locales/es_ES/__init__.py +1 -0
  43. dataforge/locales/es_ES/address.py +151 -0
  44. dataforge/locales/es_ES/company.py +125 -0
  45. dataforge/locales/es_ES/internet.py +30 -0
  46. dataforge/locales/es_ES/person.py +207 -0
  47. dataforge/locales/es_ES/phone.py +15 -0
  48. dataforge/locales/fr_FR/__init__.py +1 -0
  49. dataforge/locales/fr_FR/address.py +145 -0
  50. dataforge/locales/fr_FR/company.py +125 -0
  51. dataforge/locales/fr_FR/internet.py +30 -0
  52. dataforge/locales/fr_FR/person.py +212 -0
  53. dataforge/locales/fr_FR/phone.py +15 -0
  54. dataforge/locales/hi_IN/__init__.py +1 -0
  55. dataforge/locales/hi_IN/address.py +177 -0
  56. dataforge/locales/hi_IN/company.py +191 -0
  57. dataforge/locales/hi_IN/internet.py +26 -0
  58. dataforge/locales/hi_IN/person.py +218 -0
  59. dataforge/locales/hi_IN/phone.py +21 -0
  60. dataforge/locales/it_IT/__init__.py +1 -0
  61. dataforge/locales/it_IT/address.py +218 -0
  62. dataforge/locales/it_IT/company.py +151 -0
  63. dataforge/locales/it_IT/internet.py +31 -0
  64. dataforge/locales/it_IT/person.py +187 -0
  65. dataforge/locales/it_IT/phone.py +15 -0
  66. dataforge/locales/ja_JP/__init__.py +1 -0
  67. dataforge/locales/ja_JP/address.py +174 -0
  68. dataforge/locales/ja_JP/company.py +121 -0
  69. dataforge/locales/ja_JP/internet.py +30 -0
  70. dataforge/locales/ja_JP/person.py +207 -0
  71. dataforge/locales/ja_JP/phone.py +18 -0
  72. dataforge/locales/ko_KR/__init__.py +1 -0
  73. dataforge/locales/ko_KR/address.py +121 -0
  74. dataforge/locales/ko_KR/company.py +151 -0
  75. dataforge/locales/ko_KR/internet.py +30 -0
  76. dataforge/locales/ko_KR/person.py +157 -0
  77. dataforge/locales/ko_KR/phone.py +26 -0
  78. dataforge/locales/nl_NL/__init__.py +1 -0
  79. dataforge/locales/nl_NL/address.py +152 -0
  80. dataforge/locales/nl_NL/company.py +182 -0
  81. dataforge/locales/nl_NL/internet.py +41 -0
  82. dataforge/locales/nl_NL/person.py +218 -0
  83. dataforge/locales/nl_NL/phone.py +19 -0
  84. dataforge/locales/pl_PL/__init__.py +1 -0
  85. dataforge/locales/pl_PL/address.py +140 -0
  86. dataforge/locales/pl_PL/company.py +183 -0
  87. dataforge/locales/pl_PL/internet.py +36 -0
  88. dataforge/locales/pl_PL/person.py +217 -0
  89. dataforge/locales/pl_PL/phone.py +15 -0
  90. dataforge/locales/pt_BR/__init__.py +1 -0
  91. dataforge/locales/pt_BR/address.py +127 -0
  92. dataforge/locales/pt_BR/company.py +151 -0
  93. dataforge/locales/pt_BR/internet.py +31 -0
  94. dataforge/locales/pt_BR/person.py +187 -0
  95. dataforge/locales/pt_BR/phone.py +15 -0
  96. dataforge/locales/ru_RU/__init__.py +1 -0
  97. dataforge/locales/ru_RU/address.py +156 -0
  98. dataforge/locales/ru_RU/company.py +168 -0
  99. dataforge/locales/ru_RU/internet.py +26 -0
  100. dataforge/locales/ru_RU/person.py +218 -0
  101. dataforge/locales/ru_RU/phone.py +16 -0
  102. dataforge/locales/zh_CN/__init__.py +1 -0
  103. dataforge/locales/zh_CN/address.py +141 -0
  104. dataforge/locales/zh_CN/company.py +151 -0
  105. dataforge/locales/zh_CN/internet.py +30 -0
  106. dataforge/locales/zh_CN/person.py +157 -0
  107. dataforge/locales/zh_CN/phone.py +25 -0
  108. dataforge/providers/__init__.py +1 -0
  109. dataforge/providers/address.py +460 -0
  110. dataforge/providers/ai_chat.py +170 -0
  111. dataforge/providers/ai_prompt.py +447 -0
  112. dataforge/providers/automotive.py +416 -0
  113. dataforge/providers/barcode.py +149 -0
  114. dataforge/providers/base.py +34 -0
  115. dataforge/providers/color.py +247 -0
  116. dataforge/providers/company.py +144 -0
  117. dataforge/providers/crypto.py +105 -0
  118. dataforge/providers/datetime.py +397 -0
  119. dataforge/providers/ecommerce.py +316 -0
  120. dataforge/providers/education.py +234 -0
  121. dataforge/providers/file.py +271 -0
  122. dataforge/providers/finance.py +545 -0
  123. dataforge/providers/geo.py +332 -0
  124. dataforge/providers/government.py +114 -0
  125. dataforge/providers/internet.py +351 -0
  126. dataforge/providers/llm.py +726 -0
  127. dataforge/providers/lorem.py +241 -0
  128. dataforge/providers/medical.py +364 -0
  129. dataforge/providers/misc.py +196 -0
  130. dataforge/providers/network.py +283 -0
  131. dataforge/providers/payment.py +300 -0
  132. dataforge/providers/person.py +195 -0
  133. dataforge/providers/phone.py +87 -0
  134. dataforge/providers/profile.py +265 -0
  135. dataforge/providers/science.py +365 -0
  136. dataforge/providers/text.py +365 -0
  137. dataforge/py.typed +0 -0
  138. dataforge/pytest_plugin.py +80 -0
  139. dataforge/registry.py +164 -0
  140. dataforge/schema.py +772 -0
  141. dataforge/unique.py +171 -0
  142. dataforge_py-0.2.0.dist-info/METADATA +964 -0
  143. dataforge_py-0.2.0.dist-info/RECORD +145 -0
  144. dataforge_py-0.2.0.dist-info/WHEEL +4 -0
  145. dataforge_py-0.2.0.dist-info/entry_points.txt +35 -0
dataforge/unique.py ADDED
@@ -0,0 +1,171 @@
1
+ """UniqueProxy — wrapper for unique value generation.
2
+
3
+ Intercepts provider method calls and ensures each returned value is
4
+ unique within the lifetime of the proxy (or until :meth:`clear` is
5
+ called).
6
+
7
+ Usage::
8
+
9
+ forge = DataForge(seed=42)
10
+ forge.unique.person.first_name() # guaranteed unique per call
11
+ forge.unique.clear() # reset tracking
12
+
13
+ Performance
14
+ -----------
15
+ The proxy adds a thin ``set``-membership check per scalar value
16
+ (O(1) amortised) and retries on collision. Batch calls are
17
+ generated in bulk with a single ``set`` deduplication pass,
18
+ requesting extra items to compensate for expected collisions.
19
+
20
+ The proxy itself is **lazily created** — accessing ``forge.unique``
21
+ for the first time constructs it; all subsequent accesses return
22
+ the cached instance.
23
+ """
24
+
25
+ from __future__ import annotations
26
+
27
+ from typing import Any
28
+
29
+ from dataforge.providers.base import BaseProvider
30
+
31
+
32
+ class _UniqueMethodWrapper:
33
+ """Wraps a single provider method to enforce uniqueness."""
34
+
35
+ __slots__ = ("_method", "_seen")
36
+
37
+ def __init__(self, method: Any) -> None:
38
+ self._method = method
39
+ self._seen: set[Any] = set()
40
+
41
+ def __call__(self, count: int = 1, **kwargs: Any) -> Any:
42
+ if count == 1:
43
+ return self._generate_one(**kwargs)
44
+ return self._generate_batch(count, **kwargs)
45
+
46
+ def _generate_one(self, _max_retries: int = 10_000, **kwargs: Any) -> Any:
47
+ """Generate a single unique value with retry."""
48
+ seen = self._seen
49
+ method = self._method
50
+ for _ in range(_max_retries):
51
+ val = method(**kwargs)
52
+ if val not in seen:
53
+ seen.add(val)
54
+ return val
55
+ raise RuntimeError(
56
+ f"Could not generate a unique value after {_max_retries} "
57
+ f"retries for {self._method!r}. "
58
+ f"Already generated {len(seen)} unique values."
59
+ )
60
+
61
+ def _generate_batch(self, count: int, **kwargs: Any) -> list[Any]:
62
+ """Generate *count* unique values using over-sampling."""
63
+ seen = self._seen
64
+ method = self._method
65
+ result: list[Any] = []
66
+ remaining = count
67
+ max_total_retries = count * 100
68
+
69
+ retries = 0
70
+ while remaining > 0:
71
+ if retries > max_total_retries:
72
+ raise RuntimeError(
73
+ f"Could not generate {count} unique values after "
74
+ f"{retries} retries for {self._method!r}. "
75
+ f"Generated {len(result)}/{count}."
76
+ )
77
+ # Over-sample by 20% to compensate for expected collisions
78
+ request = remaining + max(remaining // 5, 10)
79
+ batch = method(count=request, **kwargs)
80
+ for val in batch:
81
+ if val not in seen:
82
+ seen.add(val)
83
+ result.append(val)
84
+ remaining -= 1
85
+ if remaining == 0:
86
+ break
87
+ else:
88
+ retries += 1
89
+
90
+ return result
91
+
92
+ def clear(self) -> None:
93
+ """Reset the seen set for this method."""
94
+ self._seen.clear()
95
+
96
+
97
+ class _UniqueProviderProxy:
98
+ """Proxy around a provider that wraps every method for uniqueness."""
99
+
100
+ __slots__ = ("_provider", "_wrappers")
101
+
102
+ def __init__(self, provider: BaseProvider) -> None:
103
+ self._provider = provider
104
+ self._wrappers: dict[str, _UniqueMethodWrapper] = {}
105
+
106
+ def __getattr__(self, name: str) -> Any:
107
+ wrapper = self._wrappers.get(name)
108
+ if wrapper is not None:
109
+ return wrapper
110
+ method = getattr(self._provider, name)
111
+ if not callable(method):
112
+ return method
113
+ wrapper = _UniqueMethodWrapper(method)
114
+ self._wrappers[name] = wrapper
115
+ return wrapper
116
+
117
+ def clear(self) -> None:
118
+ """Clear all tracked unique values for this provider."""
119
+ for wrapper in self._wrappers.values():
120
+ wrapper.clear()
121
+
122
+
123
+ class UniqueProxy:
124
+ """Top-level unique proxy — accessed via ``forge.unique``.
125
+
126
+ Lazily wraps each provider the first time it is accessed.
127
+ Maintains per-method seen-value sets across calls.
128
+
129
+ Examples
130
+ --------
131
+ >>> forge = DataForge(seed=42)
132
+ >>> a = forge.unique.person.first_name()
133
+ >>> b = forge.unique.person.first_name()
134
+ >>> a != b # guaranteed unique
135
+ True
136
+ >>> forge.unique.clear() # reset all tracking
137
+ """
138
+
139
+ __slots__ = ("_forge", "_proxies")
140
+
141
+ def __init__(self, forge: Any) -> None:
142
+ self._forge = forge
143
+ self._proxies: dict[str, _UniqueProviderProxy] = {}
144
+
145
+ def __getattr__(self, name: str) -> Any:
146
+ proxy = self._proxies.get(name)
147
+ if proxy is not None:
148
+ return proxy
149
+ provider = getattr(self._forge, name)
150
+ if isinstance(provider, BaseProvider):
151
+ proxy = _UniqueProviderProxy(provider)
152
+ self._proxies[name] = proxy
153
+ return proxy
154
+ return provider
155
+
156
+ def clear(self, provider_name: str | None = None) -> None:
157
+ """Clear tracked unique values.
158
+
159
+ Parameters
160
+ ----------
161
+ provider_name : str | None
162
+ If given, clear only that provider's tracking.
163
+ If ``None``, clear all providers.
164
+ """
165
+ if provider_name is not None:
166
+ proxy = self._proxies.get(provider_name)
167
+ if proxy is not None:
168
+ proxy.clear()
169
+ else:
170
+ for proxy in self._proxies.values():
171
+ proxy.clear()