dataforge-py 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dataforge/__init__.py +20 -0
- dataforge/backend.py +147 -0
- dataforge/cli.py +166 -0
- dataforge/core.py +1169 -0
- dataforge/locales/__init__.py +1 -0
- dataforge/locales/ar_SA/__init__.py +1 -0
- dataforge/locales/ar_SA/address.py +128 -0
- dataforge/locales/ar_SA/company.py +183 -0
- dataforge/locales/ar_SA/internet.py +25 -0
- dataforge/locales/ar_SA/person.py +217 -0
- dataforge/locales/ar_SA/phone.py +15 -0
- dataforge/locales/de_DE/__init__.py +1 -0
- dataforge/locales/de_DE/address.py +148 -0
- dataforge/locales/de_DE/company.py +125 -0
- dataforge/locales/de_DE/internet.py +32 -0
- dataforge/locales/de_DE/person.py +212 -0
- dataforge/locales/de_DE/phone.py +17 -0
- dataforge/locales/en_AU/__init__.py +1 -0
- dataforge/locales/en_AU/address.py +231 -0
- dataforge/locales/en_AU/company.py +193 -0
- dataforge/locales/en_AU/internet.py +34 -0
- dataforge/locales/en_AU/person.py +370 -0
- dataforge/locales/en_AU/phone.py +16 -0
- dataforge/locales/en_CA/__init__.py +1 -0
- dataforge/locales/en_CA/address.py +276 -0
- dataforge/locales/en_CA/company.py +193 -0
- dataforge/locales/en_CA/internet.py +34 -0
- dataforge/locales/en_CA/person.py +377 -0
- dataforge/locales/en_CA/phone.py +15 -0
- dataforge/locales/en_GB/__init__.py +1 -0
- dataforge/locales/en_GB/address.py +312 -0
- dataforge/locales/en_GB/company.py +196 -0
- dataforge/locales/en_GB/internet.py +34 -0
- dataforge/locales/en_GB/person.py +372 -0
- dataforge/locales/en_GB/phone.py +15 -0
- dataforge/locales/en_US/__init__.py +1 -0
- dataforge/locales/en_US/address.py +268 -0
- dataforge/locales/en_US/company.py +191 -0
- dataforge/locales/en_US/internet.py +34 -0
- dataforge/locales/en_US/person.py +370 -0
- dataforge/locales/en_US/phone.py +15 -0
- dataforge/locales/es_ES/__init__.py +1 -0
- dataforge/locales/es_ES/address.py +151 -0
- dataforge/locales/es_ES/company.py +125 -0
- dataforge/locales/es_ES/internet.py +30 -0
- dataforge/locales/es_ES/person.py +207 -0
- dataforge/locales/es_ES/phone.py +15 -0
- dataforge/locales/fr_FR/__init__.py +1 -0
- dataforge/locales/fr_FR/address.py +145 -0
- dataforge/locales/fr_FR/company.py +125 -0
- dataforge/locales/fr_FR/internet.py +30 -0
- dataforge/locales/fr_FR/person.py +212 -0
- dataforge/locales/fr_FR/phone.py +15 -0
- dataforge/locales/hi_IN/__init__.py +1 -0
- dataforge/locales/hi_IN/address.py +177 -0
- dataforge/locales/hi_IN/company.py +191 -0
- dataforge/locales/hi_IN/internet.py +26 -0
- dataforge/locales/hi_IN/person.py +218 -0
- dataforge/locales/hi_IN/phone.py +21 -0
- dataforge/locales/it_IT/__init__.py +1 -0
- dataforge/locales/it_IT/address.py +218 -0
- dataforge/locales/it_IT/company.py +151 -0
- dataforge/locales/it_IT/internet.py +31 -0
- dataforge/locales/it_IT/person.py +187 -0
- dataforge/locales/it_IT/phone.py +15 -0
- dataforge/locales/ja_JP/__init__.py +1 -0
- dataforge/locales/ja_JP/address.py +174 -0
- dataforge/locales/ja_JP/company.py +121 -0
- dataforge/locales/ja_JP/internet.py +30 -0
- dataforge/locales/ja_JP/person.py +207 -0
- dataforge/locales/ja_JP/phone.py +18 -0
- dataforge/locales/ko_KR/__init__.py +1 -0
- dataforge/locales/ko_KR/address.py +121 -0
- dataforge/locales/ko_KR/company.py +151 -0
- dataforge/locales/ko_KR/internet.py +30 -0
- dataforge/locales/ko_KR/person.py +157 -0
- dataforge/locales/ko_KR/phone.py +26 -0
- dataforge/locales/nl_NL/__init__.py +1 -0
- dataforge/locales/nl_NL/address.py +152 -0
- dataforge/locales/nl_NL/company.py +182 -0
- dataforge/locales/nl_NL/internet.py +41 -0
- dataforge/locales/nl_NL/person.py +218 -0
- dataforge/locales/nl_NL/phone.py +19 -0
- dataforge/locales/pl_PL/__init__.py +1 -0
- dataforge/locales/pl_PL/address.py +140 -0
- dataforge/locales/pl_PL/company.py +183 -0
- dataforge/locales/pl_PL/internet.py +36 -0
- dataforge/locales/pl_PL/person.py +217 -0
- dataforge/locales/pl_PL/phone.py +15 -0
- dataforge/locales/pt_BR/__init__.py +1 -0
- dataforge/locales/pt_BR/address.py +127 -0
- dataforge/locales/pt_BR/company.py +151 -0
- dataforge/locales/pt_BR/internet.py +31 -0
- dataforge/locales/pt_BR/person.py +187 -0
- dataforge/locales/pt_BR/phone.py +15 -0
- dataforge/locales/ru_RU/__init__.py +1 -0
- dataforge/locales/ru_RU/address.py +156 -0
- dataforge/locales/ru_RU/company.py +168 -0
- dataforge/locales/ru_RU/internet.py +26 -0
- dataforge/locales/ru_RU/person.py +218 -0
- dataforge/locales/ru_RU/phone.py +16 -0
- dataforge/locales/zh_CN/__init__.py +1 -0
- dataforge/locales/zh_CN/address.py +141 -0
- dataforge/locales/zh_CN/company.py +151 -0
- dataforge/locales/zh_CN/internet.py +30 -0
- dataforge/locales/zh_CN/person.py +157 -0
- dataforge/locales/zh_CN/phone.py +25 -0
- dataforge/providers/__init__.py +1 -0
- dataforge/providers/address.py +460 -0
- dataforge/providers/ai_chat.py +170 -0
- dataforge/providers/ai_prompt.py +447 -0
- dataforge/providers/automotive.py +416 -0
- dataforge/providers/barcode.py +149 -0
- dataforge/providers/base.py +34 -0
- dataforge/providers/color.py +247 -0
- dataforge/providers/company.py +144 -0
- dataforge/providers/crypto.py +105 -0
- dataforge/providers/datetime.py +397 -0
- dataforge/providers/ecommerce.py +316 -0
- dataforge/providers/education.py +234 -0
- dataforge/providers/file.py +271 -0
- dataforge/providers/finance.py +545 -0
- dataforge/providers/geo.py +332 -0
- dataforge/providers/government.py +114 -0
- dataforge/providers/internet.py +351 -0
- dataforge/providers/llm.py +726 -0
- dataforge/providers/lorem.py +241 -0
- dataforge/providers/medical.py +364 -0
- dataforge/providers/misc.py +196 -0
- dataforge/providers/network.py +283 -0
- dataforge/providers/payment.py +300 -0
- dataforge/providers/person.py +195 -0
- dataforge/providers/phone.py +87 -0
- dataforge/providers/profile.py +265 -0
- dataforge/providers/science.py +365 -0
- dataforge/providers/text.py +365 -0
- dataforge/py.typed +0 -0
- dataforge/pytest_plugin.py +80 -0
- dataforge/registry.py +164 -0
- dataforge/schema.py +772 -0
- dataforge/unique.py +171 -0
- dataforge_py-0.2.0.dist-info/METADATA +964 -0
- dataforge_py-0.2.0.dist-info/RECORD +145 -0
- dataforge_py-0.2.0.dist-info/WHEEL +4 -0
- dataforge_py-0.2.0.dist-info/entry_points.txt +35 -0
dataforge/unique.py
ADDED
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
"""UniqueProxy — wrapper for unique value generation.
|
|
2
|
+
|
|
3
|
+
Intercepts provider method calls and ensures each returned value is
|
|
4
|
+
unique within the lifetime of the proxy (or until :meth:`clear` is
|
|
5
|
+
called).
|
|
6
|
+
|
|
7
|
+
Usage::
|
|
8
|
+
|
|
9
|
+
forge = DataForge(seed=42)
|
|
10
|
+
forge.unique.person.first_name() # guaranteed unique per call
|
|
11
|
+
forge.unique.clear() # reset tracking
|
|
12
|
+
|
|
13
|
+
Performance
|
|
14
|
+
-----------
|
|
15
|
+
The proxy adds a thin ``set``-membership check per scalar value
|
|
16
|
+
(O(1) amortised) and retries on collision. Batch calls are
|
|
17
|
+
generated in bulk with a single ``set`` deduplication pass,
|
|
18
|
+
requesting extra items to compensate for expected collisions.
|
|
19
|
+
|
|
20
|
+
The proxy itself is **lazily created** — accessing ``forge.unique``
|
|
21
|
+
for the first time constructs it; all subsequent accesses return
|
|
22
|
+
the cached instance.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
from __future__ import annotations
|
|
26
|
+
|
|
27
|
+
from typing import Any
|
|
28
|
+
|
|
29
|
+
from dataforge.providers.base import BaseProvider
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class _UniqueMethodWrapper:
|
|
33
|
+
"""Wraps a single provider method to enforce uniqueness."""
|
|
34
|
+
|
|
35
|
+
__slots__ = ("_method", "_seen")
|
|
36
|
+
|
|
37
|
+
def __init__(self, method: Any) -> None:
|
|
38
|
+
self._method = method
|
|
39
|
+
self._seen: set[Any] = set()
|
|
40
|
+
|
|
41
|
+
def __call__(self, count: int = 1, **kwargs: Any) -> Any:
|
|
42
|
+
if count == 1:
|
|
43
|
+
return self._generate_one(**kwargs)
|
|
44
|
+
return self._generate_batch(count, **kwargs)
|
|
45
|
+
|
|
46
|
+
def _generate_one(self, _max_retries: int = 10_000, **kwargs: Any) -> Any:
|
|
47
|
+
"""Generate a single unique value with retry."""
|
|
48
|
+
seen = self._seen
|
|
49
|
+
method = self._method
|
|
50
|
+
for _ in range(_max_retries):
|
|
51
|
+
val = method(**kwargs)
|
|
52
|
+
if val not in seen:
|
|
53
|
+
seen.add(val)
|
|
54
|
+
return val
|
|
55
|
+
raise RuntimeError(
|
|
56
|
+
f"Could not generate a unique value after {_max_retries} "
|
|
57
|
+
f"retries for {self._method!r}. "
|
|
58
|
+
f"Already generated {len(seen)} unique values."
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
def _generate_batch(self, count: int, **kwargs: Any) -> list[Any]:
|
|
62
|
+
"""Generate *count* unique values using over-sampling."""
|
|
63
|
+
seen = self._seen
|
|
64
|
+
method = self._method
|
|
65
|
+
result: list[Any] = []
|
|
66
|
+
remaining = count
|
|
67
|
+
max_total_retries = count * 100
|
|
68
|
+
|
|
69
|
+
retries = 0
|
|
70
|
+
while remaining > 0:
|
|
71
|
+
if retries > max_total_retries:
|
|
72
|
+
raise RuntimeError(
|
|
73
|
+
f"Could not generate {count} unique values after "
|
|
74
|
+
f"{retries} retries for {self._method!r}. "
|
|
75
|
+
f"Generated {len(result)}/{count}."
|
|
76
|
+
)
|
|
77
|
+
# Over-sample by 20% to compensate for expected collisions
|
|
78
|
+
request = remaining + max(remaining // 5, 10)
|
|
79
|
+
batch = method(count=request, **kwargs)
|
|
80
|
+
for val in batch:
|
|
81
|
+
if val not in seen:
|
|
82
|
+
seen.add(val)
|
|
83
|
+
result.append(val)
|
|
84
|
+
remaining -= 1
|
|
85
|
+
if remaining == 0:
|
|
86
|
+
break
|
|
87
|
+
else:
|
|
88
|
+
retries += 1
|
|
89
|
+
|
|
90
|
+
return result
|
|
91
|
+
|
|
92
|
+
def clear(self) -> None:
|
|
93
|
+
"""Reset the seen set for this method."""
|
|
94
|
+
self._seen.clear()
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
class _UniqueProviderProxy:
|
|
98
|
+
"""Proxy around a provider that wraps every method for uniqueness."""
|
|
99
|
+
|
|
100
|
+
__slots__ = ("_provider", "_wrappers")
|
|
101
|
+
|
|
102
|
+
def __init__(self, provider: BaseProvider) -> None:
|
|
103
|
+
self._provider = provider
|
|
104
|
+
self._wrappers: dict[str, _UniqueMethodWrapper] = {}
|
|
105
|
+
|
|
106
|
+
def __getattr__(self, name: str) -> Any:
|
|
107
|
+
wrapper = self._wrappers.get(name)
|
|
108
|
+
if wrapper is not None:
|
|
109
|
+
return wrapper
|
|
110
|
+
method = getattr(self._provider, name)
|
|
111
|
+
if not callable(method):
|
|
112
|
+
return method
|
|
113
|
+
wrapper = _UniqueMethodWrapper(method)
|
|
114
|
+
self._wrappers[name] = wrapper
|
|
115
|
+
return wrapper
|
|
116
|
+
|
|
117
|
+
def clear(self) -> None:
|
|
118
|
+
"""Clear all tracked unique values for this provider."""
|
|
119
|
+
for wrapper in self._wrappers.values():
|
|
120
|
+
wrapper.clear()
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
class UniqueProxy:
|
|
124
|
+
"""Top-level unique proxy — accessed via ``forge.unique``.
|
|
125
|
+
|
|
126
|
+
Lazily wraps each provider the first time it is accessed.
|
|
127
|
+
Maintains per-method seen-value sets across calls.
|
|
128
|
+
|
|
129
|
+
Examples
|
|
130
|
+
--------
|
|
131
|
+
>>> forge = DataForge(seed=42)
|
|
132
|
+
>>> a = forge.unique.person.first_name()
|
|
133
|
+
>>> b = forge.unique.person.first_name()
|
|
134
|
+
>>> a != b # guaranteed unique
|
|
135
|
+
True
|
|
136
|
+
>>> forge.unique.clear() # reset all tracking
|
|
137
|
+
"""
|
|
138
|
+
|
|
139
|
+
__slots__ = ("_forge", "_proxies")
|
|
140
|
+
|
|
141
|
+
def __init__(self, forge: Any) -> None:
|
|
142
|
+
self._forge = forge
|
|
143
|
+
self._proxies: dict[str, _UniqueProviderProxy] = {}
|
|
144
|
+
|
|
145
|
+
def __getattr__(self, name: str) -> Any:
|
|
146
|
+
proxy = self._proxies.get(name)
|
|
147
|
+
if proxy is not None:
|
|
148
|
+
return proxy
|
|
149
|
+
provider = getattr(self._forge, name)
|
|
150
|
+
if isinstance(provider, BaseProvider):
|
|
151
|
+
proxy = _UniqueProviderProxy(provider)
|
|
152
|
+
self._proxies[name] = proxy
|
|
153
|
+
return proxy
|
|
154
|
+
return provider
|
|
155
|
+
|
|
156
|
+
def clear(self, provider_name: str | None = None) -> None:
|
|
157
|
+
"""Clear tracked unique values.
|
|
158
|
+
|
|
159
|
+
Parameters
|
|
160
|
+
----------
|
|
161
|
+
provider_name : str | None
|
|
162
|
+
If given, clear only that provider's tracking.
|
|
163
|
+
If ``None``, clear all providers.
|
|
164
|
+
"""
|
|
165
|
+
if provider_name is not None:
|
|
166
|
+
proxy = self._proxies.get(provider_name)
|
|
167
|
+
if proxy is not None:
|
|
168
|
+
proxy.clear()
|
|
169
|
+
else:
|
|
170
|
+
for proxy in self._proxies.values():
|
|
171
|
+
proxy.clear()
|