fakedata-python 2.0.1__tar.gz → 2.0.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fakedata_python-2.0.3/LICENSE +21 -0
- fakedata_python-2.0.3/MANIFEST.in +24 -0
- {fakedata_python-2.0.1/fakedata_python.egg-info → fakedata_python-2.0.3}/PKG-INFO +59 -20
- fakedata_python-2.0.1/PKG-INFO → fakedata_python-2.0.3/README.md +55 -30
- fakedata_python-2.0.3/fakedata/cli.py +133 -0
- fakedata_python-2.0.1/README.md → fakedata_python-2.0.3/fakedata_python.egg-info/PKG-INFO +69 -18
- {fakedata_python-2.0.1 → fakedata_python-2.0.3}/fakedata_python.egg-info/SOURCES.txt +4 -0
- fakedata_python-2.0.3/fakedata_python.egg-info/entry_points.txt +2 -0
- {fakedata_python-2.0.1 → fakedata_python-2.0.3}/pyproject.toml +5 -2
- {fakedata_python-2.0.1 → fakedata_python-2.0.3}/fakedata/__init__.py +0 -0
- {fakedata_python-2.0.1 → fakedata_python-2.0.3}/fakedata/core.py +0 -0
- {fakedata_python-2.0.1 → fakedata_python-2.0.3}/fakedata/helpers/cardtype.json +0 -0
- {fakedata_python-2.0.1 → fakedata_python-2.0.3}/fakedata/helpers/companies.json +0 -0
- {fakedata_python-2.0.1 → fakedata_python-2.0.3}/fakedata/helpers/countries.json +0 -0
- {fakedata_python-2.0.1 → fakedata_python-2.0.3}/fakedata/helpers/devices.json +0 -0
- {fakedata_python-2.0.1 → fakedata_python-2.0.3}/fakedata/helpers/domain.json +0 -0
- {fakedata_python-2.0.1 → fakedata_python-2.0.3}/fakedata/helpers/email.json +0 -0
- {fakedata_python-2.0.1 → fakedata_python-2.0.3}/fakedata/helpers/first.json +0 -0
- {fakedata_python-2.0.1 → fakedata_python-2.0.3}/fakedata/helpers/healthcare.json +0 -0
- {fakedata_python-2.0.1 → fakedata_python-2.0.3}/fakedata/helpers/hobbies.json +0 -0
- {fakedata_python-2.0.1 → fakedata_python-2.0.3}/fakedata/helpers/industries.json +0 -0
- {fakedata_python-2.0.1 → fakedata_python-2.0.3}/fakedata/helpers/job_categories.json +0 -0
- {fakedata_python-2.0.1 → fakedata_python-2.0.3}/fakedata/helpers/job_titles.json +0 -0
- {fakedata_python-2.0.1 → fakedata_python-2.0.3}/fakedata/helpers/last.json +0 -0
- {fakedata_python-2.0.1 → fakedata_python-2.0.3}/fakedata/helpers/locales.json +0 -0
- {fakedata_python-2.0.1 → fakedata_python-2.0.3}/fakedata/helpers/middle.json +0 -0
- {fakedata_python-2.0.1 → fakedata_python-2.0.3}/fakedata/helpers/occupation.json +0 -0
- {fakedata_python-2.0.1 → fakedata_python-2.0.3}/fakedata/helpers/salary_ranges.json +0 -0
- {fakedata_python-2.0.1 → fakedata_python-2.0.3}/fakedata/helpers/shortformstate.json +0 -0
- {fakedata_python-2.0.1 → fakedata_python-2.0.3}/fakedata/helpers/state.json +0 -0
- {fakedata_python-2.0.1 → fakedata_python-2.0.3}/fakedata/helpers/states.json +0 -0
- {fakedata_python-2.0.1 → fakedata_python-2.0.3}/fakedata/helpers/street.json +0 -0
- {fakedata_python-2.0.1 → fakedata_python-2.0.3}/fakedata/helpers/universities.json +0 -0
- {fakedata_python-2.0.1 → fakedata_python-2.0.3}/fakedata/modules/__init__.py +0 -0
- {fakedata_python-2.0.1 → fakedata_python-2.0.3}/fakedata/modules/data.py +0 -0
- {fakedata_python-2.0.1 → fakedata_python-2.0.3}/fakedata/test_python.py +0 -0
- {fakedata_python-2.0.1 → fakedata_python-2.0.3}/fakedata_python.egg-info/dependency_links.txt +0 -0
- {fakedata_python-2.0.1 → fakedata_python-2.0.3}/fakedata_python.egg-info/top_level.txt +0 -0
- {fakedata_python-2.0.1 → fakedata_python-2.0.3}/setup.cfg +0 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Abhay Mourya
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# Exclude development and Node.js files
|
|
2
|
+
prune .github
|
|
3
|
+
exclude CONTRIBUTING.md
|
|
4
|
+
exclude CODE_OF_CONDUCT.md
|
|
5
|
+
exclude .npmignore
|
|
6
|
+
exclude test.js
|
|
7
|
+
exclude test_py.py
|
|
8
|
+
exclude test_python.py
|
|
9
|
+
|
|
10
|
+
# Exclude JS source code
|
|
11
|
+
prune src
|
|
12
|
+
exclude package.json
|
|
13
|
+
exclude package-lock.json
|
|
14
|
+
|
|
15
|
+
# Exclude web and raw data
|
|
16
|
+
prune website
|
|
17
|
+
prune newdata
|
|
18
|
+
prune dist
|
|
19
|
+
prune tests
|
|
20
|
+
|
|
21
|
+
# Exclude byte-compiled files
|
|
22
|
+
global-exclude *.py[cod]
|
|
23
|
+
global-exclude __pycache__
|
|
24
|
+
global-exclude *.so
|
|
@@ -1,14 +1,16 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: fakedata-python
|
|
3
|
-
Version: 2.0.
|
|
3
|
+
Version: 2.0.3
|
|
4
4
|
Summary: The fakedata package generates realistic synthetic user profiles for machine learning, deep learning, data analysis, and data science workflows.
|
|
5
|
-
Author-email: abhay557 <
|
|
5
|
+
Author-email: abhay557 <contact@abhaymourya.in>
|
|
6
6
|
License-Expression: MIT
|
|
7
7
|
Project-URL: Homepage, https://github.com/abhay557/fakedata
|
|
8
8
|
Classifier: Programming Language :: Python :: 3
|
|
9
9
|
Classifier: Operating System :: OS Independent
|
|
10
10
|
Requires-Python: >=3.7
|
|
11
11
|
Description-Content-Type: text/markdown
|
|
12
|
+
License-File: LICENSE
|
|
13
|
+
Dynamic: license-file
|
|
12
14
|
|
|
13
15
|
# fakedata
|
|
14
16
|
|
|
@@ -33,8 +35,8 @@ A high-performance, **zero-dependency** synthetic data generation engine, availa
|
|
|
33
35
|
- **Anomaly Injection**: Inject fraud/outlier profiles (e.g., impossible geography, credit fraud, income spikes) using `anomaly_rate`.
|
|
34
36
|
- **Time-Series Data**: Generate chronological activity logs (logins, page views, purchases) per user for behavioral modeling.
|
|
35
37
|
- **Pipeline Ready**: Export directly to CSV, JSON, or Flat objects (perfect for `pandas.DataFrame`).
|
|
38
|
+
- **CLI Tool**: Generate and export datasets directly from your terminal — no scripting required.
|
|
36
39
|
|
|
37
|
-
---
|
|
38
40
|
## Python Implementation
|
|
39
41
|
|
|
40
42
|
### Installation
|
|
@@ -44,14 +46,14 @@ pip install fakedata-python
|
|
|
44
46
|
|
|
45
47
|
### Quick Start
|
|
46
48
|
```python
|
|
47
|
-
import fakedata
|
|
49
|
+
import fakedata
|
|
48
50
|
import pandas as pd
|
|
49
51
|
|
|
50
52
|
# Generate 10,000 highly correlated users deterministically
|
|
51
|
-
users = data.users(10000, {"seed": 42})
|
|
53
|
+
users = fakedata.data.users(10000, {"seed": 42})
|
|
52
54
|
|
|
53
55
|
# Or export directly to a Pandas DataFrame
|
|
54
|
-
df = pd.DataFrame(data.users_flat(10000, {"seed": 42}))
|
|
56
|
+
df = pd.DataFrame(fakedata.data.users_flat(10000, {"seed": 42}))
|
|
55
57
|
print(df.head())
|
|
56
58
|
|
|
57
59
|
# Create time-series activity data
|
|
@@ -59,30 +61,65 @@ ts = data.user_time_series({"days": 30, "events_per_day": 8})
|
|
|
59
61
|
print(f"Generated {len(ts['activity'])} events for {ts['user']['fullName']}")
|
|
60
62
|
```
|
|
61
63
|
|
|
64
|
+
---
|
|
62
65
|
|
|
63
|
-
##
|
|
66
|
+
## CLI — Command Line Interface
|
|
64
67
|
|
|
65
|
-
|
|
68
|
+
After installing, use `fakedata` directly from your terminal. No scripts needed!
|
|
69
|
+
|
|
70
|
+
### Node.js (global install)
|
|
66
71
|
```bash
|
|
67
|
-
npm install @abhay557/fakedata
|
|
72
|
+
npm install -g @abhay557/fakedata
|
|
68
73
|
```
|
|
69
74
|
|
|
70
|
-
###
|
|
71
|
-
```
|
|
72
|
-
|
|
75
|
+
### Python (global install)
|
|
76
|
+
```bash
|
|
77
|
+
pip install fakedata-python
|
|
78
|
+
```
|
|
73
79
|
|
|
74
|
-
|
|
75
|
-
const users = data.users(1000, { seed: 42, missing_rate: 0.05 });
|
|
80
|
+
### CLI Commands
|
|
76
81
|
|
|
77
|
-
|
|
78
|
-
|
|
82
|
+
| Command | Description |
|
|
83
|
+
|:---|:---|
|
|
84
|
+
| `fakedata generate` | Generate synthetic user data |
|
|
85
|
+
| `fakedata preview` | Print a single user profile to the console |
|
|
86
|
+
| `fakedata help` | Show all available options |
|
|
87
|
+
|
|
88
|
+
### CLI Options
|
|
89
|
+
|
|
90
|
+
| Flag | Default | Description |
|
|
91
|
+
|:---|:---|:---|
|
|
92
|
+
| `-n`, `--count` | `10` | Number of users to generate |
|
|
93
|
+
| `-f`, `--format` | `json` | Output format: `json` \| `csv` \| `flat` |
|
|
94
|
+
| `-o`, `--output` | stdout | Output file path |
|
|
95
|
+
| `-s`, `--seed` | none | Random seed for reproducibility |
|
|
96
|
+
| `-l`, `--locale` | `en` | Locale: `en` \| `in` \| `jp` \| `kr` \| `de` \| `br` \| `ar` \| `fr` |
|
|
97
|
+
| `-a`, `--anomaly-rate` | `0` | Fraction of anomalous users (0–1) |
|
|
98
|
+
| `-m`, `--missing-rate` | `0` | Fraction of null fields (0–1) |
|
|
99
|
+
| `-t`, `--timeseries` | — | Include time-series activity logs |
|
|
100
|
+
| `--days` | `30` | Days of activity for time-series |
|
|
101
|
+
| `--pretty` | — | Pretty-print JSON output |
|
|
102
|
+
|
|
103
|
+
### Examples
|
|
79
104
|
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
105
|
+
```bash
|
|
106
|
+
# Generate 1000 users and save as CSV
|
|
107
|
+
fakedata generate -n 1000 -f csv -o dataset.csv
|
|
108
|
+
|
|
109
|
+
# Generate 500 deterministic Indian users
|
|
110
|
+
fakedata generate -n 500 -l in --seed 42 -o india.json
|
|
111
|
+
|
|
112
|
+
# Fraud detection dataset with 5% anomalies
|
|
113
|
+
fakedata generate -n 10000 -a 0.05 -f csv -o fraud_data.csv
|
|
114
|
+
|
|
115
|
+
# Preview a single user in the console
|
|
116
|
+
fakedata preview
|
|
117
|
+
|
|
118
|
+
# Time-series activity logs for 100 users
|
|
119
|
+
fakedata generate -n 100 --timeseries --days 60 -o activity.json
|
|
83
120
|
```
|
|
84
|
-
---
|
|
85
121
|
|
|
122
|
+
---
|
|
86
123
|
### sample output - one user
|
|
87
124
|
```fakedata.data.user()```
|
|
88
125
|
```fakedata.data.user(n) // set n = 100```
|
|
@@ -374,3 +411,5 @@ Distributed under the **MIT License**. See `LICENSE` for more information.
|
|
|
374
411
|
**Maintainer**: [abhay557](https://github.com/abhay557)
|
|
375
412
|
|
|
376
413
|
- Project Commit History - `https://github.com/abhay557/random-api.xyz`
|
|
414
|
+
|
|
415
|
+
---
|
|
@@ -1,15 +1,3 @@
|
|
|
1
|
-
Metadata-Version: 2.4
|
|
2
|
-
Name: fakedata-python
|
|
3
|
-
Version: 2.0.1
|
|
4
|
-
Summary: The fakedata package generates realistic synthetic user profiles for machine learning, deep learning, data analysis, and data science workflows.
|
|
5
|
-
Author-email: abhay557 <abhaycormourya@gmail.com>
|
|
6
|
-
License-Expression: MIT
|
|
7
|
-
Project-URL: Homepage, https://github.com/abhay557/fakedata
|
|
8
|
-
Classifier: Programming Language :: Python :: 3
|
|
9
|
-
Classifier: Operating System :: OS Independent
|
|
10
|
-
Requires-Python: >=3.7
|
|
11
|
-
Description-Content-Type: text/markdown
|
|
12
|
-
|
|
13
1
|
# fakedata
|
|
14
2
|
|
|
15
3
|
[](https://www.npmjs.com/package/@abhay557/fakedata)
|
|
@@ -33,8 +21,8 @@ A high-performance, **zero-dependency** synthetic data generation engine, availa
|
|
|
33
21
|
- **Anomaly Injection**: Inject fraud/outlier profiles (e.g., impossible geography, credit fraud, income spikes) using `anomaly_rate`.
|
|
34
22
|
- **Time-Series Data**: Generate chronological activity logs (logins, page views, purchases) per user for behavioral modeling.
|
|
35
23
|
- **Pipeline Ready**: Export directly to CSV, JSON, or Flat objects (perfect for `pandas.DataFrame`).
|
|
24
|
+
- **CLI Tool**: Generate and export datasets directly from your terminal — no scripting required.
|
|
36
25
|
|
|
37
|
-
---
|
|
38
26
|
## Python Implementation
|
|
39
27
|
|
|
40
28
|
### Installation
|
|
@@ -44,14 +32,14 @@ pip install fakedata-python
|
|
|
44
32
|
|
|
45
33
|
### Quick Start
|
|
46
34
|
```python
|
|
47
|
-
import fakedata
|
|
35
|
+
import fakedata
|
|
48
36
|
import pandas as pd
|
|
49
37
|
|
|
50
38
|
# Generate 10,000 highly correlated users deterministically
|
|
51
|
-
users = data.users(10000, {"seed": 42})
|
|
39
|
+
users = fakedata.data.users(10000, {"seed": 42})
|
|
52
40
|
|
|
53
41
|
# Or export directly to a Pandas DataFrame
|
|
54
|
-
df = pd.DataFrame(data.users_flat(10000, {"seed": 42}))
|
|
42
|
+
df = pd.DataFrame(fakedata.data.users_flat(10000, {"seed": 42}))
|
|
55
43
|
print(df.head())
|
|
56
44
|
|
|
57
45
|
# Create time-series activity data
|
|
@@ -59,30 +47,65 @@ ts = data.user_time_series({"days": 30, "events_per_day": 8})
|
|
|
59
47
|
print(f"Generated {len(ts['activity'])} events for {ts['user']['fullName']}")
|
|
60
48
|
```
|
|
61
49
|
|
|
50
|
+
---
|
|
62
51
|
|
|
63
|
-
##
|
|
52
|
+
## CLI — Command Line Interface
|
|
64
53
|
|
|
65
|
-
|
|
54
|
+
After installing, use `fakedata` directly from your terminal. No scripts needed!
|
|
55
|
+
|
|
56
|
+
### Node.js (global install)
|
|
66
57
|
```bash
|
|
67
|
-
npm install @abhay557/fakedata
|
|
58
|
+
npm install -g @abhay557/fakedata
|
|
68
59
|
```
|
|
69
60
|
|
|
70
|
-
###
|
|
71
|
-
```
|
|
72
|
-
|
|
61
|
+
### Python (global install)
|
|
62
|
+
```bash
|
|
63
|
+
pip install fakedata-python
|
|
64
|
+
```
|
|
73
65
|
|
|
74
|
-
|
|
75
|
-
const users = data.users(1000, { seed: 42, missing_rate: 0.05 });
|
|
66
|
+
### CLI Commands
|
|
76
67
|
|
|
77
|
-
|
|
78
|
-
|
|
68
|
+
| Command | Description |
|
|
69
|
+
|:---|:---|
|
|
70
|
+
| `fakedata generate` | Generate synthetic user data |
|
|
71
|
+
| `fakedata preview` | Print a single user profile to the console |
|
|
72
|
+
| `fakedata help` | Show all available options |
|
|
73
|
+
|
|
74
|
+
### CLI Options
|
|
75
|
+
|
|
76
|
+
| Flag | Default | Description |
|
|
77
|
+
|:---|:---|:---|
|
|
78
|
+
| `-n`, `--count` | `10` | Number of users to generate |
|
|
79
|
+
| `-f`, `--format` | `json` | Output format: `json` \| `csv` \| `flat` |
|
|
80
|
+
| `-o`, `--output` | stdout | Output file path |
|
|
81
|
+
| `-s`, `--seed` | none | Random seed for reproducibility |
|
|
82
|
+
| `-l`, `--locale` | `en` | Locale: `en` \| `in` \| `jp` \| `kr` \| `de` \| `br` \| `ar` \| `fr` |
|
|
83
|
+
| `-a`, `--anomaly-rate` | `0` | Fraction of anomalous users (0–1) |
|
|
84
|
+
| `-m`, `--missing-rate` | `0` | Fraction of null fields (0–1) |
|
|
85
|
+
| `-t`, `--timeseries` | — | Include time-series activity logs |
|
|
86
|
+
| `--days` | `30` | Days of activity for time-series |
|
|
87
|
+
| `--pretty` | — | Pretty-print JSON output |
|
|
88
|
+
|
|
89
|
+
### Examples
|
|
90
|
+
|
|
91
|
+
```bash
|
|
92
|
+
# Generate 1000 users and save as CSV
|
|
93
|
+
fakedata generate -n 1000 -f csv -o dataset.csv
|
|
94
|
+
|
|
95
|
+
# Generate 500 deterministic Indian users
|
|
96
|
+
fakedata generate -n 500 -l in --seed 42 -o india.json
|
|
97
|
+
|
|
98
|
+
# Fraud detection dataset with 5% anomalies
|
|
99
|
+
fakedata generate -n 10000 -a 0.05 -f csv -o fraud_data.csv
|
|
79
100
|
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
101
|
+
# Preview a single user in the console
|
|
102
|
+
fakedata preview
|
|
103
|
+
|
|
104
|
+
# Time-series activity logs for 100 users
|
|
105
|
+
fakedata generate -n 100 --timeseries --days 60 -o activity.json
|
|
83
106
|
```
|
|
84
|
-
---
|
|
85
107
|
|
|
108
|
+
---
|
|
86
109
|
### sample output - one user
|
|
87
110
|
```fakedata.data.user()```
|
|
88
111
|
```fakedata.data.user(n) // set n = 100```
|
|
@@ -374,3 +397,5 @@ Distributed under the **MIT License**. See `LICENSE` for more information.
|
|
|
374
397
|
**Maintainer**: [abhay557](https://github.com/abhay557)
|
|
375
398
|
|
|
376
399
|
- Project Commit History - `https://github.com/abhay557/random-api.xyz`
|
|
400
|
+
|
|
401
|
+
---
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
fakedata CLI - ML-Ready Synthetic Data Engine
|
|
4
|
+
"""
|
|
5
|
+
import argparse
|
|
6
|
+
import json
|
|
7
|
+
import sys
|
|
8
|
+
import time
|
|
9
|
+
import os
|
|
10
|
+
|
|
11
|
+
def main():
|
|
12
|
+
parser = argparse.ArgumentParser(
|
|
13
|
+
prog='fakedata',
|
|
14
|
+
description='fakedata - ML-Ready Synthetic Data Engine (Python)',
|
|
15
|
+
formatter_class=argparse.RawTextHelpFormatter,
|
|
16
|
+
epilog="""
|
|
17
|
+
EXAMPLES:
|
|
18
|
+
# Generate 1000 users to a CSV file
|
|
19
|
+
fakedata generate -n 1000 -f csv -o dataset.csv
|
|
20
|
+
|
|
21
|
+
# Generate 500 deterministic Indian users
|
|
22
|
+
fakedata generate -n 500 -l in --seed 42 -o india.json
|
|
23
|
+
|
|
24
|
+
# Generate fraud detection dataset with 5%% anomalies
|
|
25
|
+
fakedata generate -n 10000 -a 0.05 -f csv -o fraud_data.csv
|
|
26
|
+
|
|
27
|
+
# Preview a single user profile
|
|
28
|
+
fakedata preview
|
|
29
|
+
|
|
30
|
+
# Generate with time-series activity logs
|
|
31
|
+
fakedata generate -n 100 --timeseries --days 60 -o activity.json
|
|
32
|
+
"""
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
subparsers = parser.add_subparsers(dest='command', help='Command to run')
|
|
36
|
+
|
|
37
|
+
# ─── preview ──────────────────────────────────────────────────────────────
|
|
38
|
+
subparsers.add_parser('preview', help='Print a single user profile to the console')
|
|
39
|
+
|
|
40
|
+
# ─── generate ─────────────────────────────────────────────────────────────
|
|
41
|
+
gen = subparsers.add_parser('generate', help='Generate synthetic user data')
|
|
42
|
+
gen.add_argument('-n', '--count', type=int, default=10,
|
|
43
|
+
help='Number of users to generate (default: 10)')
|
|
44
|
+
gen.add_argument('-f', '--format', choices=['json', 'csv', 'flat'], default='json',
|
|
45
|
+
help='Output format: json | csv | flat (default: json)')
|
|
46
|
+
gen.add_argument('-o', '--output', type=str, default=None,
|
|
47
|
+
help='Output file path (default: stdout)')
|
|
48
|
+
gen.add_argument('-s', '--seed', type=int, default=None,
|
|
49
|
+
help='Random seed for reproducibility')
|
|
50
|
+
gen.add_argument('-l', '--locale', type=str, default=None,
|
|
51
|
+
help='Locale: en|in|jp|kr|de|br|ar|fr (default: en)')
|
|
52
|
+
gen.add_argument('-a', '--anomaly-rate', type=float, default=0.0,
|
|
53
|
+
help='Fraction of anomalous users 0-1 (default: 0)')
|
|
54
|
+
gen.add_argument('-m', '--missing-rate', type=float, default=0.0,
|
|
55
|
+
help='Fraction of null fields 0-1 (default: 0)')
|
|
56
|
+
gen.add_argument('-t', '--timeseries', action='store_true',
|
|
57
|
+
help='Include time-series activity logs')
|
|
58
|
+
gen.add_argument('--days', type=int, default=30,
|
|
59
|
+
help='Days of activity for time-series (default: 30)')
|
|
60
|
+
gen.add_argument('--events-per-day', type=int, default=8,
|
|
61
|
+
help='Average events per day for time-series (default: 8)')
|
|
62
|
+
gen.add_argument('--pretty', action='store_true',
|
|
63
|
+
help='Pretty-print JSON output')
|
|
64
|
+
|
|
65
|
+
args = parser.parse_args()
|
|
66
|
+
|
|
67
|
+
if args.command is None or args.command == 'help':
|
|
68
|
+
parser.print_help()
|
|
69
|
+
return
|
|
70
|
+
|
|
71
|
+
# ─── Import the engine (lazy, only when needed) ───────────────────────────
|
|
72
|
+
try:
|
|
73
|
+
import fakedata.modules.data as data
|
|
74
|
+
except ImportError:
|
|
75
|
+
print("ERROR: Could not import fakedata. Make sure it is installed: pip install fakedata-python", file=sys.stderr)
|
|
76
|
+
sys.exit(1)
|
|
77
|
+
|
|
78
|
+
# ─── Preview ─────────────────────────────────────────────────────────────
|
|
79
|
+
if args.command == 'preview':
|
|
80
|
+
u = data.user()
|
|
81
|
+
print(json.dumps(u, indent=2))
|
|
82
|
+
return
|
|
83
|
+
|
|
84
|
+
# ─── Generate ────────────────────────────────────────────────────────────
|
|
85
|
+
if args.command == 'generate':
|
|
86
|
+
options = {
|
|
87
|
+
'seed': args.seed,
|
|
88
|
+
'locale': args.locale,
|
|
89
|
+
'anomaly_rate': args.anomaly_rate,
|
|
90
|
+
'missing_rate': args.missing_rate,
|
|
91
|
+
}
|
|
92
|
+
# Remove None values so defaults are used inside the engine
|
|
93
|
+
options = {k: v for k, v in options.items() if v is not None and v != 0.0}
|
|
94
|
+
|
|
95
|
+
start = time.time()
|
|
96
|
+
|
|
97
|
+
if args.timeseries:
|
|
98
|
+
results = [
|
|
99
|
+
data.user_time_series({**options, 'days': args.days, 'events_per_day': args.events_per_day})
|
|
100
|
+
for _ in range(args.count)
|
|
101
|
+
]
|
|
102
|
+
output = json.dumps(results, indent=2 if args.pretty else None)
|
|
103
|
+
|
|
104
|
+
elif args.format == 'csv':
|
|
105
|
+
output = data.users_to_csv(args.count, options if options else None)
|
|
106
|
+
|
|
107
|
+
elif args.format == 'flat':
|
|
108
|
+
rows = data.users_flat(args.count, options if options else None)
|
|
109
|
+
output = json.dumps(rows, indent=2 if args.pretty else None)
|
|
110
|
+
|
|
111
|
+
else: # json
|
|
112
|
+
if args.pretty:
|
|
113
|
+
output = data.users_to_json(args.count, options if options else None)
|
|
114
|
+
else:
|
|
115
|
+
output = json.dumps(data.users(args.count, options if options else None))
|
|
116
|
+
|
|
117
|
+
elapsed = round(time.time() - start, 2)
|
|
118
|
+
|
|
119
|
+
if args.output:
|
|
120
|
+
out_path = os.path.abspath(args.output)
|
|
121
|
+
with open(out_path, 'w', encoding='utf-8') as f:
|
|
122
|
+
f.write(output)
|
|
123
|
+
size_kb = round(len(output.encode('utf-8')) / 1024, 1)
|
|
124
|
+
print(
|
|
125
|
+
f"✔ Done! Generated {args.count:,} users in {elapsed}s → {out_path} ({size_kb} KB)",
|
|
126
|
+
file=sys.stderr
|
|
127
|
+
)
|
|
128
|
+
else:
|
|
129
|
+
print(output)
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
if __name__ == '__main__':
|
|
133
|
+
main()
|
|
@@ -1,3 +1,17 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: fakedata-python
|
|
3
|
+
Version: 2.0.3
|
|
4
|
+
Summary: The fakedata package generates realistic synthetic user profiles for machine learning, deep learning, data analysis, and data science workflows.
|
|
5
|
+
Author-email: abhay557 <contact@abhaymourya.in>
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/abhay557/fakedata
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: Operating System :: OS Independent
|
|
10
|
+
Requires-Python: >=3.7
|
|
11
|
+
Description-Content-Type: text/markdown
|
|
12
|
+
License-File: LICENSE
|
|
13
|
+
Dynamic: license-file
|
|
14
|
+
|
|
1
15
|
# fakedata
|
|
2
16
|
|
|
3
17
|
[](https://www.npmjs.com/package/@abhay557/fakedata)
|
|
@@ -21,8 +35,8 @@ A high-performance, **zero-dependency** synthetic data generation engine, availa
|
|
|
21
35
|
- **Anomaly Injection**: Inject fraud/outlier profiles (e.g., impossible geography, credit fraud, income spikes) using `anomaly_rate`.
|
|
22
36
|
- **Time-Series Data**: Generate chronological activity logs (logins, page views, purchases) per user for behavioral modeling.
|
|
23
37
|
- **Pipeline Ready**: Export directly to CSV, JSON, or Flat objects (perfect for `pandas.DataFrame`).
|
|
38
|
+
- **CLI Tool**: Generate and export datasets directly from your terminal — no scripting required.
|
|
24
39
|
|
|
25
|
-
---
|
|
26
40
|
## Python Implementation
|
|
27
41
|
|
|
28
42
|
### Installation
|
|
@@ -32,14 +46,14 @@ pip install fakedata-python
|
|
|
32
46
|
|
|
33
47
|
### Quick Start
|
|
34
48
|
```python
|
|
35
|
-
import fakedata
|
|
49
|
+
import fakedata
|
|
36
50
|
import pandas as pd
|
|
37
51
|
|
|
38
52
|
# Generate 10,000 highly correlated users deterministically
|
|
39
|
-
users = data.users(10000, {"seed": 42})
|
|
53
|
+
users = fakedata.data.users(10000, {"seed": 42})
|
|
40
54
|
|
|
41
55
|
# Or export directly to a Pandas DataFrame
|
|
42
|
-
df = pd.DataFrame(data.users_flat(10000, {"seed": 42}))
|
|
56
|
+
df = pd.DataFrame(fakedata.data.users_flat(10000, {"seed": 42}))
|
|
43
57
|
print(df.head())
|
|
44
58
|
|
|
45
59
|
# Create time-series activity data
|
|
@@ -47,30 +61,65 @@ ts = data.user_time_series({"days": 30, "events_per_day": 8})
|
|
|
47
61
|
print(f"Generated {len(ts['activity'])} events for {ts['user']['fullName']}")
|
|
48
62
|
```
|
|
49
63
|
|
|
64
|
+
---
|
|
50
65
|
|
|
51
|
-
##
|
|
66
|
+
## CLI — Command Line Interface
|
|
52
67
|
|
|
53
|
-
|
|
68
|
+
After installing, use `fakedata` directly from your terminal. No scripts needed!
|
|
69
|
+
|
|
70
|
+
### Node.js (global install)
|
|
54
71
|
```bash
|
|
55
|
-
npm install @abhay557/fakedata
|
|
72
|
+
npm install -g @abhay557/fakedata
|
|
56
73
|
```
|
|
57
74
|
|
|
58
|
-
###
|
|
59
|
-
```
|
|
60
|
-
|
|
75
|
+
### Python (global install)
|
|
76
|
+
```bash
|
|
77
|
+
pip install fakedata-python
|
|
78
|
+
```
|
|
61
79
|
|
|
62
|
-
|
|
63
|
-
const users = data.users(1000, { seed: 42, missing_rate: 0.05 });
|
|
80
|
+
### CLI Commands
|
|
64
81
|
|
|
65
|
-
|
|
66
|
-
|
|
82
|
+
| Command | Description |
|
|
83
|
+
|:---|:---|
|
|
84
|
+
| `fakedata generate` | Generate synthetic user data |
|
|
85
|
+
| `fakedata preview` | Print a single user profile to the console |
|
|
86
|
+
| `fakedata help` | Show all available options |
|
|
87
|
+
|
|
88
|
+
### CLI Options
|
|
89
|
+
|
|
90
|
+
| Flag | Default | Description |
|
|
91
|
+
|:---|:---|:---|
|
|
92
|
+
| `-n`, `--count` | `10` | Number of users to generate |
|
|
93
|
+
| `-f`, `--format` | `json` | Output format: `json` \| `csv` \| `flat` |
|
|
94
|
+
| `-o`, `--output` | stdout | Output file path |
|
|
95
|
+
| `-s`, `--seed` | none | Random seed for reproducibility |
|
|
96
|
+
| `-l`, `--locale` | `en` | Locale: `en` \| `in` \| `jp` \| `kr` \| `de` \| `br` \| `ar` \| `fr` |
|
|
97
|
+
| `-a`, `--anomaly-rate` | `0` | Fraction of anomalous users (0–1) |
|
|
98
|
+
| `-m`, `--missing-rate` | `0` | Fraction of null fields (0–1) |
|
|
99
|
+
| `-t`, `--timeseries` | — | Include time-series activity logs |
|
|
100
|
+
| `--days` | `30` | Days of activity for time-series |
|
|
101
|
+
| `--pretty` | — | Pretty-print JSON output |
|
|
102
|
+
|
|
103
|
+
### Examples
|
|
104
|
+
|
|
105
|
+
```bash
|
|
106
|
+
# Generate 1000 users and save as CSV
|
|
107
|
+
fakedata generate -n 1000 -f csv -o dataset.csv
|
|
108
|
+
|
|
109
|
+
# Generate 500 deterministic Indian users
|
|
110
|
+
fakedata generate -n 500 -l in --seed 42 -o india.json
|
|
111
|
+
|
|
112
|
+
# Fraud detection dataset with 5% anomalies
|
|
113
|
+
fakedata generate -n 10000 -a 0.05 -f csv -o fraud_data.csv
|
|
67
114
|
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
115
|
+
# Preview a single user in the console
|
|
116
|
+
fakedata preview
|
|
117
|
+
|
|
118
|
+
# Time-series activity logs for 100 users
|
|
119
|
+
fakedata generate -n 100 --timeseries --days 60 -o activity.json
|
|
71
120
|
```
|
|
72
|
-
---
|
|
73
121
|
|
|
122
|
+
---
|
|
74
123
|
### sample output - one user
|
|
75
124
|
```fakedata.data.user()```
|
|
76
125
|
```fakedata.data.user(n) // set n = 100```
|
|
@@ -362,3 +411,5 @@ Distributed under the **MIT License**. See `LICENSE` for more information.
|
|
|
362
411
|
**Maintainer**: [abhay557](https://github.com/abhay557)
|
|
363
412
|
|
|
364
413
|
- Project Commit History - `https://github.com/abhay557/random-api.xyz`
|
|
414
|
+
|
|
415
|
+
---
|
|
@@ -1,6 +1,9 @@
|
|
|
1
|
+
LICENSE
|
|
2
|
+
MANIFEST.in
|
|
1
3
|
README.md
|
|
2
4
|
pyproject.toml
|
|
3
5
|
fakedata/__init__.py
|
|
6
|
+
fakedata/cli.py
|
|
4
7
|
fakedata/core.py
|
|
5
8
|
fakedata/test_python.py
|
|
6
9
|
fakedata/helpers/cardtype.json
|
|
@@ -30,4 +33,5 @@ fakedata/modules/data.py
|
|
|
30
33
|
fakedata_python.egg-info/PKG-INFO
|
|
31
34
|
fakedata_python.egg-info/SOURCES.txt
|
|
32
35
|
fakedata_python.egg-info/dependency_links.txt
|
|
36
|
+
fakedata_python.egg-info/entry_points.txt
|
|
33
37
|
fakedata_python.egg-info/top_level.txt
|
|
@@ -4,9 +4,9 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "fakedata-python"
|
|
7
|
-
version = "2.0.
|
|
7
|
+
version = "2.0.3"
|
|
8
8
|
authors = [
|
|
9
|
-
{ name="abhay557", email="
|
|
9
|
+
{ name="abhay557", email="contact@abhaymourya.in" },
|
|
10
10
|
]
|
|
11
11
|
description = "The fakedata package generates realistic synthetic user profiles for machine learning, deep learning, data analysis, and data science workflows."
|
|
12
12
|
readme = "README.md"
|
|
@@ -17,6 +17,9 @@ classifiers = [
|
|
|
17
17
|
"Operating System :: OS Independent",
|
|
18
18
|
]
|
|
19
19
|
|
|
20
|
+
[project.scripts]
|
|
21
|
+
fakedata = "fakedata.cli:main"
|
|
22
|
+
|
|
20
23
|
[project.urls]
|
|
21
24
|
"Homepage" = "https://github.com/abhay557/fakedata"
|
|
22
25
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{fakedata_python-2.0.1 → fakedata_python-2.0.3}/fakedata_python.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|