fakedata-python 2.0.1__tar.gz → 2.0.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. fakedata_python-2.0.3/LICENSE +21 -0
  2. fakedata_python-2.0.3/MANIFEST.in +24 -0
  3. {fakedata_python-2.0.1/fakedata_python.egg-info → fakedata_python-2.0.3}/PKG-INFO +59 -20
  4. fakedata_python-2.0.1/PKG-INFO → fakedata_python-2.0.3/README.md +55 -30
  5. fakedata_python-2.0.3/fakedata/cli.py +133 -0
  6. fakedata_python-2.0.1/README.md → fakedata_python-2.0.3/fakedata_python.egg-info/PKG-INFO +69 -18
  7. {fakedata_python-2.0.1 → fakedata_python-2.0.3}/fakedata_python.egg-info/SOURCES.txt +4 -0
  8. fakedata_python-2.0.3/fakedata_python.egg-info/entry_points.txt +2 -0
  9. {fakedata_python-2.0.1 → fakedata_python-2.0.3}/pyproject.toml +5 -2
  10. {fakedata_python-2.0.1 → fakedata_python-2.0.3}/fakedata/__init__.py +0 -0
  11. {fakedata_python-2.0.1 → fakedata_python-2.0.3}/fakedata/core.py +0 -0
  12. {fakedata_python-2.0.1 → fakedata_python-2.0.3}/fakedata/helpers/cardtype.json +0 -0
  13. {fakedata_python-2.0.1 → fakedata_python-2.0.3}/fakedata/helpers/companies.json +0 -0
  14. {fakedata_python-2.0.1 → fakedata_python-2.0.3}/fakedata/helpers/countries.json +0 -0
  15. {fakedata_python-2.0.1 → fakedata_python-2.0.3}/fakedata/helpers/devices.json +0 -0
  16. {fakedata_python-2.0.1 → fakedata_python-2.0.3}/fakedata/helpers/domain.json +0 -0
  17. {fakedata_python-2.0.1 → fakedata_python-2.0.3}/fakedata/helpers/email.json +0 -0
  18. {fakedata_python-2.0.1 → fakedata_python-2.0.3}/fakedata/helpers/first.json +0 -0
  19. {fakedata_python-2.0.1 → fakedata_python-2.0.3}/fakedata/helpers/healthcare.json +0 -0
  20. {fakedata_python-2.0.1 → fakedata_python-2.0.3}/fakedata/helpers/hobbies.json +0 -0
  21. {fakedata_python-2.0.1 → fakedata_python-2.0.3}/fakedata/helpers/industries.json +0 -0
  22. {fakedata_python-2.0.1 → fakedata_python-2.0.3}/fakedata/helpers/job_categories.json +0 -0
  23. {fakedata_python-2.0.1 → fakedata_python-2.0.3}/fakedata/helpers/job_titles.json +0 -0
  24. {fakedata_python-2.0.1 → fakedata_python-2.0.3}/fakedata/helpers/last.json +0 -0
  25. {fakedata_python-2.0.1 → fakedata_python-2.0.3}/fakedata/helpers/locales.json +0 -0
  26. {fakedata_python-2.0.1 → fakedata_python-2.0.3}/fakedata/helpers/middle.json +0 -0
  27. {fakedata_python-2.0.1 → fakedata_python-2.0.3}/fakedata/helpers/occupation.json +0 -0
  28. {fakedata_python-2.0.1 → fakedata_python-2.0.3}/fakedata/helpers/salary_ranges.json +0 -0
  29. {fakedata_python-2.0.1 → fakedata_python-2.0.3}/fakedata/helpers/shortformstate.json +0 -0
  30. {fakedata_python-2.0.1 → fakedata_python-2.0.3}/fakedata/helpers/state.json +0 -0
  31. {fakedata_python-2.0.1 → fakedata_python-2.0.3}/fakedata/helpers/states.json +0 -0
  32. {fakedata_python-2.0.1 → fakedata_python-2.0.3}/fakedata/helpers/street.json +0 -0
  33. {fakedata_python-2.0.1 → fakedata_python-2.0.3}/fakedata/helpers/universities.json +0 -0
  34. {fakedata_python-2.0.1 → fakedata_python-2.0.3}/fakedata/modules/__init__.py +0 -0
  35. {fakedata_python-2.0.1 → fakedata_python-2.0.3}/fakedata/modules/data.py +0 -0
  36. {fakedata_python-2.0.1 → fakedata_python-2.0.3}/fakedata/test_python.py +0 -0
  37. {fakedata_python-2.0.1 → fakedata_python-2.0.3}/fakedata_python.egg-info/dependency_links.txt +0 -0
  38. {fakedata_python-2.0.1 → fakedata_python-2.0.3}/fakedata_python.egg-info/top_level.txt +0 -0
  39. {fakedata_python-2.0.1 → fakedata_python-2.0.3}/setup.cfg +0 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Abhay Mourya
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,24 @@
1
+ # Exclude development and Node.js files
2
+ prune .github
3
+ exclude CONTRIBUTING.md
4
+ exclude CODE_OF_CONDUCT.md
5
+ exclude .npmignore
6
+ exclude test.js
7
+ exclude test_py.py
8
+ exclude test_python.py
9
+
10
+ # Exclude JS source code
11
+ prune src
12
+ exclude package.json
13
+ exclude package-lock.json
14
+
15
+ # Exclude web and raw data
16
+ prune website
17
+ prune newdata
18
+ prune dist
19
+ prune tests
20
+
21
+ # Exclude byte-compiled files
22
+ global-exclude *.py[cod]
23
+ global-exclude __pycache__
24
+ global-exclude *.so
@@ -1,14 +1,16 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: fakedata-python
3
- Version: 2.0.1
3
+ Version: 2.0.3
4
4
  Summary: The fakedata package generates realistic synthetic user profiles for machine learning, deep learning, data analysis, and data science workflows.
5
- Author-email: abhay557 <abhaycormourya@gmail.com>
5
+ Author-email: abhay557 <contact@abhaymourya.in>
6
6
  License-Expression: MIT
7
7
  Project-URL: Homepage, https://github.com/abhay557/fakedata
8
8
  Classifier: Programming Language :: Python :: 3
9
9
  Classifier: Operating System :: OS Independent
10
10
  Requires-Python: >=3.7
11
11
  Description-Content-Type: text/markdown
12
+ License-File: LICENSE
13
+ Dynamic: license-file
12
14
 
13
15
  # fakedata
14
16
 
@@ -33,8 +35,8 @@ A high-performance, **zero-dependency** synthetic data generation engine, availa
33
35
  - **Anomaly Injection**: Inject fraud/outlier profiles (e.g., impossible geography, credit fraud, income spikes) using `anomaly_rate`.
34
36
  - **Time-Series Data**: Generate chronological activity logs (logins, page views, purchases) per user for behavioral modeling.
35
37
  - **Pipeline Ready**: Export directly to CSV, JSON, or Flat objects (perfect for `pandas.DataFrame`).
38
+ - **CLI Tool**: Generate and export datasets directly from your terminal — no scripting required.
36
39
 
37
- ---
38
40
  ## Python Implementation
39
41
 
40
42
  ### Installation
@@ -44,14 +46,14 @@ pip install fakedata-python
44
46
 
45
47
  ### Quick Start
46
48
  ```python
47
- import fakedata.data as data
49
+ import fakedata
48
50
  import pandas as pd
49
51
 
50
52
  # Generate 10,000 highly correlated users deterministically
51
- users = data.users(10000, {"seed": 42})
53
+ users = fakedata.data.users(10000, {"seed": 42})
52
54
 
53
55
  # Or export directly to a Pandas DataFrame
54
- df = pd.DataFrame(data.users_flat(10000, {"seed": 42}))
56
+ df = pd.DataFrame(fakedata.data.users_flat(10000, {"seed": 42}))
55
57
  print(df.head())
56
58
 
57
59
  # Create time-series activity data
@@ -59,30 +61,65 @@ ts = data.user_time_series({"days": 30, "events_per_day": 8})
59
61
  print(f"Generated {len(ts['activity'])} events for {ts['user']['fullName']}")
60
62
  ```
61
63
 
64
+ ---
62
65
 
63
- ## Node.js / TypeScript Implementation
66
+ ## CLI Command Line Interface
64
67
 
65
- ### Installation
68
+ After installing, use `fakedata` directly from your terminal. No scripts needed!
69
+
70
+ ### Node.js (global install)
66
71
  ```bash
67
- npm install @abhay557/fakedata
72
+ npm install -g @abhay557/fakedata
68
73
  ```
69
74
 
70
- ### Quick Start
71
- ```javascript
72
- const { data } = require('@abhay557/fakedata');
75
+ ### Python (global install)
76
+ ```bash
77
+ pip install fakedata-python
78
+ ```
73
79
 
74
- // Generate deterministic users with a 5% missing data rate (null injection)
75
- const users = data.users(1000, { seed: 42, missing_rate: 0.05 });
80
+ ### CLI Commands
76
81
 
77
- // Export directly to CSV format
78
- const csvString = data.usersToCSV(1000, { seed: 42 });
82
+ | Command | Description |
83
+ |:---|:---|
84
+ | `fakedata generate` | Generate synthetic user data |
85
+ | `fakedata preview` | Print a single user profile to the console |
86
+ | `fakedata help` | Show all available options |
87
+
88
+ ### CLI Options
89
+
90
+ | Flag | Default | Description |
91
+ |:---|:---|:---|
92
+ | `-n`, `--count` | `10` | Number of users to generate |
93
+ | `-f`, `--format` | `json` | Output format: `json` \| `csv` \| `flat` |
94
+ | `-o`, `--output` | stdout | Output file path |
95
+ | `-s`, `--seed` | none | Random seed for reproducibility |
96
+ | `-l`, `--locale` | `en` | Locale: `en` \| `in` \| `jp` \| `kr` \| `de` \| `br` \| `ar` \| `fr` |
97
+ | `-a`, `--anomaly-rate` | `0` | Fraction of anomalous users (0–1) |
98
+ | `-m`, `--missing-rate` | `0` | Fraction of null fields (0–1) |
99
+ | `-t`, `--timeseries` | — | Include time-series activity logs |
100
+ | `--days` | `30` | Days of activity for time-series |
101
+ | `--pretty` | — | Pretty-print JSON output |
102
+
103
+ ### Examples
79
104
 
80
- // Time-series activity data
81
- const ts = data.userTimeSeries({ days: 30, eventsPerDay: 8 });
82
- console.log(`Generated ${ts.activity.length} events for ${ts.user.fullName}`);
105
+ ```bash
106
+ # Generate 1000 users and save as CSV
107
+ fakedata generate -n 1000 -f csv -o dataset.csv
108
+
109
+ # Generate 500 deterministic Indian users
110
+ fakedata generate -n 500 -l in --seed 42 -o india.json
111
+
112
+ # Fraud detection dataset with 5% anomalies
113
+ fakedata generate -n 10000 -a 0.05 -f csv -o fraud_data.csv
114
+
115
+ # Preview a single user in the console
116
+ fakedata preview
117
+
118
+ # Time-series activity logs for 100 users
119
+ fakedata generate -n 100 --timeseries --days 60 -o activity.json
83
120
  ```
84
- ---
85
121
 
122
+ ---
86
123
  ### sample output - one user
87
124
  ```fakedata.data.user()```
88
125
  ```fakedata.data.user(n) // set n = 100```
@@ -374,3 +411,5 @@ Distributed under the **MIT License**. See `LICENSE` for more information.
374
411
  **Maintainer**: [abhay557](https://github.com/abhay557)
375
412
 
376
413
  - Project Commit History - `https://github.com/abhay557/random-api.xyz`
414
+
415
+ ---
@@ -1,15 +1,3 @@
1
- Metadata-Version: 2.4
2
- Name: fakedata-python
3
- Version: 2.0.1
4
- Summary: The fakedata package generates realistic synthetic user profiles for machine learning, deep learning, data analysis, and data science workflows.
5
- Author-email: abhay557 <abhaycormourya@gmail.com>
6
- License-Expression: MIT
7
- Project-URL: Homepage, https://github.com/abhay557/fakedata
8
- Classifier: Programming Language :: Python :: 3
9
- Classifier: Operating System :: OS Independent
10
- Requires-Python: >=3.7
11
- Description-Content-Type: text/markdown
12
-
13
1
  # fakedata
14
2
 
15
3
  [![NPM Version](https://img.shields.io/npm/v/@abhay557/fakedata?color=red&label=npm)](https://www.npmjs.com/package/@abhay557/fakedata)
@@ -33,8 +21,8 @@ A high-performance, **zero-dependency** synthetic data generation engine, availa
33
21
  - **Anomaly Injection**: Inject fraud/outlier profiles (e.g., impossible geography, credit fraud, income spikes) using `anomaly_rate`.
34
22
  - **Time-Series Data**: Generate chronological activity logs (logins, page views, purchases) per user for behavioral modeling.
35
23
  - **Pipeline Ready**: Export directly to CSV, JSON, or Flat objects (perfect for `pandas.DataFrame`).
24
+ - **CLI Tool**: Generate and export datasets directly from your terminal — no scripting required.
36
25
 
37
- ---
38
26
  ## Python Implementation
39
27
 
40
28
  ### Installation
@@ -44,14 +32,14 @@ pip install fakedata-python
44
32
 
45
33
  ### Quick Start
46
34
  ```python
47
- import fakedata.data as data
35
+ import fakedata
48
36
  import pandas as pd
49
37
 
50
38
  # Generate 10,000 highly correlated users deterministically
51
- users = data.users(10000, {"seed": 42})
39
+ users = fakedata.data.users(10000, {"seed": 42})
52
40
 
53
41
  # Or export directly to a Pandas DataFrame
54
- df = pd.DataFrame(data.users_flat(10000, {"seed": 42}))
42
+ df = pd.DataFrame(fakedata.data.users_flat(10000, {"seed": 42}))
55
43
  print(df.head())
56
44
 
57
45
  # Create time-series activity data
@@ -59,30 +47,65 @@ ts = data.user_time_series({"days": 30, "events_per_day": 8})
59
47
  print(f"Generated {len(ts['activity'])} events for {ts['user']['fullName']}")
60
48
  ```
61
49
 
50
+ ---
62
51
 
63
- ## Node.js / TypeScript Implementation
52
+ ## CLI Command Line Interface
64
53
 
65
- ### Installation
54
+ After installing, use `fakedata` directly from your terminal. No scripts needed!
55
+
56
+ ### Node.js (global install)
66
57
  ```bash
67
- npm install @abhay557/fakedata
58
+ npm install -g @abhay557/fakedata
68
59
  ```
69
60
 
70
- ### Quick Start
71
- ```javascript
72
- const { data } = require('@abhay557/fakedata');
61
+ ### Python (global install)
62
+ ```bash
63
+ pip install fakedata-python
64
+ ```
73
65
 
74
- // Generate deterministic users with a 5% missing data rate (null injection)
75
- const users = data.users(1000, { seed: 42, missing_rate: 0.05 });
66
+ ### CLI Commands
76
67
 
77
- // Export directly to CSV format
78
- const csvString = data.usersToCSV(1000, { seed: 42 });
68
+ | Command | Description |
69
+ |:---|:---|
70
+ | `fakedata generate` | Generate synthetic user data |
71
+ | `fakedata preview` | Print a single user profile to the console |
72
+ | `fakedata help` | Show all available options |
73
+
74
+ ### CLI Options
75
+
76
+ | Flag | Default | Description |
77
+ |:---|:---|:---|
78
+ | `-n`, `--count` | `10` | Number of users to generate |
79
+ | `-f`, `--format` | `json` | Output format: `json` \| `csv` \| `flat` |
80
+ | `-o`, `--output` | stdout | Output file path |
81
+ | `-s`, `--seed` | none | Random seed for reproducibility |
82
+ | `-l`, `--locale` | `en` | Locale: `en` \| `in` \| `jp` \| `kr` \| `de` \| `br` \| `ar` \| `fr` |
83
+ | `-a`, `--anomaly-rate` | `0` | Fraction of anomalous users (0–1) |
84
+ | `-m`, `--missing-rate` | `0` | Fraction of null fields (0–1) |
85
+ | `-t`, `--timeseries` | — | Include time-series activity logs |
86
+ | `--days` | `30` | Days of activity for time-series |
87
+ | `--pretty` | — | Pretty-print JSON output |
88
+
89
+ ### Examples
90
+
91
+ ```bash
92
+ # Generate 1000 users and save as CSV
93
+ fakedata generate -n 1000 -f csv -o dataset.csv
94
+
95
+ # Generate 500 deterministic Indian users
96
+ fakedata generate -n 500 -l in --seed 42 -o india.json
97
+
98
+ # Fraud detection dataset with 5% anomalies
99
+ fakedata generate -n 10000 -a 0.05 -f csv -o fraud_data.csv
79
100
 
80
- // Time-series activity data
81
- const ts = data.userTimeSeries({ days: 30, eventsPerDay: 8 });
82
- console.log(`Generated ${ts.activity.length} events for ${ts.user.fullName}`);
101
+ # Preview a single user in the console
102
+ fakedata preview
103
+
104
+ # Time-series activity logs for 100 users
105
+ fakedata generate -n 100 --timeseries --days 60 -o activity.json
83
106
  ```
84
- ---
85
107
 
108
+ ---
86
109
  ### sample output - one user
87
110
  ```fakedata.data.user()```
88
111
  ```fakedata.data.user(n) // set n = 100```
@@ -374,3 +397,5 @@ Distributed under the **MIT License**. See `LICENSE` for more information.
374
397
  **Maintainer**: [abhay557](https://github.com/abhay557)
375
398
 
376
399
  - Project Commit History - `https://github.com/abhay557/random-api.xyz`
400
+
401
+ ---
@@ -0,0 +1,133 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ fakedata CLI - ML-Ready Synthetic Data Engine
4
+ """
5
+ import argparse
6
+ import json
7
+ import sys
8
+ import time
9
+ import os
10
+
11
+ def main():
12
+ parser = argparse.ArgumentParser(
13
+ prog='fakedata',
14
+ description='fakedata - ML-Ready Synthetic Data Engine (Python)',
15
+ formatter_class=argparse.RawTextHelpFormatter,
16
+ epilog="""
17
+ EXAMPLES:
18
+ # Generate 1000 users to a CSV file
19
+ fakedata generate -n 1000 -f csv -o dataset.csv
20
+
21
+ # Generate 500 deterministic Indian users
22
+ fakedata generate -n 500 -l in --seed 42 -o india.json
23
+
24
+ # Generate fraud detection dataset with 5%% anomalies
25
+ fakedata generate -n 10000 -a 0.05 -f csv -o fraud_data.csv
26
+
27
+ # Preview a single user profile
28
+ fakedata preview
29
+
30
+ # Generate with time-series activity logs
31
+ fakedata generate -n 100 --timeseries --days 60 -o activity.json
32
+ """
33
+ )
34
+
35
+ subparsers = parser.add_subparsers(dest='command', help='Command to run')
36
+
37
+ # ─── preview ──────────────────────────────────────────────────────────────
38
+ subparsers.add_parser('preview', help='Print a single user profile to the console')
39
+
40
+ # ─── generate ─────────────────────────────────────────────────────────────
41
+ gen = subparsers.add_parser('generate', help='Generate synthetic user data')
42
+ gen.add_argument('-n', '--count', type=int, default=10,
43
+ help='Number of users to generate (default: 10)')
44
+ gen.add_argument('-f', '--format', choices=['json', 'csv', 'flat'], default='json',
45
+ help='Output format: json | csv | flat (default: json)')
46
+ gen.add_argument('-o', '--output', type=str, default=None,
47
+ help='Output file path (default: stdout)')
48
+ gen.add_argument('-s', '--seed', type=int, default=None,
49
+ help='Random seed for reproducibility')
50
+ gen.add_argument('-l', '--locale', type=str, default=None,
51
+ help='Locale: en|in|jp|kr|de|br|ar|fr (default: en)')
52
+ gen.add_argument('-a', '--anomaly-rate', type=float, default=0.0,
53
+ help='Fraction of anomalous users 0-1 (default: 0)')
54
+ gen.add_argument('-m', '--missing-rate', type=float, default=0.0,
55
+ help='Fraction of null fields 0-1 (default: 0)')
56
+ gen.add_argument('-t', '--timeseries', action='store_true',
57
+ help='Include time-series activity logs')
58
+ gen.add_argument('--days', type=int, default=30,
59
+ help='Days of activity for time-series (default: 30)')
60
+ gen.add_argument('--events-per-day', type=int, default=8,
61
+ help='Average events per day for time-series (default: 8)')
62
+ gen.add_argument('--pretty', action='store_true',
63
+ help='Pretty-print JSON output')
64
+
65
+ args = parser.parse_args()
66
+
67
+ if args.command is None or args.command == 'help':
68
+ parser.print_help()
69
+ return
70
+
71
+ # ─── Import the engine (lazy, only when needed) ───────────────────────────
72
+ try:
73
+ import fakedata.modules.data as data
74
+ except ImportError:
75
+ print("ERROR: Could not import fakedata. Make sure it is installed: pip install fakedata-python", file=sys.stderr)
76
+ sys.exit(1)
77
+
78
+ # ─── Preview ─────────────────────────────────────────────────────────────
79
+ if args.command == 'preview':
80
+ u = data.user()
81
+ print(json.dumps(u, indent=2))
82
+ return
83
+
84
+ # ─── Generate ────────────────────────────────────────────────────────────
85
+ if args.command == 'generate':
86
+ options = {
87
+ 'seed': args.seed,
88
+ 'locale': args.locale,
89
+ 'anomaly_rate': args.anomaly_rate,
90
+ 'missing_rate': args.missing_rate,
91
+ }
92
+ # Remove None values so defaults are used inside the engine
93
+ options = {k: v for k, v in options.items() if v is not None and v != 0.0}
94
+
95
+ start = time.time()
96
+
97
+ if args.timeseries:
98
+ results = [
99
+ data.user_time_series({**options, 'days': args.days, 'events_per_day': args.events_per_day})
100
+ for _ in range(args.count)
101
+ ]
102
+ output = json.dumps(results, indent=2 if args.pretty else None)
103
+
104
+ elif args.format == 'csv':
105
+ output = data.users_to_csv(args.count, options if options else None)
106
+
107
+ elif args.format == 'flat':
108
+ rows = data.users_flat(args.count, options if options else None)
109
+ output = json.dumps(rows, indent=2 if args.pretty else None)
110
+
111
+ else: # json
112
+ if args.pretty:
113
+ output = data.users_to_json(args.count, options if options else None)
114
+ else:
115
+ output = json.dumps(data.users(args.count, options if options else None))
116
+
117
+ elapsed = round(time.time() - start, 2)
118
+
119
+ if args.output:
120
+ out_path = os.path.abspath(args.output)
121
+ with open(out_path, 'w', encoding='utf-8') as f:
122
+ f.write(output)
123
+ size_kb = round(len(output.encode('utf-8')) / 1024, 1)
124
+ print(
125
+ f"✔ Done! Generated {args.count:,} users in {elapsed}s → {out_path} ({size_kb} KB)",
126
+ file=sys.stderr
127
+ )
128
+ else:
129
+ print(output)
130
+
131
+
132
+ if __name__ == '__main__':
133
+ main()
@@ -1,3 +1,17 @@
1
+ Metadata-Version: 2.4
2
+ Name: fakedata-python
3
+ Version: 2.0.3
4
+ Summary: The fakedata package generates realistic synthetic user profiles for machine learning, deep learning, data analysis, and data science workflows.
5
+ Author-email: abhay557 <contact@abhaymourya.in>
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/abhay557/fakedata
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: Operating System :: OS Independent
10
+ Requires-Python: >=3.7
11
+ Description-Content-Type: text/markdown
12
+ License-File: LICENSE
13
+ Dynamic: license-file
14
+
1
15
  # fakedata
2
16
 
3
17
  [![NPM Version](https://img.shields.io/npm/v/@abhay557/fakedata?color=red&label=npm)](https://www.npmjs.com/package/@abhay557/fakedata)
@@ -21,8 +35,8 @@ A high-performance, **zero-dependency** synthetic data generation engine, availa
21
35
  - **Anomaly Injection**: Inject fraud/outlier profiles (e.g., impossible geography, credit fraud, income spikes) using `anomaly_rate`.
22
36
  - **Time-Series Data**: Generate chronological activity logs (logins, page views, purchases) per user for behavioral modeling.
23
37
  - **Pipeline Ready**: Export directly to CSV, JSON, or Flat objects (perfect for `pandas.DataFrame`).
38
+ - **CLI Tool**: Generate and export datasets directly from your terminal — no scripting required.
24
39
 
25
- ---
26
40
  ## Python Implementation
27
41
 
28
42
  ### Installation
@@ -32,14 +46,14 @@ pip install fakedata-python
32
46
 
33
47
  ### Quick Start
34
48
  ```python
35
- import fakedata.data as data
49
+ import fakedata
36
50
  import pandas as pd
37
51
 
38
52
  # Generate 10,000 highly correlated users deterministically
39
- users = data.users(10000, {"seed": 42})
53
+ users = fakedata.data.users(10000, {"seed": 42})
40
54
 
41
55
  # Or export directly to a Pandas DataFrame
42
- df = pd.DataFrame(data.users_flat(10000, {"seed": 42}))
56
+ df = pd.DataFrame(fakedata.data.users_flat(10000, {"seed": 42}))
43
57
  print(df.head())
44
58
 
45
59
  # Create time-series activity data
@@ -47,30 +61,65 @@ ts = data.user_time_series({"days": 30, "events_per_day": 8})
47
61
  print(f"Generated {len(ts['activity'])} events for {ts['user']['fullName']}")
48
62
  ```
49
63
 
64
+ ---
50
65
 
51
- ## Node.js / TypeScript Implementation
66
+ ## CLI Command Line Interface
52
67
 
53
- ### Installation
68
+ After installing, use `fakedata` directly from your terminal. No scripts needed!
69
+
70
+ ### Node.js (global install)
54
71
  ```bash
55
- npm install @abhay557/fakedata
72
+ npm install -g @abhay557/fakedata
56
73
  ```
57
74
 
58
- ### Quick Start
59
- ```javascript
60
- const { data } = require('@abhay557/fakedata');
75
+ ### Python (global install)
76
+ ```bash
77
+ pip install fakedata-python
78
+ ```
61
79
 
62
- // Generate deterministic users with a 5% missing data rate (null injection)
63
- const users = data.users(1000, { seed: 42, missing_rate: 0.05 });
80
+ ### CLI Commands
64
81
 
65
- // Export directly to CSV format
66
- const csvString = data.usersToCSV(1000, { seed: 42 });
82
+ | Command | Description |
83
+ |:---|:---|
84
+ | `fakedata generate` | Generate synthetic user data |
85
+ | `fakedata preview` | Print a single user profile to the console |
86
+ | `fakedata help` | Show all available options |
87
+
88
+ ### CLI Options
89
+
90
+ | Flag | Default | Description |
91
+ |:---|:---|:---|
92
+ | `-n`, `--count` | `10` | Number of users to generate |
93
+ | `-f`, `--format` | `json` | Output format: `json` \| `csv` \| `flat` |
94
+ | `-o`, `--output` | stdout | Output file path |
95
+ | `-s`, `--seed` | none | Random seed for reproducibility |
96
+ | `-l`, `--locale` | `en` | Locale: `en` \| `in` \| `jp` \| `kr` \| `de` \| `br` \| `ar` \| `fr` |
97
+ | `-a`, `--anomaly-rate` | `0` | Fraction of anomalous users (0–1) |
98
+ | `-m`, `--missing-rate` | `0` | Fraction of null fields (0–1) |
99
+ | `-t`, `--timeseries` | — | Include time-series activity logs |
100
+ | `--days` | `30` | Days of activity for time-series |
101
+ | `--pretty` | — | Pretty-print JSON output |
102
+
103
+ ### Examples
104
+
105
+ ```bash
106
+ # Generate 1000 users and save as CSV
107
+ fakedata generate -n 1000 -f csv -o dataset.csv
108
+
109
+ # Generate 500 deterministic Indian users
110
+ fakedata generate -n 500 -l in --seed 42 -o india.json
111
+
112
+ # Fraud detection dataset with 5% anomalies
113
+ fakedata generate -n 10000 -a 0.05 -f csv -o fraud_data.csv
67
114
 
68
- // Time-series activity data
69
- const ts = data.userTimeSeries({ days: 30, eventsPerDay: 8 });
70
- console.log(`Generated ${ts.activity.length} events for ${ts.user.fullName}`);
115
+ # Preview a single user in the console
116
+ fakedata preview
117
+
118
+ # Time-series activity logs for 100 users
119
+ fakedata generate -n 100 --timeseries --days 60 -o activity.json
71
120
  ```
72
- ---
73
121
 
122
+ ---
74
123
  ### sample output - one user
75
124
  ```fakedata.data.user()```
76
125
  ```fakedata.data.user(n) // set n = 100```
@@ -362,3 +411,5 @@ Distributed under the **MIT License**. See `LICENSE` for more information.
362
411
  **Maintainer**: [abhay557](https://github.com/abhay557)
363
412
 
364
413
  - Project Commit History - `https://github.com/abhay557/random-api.xyz`
414
+
415
+ ---
@@ -1,6 +1,9 @@
1
+ LICENSE
2
+ MANIFEST.in
1
3
  README.md
2
4
  pyproject.toml
3
5
  fakedata/__init__.py
6
+ fakedata/cli.py
4
7
  fakedata/core.py
5
8
  fakedata/test_python.py
6
9
  fakedata/helpers/cardtype.json
@@ -30,4 +33,5 @@ fakedata/modules/data.py
30
33
  fakedata_python.egg-info/PKG-INFO
31
34
  fakedata_python.egg-info/SOURCES.txt
32
35
  fakedata_python.egg-info/dependency_links.txt
36
+ fakedata_python.egg-info/entry_points.txt
33
37
  fakedata_python.egg-info/top_level.txt
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ fakedata = fakedata.cli:main
@@ -4,9 +4,9 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "fakedata-python"
7
- version = "2.0.1"
7
+ version = "2.0.3"
8
8
  authors = [
9
- { name="abhay557", email="abhaycormourya@gmail.com" },
9
+ { name="abhay557", email="contact@abhaymourya.in" },
10
10
  ]
11
11
  description = "The fakedata package generates realistic synthetic user profiles for machine learning, deep learning, data analysis, and data science workflows."
12
12
  readme = "README.md"
@@ -17,6 +17,9 @@ classifiers = [
17
17
  "Operating System :: OS Independent",
18
18
  ]
19
19
 
20
+ [project.scripts]
21
+ fakedata = "fakedata.cli:main"
22
+
20
23
  [project.urls]
21
24
  "Homepage" = "https://github.com/abhay557/fakedata"
22
25