TestDataX 0.1.0__tar.gz → 0.1.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {testdatax-0.1.0 → testdatax-0.1.3}/PKG-INFO +66 -45
- {testdatax-0.1.0 → testdatax-0.1.3}/README.md +64 -44
- {testdatax-0.1.0 → testdatax-0.1.3}/pyproject.toml +23 -1
- {testdatax-0.1.0 → testdatax-0.1.3}/src/__init__.py +1 -1
- {testdatax-0.1.0 → testdatax-0.1.3}/src/cli.py +17 -2
- testdatax-0.1.3/src/providers/__init__.py +5 -0
- testdatax-0.1.3/src/providers/mimesis_provider.py +87 -0
- testdatax-0.1.0/src/providers/__init__.py +0 -4
- {testdatax-0.1.0 → testdatax-0.1.3}/LICENSE +0 -0
- {testdatax-0.1.0 → testdatax-0.1.3}/src/exporters/__init__.py +0 -0
- {testdatax-0.1.0 → testdatax-0.1.3}/src/exporters/base_exporter.py +0 -0
- {testdatax-0.1.0 → testdatax-0.1.3}/src/exporters/csv_exporter.py +0 -0
- {testdatax-0.1.0 → testdatax-0.1.3}/src/exporters/json_exporter.py +0 -0
- {testdatax-0.1.0 → testdatax-0.1.3}/src/exporters/mssql_exporter.py +0 -0
- {testdatax-0.1.0 → testdatax-0.1.3}/src/exporters/mysql_exporter.py +0 -0
- {testdatax-0.1.0 → testdatax-0.1.3}/src/exporters/oracle_exporter.py +0 -0
- {testdatax-0.1.0 → testdatax-0.1.3}/src/exporters/orc_exporter.py +0 -0
- {testdatax-0.1.0 → testdatax-0.1.3}/src/exporters/parquet_exporter.py +0 -0
- {testdatax-0.1.0 → testdatax-0.1.3}/src/exporters/utils/__init__.py +0 -0
- {testdatax-0.1.0 → testdatax-0.1.3}/src/exporters/utils/chunker.py +0 -0
- {testdatax-0.1.0 → testdatax-0.1.3}/src/exporters/utils/constants.py +0 -0
- {testdatax-0.1.0 → testdatax-0.1.3}/src/exporters/utils/exporter_config.py +0 -0
- {testdatax-0.1.0 → testdatax-0.1.3}/src/exporters/utils/formatters.py +0 -0
- {testdatax-0.1.0 → testdatax-0.1.3}/src/generator.py +0 -0
- {testdatax-0.1.0 → testdatax-0.1.3}/src/providers/base.py +0 -0
- {testdatax-0.1.0 → testdatax-0.1.3}/src/providers/faker_provider.py +0 -0
- {testdatax-0.1.0 → testdatax-0.1.3}/src/schemas.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: TestDataX
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.3
|
|
4
4
|
Summary: A flexible test data generation toolkit
|
|
5
5
|
License: MIT
|
|
6
6
|
Author: JamesPBrett
|
|
@@ -11,6 +11,7 @@ Classifier: Programming Language :: Python :: 3.11
|
|
|
11
11
|
Classifier: Programming Language :: Python :: 3.12
|
|
12
12
|
Classifier: Programming Language :: Python :: 3.13
|
|
13
13
|
Requires-Dist: faker (>=33.1.0,<34.0.0)
|
|
14
|
+
Requires-Dist: mimesis (>=18.0.0,<19.0.0)
|
|
14
15
|
Requires-Dist: mysql-connector-python (>=9.1.0,<10.0.0)
|
|
15
16
|
Requires-Dist: orjson (>=3.10.12,<4.0.0)
|
|
16
17
|
Requires-Dist: pandas (>=2.2.3,<3.0.0)
|
|
@@ -21,57 +22,35 @@ Description-Content-Type: text/markdown
|
|
|
21
22
|
|
|
22
23
|
# TestDataX
|
|
23
24
|
|
|
25
|
+
# TestDataX
|
|
26
|
+
|
|
24
27
|

|
|
25
28
|
[](https://codecov.io/gh/JamesPBrett/testdatax)
|
|
26
29
|

|
|
27
30
|

|
|
28
31
|
|
|
29
|
-
This command-line interface application enables quick and customizable test data generation across various formats. It
|
|
32
|
+
This command-line interface application enables quick and customizable test data generation across various formats. It supports multiple data providers (Mimesis and Faker) for realistic data generation, offers flexible schema configurations, and simplifies output to multiple database dialects or file types. Users can define precise parameters for data volume, types, and constraints for each target data set.
|
|
30
33
|
|
|
31
34
|
## Requirements
|
|
32
35
|
- Python 3.11+
|
|
33
|
-
- Additional dependencies are handled automatically by poetry
|
|
34
|
-
|
|
35
|
-
## Installation
|
|
36
|
-
|
|
37
|
-
### Prerequisites
|
|
38
|
-
|
|
39
|
-
```bash
|
|
40
|
-
# Install Python 3.11+ if not already installed
|
|
41
|
-
brew install python@3.11
|
|
42
|
-
|
|
43
|
-
# Install Poetry
|
|
44
|
-
curl -sSL https://install.python-poetry.org | python3 -
|
|
45
36
|
|
|
46
|
-
|
|
47
|
-
poetry --version
|
|
48
|
-
```
|
|
49
|
-
|
|
50
|
-
### Install
|
|
37
|
+
## Quick Start
|
|
51
38
|
|
|
52
39
|
```bash
|
|
53
|
-
#
|
|
54
|
-
|
|
55
|
-
cd testdatax
|
|
56
|
-
|
|
57
|
-
# Install dependencies
|
|
58
|
-
poetry install
|
|
59
|
-
```
|
|
40
|
+
# Install from PyPI
|
|
41
|
+
pip install testdatax
|
|
60
42
|
|
|
61
|
-
|
|
43
|
+
# Generate sample data
|
|
44
|
+
testdatax --rows 1000 --format json --output data.json
|
|
62
45
|
|
|
63
|
-
- If Poetry is not found in PATH:
|
|
64
|
-
```bash
|
|
65
|
-
export PATH="$HOME/.local/bin:$PATH"
|
|
66
|
-
```
|
|
67
46
|
|
|
68
47
|
## Features
|
|
69
48
|
|
|
70
|
-
- Generate realistic test data using
|
|
49
|
+
- Generate realistic test data using multiple data providers (Mimesis, Faker)
|
|
71
50
|
- Support for multiple output formats (CSV, JSON, SQL, etc.)
|
|
72
51
|
- Customizable schema definitions
|
|
73
52
|
- Configurable data generation parameters
|
|
74
|
-
- CLI tool for easy data generation
|
|
53
|
+
- CLI tool for easy test data generation
|
|
75
54
|
|
|
76
55
|
## Supported Formats
|
|
77
56
|
|
|
@@ -85,7 +64,7 @@ poetry install
|
|
|
85
64
|
|
|
86
65
|
## CLI Usage
|
|
87
66
|
```bash
|
|
88
|
-
testdatax -o <output_file> -f <format> -s <schema_file> -r <num_rows> [-d]
|
|
67
|
+
testdatax -o <output_file> -f <format> -s <schema_file> -r <num_rows> -p <provider> [-d]
|
|
89
68
|
```
|
|
90
69
|
|
|
91
70
|
Options:
|
|
@@ -93,6 +72,7 @@ Options:
|
|
|
93
72
|
- `-f, --format`: Output format (csv, json, orc, parquet, mysql, mssql, oracle)
|
|
94
73
|
- `-r, --rows`: Number of rows to generate (default: 10)
|
|
95
74
|
- `-s, --schema`: Path to schema file
|
|
75
|
+
- `-p, --provider`: Data provider (mimesis, faker) - default: mimesis
|
|
96
76
|
- `-d, --debug`: Enable debug output
|
|
97
77
|
|
|
98
78
|
## Usage Examples
|
|
@@ -102,10 +82,20 @@ Generate 10 rows of CSV data:
|
|
|
102
82
|
testdatax -o users.csv -f csv -s schema.json -r 10
|
|
103
83
|
```
|
|
104
84
|
|
|
85
|
+
Generate 10 rows of CSV data using Faker provider:
|
|
86
|
+
```bash
|
|
87
|
+
testdatax -o users.csv -f csv -s schema.json -r 10 -p faker
|
|
88
|
+
```
|
|
89
|
+
|
|
105
90
|
Generate 1000 rows of Parquet data with debug output:
|
|
106
91
|
```bash
|
|
107
92
|
testdatax -o large_dataset.parquet -f parquet -s users_schema.json -r 1000 -d
|
|
108
93
|
```
|
|
94
|
+
|
|
95
|
+
Generate 1000 rows of Parquet data using Mimesis provider:
|
|
96
|
+
```bash
|
|
97
|
+
testdatax -o large_dataset.parquet -f parquet -s users_schema.json -r 1000 -p mimesis
|
|
98
|
+
```
|
|
109
99
|
Generate JSON data with default row count (10):
|
|
110
100
|
```bash
|
|
111
101
|
testdatax -o data.json -f json -s schema.json
|
|
@@ -136,6 +126,7 @@ Each command consists of:
|
|
|
136
126
|
- `-f, --format`: Output format (csv, json, orc, parquet, mysql, mssql, oracle)
|
|
137
127
|
- `-s, --schema`: Path to your schema definition file
|
|
138
128
|
- `-r, --rows`: Number of rows to generate (optional, defaults to 10)
|
|
129
|
+
- `-p, --provider`: Data provider (mimesis, faker) - default: mimesis
|
|
139
130
|
- `-d, --debug`: Enable debug logging (optional)
|
|
140
131
|
|
|
141
132
|
## Schema Example
|
|
@@ -144,7 +135,7 @@ Each command consists of:
|
|
|
144
135
|
{
|
|
145
136
|
"username": {
|
|
146
137
|
"type": "string",
|
|
147
|
-
"
|
|
138
|
+
"provider_field": "name"
|
|
148
139
|
},
|
|
149
140
|
"date_joined": {
|
|
150
141
|
"type": "datetime"
|
|
@@ -191,7 +182,7 @@ The schema file defines the structure and constraints of your generated data. Ea
|
|
|
191
182
|
"type": "string",
|
|
192
183
|
"min_length": 5,
|
|
193
184
|
"max_length": 20,
|
|
194
|
-
"
|
|
185
|
+
"provider_field": "user_name" // Use provider-specific field to generate realistic data
|
|
195
186
|
},
|
|
196
187
|
"description": {
|
|
197
188
|
"type": "text",
|
|
@@ -244,25 +235,25 @@ The schema file defines the structure and constraints of your generated data. Ea
|
|
|
244
235
|
}
|
|
245
236
|
```
|
|
246
237
|
|
|
247
|
-
#### Using
|
|
248
|
-
|
|
238
|
+
#### Using Data Providers
|
|
239
|
+
Both Mimesis and Faker providers support the same schema format. You can specify provider-specific generators using the `provider_field` field (works with both providers):
|
|
249
240
|
```json
|
|
250
241
|
{
|
|
251
242
|
"name": {
|
|
252
243
|
"type": "string",
|
|
253
|
-
"
|
|
244
|
+
"provider_field": "name"
|
|
254
245
|
},
|
|
255
246
|
"email": {
|
|
256
247
|
"type": "string",
|
|
257
|
-
"
|
|
248
|
+
"provider_field": "email"
|
|
258
249
|
},
|
|
259
250
|
"address": {
|
|
260
251
|
"type": "string",
|
|
261
|
-
"
|
|
252
|
+
"provider_field": "address"
|
|
262
253
|
},
|
|
263
254
|
"company": {
|
|
264
255
|
"type": "string",
|
|
265
|
-
"
|
|
256
|
+
"provider_field": "company"
|
|
266
257
|
}
|
|
267
258
|
}
|
|
268
259
|
```
|
|
@@ -276,12 +267,12 @@ The generator supports Faker providers for generating realistic data:
|
|
|
276
267
|
},
|
|
277
268
|
"username": {
|
|
278
269
|
"type": "string",
|
|
279
|
-
"
|
|
270
|
+
"provider_field": "user_name",
|
|
280
271
|
"unique": true
|
|
281
272
|
},
|
|
282
273
|
"email": {
|
|
283
274
|
"type": "string",
|
|
284
|
-
"
|
|
275
|
+
"provider_field": "email",
|
|
285
276
|
"unique": true
|
|
286
277
|
},
|
|
287
278
|
"age": {
|
|
@@ -306,6 +297,37 @@ The generator supports Faker providers for generating realistic data:
|
|
|
306
297
|
}
|
|
307
298
|
```
|
|
308
299
|
|
|
300
|
+
## Data Providers
|
|
301
|
+
|
|
302
|
+
TestDataX supports two powerful data providers for generating realistic test data:
|
|
303
|
+
|
|
304
|
+
### Mimesis (Default)
|
|
305
|
+
Mimesis is a high-performance Python library for generating synthetic data. It provides:
|
|
306
|
+
- Fast data generation with excellent performance
|
|
307
|
+
- Support for multiple locales and languages
|
|
308
|
+
- Wide variety of data providers for different domains
|
|
309
|
+
- Lightweight and efficient implementation
|
|
310
|
+
|
|
311
|
+
### Faker
|
|
312
|
+
Faker is a popular Python library for generating fake data. It offers:
|
|
313
|
+
- Extensive provider ecosystem with community contributions
|
|
314
|
+
- Rich set of localized providers
|
|
315
|
+
- Well-established and widely used in the Python community
|
|
316
|
+
- Comprehensive documentation and examples
|
|
317
|
+
|
|
318
|
+
You can specify the provider using the `-p` or `--provider` option:
|
|
319
|
+
```bash
|
|
320
|
+
# Use Mimesis (default)
|
|
321
|
+
testdatax -o data.csv -f csv -p mimesis
|
|
322
|
+
|
|
323
|
+
# Use Faker
|
|
324
|
+
testdatax -o data.csv -f csv -p faker
|
|
325
|
+
```
|
|
326
|
+
|
|
327
|
+
Both providers support the same schema format and generate compatible data types.
|
|
328
|
+
|
|
329
|
+
**Note:** For backward compatibility, the legacy `faker` field name is still supported, but `provider_field` is recommended for new schemas.
|
|
330
|
+
|
|
309
331
|
## Supported Data Types
|
|
310
332
|
|
|
311
333
|
- string
|
|
@@ -341,5 +363,4 @@ The generator supports Faker providers for generating realistic data:
|
|
|
341
363
|
## License
|
|
342
364
|
|
|
343
365
|
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
|
|
344
|
-
# Test change
|
|
345
366
|
|
|
@@ -1,56 +1,34 @@
|
|
|
1
1
|
# TestDataX
|
|
2
2
|
|
|
3
|
+
# TestDataX
|
|
4
|
+
|
|
3
5
|

|
|
4
6
|
[](https://codecov.io/gh/JamesPBrett/testdatax)
|
|
5
7
|

|
|
6
8
|

|
|
7
9
|
|
|
8
|
-
This command-line interface application enables quick and customizable test data generation across various formats. It
|
|
10
|
+
This command-line interface application enables quick and customizable test data generation across various formats. It supports multiple data providers (Mimesis and Faker) for realistic data generation, offers flexible schema configurations, and simplifies output to multiple database dialects or file types. Users can define precise parameters for data volume, types, and constraints for each target data set.
|
|
9
11
|
|
|
10
12
|
## Requirements
|
|
11
13
|
- Python 3.11+
|
|
12
|
-
- Additional dependencies are handled automatically by poetry
|
|
13
|
-
|
|
14
|
-
## Installation
|
|
15
|
-
|
|
16
|
-
### Prerequisites
|
|
17
|
-
|
|
18
|
-
```bash
|
|
19
|
-
# Install Python 3.11+ if not already installed
|
|
20
|
-
brew install python@3.11
|
|
21
|
-
|
|
22
|
-
# Install Poetry
|
|
23
|
-
curl -sSL https://install.python-poetry.org | python3 -
|
|
24
14
|
|
|
25
|
-
|
|
26
|
-
poetry --version
|
|
27
|
-
```
|
|
28
|
-
|
|
29
|
-
### Install
|
|
15
|
+
## Quick Start
|
|
30
16
|
|
|
31
17
|
```bash
|
|
32
|
-
#
|
|
33
|
-
|
|
34
|
-
cd testdatax
|
|
35
|
-
|
|
36
|
-
# Install dependencies
|
|
37
|
-
poetry install
|
|
38
|
-
```
|
|
18
|
+
# Install from PyPI
|
|
19
|
+
pip install testdatax
|
|
39
20
|
|
|
40
|
-
|
|
21
|
+
# Generate sample data
|
|
22
|
+
testdatax --rows 1000 --format json --output data.json
|
|
41
23
|
|
|
42
|
-
- If Poetry is not found in PATH:
|
|
43
|
-
```bash
|
|
44
|
-
export PATH="$HOME/.local/bin:$PATH"
|
|
45
|
-
```
|
|
46
24
|
|
|
47
25
|
## Features
|
|
48
26
|
|
|
49
|
-
- Generate realistic test data using
|
|
27
|
+
- Generate realistic test data using multiple data providers (Mimesis, Faker)
|
|
50
28
|
- Support for multiple output formats (CSV, JSON, SQL, etc.)
|
|
51
29
|
- Customizable schema definitions
|
|
52
30
|
- Configurable data generation parameters
|
|
53
|
-
- CLI tool for easy data generation
|
|
31
|
+
- CLI tool for easy test data generation
|
|
54
32
|
|
|
55
33
|
## Supported Formats
|
|
56
34
|
|
|
@@ -64,7 +42,7 @@ poetry install
|
|
|
64
42
|
|
|
65
43
|
## CLI Usage
|
|
66
44
|
```bash
|
|
67
|
-
testdatax -o <output_file> -f <format> -s <schema_file> -r <num_rows> [-d]
|
|
45
|
+
testdatax -o <output_file> -f <format> -s <schema_file> -r <num_rows> -p <provider> [-d]
|
|
68
46
|
```
|
|
69
47
|
|
|
70
48
|
Options:
|
|
@@ -72,6 +50,7 @@ Options:
|
|
|
72
50
|
- `-f, --format`: Output format (csv, json, orc, parquet, mysql, mssql, oracle)
|
|
73
51
|
- `-r, --rows`: Number of rows to generate (default: 10)
|
|
74
52
|
- `-s, --schema`: Path to schema file
|
|
53
|
+
- `-p, --provider`: Data provider (mimesis, faker) - default: mimesis
|
|
75
54
|
- `-d, --debug`: Enable debug output
|
|
76
55
|
|
|
77
56
|
## Usage Examples
|
|
@@ -81,10 +60,20 @@ Generate 10 rows of CSV data:
|
|
|
81
60
|
testdatax -o users.csv -f csv -s schema.json -r 10
|
|
82
61
|
```
|
|
83
62
|
|
|
63
|
+
Generate 10 rows of CSV data using Faker provider:
|
|
64
|
+
```bash
|
|
65
|
+
testdatax -o users.csv -f csv -s schema.json -r 10 -p faker
|
|
66
|
+
```
|
|
67
|
+
|
|
84
68
|
Generate 1000 rows of Parquet data with debug output:
|
|
85
69
|
```bash
|
|
86
70
|
testdatax -o large_dataset.parquet -f parquet -s users_schema.json -r 1000 -d
|
|
87
71
|
```
|
|
72
|
+
|
|
73
|
+
Generate 1000 rows of Parquet data using Mimesis provider:
|
|
74
|
+
```bash
|
|
75
|
+
testdatax -o large_dataset.parquet -f parquet -s users_schema.json -r 1000 -p mimesis
|
|
76
|
+
```
|
|
88
77
|
Generate JSON data with default row count (10):
|
|
89
78
|
```bash
|
|
90
79
|
testdatax -o data.json -f json -s schema.json
|
|
@@ -115,6 +104,7 @@ Each command consists of:
|
|
|
115
104
|
- `-f, --format`: Output format (csv, json, orc, parquet, mysql, mssql, oracle)
|
|
116
105
|
- `-s, --schema`: Path to your schema definition file
|
|
117
106
|
- `-r, --rows`: Number of rows to generate (optional, defaults to 10)
|
|
107
|
+
- `-p, --provider`: Data provider (mimesis, faker) - default: mimesis
|
|
118
108
|
- `-d, --debug`: Enable debug logging (optional)
|
|
119
109
|
|
|
120
110
|
## Schema Example
|
|
@@ -123,7 +113,7 @@ Each command consists of:
|
|
|
123
113
|
{
|
|
124
114
|
"username": {
|
|
125
115
|
"type": "string",
|
|
126
|
-
"
|
|
116
|
+
"provider_field": "name"
|
|
127
117
|
},
|
|
128
118
|
"date_joined": {
|
|
129
119
|
"type": "datetime"
|
|
@@ -170,7 +160,7 @@ The schema file defines the structure and constraints of your generated data. Ea
|
|
|
170
160
|
"type": "string",
|
|
171
161
|
"min_length": 5,
|
|
172
162
|
"max_length": 20,
|
|
173
|
-
"
|
|
163
|
+
"provider_field": "user_name" // Use provider-specific field to generate realistic data
|
|
174
164
|
},
|
|
175
165
|
"description": {
|
|
176
166
|
"type": "text",
|
|
@@ -223,25 +213,25 @@ The schema file defines the structure and constraints of your generated data. Ea
|
|
|
223
213
|
}
|
|
224
214
|
```
|
|
225
215
|
|
|
226
|
-
#### Using
|
|
227
|
-
|
|
216
|
+
#### Using Data Providers
|
|
217
|
+
Both Mimesis and Faker providers support the same schema format. You can specify provider-specific generators using the `provider_field` field (works with both providers):
|
|
228
218
|
```json
|
|
229
219
|
{
|
|
230
220
|
"name": {
|
|
231
221
|
"type": "string",
|
|
232
|
-
"
|
|
222
|
+
"provider_field": "name"
|
|
233
223
|
},
|
|
234
224
|
"email": {
|
|
235
225
|
"type": "string",
|
|
236
|
-
"
|
|
226
|
+
"provider_field": "email"
|
|
237
227
|
},
|
|
238
228
|
"address": {
|
|
239
229
|
"type": "string",
|
|
240
|
-
"
|
|
230
|
+
"provider_field": "address"
|
|
241
231
|
},
|
|
242
232
|
"company": {
|
|
243
233
|
"type": "string",
|
|
244
|
-
"
|
|
234
|
+
"provider_field": "company"
|
|
245
235
|
}
|
|
246
236
|
}
|
|
247
237
|
```
|
|
@@ -255,12 +245,12 @@ The generator supports Faker providers for generating realistic data:
|
|
|
255
245
|
},
|
|
256
246
|
"username": {
|
|
257
247
|
"type": "string",
|
|
258
|
-
"
|
|
248
|
+
"provider_field": "user_name",
|
|
259
249
|
"unique": true
|
|
260
250
|
},
|
|
261
251
|
"email": {
|
|
262
252
|
"type": "string",
|
|
263
|
-
"
|
|
253
|
+
"provider_field": "email",
|
|
264
254
|
"unique": true
|
|
265
255
|
},
|
|
266
256
|
"age": {
|
|
@@ -285,6 +275,37 @@ The generator supports Faker providers for generating realistic data:
|
|
|
285
275
|
}
|
|
286
276
|
```
|
|
287
277
|
|
|
278
|
+
## Data Providers
|
|
279
|
+
|
|
280
|
+
TestDataX supports two powerful data providers for generating realistic test data:
|
|
281
|
+
|
|
282
|
+
### Mimesis (Default)
|
|
283
|
+
Mimesis is a high-performance Python library for generating synthetic data. It provides:
|
|
284
|
+
- Fast data generation with excellent performance
|
|
285
|
+
- Support for multiple locales and languages
|
|
286
|
+
- Wide variety of data providers for different domains
|
|
287
|
+
- Lightweight and efficient implementation
|
|
288
|
+
|
|
289
|
+
### Faker
|
|
290
|
+
Faker is a popular Python library for generating fake data. It offers:
|
|
291
|
+
- Extensive provider ecosystem with community contributions
|
|
292
|
+
- Rich set of localized providers
|
|
293
|
+
- Well-established and widely used in the Python community
|
|
294
|
+
- Comprehensive documentation and examples
|
|
295
|
+
|
|
296
|
+
You can specify the provider using the `-p` or `--provider` option:
|
|
297
|
+
```bash
|
|
298
|
+
# Use Mimesis (default)
|
|
299
|
+
testdatax -o data.csv -f csv -p mimesis
|
|
300
|
+
|
|
301
|
+
# Use Faker
|
|
302
|
+
testdatax -o data.csv -f csv -p faker
|
|
303
|
+
```
|
|
304
|
+
|
|
305
|
+
Both providers support the same schema format and generate compatible data types.
|
|
306
|
+
|
|
307
|
+
**Note:** For backward compatibility, the legacy `faker` field name is still supported, but `provider_field` is recommended for new schemas.
|
|
308
|
+
|
|
288
309
|
## Supported Data Types
|
|
289
310
|
|
|
290
311
|
- string
|
|
@@ -320,4 +341,3 @@ The generator supports Faker providers for generating realistic data:
|
|
|
320
341
|
## License
|
|
321
342
|
|
|
322
343
|
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
|
|
323
|
-
# Test change
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "TestDataX"
|
|
3
|
-
version = "0.1.
|
|
3
|
+
version = "0.1.3"
|
|
4
4
|
description = "A flexible test data generation toolkit"
|
|
5
5
|
authors = ["JamesPBrett"]
|
|
6
6
|
license = "MIT"
|
|
@@ -16,6 +16,7 @@ orjson = "^3.10.12"
|
|
|
16
16
|
pyarrow = "^18.1.0"
|
|
17
17
|
pandas = "^2.2.3"
|
|
18
18
|
mysql-connector-python = "^9.1.0"
|
|
19
|
+
mimesis = "^18.0.0"
|
|
19
20
|
|
|
20
21
|
[tool.poetry.group.dev.dependencies]
|
|
21
22
|
pytest = "^8.3.4"
|
|
@@ -119,6 +120,14 @@ warn_unreachable = true
|
|
|
119
120
|
strict_optional = true
|
|
120
121
|
plugins = ["pydantic.mypy"]
|
|
121
122
|
|
|
123
|
+
[[tool.mypy.overrides]]
|
|
124
|
+
module = "mimesis.*"
|
|
125
|
+
ignore_missing_imports = true
|
|
126
|
+
|
|
127
|
+
[[tool.mypy.overrides]]
|
|
128
|
+
module = "src.providers.mimesis_provider"
|
|
129
|
+
warn_return_any = false
|
|
130
|
+
|
|
122
131
|
[tool.coverage.run]
|
|
123
132
|
source = ["src"]
|
|
124
133
|
branch = true
|
|
@@ -177,7 +186,20 @@ upload_to_repository = true
|
|
|
177
186
|
repository = "pypi"
|
|
178
187
|
tag_commit = true
|
|
179
188
|
tag_format = "v{version}"
|
|
189
|
+
major_on_zero = false # Prevents 0.x.x to 1.0.0 on breaking changes
|
|
190
|
+
allow_zero_version = true
|
|
180
191
|
commit_message = "chore(release): v{version}"
|
|
192
|
+
commit_types = [
|
|
193
|
+
{ type = "feat", section = "Features", bump = "minor" },
|
|
194
|
+
{ type = "fix", section = "Bug Fixes", bump = "patch" },
|
|
195
|
+
{ type = "breaking", section = "Breaking Changes", bump = "major" },
|
|
196
|
+
{ type = "perf", section = "Performance", bump = "patch" },
|
|
197
|
+
{ type = "refactor", section = "Refactoring", bump = "patch" },
|
|
198
|
+
{ type = "docs", section = "Documentation", bump = false },
|
|
199
|
+
{ type = "chore", section = "Maintenance", bump = false },
|
|
200
|
+
{ type = "ci", section = "CI/CD", bump = false },
|
|
201
|
+
{ type = "build", section = "Build", bump = false }
|
|
202
|
+
]
|
|
181
203
|
|
|
182
204
|
[tool.semantic_release.changelog]
|
|
183
205
|
template_dir = "templates"
|
|
@@ -9,6 +9,7 @@ from .exporters.base_exporter import BaseExporter
|
|
|
9
9
|
from .exporters.utils.constants import DEFAULT_SCHEMA, EXPORT_FORMATS
|
|
10
10
|
from .exporters.utils.exporter_config import EXPORTER_CLASSES
|
|
11
11
|
from .generator import DataGenerator
|
|
12
|
+
from .providers import FakerProvider, MimesisProvider
|
|
12
13
|
from .schemas import DataType, FieldSchema, GeneratorConfig
|
|
13
14
|
|
|
14
15
|
|
|
@@ -42,6 +43,9 @@ FORMAT_OPTION = typer.Option(
|
|
|
42
43
|
ROWS_OPTION = typer.Option(10, "--rows", "-r", help="Number of rows to generate")
|
|
43
44
|
SCHEMA_PATH_OPTION = typer.Option(None, "--schema", "-s", help="Path to schema file")
|
|
44
45
|
DEBUG_OPTION = typer.Option(False, "--debug", "-d", help="Enable debug output")
|
|
46
|
+
PROVIDER_OPTION = typer.Option(
|
|
47
|
+
"mimesis", "--provider", "-p", help="Data provider (faker or mimesis)"
|
|
48
|
+
)
|
|
45
49
|
|
|
46
50
|
|
|
47
51
|
@app.command()
|
|
@@ -51,6 +55,7 @@ def generate(
|
|
|
51
55
|
rows: int = ROWS_OPTION,
|
|
52
56
|
schema_path: Path | None = SCHEMA_PATH_OPTION,
|
|
53
57
|
debug: bool = DEBUG_OPTION,
|
|
58
|
+
provider: str = PROVIDER_OPTION,
|
|
54
59
|
) -> None:
|
|
55
60
|
"""Generate synthetic data based on the provided schema."""
|
|
56
61
|
try:
|
|
@@ -104,7 +109,8 @@ def generate(
|
|
|
104
109
|
min_value=min_value,
|
|
105
110
|
max_value=max_value,
|
|
106
111
|
right_digits=field_def.get("right_digits"),
|
|
107
|
-
value_provider=field_def.get("
|
|
112
|
+
value_provider=field_def.get("provider_field")
|
|
113
|
+
or field_def.get("faker"),
|
|
108
114
|
pattern=field_def.get("pattern"),
|
|
109
115
|
)
|
|
110
116
|
fields.append(field_schema.model_dump())
|
|
@@ -117,6 +123,10 @@ def generate(
|
|
|
117
123
|
if format not in EXPORT_FORMATS:
|
|
118
124
|
raise ValueError(f"Unsupported format: {format}")
|
|
119
125
|
|
|
126
|
+
# Validate provider
|
|
127
|
+
if provider.lower() not in ["faker", "mimesis"]:
|
|
128
|
+
raise ValueError(f"Unsupported provider: {provider}")
|
|
129
|
+
|
|
120
130
|
# Create generator config
|
|
121
131
|
if debug:
|
|
122
132
|
typer.echo(f"Converted fields: {fields}", err=False)
|
|
@@ -127,7 +137,12 @@ def generate(
|
|
|
127
137
|
# Generate data
|
|
128
138
|
if debug:
|
|
129
139
|
typer.echo(f"Generator config: {config}", err=False)
|
|
130
|
-
|
|
140
|
+
|
|
141
|
+
# Select provider
|
|
142
|
+
data_provider = (
|
|
143
|
+
MimesisProvider() if provider.lower() == "mimesis" else FakerProvider()
|
|
144
|
+
)
|
|
145
|
+
generator = DataGenerator(provider=data_provider)
|
|
131
146
|
data = generator.generate_data(config.fields, config.row_count)
|
|
132
147
|
|
|
133
148
|
# Export data
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
from datetime import date, datetime
|
|
2
|
+
from decimal import Decimal
|
|
3
|
+
from uuid import UUID
|
|
4
|
+
|
|
5
|
+
from mimesis import Address, Cryptographic, Datetime, Locale, Numeric, Person, Text
|
|
6
|
+
from mimesis.random import Random
|
|
7
|
+
|
|
8
|
+
from .base import DataProvider
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class MimesisProvider(DataProvider):
|
|
12
|
+
"""MimesisProvider generates fake data using the Mimesis library."""
|
|
13
|
+
|
|
14
|
+
def __init__(self, locale: Locale = Locale.EN) -> None:
|
|
15
|
+
"""Initialize the MimesisProvider with a specific locale."""
|
|
16
|
+
self.locale = locale
|
|
17
|
+
self.person = Person(locale=locale)
|
|
18
|
+
self.address = Address(locale=locale)
|
|
19
|
+
self.text = Text(locale=locale)
|
|
20
|
+
self.numeric = Numeric()
|
|
21
|
+
self.datetime = Datetime(locale=locale)
|
|
22
|
+
self.crypto = Cryptographic()
|
|
23
|
+
self.random = Random()
|
|
24
|
+
|
|
25
|
+
def generate_string(self, **kwargs: str) -> str:
|
|
26
|
+
"""Generate a fake string using the specified provider."""
|
|
27
|
+
provider = kwargs.get("value_provider") or "name"
|
|
28
|
+
|
|
29
|
+
# Map common Faker providers to Mimesis equivalents
|
|
30
|
+
provider_map = {
|
|
31
|
+
"name": lambda: self.person.full_name(),
|
|
32
|
+
"first_name": lambda: self.person.first_name(),
|
|
33
|
+
"last_name": lambda: self.person.last_name(),
|
|
34
|
+
"email": lambda: self.person.email(),
|
|
35
|
+
"username": lambda: self.person.username(),
|
|
36
|
+
"phone_number": lambda: self.person.telephone(),
|
|
37
|
+
"address": lambda: self.address.address(),
|
|
38
|
+
"company": lambda: self.person.occupation(),
|
|
39
|
+
"word": lambda: self.text.word(),
|
|
40
|
+
"sentence": lambda: self.text.sentence(),
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
generator = provider_map.get(provider, lambda: self.person.full_name())
|
|
44
|
+
return str(generator())
|
|
45
|
+
|
|
46
|
+
def generate_text(self, **kwargs: str) -> str:
|
|
47
|
+
"""Generate a fake text string."""
|
|
48
|
+
return str(self.text.text(quantity=1))
|
|
49
|
+
|
|
50
|
+
def generate_integer(
|
|
51
|
+
self, min_value: int | None = None, max_value: int | None = None
|
|
52
|
+
) -> int:
|
|
53
|
+
"""Generate a fake integer within the specified range."""
|
|
54
|
+
min_val = min_value if min_value is not None else 0
|
|
55
|
+
max_val = max_value if max_value is not None else 100
|
|
56
|
+
return int(self.numeric.integer_number(start=min_val, end=max_val))
|
|
57
|
+
|
|
58
|
+
def generate_decimal(self, **kwargs: Decimal) -> Decimal:
|
|
59
|
+
"""Generate a fake decimal number."""
|
|
60
|
+
float_val = self.numeric.float_number(start=0.0, end=999999.99)
|
|
61
|
+
return Decimal(f"{float_val:.2f}")
|
|
62
|
+
|
|
63
|
+
def generate_boolean(self) -> bool:
|
|
64
|
+
"""Generate a fake boolean value."""
|
|
65
|
+
return bool(self.random.choice([True, False]))
|
|
66
|
+
|
|
67
|
+
def generate_date(self) -> date:
|
|
68
|
+
"""Generate a fake date object."""
|
|
69
|
+
return self.datetime.date()
|
|
70
|
+
|
|
71
|
+
def generate_datetime(self) -> datetime:
|
|
72
|
+
"""Generate a fake datetime object."""
|
|
73
|
+
return self.datetime.datetime()
|
|
74
|
+
|
|
75
|
+
def generate_binary(self, length: int = 64) -> bytes:
|
|
76
|
+
"""Generate a fake binary string of the specified length."""
|
|
77
|
+
return bytes(self.random.randbytes(length))
|
|
78
|
+
|
|
79
|
+
def generate_uuid(self) -> UUID:
|
|
80
|
+
"""Generate a fake UUID."""
|
|
81
|
+
return UUID(self.crypto.uuid())
|
|
82
|
+
|
|
83
|
+
def generate_enum(self, values: list[str]) -> str:
|
|
84
|
+
"""Generate a fake value from the given list of values."""
|
|
85
|
+
if not values:
|
|
86
|
+
raise ValueError("Enum values cannot be empty")
|
|
87
|
+
return str(self.random.choice(values))
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|