TestDataX 0.1.1__tar.gz → 0.1.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {testdatax-0.1.1 → testdatax-0.1.3}/PKG-INFO +58 -14
- {testdatax-0.1.1 → testdatax-0.1.3}/README.md +56 -13
- {testdatax-0.1.1 → testdatax-0.1.3}/pyproject.toml +10 -1
- {testdatax-0.1.1 → testdatax-0.1.3}/src/__init__.py +1 -1
- {testdatax-0.1.1 → testdatax-0.1.3}/src/cli.py +17 -2
- testdatax-0.1.3/src/providers/__init__.py +5 -0
- testdatax-0.1.3/src/providers/mimesis_provider.py +87 -0
- testdatax-0.1.1/src/providers/__init__.py +0 -4
- {testdatax-0.1.1 → testdatax-0.1.3}/LICENSE +0 -0
- {testdatax-0.1.1 → testdatax-0.1.3}/src/exporters/__init__.py +0 -0
- {testdatax-0.1.1 → testdatax-0.1.3}/src/exporters/base_exporter.py +0 -0
- {testdatax-0.1.1 → testdatax-0.1.3}/src/exporters/csv_exporter.py +0 -0
- {testdatax-0.1.1 → testdatax-0.1.3}/src/exporters/json_exporter.py +0 -0
- {testdatax-0.1.1 → testdatax-0.1.3}/src/exporters/mssql_exporter.py +0 -0
- {testdatax-0.1.1 → testdatax-0.1.3}/src/exporters/mysql_exporter.py +0 -0
- {testdatax-0.1.1 → testdatax-0.1.3}/src/exporters/oracle_exporter.py +0 -0
- {testdatax-0.1.1 → testdatax-0.1.3}/src/exporters/orc_exporter.py +0 -0
- {testdatax-0.1.1 → testdatax-0.1.3}/src/exporters/parquet_exporter.py +0 -0
- {testdatax-0.1.1 → testdatax-0.1.3}/src/exporters/utils/__init__.py +0 -0
- {testdatax-0.1.1 → testdatax-0.1.3}/src/exporters/utils/chunker.py +0 -0
- {testdatax-0.1.1 → testdatax-0.1.3}/src/exporters/utils/constants.py +0 -0
- {testdatax-0.1.1 → testdatax-0.1.3}/src/exporters/utils/exporter_config.py +0 -0
- {testdatax-0.1.1 → testdatax-0.1.3}/src/exporters/utils/formatters.py +0 -0
- {testdatax-0.1.1 → testdatax-0.1.3}/src/generator.py +0 -0
- {testdatax-0.1.1 → testdatax-0.1.3}/src/providers/base.py +0 -0
- {testdatax-0.1.1 → testdatax-0.1.3}/src/providers/faker_provider.py +0 -0
- {testdatax-0.1.1 → testdatax-0.1.3}/src/schemas.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: TestDataX
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.3
|
|
4
4
|
Summary: A flexible test data generation toolkit
|
|
5
5
|
License: MIT
|
|
6
6
|
Author: JamesPBrett
|
|
@@ -11,6 +11,7 @@ Classifier: Programming Language :: Python :: 3.11
|
|
|
11
11
|
Classifier: Programming Language :: Python :: 3.12
|
|
12
12
|
Classifier: Programming Language :: Python :: 3.13
|
|
13
13
|
Requires-Dist: faker (>=33.1.0,<34.0.0)
|
|
14
|
+
Requires-Dist: mimesis (>=18.0.0,<19.0.0)
|
|
14
15
|
Requires-Dist: mysql-connector-python (>=9.1.0,<10.0.0)
|
|
15
16
|
Requires-Dist: orjson (>=3.10.12,<4.0.0)
|
|
16
17
|
Requires-Dist: pandas (>=2.2.3,<3.0.0)
|
|
@@ -28,7 +29,7 @@ Description-Content-Type: text/markdown
|
|
|
28
29
|

|
|
29
30
|

|
|
30
31
|
|
|
31
|
-
This command-line interface application enables quick and customizable test data generation across various formats. It
|
|
32
|
+
This command-line interface application enables quick and customizable test data generation across various formats. It supports multiple data providers (Mimesis and Faker) for realistic data generation, offers flexible schema configurations, and simplifies output to multiple database dialects or file types. Users can define precise parameters for data volume, types, and constraints for each target data set.
|
|
32
33
|
|
|
33
34
|
## Requirements
|
|
34
35
|
- Python 3.11+
|
|
@@ -45,7 +46,7 @@ testdatax --rows 1000 --format json --output data.json
|
|
|
45
46
|
|
|
46
47
|
## Features
|
|
47
48
|
|
|
48
|
-
- Generate realistic test data using
|
|
49
|
+
- Generate realistic test data using multiple data providers (Mimesis, Faker)
|
|
49
50
|
- Support for multiple output formats (CSV, JSON, SQL, etc.)
|
|
50
51
|
- Customizable schema definitions
|
|
51
52
|
- Configurable data generation parameters
|
|
@@ -63,7 +64,7 @@ testdatax --rows 1000 --format json --output data.json
|
|
|
63
64
|
|
|
64
65
|
## CLI Usage
|
|
65
66
|
```bash
|
|
66
|
-
testdatax -o <output_file> -f <format> -s <schema_file> -r <num_rows> [-d]
|
|
67
|
+
testdatax -o <output_file> -f <format> -s <schema_file> -r <num_rows> -p <provider> [-d]
|
|
67
68
|
```
|
|
68
69
|
|
|
69
70
|
Options:
|
|
@@ -71,6 +72,7 @@ Options:
|
|
|
71
72
|
- `-f, --format`: Output format (csv, json, orc, parquet, mysql, mssql, oracle)
|
|
72
73
|
- `-r, --rows`: Number of rows to generate (default: 10)
|
|
73
74
|
- `-s, --schema`: Path to schema file
|
|
75
|
+
- `-p, --provider`: Data provider (mimesis, faker) - default: mimesis
|
|
74
76
|
- `-d, --debug`: Enable debug output
|
|
75
77
|
|
|
76
78
|
## Usage Examples
|
|
@@ -80,10 +82,20 @@ Generate 10 rows of CSV data:
|
|
|
80
82
|
testdatax -o users.csv -f csv -s schema.json -r 10
|
|
81
83
|
```
|
|
82
84
|
|
|
85
|
+
Generate 10 rows of CSV data using Faker provider:
|
|
86
|
+
```bash
|
|
87
|
+
testdatax -o users.csv -f csv -s schema.json -r 10 -p faker
|
|
88
|
+
```
|
|
89
|
+
|
|
83
90
|
Generate 1000 rows of Parquet data with debug output:
|
|
84
91
|
```bash
|
|
85
92
|
testdatax -o large_dataset.parquet -f parquet -s users_schema.json -r 1000 -d
|
|
86
93
|
```
|
|
94
|
+
|
|
95
|
+
Generate 1000 rows of Parquet data using Mimesis provider:
|
|
96
|
+
```bash
|
|
97
|
+
testdatax -o large_dataset.parquet -f parquet -s users_schema.json -r 1000 -p mimesis
|
|
98
|
+
```
|
|
87
99
|
Generate JSON data with default row count (10):
|
|
88
100
|
```bash
|
|
89
101
|
testdatax -o data.json -f json -s schema.json
|
|
@@ -114,6 +126,7 @@ Each command consists of:
|
|
|
114
126
|
- `-f, --format`: Output format (csv, json, orc, parquet, mysql, mssql, oracle)
|
|
115
127
|
- `-s, --schema`: Path to your schema definition file
|
|
116
128
|
- `-r, --rows`: Number of rows to generate (optional, defaults to 10)
|
|
129
|
+
- `-p, --provider`: Data provider (mimesis, faker) - default: mimesis
|
|
117
130
|
- `-d, --debug`: Enable debug logging (optional)
|
|
118
131
|
|
|
119
132
|
## Schema Example
|
|
@@ -122,7 +135,7 @@ Each command consists of:
|
|
|
122
135
|
{
|
|
123
136
|
"username": {
|
|
124
137
|
"type": "string",
|
|
125
|
-
"
|
|
138
|
+
"provider_field": "name"
|
|
126
139
|
},
|
|
127
140
|
"date_joined": {
|
|
128
141
|
"type": "datetime"
|
|
@@ -169,7 +182,7 @@ The schema file defines the structure and constraints of your generated data. Ea
|
|
|
169
182
|
"type": "string",
|
|
170
183
|
"min_length": 5,
|
|
171
184
|
"max_length": 20,
|
|
172
|
-
"
|
|
185
|
+
"provider_field": "user_name" // Use provider-specific field to generate realistic data
|
|
173
186
|
},
|
|
174
187
|
"description": {
|
|
175
188
|
"type": "text",
|
|
@@ -222,25 +235,25 @@ The schema file defines the structure and constraints of your generated data. Ea
|
|
|
222
235
|
}
|
|
223
236
|
```
|
|
224
237
|
|
|
225
|
-
#### Using
|
|
226
|
-
|
|
238
|
+
#### Using Data Providers
|
|
239
|
+
Both Mimesis and Faker providers support the same schema format. You can specify provider-specific generators using the `provider_field` field (works with both providers):
|
|
227
240
|
```json
|
|
228
241
|
{
|
|
229
242
|
"name": {
|
|
230
243
|
"type": "string",
|
|
231
|
-
"
|
|
244
|
+
"provider_field": "name"
|
|
232
245
|
},
|
|
233
246
|
"email": {
|
|
234
247
|
"type": "string",
|
|
235
|
-
"
|
|
248
|
+
"provider_field": "email"
|
|
236
249
|
},
|
|
237
250
|
"address": {
|
|
238
251
|
"type": "string",
|
|
239
|
-
"
|
|
252
|
+
"provider_field": "address"
|
|
240
253
|
},
|
|
241
254
|
"company": {
|
|
242
255
|
"type": "string",
|
|
243
|
-
"
|
|
256
|
+
"provider_field": "company"
|
|
244
257
|
}
|
|
245
258
|
}
|
|
246
259
|
```
|
|
@@ -254,12 +267,12 @@ The generator supports Faker providers for generating realistic data:
|
|
|
254
267
|
},
|
|
255
268
|
"username": {
|
|
256
269
|
"type": "string",
|
|
257
|
-
"
|
|
270
|
+
"provider_field": "user_name",
|
|
258
271
|
"unique": true
|
|
259
272
|
},
|
|
260
273
|
"email": {
|
|
261
274
|
"type": "string",
|
|
262
|
-
"
|
|
275
|
+
"provider_field": "email",
|
|
263
276
|
"unique": true
|
|
264
277
|
},
|
|
265
278
|
"age": {
|
|
@@ -284,6 +297,37 @@ The generator supports Faker providers for generating realistic data:
|
|
|
284
297
|
}
|
|
285
298
|
```
|
|
286
299
|
|
|
300
|
+
## Data Providers
|
|
301
|
+
|
|
302
|
+
TestDataX supports two powerful data providers for generating realistic test data:
|
|
303
|
+
|
|
304
|
+
### Mimesis (Default)
|
|
305
|
+
Mimesis is a high-performance Python library for generating synthetic data. It provides:
|
|
306
|
+
- Fast data generation with excellent performance
|
|
307
|
+
- Support for multiple locales and languages
|
|
308
|
+
- Wide variety of data providers for different domains
|
|
309
|
+
- Lightweight and efficient implementation
|
|
310
|
+
|
|
311
|
+
### Faker
|
|
312
|
+
Faker is a popular Python library for generating fake data. It offers:
|
|
313
|
+
- Extensive provider ecosystem with community contributions
|
|
314
|
+
- Rich set of localized providers
|
|
315
|
+
- Well-established and widely used in the Python community
|
|
316
|
+
- Comprehensive documentation and examples
|
|
317
|
+
|
|
318
|
+
You can specify the provider using the `-p` or `--provider` option:
|
|
319
|
+
```bash
|
|
320
|
+
# Use Mimesis (default)
|
|
321
|
+
testdatax -o data.csv -f csv -p mimesis
|
|
322
|
+
|
|
323
|
+
# Use Faker
|
|
324
|
+
testdatax -o data.csv -f csv -p faker
|
|
325
|
+
```
|
|
326
|
+
|
|
327
|
+
Both providers support the same schema format and generate compatible data types.
|
|
328
|
+
|
|
329
|
+
**Note:** For backward compatibility, the legacy `faker` field name is still supported, but `provider_field` is recommended for new schemas.
|
|
330
|
+
|
|
287
331
|
## Supported Data Types
|
|
288
332
|
|
|
289
333
|
- string
|
|
@@ -7,7 +7,7 @@
|
|
|
7
7
|

|
|
8
8
|

|
|
9
9
|
|
|
10
|
-
This command-line interface application enables quick and customizable test data generation across various formats. It
|
|
10
|
+
This command-line interface application enables quick and customizable test data generation across various formats. It supports multiple data providers (Mimesis and Faker) for realistic data generation, offers flexible schema configurations, and simplifies output to multiple database dialects or file types. Users can define precise parameters for data volume, types, and constraints for each target data set.
|
|
11
11
|
|
|
12
12
|
## Requirements
|
|
13
13
|
- Python 3.11+
|
|
@@ -24,7 +24,7 @@ testdatax --rows 1000 --format json --output data.json
|
|
|
24
24
|
|
|
25
25
|
## Features
|
|
26
26
|
|
|
27
|
-
- Generate realistic test data using
|
|
27
|
+
- Generate realistic test data using multiple data providers (Mimesis, Faker)
|
|
28
28
|
- Support for multiple output formats (CSV, JSON, SQL, etc.)
|
|
29
29
|
- Customizable schema definitions
|
|
30
30
|
- Configurable data generation parameters
|
|
@@ -42,7 +42,7 @@ testdatax --rows 1000 --format json --output data.json
|
|
|
42
42
|
|
|
43
43
|
## CLI Usage
|
|
44
44
|
```bash
|
|
45
|
-
testdatax -o <output_file> -f <format> -s <schema_file> -r <num_rows> [-d]
|
|
45
|
+
testdatax -o <output_file> -f <format> -s <schema_file> -r <num_rows> -p <provider> [-d]
|
|
46
46
|
```
|
|
47
47
|
|
|
48
48
|
Options:
|
|
@@ -50,6 +50,7 @@ Options:
|
|
|
50
50
|
- `-f, --format`: Output format (csv, json, orc, parquet, mysql, mssql, oracle)
|
|
51
51
|
- `-r, --rows`: Number of rows to generate (default: 10)
|
|
52
52
|
- `-s, --schema`: Path to schema file
|
|
53
|
+
- `-p, --provider`: Data provider (mimesis, faker) - default: mimesis
|
|
53
54
|
- `-d, --debug`: Enable debug output
|
|
54
55
|
|
|
55
56
|
## Usage Examples
|
|
@@ -59,10 +60,20 @@ Generate 10 rows of CSV data:
|
|
|
59
60
|
testdatax -o users.csv -f csv -s schema.json -r 10
|
|
60
61
|
```
|
|
61
62
|
|
|
63
|
+
Generate 10 rows of CSV data using Faker provider:
|
|
64
|
+
```bash
|
|
65
|
+
testdatax -o users.csv -f csv -s schema.json -r 10 -p faker
|
|
66
|
+
```
|
|
67
|
+
|
|
62
68
|
Generate 1000 rows of Parquet data with debug output:
|
|
63
69
|
```bash
|
|
64
70
|
testdatax -o large_dataset.parquet -f parquet -s users_schema.json -r 1000 -d
|
|
65
71
|
```
|
|
72
|
+
|
|
73
|
+
Generate 1000 rows of Parquet data using Mimesis provider:
|
|
74
|
+
```bash
|
|
75
|
+
testdatax -o large_dataset.parquet -f parquet -s users_schema.json -r 1000 -p mimesis
|
|
76
|
+
```
|
|
66
77
|
Generate JSON data with default row count (10):
|
|
67
78
|
```bash
|
|
68
79
|
testdatax -o data.json -f json -s schema.json
|
|
@@ -93,6 +104,7 @@ Each command consists of:
|
|
|
93
104
|
- `-f, --format`: Output format (csv, json, orc, parquet, mysql, mssql, oracle)
|
|
94
105
|
- `-s, --schema`: Path to your schema definition file
|
|
95
106
|
- `-r, --rows`: Number of rows to generate (optional, defaults to 10)
|
|
107
|
+
- `-p, --provider`: Data provider (mimesis, faker) - default: mimesis
|
|
96
108
|
- `-d, --debug`: Enable debug logging (optional)
|
|
97
109
|
|
|
98
110
|
## Schema Example
|
|
@@ -101,7 +113,7 @@ Each command consists of:
|
|
|
101
113
|
{
|
|
102
114
|
"username": {
|
|
103
115
|
"type": "string",
|
|
104
|
-
"
|
|
116
|
+
"provider_field": "name"
|
|
105
117
|
},
|
|
106
118
|
"date_joined": {
|
|
107
119
|
"type": "datetime"
|
|
@@ -148,7 +160,7 @@ The schema file defines the structure and constraints of your generated data. Ea
|
|
|
148
160
|
"type": "string",
|
|
149
161
|
"min_length": 5,
|
|
150
162
|
"max_length": 20,
|
|
151
|
-
"
|
|
163
|
+
"provider_field": "user_name" // Use provider-specific field to generate realistic data
|
|
152
164
|
},
|
|
153
165
|
"description": {
|
|
154
166
|
"type": "text",
|
|
@@ -201,25 +213,25 @@ The schema file defines the structure and constraints of your generated data. Ea
|
|
|
201
213
|
}
|
|
202
214
|
```
|
|
203
215
|
|
|
204
|
-
#### Using
|
|
205
|
-
|
|
216
|
+
#### Using Data Providers
|
|
217
|
+
Both Mimesis and Faker providers support the same schema format. You can specify provider-specific generators using the `provider_field` field (works with both providers):
|
|
206
218
|
```json
|
|
207
219
|
{
|
|
208
220
|
"name": {
|
|
209
221
|
"type": "string",
|
|
210
|
-
"
|
|
222
|
+
"provider_field": "name"
|
|
211
223
|
},
|
|
212
224
|
"email": {
|
|
213
225
|
"type": "string",
|
|
214
|
-
"
|
|
226
|
+
"provider_field": "email"
|
|
215
227
|
},
|
|
216
228
|
"address": {
|
|
217
229
|
"type": "string",
|
|
218
|
-
"
|
|
230
|
+
"provider_field": "address"
|
|
219
231
|
},
|
|
220
232
|
"company": {
|
|
221
233
|
"type": "string",
|
|
222
|
-
"
|
|
234
|
+
"provider_field": "company"
|
|
223
235
|
}
|
|
224
236
|
}
|
|
225
237
|
```
|
|
@@ -233,12 +245,12 @@ The generator supports Faker providers for generating realistic data:
|
|
|
233
245
|
},
|
|
234
246
|
"username": {
|
|
235
247
|
"type": "string",
|
|
236
|
-
"
|
|
248
|
+
"provider_field": "user_name",
|
|
237
249
|
"unique": true
|
|
238
250
|
},
|
|
239
251
|
"email": {
|
|
240
252
|
"type": "string",
|
|
241
|
-
"
|
|
253
|
+
"provider_field": "email",
|
|
242
254
|
"unique": true
|
|
243
255
|
},
|
|
244
256
|
"age": {
|
|
@@ -263,6 +275,37 @@ The generator supports Faker providers for generating realistic data:
|
|
|
263
275
|
}
|
|
264
276
|
```
|
|
265
277
|
|
|
278
|
+
## Data Providers
|
|
279
|
+
|
|
280
|
+
TestDataX supports two powerful data providers for generating realistic test data:
|
|
281
|
+
|
|
282
|
+
### Mimesis (Default)
|
|
283
|
+
Mimesis is a high-performance Python library for generating synthetic data. It provides:
|
|
284
|
+
- Fast data generation with excellent performance
|
|
285
|
+
- Support for multiple locales and languages
|
|
286
|
+
- Wide variety of data providers for different domains
|
|
287
|
+
- Lightweight and efficient implementation
|
|
288
|
+
|
|
289
|
+
### Faker
|
|
290
|
+
Faker is a popular Python library for generating fake data. It offers:
|
|
291
|
+
- Extensive provider ecosystem with community contributions
|
|
292
|
+
- Rich set of localized providers
|
|
293
|
+
- Well-established and widely used in the Python community
|
|
294
|
+
- Comprehensive documentation and examples
|
|
295
|
+
|
|
296
|
+
You can specify the provider using the `-p` or `--provider` option:
|
|
297
|
+
```bash
|
|
298
|
+
# Use Mimesis (default)
|
|
299
|
+
testdatax -o data.csv -f csv -p mimesis
|
|
300
|
+
|
|
301
|
+
# Use Faker
|
|
302
|
+
testdatax -o data.csv -f csv -p faker
|
|
303
|
+
```
|
|
304
|
+
|
|
305
|
+
Both providers support the same schema format and generate compatible data types.
|
|
306
|
+
|
|
307
|
+
**Note:** For backward compatibility, the legacy `faker` field name is still supported, but `provider_field` is recommended for new schemas.
|
|
308
|
+
|
|
266
309
|
## Supported Data Types
|
|
267
310
|
|
|
268
311
|
- string
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "TestDataX"
|
|
3
|
-
version = "0.1.
|
|
3
|
+
version = "0.1.3"
|
|
4
4
|
description = "A flexible test data generation toolkit"
|
|
5
5
|
authors = ["JamesPBrett"]
|
|
6
6
|
license = "MIT"
|
|
@@ -16,6 +16,7 @@ orjson = "^3.10.12"
|
|
|
16
16
|
pyarrow = "^18.1.0"
|
|
17
17
|
pandas = "^2.2.3"
|
|
18
18
|
mysql-connector-python = "^9.1.0"
|
|
19
|
+
mimesis = "^18.0.0"
|
|
19
20
|
|
|
20
21
|
[tool.poetry.group.dev.dependencies]
|
|
21
22
|
pytest = "^8.3.4"
|
|
@@ -119,6 +120,14 @@ warn_unreachable = true
|
|
|
119
120
|
strict_optional = true
|
|
120
121
|
plugins = ["pydantic.mypy"]
|
|
121
122
|
|
|
123
|
+
[[tool.mypy.overrides]]
|
|
124
|
+
module = "mimesis.*"
|
|
125
|
+
ignore_missing_imports = true
|
|
126
|
+
|
|
127
|
+
[[tool.mypy.overrides]]
|
|
128
|
+
module = "src.providers.mimesis_provider"
|
|
129
|
+
warn_return_any = false
|
|
130
|
+
|
|
122
131
|
[tool.coverage.run]
|
|
123
132
|
source = ["src"]
|
|
124
133
|
branch = true
|
|
@@ -9,6 +9,7 @@ from .exporters.base_exporter import BaseExporter
|
|
|
9
9
|
from .exporters.utils.constants import DEFAULT_SCHEMA, EXPORT_FORMATS
|
|
10
10
|
from .exporters.utils.exporter_config import EXPORTER_CLASSES
|
|
11
11
|
from .generator import DataGenerator
|
|
12
|
+
from .providers import FakerProvider, MimesisProvider
|
|
12
13
|
from .schemas import DataType, FieldSchema, GeneratorConfig
|
|
13
14
|
|
|
14
15
|
|
|
@@ -42,6 +43,9 @@ FORMAT_OPTION = typer.Option(
|
|
|
42
43
|
ROWS_OPTION = typer.Option(10, "--rows", "-r", help="Number of rows to generate")
|
|
43
44
|
SCHEMA_PATH_OPTION = typer.Option(None, "--schema", "-s", help="Path to schema file")
|
|
44
45
|
DEBUG_OPTION = typer.Option(False, "--debug", "-d", help="Enable debug output")
|
|
46
|
+
PROVIDER_OPTION = typer.Option(
|
|
47
|
+
"mimesis", "--provider", "-p", help="Data provider (faker or mimesis)"
|
|
48
|
+
)
|
|
45
49
|
|
|
46
50
|
|
|
47
51
|
@app.command()
|
|
@@ -51,6 +55,7 @@ def generate(
|
|
|
51
55
|
rows: int = ROWS_OPTION,
|
|
52
56
|
schema_path: Path | None = SCHEMA_PATH_OPTION,
|
|
53
57
|
debug: bool = DEBUG_OPTION,
|
|
58
|
+
provider: str = PROVIDER_OPTION,
|
|
54
59
|
) -> None:
|
|
55
60
|
"""Generate synthetic data based on the provided schema."""
|
|
56
61
|
try:
|
|
@@ -104,7 +109,8 @@ def generate(
|
|
|
104
109
|
min_value=min_value,
|
|
105
110
|
max_value=max_value,
|
|
106
111
|
right_digits=field_def.get("right_digits"),
|
|
107
|
-
value_provider=field_def.get("
|
|
112
|
+
value_provider=field_def.get("provider_field")
|
|
113
|
+
or field_def.get("faker"),
|
|
108
114
|
pattern=field_def.get("pattern"),
|
|
109
115
|
)
|
|
110
116
|
fields.append(field_schema.model_dump())
|
|
@@ -117,6 +123,10 @@ def generate(
|
|
|
117
123
|
if format not in EXPORT_FORMATS:
|
|
118
124
|
raise ValueError(f"Unsupported format: {format}")
|
|
119
125
|
|
|
126
|
+
# Validate provider
|
|
127
|
+
if provider.lower() not in ["faker", "mimesis"]:
|
|
128
|
+
raise ValueError(f"Unsupported provider: {provider}")
|
|
129
|
+
|
|
120
130
|
# Create generator config
|
|
121
131
|
if debug:
|
|
122
132
|
typer.echo(f"Converted fields: {fields}", err=False)
|
|
@@ -127,7 +137,12 @@ def generate(
|
|
|
127
137
|
# Generate data
|
|
128
138
|
if debug:
|
|
129
139
|
typer.echo(f"Generator config: {config}", err=False)
|
|
130
|
-
|
|
140
|
+
|
|
141
|
+
# Select provider
|
|
142
|
+
data_provider = (
|
|
143
|
+
MimesisProvider() if provider.lower() == "mimesis" else FakerProvider()
|
|
144
|
+
)
|
|
145
|
+
generator = DataGenerator(provider=data_provider)
|
|
131
146
|
data = generator.generate_data(config.fields, config.row_count)
|
|
132
147
|
|
|
133
148
|
# Export data
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
from datetime import date, datetime
|
|
2
|
+
from decimal import Decimal
|
|
3
|
+
from uuid import UUID
|
|
4
|
+
|
|
5
|
+
from mimesis import Address, Cryptographic, Datetime, Locale, Numeric, Person, Text
|
|
6
|
+
from mimesis.random import Random
|
|
7
|
+
|
|
8
|
+
from .base import DataProvider
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class MimesisProvider(DataProvider):
|
|
12
|
+
"""MimesisProvider generates fake data using the Mimesis library."""
|
|
13
|
+
|
|
14
|
+
def __init__(self, locale: Locale = Locale.EN) -> None:
|
|
15
|
+
"""Initialize the MimesisProvider with a specific locale."""
|
|
16
|
+
self.locale = locale
|
|
17
|
+
self.person = Person(locale=locale)
|
|
18
|
+
self.address = Address(locale=locale)
|
|
19
|
+
self.text = Text(locale=locale)
|
|
20
|
+
self.numeric = Numeric()
|
|
21
|
+
self.datetime = Datetime(locale=locale)
|
|
22
|
+
self.crypto = Cryptographic()
|
|
23
|
+
self.random = Random()
|
|
24
|
+
|
|
25
|
+
def generate_string(self, **kwargs: str) -> str:
|
|
26
|
+
"""Generate a fake string using the specified provider."""
|
|
27
|
+
provider = kwargs.get("value_provider") or "name"
|
|
28
|
+
|
|
29
|
+
# Map common Faker providers to Mimesis equivalents
|
|
30
|
+
provider_map = {
|
|
31
|
+
"name": lambda: self.person.full_name(),
|
|
32
|
+
"first_name": lambda: self.person.first_name(),
|
|
33
|
+
"last_name": lambda: self.person.last_name(),
|
|
34
|
+
"email": lambda: self.person.email(),
|
|
35
|
+
"username": lambda: self.person.username(),
|
|
36
|
+
"phone_number": lambda: self.person.telephone(),
|
|
37
|
+
"address": lambda: self.address.address(),
|
|
38
|
+
"company": lambda: self.person.occupation(),
|
|
39
|
+
"word": lambda: self.text.word(),
|
|
40
|
+
"sentence": lambda: self.text.sentence(),
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
generator = provider_map.get(provider, lambda: self.person.full_name())
|
|
44
|
+
return str(generator())
|
|
45
|
+
|
|
46
|
+
def generate_text(self, **kwargs: str) -> str:
|
|
47
|
+
"""Generate a fake text string."""
|
|
48
|
+
return str(self.text.text(quantity=1))
|
|
49
|
+
|
|
50
|
+
def generate_integer(
|
|
51
|
+
self, min_value: int | None = None, max_value: int | None = None
|
|
52
|
+
) -> int:
|
|
53
|
+
"""Generate a fake integer within the specified range."""
|
|
54
|
+
min_val = min_value if min_value is not None else 0
|
|
55
|
+
max_val = max_value if max_value is not None else 100
|
|
56
|
+
return int(self.numeric.integer_number(start=min_val, end=max_val))
|
|
57
|
+
|
|
58
|
+
def generate_decimal(self, **kwargs: Decimal) -> Decimal:
|
|
59
|
+
"""Generate a fake decimal number."""
|
|
60
|
+
float_val = self.numeric.float_number(start=0.0, end=999999.99)
|
|
61
|
+
return Decimal(f"{float_val:.2f}")
|
|
62
|
+
|
|
63
|
+
def generate_boolean(self) -> bool:
|
|
64
|
+
"""Generate a fake boolean value."""
|
|
65
|
+
return bool(self.random.choice([True, False]))
|
|
66
|
+
|
|
67
|
+
def generate_date(self) -> date:
|
|
68
|
+
"""Generate a fake date object."""
|
|
69
|
+
return self.datetime.date()
|
|
70
|
+
|
|
71
|
+
def generate_datetime(self) -> datetime:
|
|
72
|
+
"""Generate a fake datetime object."""
|
|
73
|
+
return self.datetime.datetime()
|
|
74
|
+
|
|
75
|
+
def generate_binary(self, length: int = 64) -> bytes:
|
|
76
|
+
"""Generate a fake binary string of the specified length."""
|
|
77
|
+
return bytes(self.random.randbytes(length))
|
|
78
|
+
|
|
79
|
+
def generate_uuid(self) -> UUID:
|
|
80
|
+
"""Generate a fake UUID."""
|
|
81
|
+
return UUID(self.crypto.uuid())
|
|
82
|
+
|
|
83
|
+
def generate_enum(self, values: list[str]) -> str:
|
|
84
|
+
"""Generate a fake value from the given list of values."""
|
|
85
|
+
if not values:
|
|
86
|
+
raise ValueError("Enum values cannot be empty")
|
|
87
|
+
return str(self.random.choice(values))
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|