TestDataX 0.1.0__tar.gz → 0.1.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. {testdatax-0.1.0 → testdatax-0.1.3}/PKG-INFO +66 -45
  2. {testdatax-0.1.0 → testdatax-0.1.3}/README.md +64 -44
  3. {testdatax-0.1.0 → testdatax-0.1.3}/pyproject.toml +23 -1
  4. {testdatax-0.1.0 → testdatax-0.1.3}/src/__init__.py +1 -1
  5. {testdatax-0.1.0 → testdatax-0.1.3}/src/cli.py +17 -2
  6. testdatax-0.1.3/src/providers/__init__.py +5 -0
  7. testdatax-0.1.3/src/providers/mimesis_provider.py +87 -0
  8. testdatax-0.1.0/src/providers/__init__.py +0 -4
  9. {testdatax-0.1.0 → testdatax-0.1.3}/LICENSE +0 -0
  10. {testdatax-0.1.0 → testdatax-0.1.3}/src/exporters/__init__.py +0 -0
  11. {testdatax-0.1.0 → testdatax-0.1.3}/src/exporters/base_exporter.py +0 -0
  12. {testdatax-0.1.0 → testdatax-0.1.3}/src/exporters/csv_exporter.py +0 -0
  13. {testdatax-0.1.0 → testdatax-0.1.3}/src/exporters/json_exporter.py +0 -0
  14. {testdatax-0.1.0 → testdatax-0.1.3}/src/exporters/mssql_exporter.py +0 -0
  15. {testdatax-0.1.0 → testdatax-0.1.3}/src/exporters/mysql_exporter.py +0 -0
  16. {testdatax-0.1.0 → testdatax-0.1.3}/src/exporters/oracle_exporter.py +0 -0
  17. {testdatax-0.1.0 → testdatax-0.1.3}/src/exporters/orc_exporter.py +0 -0
  18. {testdatax-0.1.0 → testdatax-0.1.3}/src/exporters/parquet_exporter.py +0 -0
  19. {testdatax-0.1.0 → testdatax-0.1.3}/src/exporters/utils/__init__.py +0 -0
  20. {testdatax-0.1.0 → testdatax-0.1.3}/src/exporters/utils/chunker.py +0 -0
  21. {testdatax-0.1.0 → testdatax-0.1.3}/src/exporters/utils/constants.py +0 -0
  22. {testdatax-0.1.0 → testdatax-0.1.3}/src/exporters/utils/exporter_config.py +0 -0
  23. {testdatax-0.1.0 → testdatax-0.1.3}/src/exporters/utils/formatters.py +0 -0
  24. {testdatax-0.1.0 → testdatax-0.1.3}/src/generator.py +0 -0
  25. {testdatax-0.1.0 → testdatax-0.1.3}/src/providers/base.py +0 -0
  26. {testdatax-0.1.0 → testdatax-0.1.3}/src/providers/faker_provider.py +0 -0
  27. {testdatax-0.1.0 → testdatax-0.1.3}/src/schemas.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: TestDataX
3
- Version: 0.1.0
3
+ Version: 0.1.3
4
4
  Summary: A flexible test data generation toolkit
5
5
  License: MIT
6
6
  Author: JamesPBrett
@@ -11,6 +11,7 @@ Classifier: Programming Language :: Python :: 3.11
11
11
  Classifier: Programming Language :: Python :: 3.12
12
12
  Classifier: Programming Language :: Python :: 3.13
13
13
  Requires-Dist: faker (>=33.1.0,<34.0.0)
14
+ Requires-Dist: mimesis (>=18.0.0,<19.0.0)
14
15
  Requires-Dist: mysql-connector-python (>=9.1.0,<10.0.0)
15
16
  Requires-Dist: orjson (>=3.10.12,<4.0.0)
16
17
  Requires-Dist: pandas (>=2.2.3,<3.0.0)
@@ -21,57 +22,35 @@ Description-Content-Type: text/markdown
21
22
 
22
23
  # TestDataX
23
24
 
25
+ # TestDataX
26
+
24
27
  ![Build Status](https://github.com/JamesPBrett/testdatax/actions/workflows/publish.yml/badge.svg)
25
28
  [![codecov](https://codecov.io/gh/JamesPBrett/testdatax/branch/main/graph/badge.svg?token=6VX62CI6U9)](https://codecov.io/gh/JamesPBrett/testdatax)
26
29
  ![Python Version](https://img.shields.io/badge/python-3.11%2B-blue)
27
30
  ![License](https://img.shields.io/badge/license-MIT-blue.svg)
28
31
 
29
- This command-line interface application enables quick and customizable test data generation across various formats. It leverages Faker for realistic data fields, offers flexible schema configurations, and simplifies output to multiple database dialects or file types. Users can define precise parameters for data volume, types, and constraints for each target data set.
32
+ This command-line interface application enables quick and customizable test data generation across various formats. It supports multiple data providers (Mimesis and Faker) for realistic data generation, offers flexible schema configurations, and simplifies output to multiple database dialects or file types. Users can define precise parameters for data volume, types, and constraints for each target data set.
30
33
 
31
34
  ## Requirements
32
35
  - Python 3.11+
33
- - Additional dependencies are handled automatically by poetry
34
-
35
- ## Installation
36
-
37
- ### Prerequisites
38
-
39
- ```bash
40
- # Install Python 3.11+ if not already installed
41
- brew install python@3.11
42
-
43
- # Install Poetry
44
- curl -sSL https://install.python-poetry.org | python3 -
45
36
 
46
- # Verify Poetry installation
47
- poetry --version
48
- ```
49
-
50
- ### Install
37
+ ## Quick Start
51
38
 
52
39
  ```bash
53
- # Clone the repository
54
- git clone https://github.com/JamesPBrett/testdatax.git
55
- cd testdatax
56
-
57
- # Install dependencies
58
- poetry install
59
- ```
40
+ # Install from PyPI
41
+ pip install testdatax
60
42
 
61
- ### Common Issues
43
+ # Generate sample data
44
+ testdatax --rows 1000 --format json --output data.json
62
45
 
63
- - If Poetry is not found in PATH:
64
- ```bash
65
- export PATH="$HOME/.local/bin:$PATH"
66
- ```
67
46
 
68
47
  ## Features
69
48
 
70
- - Generate realistic test data using Data providers
49
+ - Generate realistic test data using multiple data providers (Mimesis, Faker)
71
50
  - Support for multiple output formats (CSV, JSON, SQL, etc.)
72
51
  - Customizable schema definitions
73
52
  - Configurable data generation parameters
74
- - CLI tool for easy data generation
53
+ - CLI tool for easy test data generation
75
54
 
76
55
  ## Supported Formats
77
56
 
@@ -85,7 +64,7 @@ poetry install
85
64
 
86
65
  ## CLI Usage
87
66
  ```bash
88
- testdatax -o <output_file> -f <format> -s <schema_file> -r <num_rows> [-d]
67
+ testdatax -o <output_file> -f <format> -s <schema_file> -r <num_rows> -p <provider> [-d]
89
68
  ```
90
69
 
91
70
  Options:
@@ -93,6 +72,7 @@ Options:
93
72
  - `-f, --format`: Output format (csv, json, orc, parquet, mysql, mssql, oracle)
94
73
  - `-r, --rows`: Number of rows to generate (default: 10)
95
74
  - `-s, --schema`: Path to schema file
75
+ - `-p, --provider`: Data provider (mimesis, faker) - default: mimesis
96
76
  - `-d, --debug`: Enable debug output
97
77
 
98
78
  ## Usage Examples
@@ -102,10 +82,20 @@ Generate 10 rows of CSV data:
102
82
  testdatax -o users.csv -f csv -s schema.json -r 10
103
83
  ```
104
84
 
85
+ Generate 10 rows of CSV data using Faker provider:
86
+ ```bash
87
+ testdatax -o users.csv -f csv -s schema.json -r 10 -p faker
88
+ ```
89
+
105
90
  Generate 1000 rows of Parquet data with debug output:
106
91
  ```bash
107
92
  testdatax -o large_dataset.parquet -f parquet -s users_schema.json -r 1000 -d
108
93
  ```
94
+
95
+ Generate 1000 rows of Parquet data using Mimesis provider:
96
+ ```bash
97
+ testdatax -o large_dataset.parquet -f parquet -s users_schema.json -r 1000 -p mimesis
98
+ ```
109
99
  Generate JSON data with default row count (10):
110
100
  ```bash
111
101
  testdatax -o data.json -f json -s schema.json
@@ -136,6 +126,7 @@ Each command consists of:
136
126
  - `-f, --format`: Output format (csv, json, orc, parquet, mysql, mssql, oracle)
137
127
  - `-s, --schema`: Path to your schema definition file
138
128
  - `-r, --rows`: Number of rows to generate (optional, defaults to 10)
129
+ - `-p, --provider`: Data provider (mimesis, faker) - default: mimesis
139
130
  - `-d, --debug`: Enable debug logging (optional)
140
131
 
141
132
  ## Schema Example
@@ -144,7 +135,7 @@ Each command consists of:
144
135
  {
145
136
  "username": {
146
137
  "type": "string",
147
- "faker": "name"
138
+ "provider_field": "name"
148
139
  },
149
140
  "date_joined": {
150
141
  "type": "datetime"
@@ -191,7 +182,7 @@ The schema file defines the structure and constraints of your generated data. Ea
191
182
  "type": "string",
192
183
  "min_length": 5,
193
184
  "max_length": 20,
194
- "faker": "user_name" // Use faker to generate realistic data
185
+ "provider_field": "user_name" // Use provider-specific field to generate realistic data
195
186
  },
196
187
  "description": {
197
188
  "type": "text",
@@ -244,25 +235,25 @@ The schema file defines the structure and constraints of your generated data. Ea
244
235
  }
245
236
  ```
246
237
 
247
- #### Using Faker
248
- The generator supports Faker providers for generating realistic data:
238
+ #### Using Data Providers
239
+ Both Mimesis and Faker providers support the same schema format. You can specify provider-specific generators using the `provider_field` field (works with both providers):
249
240
  ```json
250
241
  {
251
242
  "name": {
252
243
  "type": "string",
253
- "faker": "name"
244
+ "provider_field": "name"
254
245
  },
255
246
  "email": {
256
247
  "type": "string",
257
- "faker": "email"
248
+ "provider_field": "email"
258
249
  },
259
250
  "address": {
260
251
  "type": "string",
261
- "faker": "address"
252
+ "provider_field": "address"
262
253
  },
263
254
  "company": {
264
255
  "type": "string",
265
- "faker": "company"
256
+ "provider_field": "company"
266
257
  }
267
258
  }
268
259
  ```
@@ -276,12 +267,12 @@ The generator supports Faker providers for generating realistic data:
276
267
  },
277
268
  "username": {
278
269
  "type": "string",
279
- "faker": "user_name",
270
+ "provider_field": "user_name",
280
271
  "unique": true
281
272
  },
282
273
  "email": {
283
274
  "type": "string",
284
- "faker": "email",
275
+ "provider_field": "email",
285
276
  "unique": true
286
277
  },
287
278
  "age": {
@@ -306,6 +297,37 @@ The generator supports Faker providers for generating realistic data:
306
297
  }
307
298
  ```
308
299
 
300
+ ## Data Providers
301
+
302
+ TestDataX supports two powerful data providers for generating realistic test data:
303
+
304
+ ### Mimesis (Default)
305
+ Mimesis is a high-performance Python library for generating synthetic data. It provides:
306
+ - Fast data generation with excellent performance
307
+ - Support for multiple locales and languages
308
+ - Wide variety of data providers for different domains
309
+ - Lightweight and efficient implementation
310
+
311
+ ### Faker
312
+ Faker is a popular Python library for generating fake data. It offers:
313
+ - Extensive provider ecosystem with community contributions
314
+ - Rich set of localized providers
315
+ - Well-established and widely used in the Python community
316
+ - Comprehensive documentation and examples
317
+
318
+ You can specify the provider using the `-p` or `--provider` option:
319
+ ```bash
320
+ # Use Mimesis (default)
321
+ testdatax -o data.csv -f csv -p mimesis
322
+
323
+ # Use Faker
324
+ testdatax -o data.csv -f csv -p faker
325
+ ```
326
+
327
+ Both providers support the same schema format and generate compatible data types.
328
+
329
+ **Note:** For backward compatibility, the legacy `faker` field name is still supported, but `provider_field` is recommended for new schemas.
330
+
309
331
  ## Supported Data Types
310
332
 
311
333
  - string
@@ -341,5 +363,4 @@ The generator supports Faker providers for generating realistic data:
341
363
  ## License
342
364
 
343
365
  This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
344
- # Test change
345
366
 
@@ -1,56 +1,34 @@
1
1
  # TestDataX
2
2
 
3
+ # TestDataX
4
+
3
5
  ![Build Status](https://github.com/JamesPBrett/testdatax/actions/workflows/publish.yml/badge.svg)
4
6
  [![codecov](https://codecov.io/gh/JamesPBrett/testdatax/branch/main/graph/badge.svg?token=6VX62CI6U9)](https://codecov.io/gh/JamesPBrett/testdatax)
5
7
  ![Python Version](https://img.shields.io/badge/python-3.11%2B-blue)
6
8
  ![License](https://img.shields.io/badge/license-MIT-blue.svg)
7
9
 
8
- This command-line interface application enables quick and customizable test data generation across various formats. It leverages Faker for realistic data fields, offers flexible schema configurations, and simplifies output to multiple database dialects or file types. Users can define precise parameters for data volume, types, and constraints for each target data set.
10
+ This command-line interface application enables quick and customizable test data generation across various formats. It supports multiple data providers (Mimesis and Faker) for realistic data generation, offers flexible schema configurations, and simplifies output to multiple database dialects or file types. Users can define precise parameters for data volume, types, and constraints for each target data set.
9
11
 
10
12
  ## Requirements
11
13
  - Python 3.11+
12
- - Additional dependencies are handled automatically by poetry
13
-
14
- ## Installation
15
-
16
- ### Prerequisites
17
-
18
- ```bash
19
- # Install Python 3.11+ if not already installed
20
- brew install python@3.11
21
-
22
- # Install Poetry
23
- curl -sSL https://install.python-poetry.org | python3 -
24
14
 
25
- # Verify Poetry installation
26
- poetry --version
27
- ```
28
-
29
- ### Install
15
+ ## Quick Start
30
16
 
31
17
  ```bash
32
- # Clone the repository
33
- git clone https://github.com/JamesPBrett/testdatax.git
34
- cd testdatax
35
-
36
- # Install dependencies
37
- poetry install
38
- ```
18
+ # Install from PyPI
19
+ pip install testdatax
39
20
 
40
- ### Common Issues
21
+ # Generate sample data
22
+ testdatax --rows 1000 --format json --output data.json
41
23
 
42
- - If Poetry is not found in PATH:
43
- ```bash
44
- export PATH="$HOME/.local/bin:$PATH"
45
- ```
46
24
 
47
25
  ## Features
48
26
 
49
- - Generate realistic test data using Data providers
27
+ - Generate realistic test data using multiple data providers (Mimesis, Faker)
50
28
  - Support for multiple output formats (CSV, JSON, SQL, etc.)
51
29
  - Customizable schema definitions
52
30
  - Configurable data generation parameters
53
- - CLI tool for easy data generation
31
+ - CLI tool for easy test data generation
54
32
 
55
33
  ## Supported Formats
56
34
 
@@ -64,7 +42,7 @@ poetry install
64
42
 
65
43
  ## CLI Usage
66
44
  ```bash
67
- testdatax -o <output_file> -f <format> -s <schema_file> -r <num_rows> [-d]
45
+ testdatax -o <output_file> -f <format> -s <schema_file> -r <num_rows> -p <provider> [-d]
68
46
  ```
69
47
 
70
48
  Options:
@@ -72,6 +50,7 @@ Options:
72
50
  - `-f, --format`: Output format (csv, json, orc, parquet, mysql, mssql, oracle)
73
51
  - `-r, --rows`: Number of rows to generate (default: 10)
74
52
  - `-s, --schema`: Path to schema file
53
+ - `-p, --provider`: Data provider (mimesis, faker) - default: mimesis
75
54
  - `-d, --debug`: Enable debug output
76
55
 
77
56
  ## Usage Examples
@@ -81,10 +60,20 @@ Generate 10 rows of CSV data:
81
60
  testdatax -o users.csv -f csv -s schema.json -r 10
82
61
  ```
83
62
 
63
+ Generate 10 rows of CSV data using Faker provider:
64
+ ```bash
65
+ testdatax -o users.csv -f csv -s schema.json -r 10 -p faker
66
+ ```
67
+
84
68
  Generate 1000 rows of Parquet data with debug output:
85
69
  ```bash
86
70
  testdatax -o large_dataset.parquet -f parquet -s users_schema.json -r 1000 -d
87
71
  ```
72
+
73
+ Generate 1000 rows of Parquet data using Mimesis provider:
74
+ ```bash
75
+ testdatax -o large_dataset.parquet -f parquet -s users_schema.json -r 1000 -p mimesis
76
+ ```
88
77
  Generate JSON data with default row count (10):
89
78
  ```bash
90
79
  testdatax -o data.json -f json -s schema.json
@@ -115,6 +104,7 @@ Each command consists of:
115
104
  - `-f, --format`: Output format (csv, json, orc, parquet, mysql, mssql, oracle)
116
105
  - `-s, --schema`: Path to your schema definition file
117
106
  - `-r, --rows`: Number of rows to generate (optional, defaults to 10)
107
+ - `-p, --provider`: Data provider (mimesis, faker) - default: mimesis
118
108
  - `-d, --debug`: Enable debug logging (optional)
119
109
 
120
110
  ## Schema Example
@@ -123,7 +113,7 @@ Each command consists of:
123
113
  {
124
114
  "username": {
125
115
  "type": "string",
126
- "faker": "name"
116
+ "provider_field": "name"
127
117
  },
128
118
  "date_joined": {
129
119
  "type": "datetime"
@@ -170,7 +160,7 @@ The schema file defines the structure and constraints of your generated data. Ea
170
160
  "type": "string",
171
161
  "min_length": 5,
172
162
  "max_length": 20,
173
- "faker": "user_name" // Use faker to generate realistic data
163
+ "provider_field": "user_name" // Use provider-specific field to generate realistic data
174
164
  },
175
165
  "description": {
176
166
  "type": "text",
@@ -223,25 +213,25 @@ The schema file defines the structure and constraints of your generated data. Ea
223
213
  }
224
214
  ```
225
215
 
226
- #### Using Faker
227
- The generator supports Faker providers for generating realistic data:
216
+ #### Using Data Providers
217
+ Both Mimesis and Faker providers support the same schema format. You can specify provider-specific generators using the `provider_field` field (works with both providers):
228
218
  ```json
229
219
  {
230
220
  "name": {
231
221
  "type": "string",
232
- "faker": "name"
222
+ "provider_field": "name"
233
223
  },
234
224
  "email": {
235
225
  "type": "string",
236
- "faker": "email"
226
+ "provider_field": "email"
237
227
  },
238
228
  "address": {
239
229
  "type": "string",
240
- "faker": "address"
230
+ "provider_field": "address"
241
231
  },
242
232
  "company": {
243
233
  "type": "string",
244
- "faker": "company"
234
+ "provider_field": "company"
245
235
  }
246
236
  }
247
237
  ```
@@ -255,12 +245,12 @@ The generator supports Faker providers for generating realistic data:
255
245
  },
256
246
  "username": {
257
247
  "type": "string",
258
- "faker": "user_name",
248
+ "provider_field": "user_name",
259
249
  "unique": true
260
250
  },
261
251
  "email": {
262
252
  "type": "string",
263
- "faker": "email",
253
+ "provider_field": "email",
264
254
  "unique": true
265
255
  },
266
256
  "age": {
@@ -285,6 +275,37 @@ The generator supports Faker providers for generating realistic data:
285
275
  }
286
276
  ```
287
277
 
278
+ ## Data Providers
279
+
280
+ TestDataX supports two powerful data providers for generating realistic test data:
281
+
282
+ ### Mimesis (Default)
283
+ Mimesis is a high-performance Python library for generating synthetic data. It provides:
284
+ - Fast data generation with excellent performance
285
+ - Support for multiple locales and languages
286
+ - Wide variety of data providers for different domains
287
+ - Lightweight and efficient implementation
288
+
289
+ ### Faker
290
+ Faker is a popular Python library for generating fake data. It offers:
291
+ - Extensive provider ecosystem with community contributions
292
+ - Rich set of localized providers
293
+ - Well-established and widely used in the Python community
294
+ - Comprehensive documentation and examples
295
+
296
+ You can specify the provider using the `-p` or `--provider` option:
297
+ ```bash
298
+ # Use Mimesis (default)
299
+ testdatax -o data.csv -f csv -p mimesis
300
+
301
+ # Use Faker
302
+ testdatax -o data.csv -f csv -p faker
303
+ ```
304
+
305
+ Both providers support the same schema format and generate compatible data types.
306
+
307
+ **Note:** For backward compatibility, the legacy `faker` field name is still supported, but `provider_field` is recommended for new schemas.
308
+
288
309
  ## Supported Data Types
289
310
 
290
311
  - string
@@ -320,4 +341,3 @@ The generator supports Faker providers for generating realistic data:
320
341
  ## License
321
342
 
322
343
  This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
323
- # Test change
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "TestDataX"
3
- version = "0.1.0"
3
+ version = "0.1.3"
4
4
  description = "A flexible test data generation toolkit"
5
5
  authors = ["JamesPBrett"]
6
6
  license = "MIT"
@@ -16,6 +16,7 @@ orjson = "^3.10.12"
16
16
  pyarrow = "^18.1.0"
17
17
  pandas = "^2.2.3"
18
18
  mysql-connector-python = "^9.1.0"
19
+ mimesis = "^18.0.0"
19
20
 
20
21
  [tool.poetry.group.dev.dependencies]
21
22
  pytest = "^8.3.4"
@@ -119,6 +120,14 @@ warn_unreachable = true
119
120
  strict_optional = true
120
121
  plugins = ["pydantic.mypy"]
121
122
 
123
+ [[tool.mypy.overrides]]
124
+ module = "mimesis.*"
125
+ ignore_missing_imports = true
126
+
127
+ [[tool.mypy.overrides]]
128
+ module = "src.providers.mimesis_provider"
129
+ warn_return_any = false
130
+
122
131
  [tool.coverage.run]
123
132
  source = ["src"]
124
133
  branch = true
@@ -177,7 +186,20 @@ upload_to_repository = true
177
186
  repository = "pypi"
178
187
  tag_commit = true
179
188
  tag_format = "v{version}"
189
+ major_on_zero = false # Prevents 0.x.x to 1.0.0 on breaking changes
190
+ allow_zero_version = true
180
191
  commit_message = "chore(release): v{version}"
192
+ commit_types = [
193
+ { type = "feat", section = "Features", bump = "minor" },
194
+ { type = "fix", section = "Bug Fixes", bump = "patch" },
195
+ { type = "breaking", section = "Breaking Changes", bump = "major" },
196
+ { type = "perf", section = "Performance", bump = "patch" },
197
+ { type = "refactor", section = "Refactoring", bump = "patch" },
198
+ { type = "docs", section = "Documentation", bump = false },
199
+ { type = "chore", section = "Maintenance", bump = false },
200
+ { type = "ci", section = "CI/CD", bump = false },
201
+ { type = "build", section = "Build", bump = false }
202
+ ]
181
203
 
182
204
  [tool.semantic_release.changelog]
183
205
  template_dir = "templates"
@@ -1,6 +1,6 @@
1
1
  """TestDataX package initialization."""
2
2
 
3
- __version__ = "0.1.0"
3
+ __version__ = "0.1.3"
4
4
 
5
5
  from src.cli import app # noqa
6
6
 
@@ -9,6 +9,7 @@ from .exporters.base_exporter import BaseExporter
9
9
  from .exporters.utils.constants import DEFAULT_SCHEMA, EXPORT_FORMATS
10
10
  from .exporters.utils.exporter_config import EXPORTER_CLASSES
11
11
  from .generator import DataGenerator
12
+ from .providers import FakerProvider, MimesisProvider
12
13
  from .schemas import DataType, FieldSchema, GeneratorConfig
13
14
 
14
15
 
@@ -42,6 +43,9 @@ FORMAT_OPTION = typer.Option(
42
43
  ROWS_OPTION = typer.Option(10, "--rows", "-r", help="Number of rows to generate")
43
44
  SCHEMA_PATH_OPTION = typer.Option(None, "--schema", "-s", help="Path to schema file")
44
45
  DEBUG_OPTION = typer.Option(False, "--debug", "-d", help="Enable debug output")
46
+ PROVIDER_OPTION = typer.Option(
47
+ "mimesis", "--provider", "-p", help="Data provider (faker or mimesis)"
48
+ )
45
49
 
46
50
 
47
51
  @app.command()
@@ -51,6 +55,7 @@ def generate(
51
55
  rows: int = ROWS_OPTION,
52
56
  schema_path: Path | None = SCHEMA_PATH_OPTION,
53
57
  debug: bool = DEBUG_OPTION,
58
+ provider: str = PROVIDER_OPTION,
54
59
  ) -> None:
55
60
  """Generate synthetic data based on the provided schema."""
56
61
  try:
@@ -104,7 +109,8 @@ def generate(
104
109
  min_value=min_value,
105
110
  max_value=max_value,
106
111
  right_digits=field_def.get("right_digits"),
107
- value_provider=field_def.get("faker"),
112
+ value_provider=field_def.get("provider_field")
113
+ or field_def.get("faker"),
108
114
  pattern=field_def.get("pattern"),
109
115
  )
110
116
  fields.append(field_schema.model_dump())
@@ -117,6 +123,10 @@ def generate(
117
123
  if format not in EXPORT_FORMATS:
118
124
  raise ValueError(f"Unsupported format: {format}")
119
125
 
126
+ # Validate provider
127
+ if provider.lower() not in ["faker", "mimesis"]:
128
+ raise ValueError(f"Unsupported provider: {provider}")
129
+
120
130
  # Create generator config
121
131
  if debug:
122
132
  typer.echo(f"Converted fields: {fields}", err=False)
@@ -127,7 +137,12 @@ def generate(
127
137
  # Generate data
128
138
  if debug:
129
139
  typer.echo(f"Generator config: {config}", err=False)
130
- generator = DataGenerator()
140
+
141
+ # Select provider
142
+ data_provider = (
143
+ MimesisProvider() if provider.lower() == "mimesis" else FakerProvider()
144
+ )
145
+ generator = DataGenerator(provider=data_provider)
131
146
  data = generator.generate_data(config.fields, config.row_count)
132
147
 
133
148
  # Export data
@@ -0,0 +1,5 @@
1
+ from .base import DataProvider
2
+ from .faker_provider import FakerProvider
3
+ from .mimesis_provider import MimesisProvider
4
+
5
+ __all__ = ["DataProvider", "FakerProvider", "MimesisProvider"]
@@ -0,0 +1,87 @@
1
+ from datetime import date, datetime
2
+ from decimal import Decimal
3
+ from uuid import UUID
4
+
5
+ from mimesis import Address, Cryptographic, Datetime, Locale, Numeric, Person, Text
6
+ from mimesis.random import Random
7
+
8
+ from .base import DataProvider
9
+
10
+
11
+ class MimesisProvider(DataProvider):
12
+ """MimesisProvider generates fake data using the Mimesis library."""
13
+
14
+ def __init__(self, locale: Locale = Locale.EN) -> None:
15
+ """Initialize the MimesisProvider with a specific locale."""
16
+ self.locale = locale
17
+ self.person = Person(locale=locale)
18
+ self.address = Address(locale=locale)
19
+ self.text = Text(locale=locale)
20
+ self.numeric = Numeric()
21
+ self.datetime = Datetime(locale=locale)
22
+ self.crypto = Cryptographic()
23
+ self.random = Random()
24
+
25
+ def generate_string(self, **kwargs: str) -> str:
26
+ """Generate a fake string using the specified provider."""
27
+ provider = kwargs.get("value_provider") or "name"
28
+
29
+ # Map common Faker providers to Mimesis equivalents
30
+ provider_map = {
31
+ "name": lambda: self.person.full_name(),
32
+ "first_name": lambda: self.person.first_name(),
33
+ "last_name": lambda: self.person.last_name(),
34
+ "email": lambda: self.person.email(),
35
+ "username": lambda: self.person.username(),
36
+ "phone_number": lambda: self.person.telephone(),
37
+ "address": lambda: self.address.address(),
38
+ "company": lambda: self.person.occupation(),
39
+ "word": lambda: self.text.word(),
40
+ "sentence": lambda: self.text.sentence(),
41
+ }
42
+
43
+ generator = provider_map.get(provider, lambda: self.person.full_name())
44
+ return str(generator())
45
+
46
+ def generate_text(self, **kwargs: str) -> str:
47
+ """Generate a fake text string."""
48
+ return str(self.text.text(quantity=1))
49
+
50
+ def generate_integer(
51
+ self, min_value: int | None = None, max_value: int | None = None
52
+ ) -> int:
53
+ """Generate a fake integer within the specified range."""
54
+ min_val = min_value if min_value is not None else 0
55
+ max_val = max_value if max_value is not None else 100
56
+ return int(self.numeric.integer_number(start=min_val, end=max_val))
57
+
58
+ def generate_decimal(self, **kwargs: Decimal) -> Decimal:
59
+ """Generate a fake decimal number."""
60
+ float_val = self.numeric.float_number(start=0.0, end=999999.99)
61
+ return Decimal(f"{float_val:.2f}")
62
+
63
+ def generate_boolean(self) -> bool:
64
+ """Generate a fake boolean value."""
65
+ return bool(self.random.choice([True, False]))
66
+
67
+ def generate_date(self) -> date:
68
+ """Generate a fake date object."""
69
+ return self.datetime.date()
70
+
71
+ def generate_datetime(self) -> datetime:
72
+ """Generate a fake datetime object."""
73
+ return self.datetime.datetime()
74
+
75
+ def generate_binary(self, length: int = 64) -> bytes:
76
+ """Generate a fake binary string of the specified length."""
77
+ return bytes(self.random.randbytes(length))
78
+
79
+ def generate_uuid(self) -> UUID:
80
+ """Generate a fake UUID."""
81
+ return UUID(self.crypto.uuid())
82
+
83
+ def generate_enum(self, values: list[str]) -> str:
84
+ """Generate a fake value from the given list of values."""
85
+ if not values:
86
+ raise ValueError("Enum values cannot be empty")
87
+ return str(self.random.choice(values))
@@ -1,4 +0,0 @@
1
- from .base import DataProvider
2
- from .faker_provider import FakerProvider
3
-
4
- __all__ = ["DataProvider", "FakerProvider"]
File without changes
File without changes
File without changes