TestDataX 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- testdatax-0.1.0/LICENSE +21 -0
- testdatax-0.1.0/PKG-INFO +345 -0
- testdatax-0.1.0/README.md +323 -0
- testdatax-0.1.0/pyproject.toml +203 -0
- testdatax-0.1.0/src/__init__.py +7 -0
- testdatax-0.1.0/src/cli.py +166 -0
- testdatax-0.1.0/src/exporters/__init__.py +0 -0
- testdatax-0.1.0/src/exporters/base_exporter.py +23 -0
- testdatax-0.1.0/src/exporters/csv_exporter.py +115 -0
- testdatax-0.1.0/src/exporters/json_exporter.py +89 -0
- testdatax-0.1.0/src/exporters/mssql_exporter.py +198 -0
- testdatax-0.1.0/src/exporters/mysql_exporter.py +184 -0
- testdatax-0.1.0/src/exporters/oracle_exporter.py +205 -0
- testdatax-0.1.0/src/exporters/orc_exporter.py +100 -0
- testdatax-0.1.0/src/exporters/parquet_exporter.py +102 -0
- testdatax-0.1.0/src/exporters/utils/__init__.py +0 -0
- testdatax-0.1.0/src/exporters/utils/chunker.py +27 -0
- testdatax-0.1.0/src/exporters/utils/constants.py +55 -0
- testdatax-0.1.0/src/exporters/utils/exporter_config.py +17 -0
- testdatax-0.1.0/src/exporters/utils/formatters.py +165 -0
- testdatax-0.1.0/src/generator.py +117 -0
- testdatax-0.1.0/src/providers/__init__.py +4 -0
- testdatax-0.1.0/src/providers/base.py +58 -0
- testdatax-0.1.0/src/providers/faker_provider.py +65 -0
- testdatax-0.1.0/src/schemas.py +81 -0
testdatax-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 JamesPBrett
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
testdatax-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,345 @@
|
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
|
+
Name: TestDataX
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: A flexible test data generation toolkit
|
|
5
|
+
License: MIT
|
|
6
|
+
Author: JamesPBrett
|
|
7
|
+
Requires-Python: >=3.11,<4.0
|
|
8
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
9
|
+
Classifier: Programming Language :: Python :: 3
|
|
10
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
13
|
+
Requires-Dist: faker (>=33.1.0,<34.0.0)
|
|
14
|
+
Requires-Dist: mysql-connector-python (>=9.1.0,<10.0.0)
|
|
15
|
+
Requires-Dist: orjson (>=3.10.12,<4.0.0)
|
|
16
|
+
Requires-Dist: pandas (>=2.2.3,<3.0.0)
|
|
17
|
+
Requires-Dist: pyarrow (>=18.1.0,<19.0.0)
|
|
18
|
+
Requires-Dist: pydantic (>=2.10.4,<3.0.0)
|
|
19
|
+
Requires-Dist: typer (>=0.15.1,<0.16.0)
|
|
20
|
+
Description-Content-Type: text/markdown
|
|
21
|
+
|
|
22
|
+
# TestDataX
|
|
23
|
+
|
|
24
|
+

|
|
25
|
+
[](https://codecov.io/gh/JamesPBrett/testdatax)
|
|
26
|
+

|
|
27
|
+

|
|
28
|
+
|
|
29
|
+
This command-line interface application enables quick and customizable test data generation across various formats. It leverages Faker for realistic data fields, offers flexible schema configurations, and simplifies output to multiple database dialects or file types. Users can define precise parameters for data volume, types, and constraints for each target data set.
|
|
30
|
+
|
|
31
|
+
## Requirements
|
|
32
|
+
- Python 3.11+
|
|
33
|
+
- Additional dependencies are handled automatically by poetry
|
|
34
|
+
|
|
35
|
+
## Installation
|
|
36
|
+
|
|
37
|
+
### Prerequisites
|
|
38
|
+
|
|
39
|
+
```bash
|
|
40
|
+
# Install Python 3.11+ if not already installed
|
|
41
|
+
brew install python@3.11
|
|
42
|
+
|
|
43
|
+
# Install Poetry
|
|
44
|
+
curl -sSL https://install.python-poetry.org | python3 -
|
|
45
|
+
|
|
46
|
+
# Verify Poetry installation
|
|
47
|
+
poetry --version
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
### Install
|
|
51
|
+
|
|
52
|
+
```bash
|
|
53
|
+
# Clone the repository
|
|
54
|
+
git clone https://github.com/JamesPBrett/testdatax.git
|
|
55
|
+
cd testdatax
|
|
56
|
+
|
|
57
|
+
# Install dependencies
|
|
58
|
+
poetry install
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
### Common Issues
|
|
62
|
+
|
|
63
|
+
- If Poetry is not found in PATH:
|
|
64
|
+
```bash
|
|
65
|
+
export PATH="$HOME/.local/bin:$PATH"
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
## Features
|
|
69
|
+
|
|
70
|
+
- Generate realistic test data using Data providers
|
|
71
|
+
- Support for multiple output formats (CSV, JSON, SQL, etc.)
|
|
72
|
+
- Customizable schema definitions
|
|
73
|
+
- Configurable data generation parameters
|
|
74
|
+
- CLI tool for easy data generation
|
|
75
|
+
|
|
76
|
+
## Supported Formats
|
|
77
|
+
|
|
78
|
+
- JSON
|
|
79
|
+
- CSV
|
|
80
|
+
- ORC
|
|
81
|
+
- Parquet
|
|
82
|
+
- MySQL
|
|
83
|
+
- MSSQL
|
|
84
|
+
- Oracle
|
|
85
|
+
|
|
86
|
+
## CLI Usage
|
|
87
|
+
```bash
|
|
88
|
+
testdatax -o <output_file> -f <format> -s <schema_file> -r <num_rows> [-d]
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
Options:
|
|
92
|
+
- `-o, --output`: Output file path (table_name for sql exports)
|
|
93
|
+
- `-f, --format`: Output format (csv, json, orc, parquet, mysql, mssql, oracle)
|
|
94
|
+
- `-r, --rows`: Number of rows to generate (default: 10)
|
|
95
|
+
- `-s, --schema`: Path to schema file
|
|
96
|
+
- `-d, --debug`: Enable debug output
|
|
97
|
+
|
|
98
|
+
## Usage Examples
|
|
99
|
+
|
|
100
|
+
Generate 10 rows of CSV data:
|
|
101
|
+
```bash
|
|
102
|
+
testdatax -o users.csv -f csv -s schema.json -r 10
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
Generate 1000 rows of Parquet data with debug output:
|
|
106
|
+
```bash
|
|
107
|
+
testdatax -o large_dataset.parquet -f parquet -s users_schema.json -r 1000 -d
|
|
108
|
+
```
|
|
109
|
+
Generate JSON data with default row count (10):
|
|
110
|
+
```bash
|
|
111
|
+
testdatax -o data.json -f json -s schema.json
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
Generate ORC file with specific schema:
|
|
115
|
+
```bash
|
|
116
|
+
testdatax -o analytics.orc -f orc -s analytics_schema.json -r 100
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
Generate MySQL with default row count (1000), table_name as 'default':
|
|
120
|
+
```bash
|
|
121
|
+
testdatax -o default.sql -f mysql -r 1000
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
Generate MSSQL with default row count (1000), table_name as 'mstest':
|
|
125
|
+
```bash
|
|
126
|
+
testdatax -o mstest.sql -f mssql -r 1000
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
Generate Oracle with default row count (1000), table_name as 'oracle':
|
|
130
|
+
```bash
|
|
131
|
+
datagen -o oracle.sql -f oracle -r 1000
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
Each command consists of:
|
|
135
|
+
- `-o, --output`: Specify the output file path and name
|
|
136
|
+
- `-f, --format`: Output format (csv, json, orc, parquet, mysql, mssql, oracle)
|
|
137
|
+
- `-s, --schema`: Path to your schema definition file
|
|
138
|
+
- `-r, --rows`: Number of rows to generate (optional, defaults to 10)
|
|
139
|
+
- `-d, --debug`: Enable debug logging (optional)
|
|
140
|
+
|
|
141
|
+
## Schema Example
|
|
142
|
+
|
|
143
|
+
```json
|
|
144
|
+
{
|
|
145
|
+
"username": {
|
|
146
|
+
"type": "string",
|
|
147
|
+
"faker": "name"
|
|
148
|
+
},
|
|
149
|
+
"date_joined": {
|
|
150
|
+
"type": "datetime"
|
|
151
|
+
},
|
|
152
|
+
"date": {
|
|
153
|
+
"type": "date"
|
|
154
|
+
},
|
|
155
|
+
"age": {
|
|
156
|
+
"type": "integer",
|
|
157
|
+
"min": 18,
|
|
158
|
+
"max": 99
|
|
159
|
+
},
|
|
160
|
+
"is_active": {
|
|
161
|
+
"type": "boolean"
|
|
162
|
+
},
|
|
163
|
+
"float": {
|
|
164
|
+
"type": "float"
|
|
165
|
+
},
|
|
166
|
+
"uuid": {
|
|
167
|
+
"type": "uuid"
|
|
168
|
+
},
|
|
169
|
+
"status": {
|
|
170
|
+
"type": "enum",
|
|
171
|
+
"values": ["active", "inactive", "pending"]
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
```
|
|
175
|
+
|
|
176
|
+
## Schema Configuration
|
|
177
|
+
|
|
178
|
+
The schema file defines the structure and constraints of your generated data. Each field in the schema can have the following properties:
|
|
179
|
+
|
|
180
|
+
### Basic Field Properties
|
|
181
|
+
- `type`: (required) The data type of the field
|
|
182
|
+
- `nullable`: (optional) Boolean to allow null values (default: false)
|
|
183
|
+
- `unique`: (optional) Boolean to ensure unique values (default: false)
|
|
184
|
+
|
|
185
|
+
### Type-Specific Properties
|
|
186
|
+
|
|
187
|
+
#### String Fields
|
|
188
|
+
```json
|
|
189
|
+
{
|
|
190
|
+
"username": {
|
|
191
|
+
"type": "string",
|
|
192
|
+
"min_length": 5,
|
|
193
|
+
"max_length": 20,
|
|
194
|
+
"faker": "user_name" // Use faker to generate realistic data
|
|
195
|
+
},
|
|
196
|
+
"description": {
|
|
197
|
+
"type": "text",
|
|
198
|
+
"min_length": 100,
|
|
199
|
+
"max_length": 500
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
```
|
|
203
|
+
|
|
204
|
+
#### Numeric Fields
|
|
205
|
+
```json
|
|
206
|
+
{
|
|
207
|
+
"age": {
|
|
208
|
+
"type": "integer",
|
|
209
|
+
"min": 18,
|
|
210
|
+
"max": 99
|
|
211
|
+
},
|
|
212
|
+
"score": {
|
|
213
|
+
"type": "float",
|
|
214
|
+
"min": 0.0,
|
|
215
|
+
"max": 100.0,
|
|
216
|
+
"precision": 2
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
```
|
|
220
|
+
|
|
221
|
+
#### Date and Time Fields
|
|
222
|
+
```json
|
|
223
|
+
{
|
|
224
|
+
"created_at": {
|
|
225
|
+
"type": "datetime",
|
|
226
|
+
"start_date": "2020-01-01",
|
|
227
|
+
"end_date": "2023-12-31"
|
|
228
|
+
},
|
|
229
|
+
"birth_date": {
|
|
230
|
+
"type": "date",
|
|
231
|
+
"format": "%Y-%m-%d"
|
|
232
|
+
}
|
|
233
|
+
}
|
|
234
|
+
```
|
|
235
|
+
|
|
236
|
+
#### Enum Fields
|
|
237
|
+
```json
|
|
238
|
+
{
|
|
239
|
+
"status": {
|
|
240
|
+
"type": "enum",
|
|
241
|
+
"values": ["pending", "active", "suspended"],
|
|
242
|
+
"weights": [0.2, 0.7, 0.1] // Optional probability weights
|
|
243
|
+
}
|
|
244
|
+
}
|
|
245
|
+
```
|
|
246
|
+
|
|
247
|
+
#### Using Faker
|
|
248
|
+
The generator supports Faker providers for generating realistic data:
|
|
249
|
+
```json
|
|
250
|
+
{
|
|
251
|
+
"name": {
|
|
252
|
+
"type": "string",
|
|
253
|
+
"faker": "name"
|
|
254
|
+
},
|
|
255
|
+
"email": {
|
|
256
|
+
"type": "string",
|
|
257
|
+
"faker": "email"
|
|
258
|
+
},
|
|
259
|
+
"address": {
|
|
260
|
+
"type": "string",
|
|
261
|
+
"faker": "address"
|
|
262
|
+
},
|
|
263
|
+
"company": {
|
|
264
|
+
"type": "string",
|
|
265
|
+
"faker": "company"
|
|
266
|
+
}
|
|
267
|
+
}
|
|
268
|
+
```
|
|
269
|
+
|
|
270
|
+
### Complete Example
|
|
271
|
+
```json
|
|
272
|
+
{
|
|
273
|
+
"user_id": {
|
|
274
|
+
"type": "uuid",
|
|
275
|
+
"unique": true
|
|
276
|
+
},
|
|
277
|
+
"username": {
|
|
278
|
+
"type": "string",
|
|
279
|
+
"faker": "user_name",
|
|
280
|
+
"unique": true
|
|
281
|
+
},
|
|
282
|
+
"email": {
|
|
283
|
+
"type": "string",
|
|
284
|
+
"faker": "email",
|
|
285
|
+
"unique": true
|
|
286
|
+
},
|
|
287
|
+
"age": {
|
|
288
|
+
"type": "integer",
|
|
289
|
+
"min": 18,
|
|
290
|
+
"max": 99
|
|
291
|
+
},
|
|
292
|
+
"status": {
|
|
293
|
+
"type": "enum",
|
|
294
|
+
"values": ["active", "inactive"],
|
|
295
|
+
"weights": [0.8, 0.2]
|
|
296
|
+
},
|
|
297
|
+
"created_at": {
|
|
298
|
+
"type": "datetime",
|
|
299
|
+
"start_date": "2020-01-01",
|
|
300
|
+
"end_date": "2023-12-31"
|
|
301
|
+
},
|
|
302
|
+
"is_verified": {
|
|
303
|
+
"type": "boolean",
|
|
304
|
+
"nullable": true
|
|
305
|
+
}
|
|
306
|
+
}
|
|
307
|
+
```
|
|
308
|
+
|
|
309
|
+
## Supported Data Types
|
|
310
|
+
|
|
311
|
+
- string
|
|
312
|
+
- text
|
|
313
|
+
- integer
|
|
314
|
+
- bigint
|
|
315
|
+
- float
|
|
316
|
+
- decimal
|
|
317
|
+
- boolean
|
|
318
|
+
- date
|
|
319
|
+
- datetime
|
|
320
|
+
- blob
|
|
321
|
+
- uuid
|
|
322
|
+
- enum
|
|
323
|
+
|
|
324
|
+
## Database Type Mappings
|
|
325
|
+
|
|
326
|
+
| Generic Type | MySQL | MSSQL | Oracle |
|
|
327
|
+
|--------------|---------------|-------------------|---------------|
|
|
328
|
+
| string | VARCHAR(255) | NVARCHAR(255) | VARCHAR2(255) |
|
|
329
|
+
| text | TEXT | NVARCHAR(MAX) | CLOB |
|
|
330
|
+
| integer | INT | INT | NUMBER(10) |
|
|
331
|
+
| bigint | BIGINT | BIGINT | NUMBER(19) |
|
|
332
|
+
| float | FLOAT | FLOAT | FLOAT |
|
|
333
|
+
| decimal | DECIMAL(18,2) | DECIMAL(18,2) | NUMBER(18,2) |
|
|
334
|
+
| boolean | TINYINT(1) | BIT | NUMBER(1) |
|
|
335
|
+
| date | DATE | DATE | DATE |
|
|
336
|
+
| datetime | DATETIME | DATETIME2 | TIMESTAMP |
|
|
337
|
+
| blob | LONGBLOB | VARBINARY(MAX) | BLOB |
|
|
338
|
+
| uuid | VARCHAR(36) | UNIQUEIDENTIFIER | VARCHAR2(36) |
|
|
339
|
+
| enum | ENUM | NVARCHAR(255) | VARCHAR2(255) |
|
|
340
|
+
|
|
341
|
+
## License
|
|
342
|
+
|
|
343
|
+
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
|
|
344
|
+
# Test change
|
|
345
|
+
|
|
@@ -0,0 +1,323 @@
|
|
|
1
|
+
# TestDataX
|
|
2
|
+
|
|
3
|
+

|
|
4
|
+
[](https://codecov.io/gh/JamesPBrett/testdatax)
|
|
5
|
+

|
|
6
|
+

|
|
7
|
+
|
|
8
|
+
This command-line interface application enables quick and customizable test data generation across various formats. It leverages Faker for realistic data fields, offers flexible schema configurations, and simplifies output to multiple database dialects or file types. Users can define precise parameters for data volume, types, and constraints for each target data set.
|
|
9
|
+
|
|
10
|
+
## Requirements
|
|
11
|
+
- Python 3.11+
|
|
12
|
+
- Additional dependencies are handled automatically by poetry
|
|
13
|
+
|
|
14
|
+
## Installation
|
|
15
|
+
|
|
16
|
+
### Prerequisites
|
|
17
|
+
|
|
18
|
+
```bash
|
|
19
|
+
# Install Python 3.11+ if not already installed
|
|
20
|
+
brew install python@3.11
|
|
21
|
+
|
|
22
|
+
# Install Poetry
|
|
23
|
+
curl -sSL https://install.python-poetry.org | python3 -
|
|
24
|
+
|
|
25
|
+
# Verify Poetry installation
|
|
26
|
+
poetry --version
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
### Install
|
|
30
|
+
|
|
31
|
+
```bash
|
|
32
|
+
# Clone the repository
|
|
33
|
+
git clone https://github.com/JamesPBrett/testdatax.git
|
|
34
|
+
cd testdatax
|
|
35
|
+
|
|
36
|
+
# Install dependencies
|
|
37
|
+
poetry install
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
### Common Issues
|
|
41
|
+
|
|
42
|
+
- If Poetry is not found in PATH:
|
|
43
|
+
```bash
|
|
44
|
+
export PATH="$HOME/.local/bin:$PATH"
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
## Features
|
|
48
|
+
|
|
49
|
+
- Generate realistic test data using Data providers
|
|
50
|
+
- Support for multiple output formats (CSV, JSON, SQL, etc.)
|
|
51
|
+
- Customizable schema definitions
|
|
52
|
+
- Configurable data generation parameters
|
|
53
|
+
- CLI tool for easy data generation
|
|
54
|
+
|
|
55
|
+
## Supported Formats
|
|
56
|
+
|
|
57
|
+
- JSON
|
|
58
|
+
- CSV
|
|
59
|
+
- ORC
|
|
60
|
+
- Parquet
|
|
61
|
+
- MySQL
|
|
62
|
+
- MSSQL
|
|
63
|
+
- Oracle
|
|
64
|
+
|
|
65
|
+
## CLI Usage
|
|
66
|
+
```bash
|
|
67
|
+
testdatax -o <output_file> -f <format> -s <schema_file> -r <num_rows> [-d]
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
Options:
|
|
71
|
+
- `-o, --output`: Output file path (table_name for sql exports)
|
|
72
|
+
- `-f, --format`: Output format (csv, json, orc, parquet, mysql, mssql, oracle)
|
|
73
|
+
- `-r, --rows`: Number of rows to generate (default: 10)
|
|
74
|
+
- `-s, --schema`: Path to schema file
|
|
75
|
+
- `-d, --debug`: Enable debug output
|
|
76
|
+
|
|
77
|
+
## Usage Examples
|
|
78
|
+
|
|
79
|
+
Generate 10 rows of CSV data:
|
|
80
|
+
```bash
|
|
81
|
+
testdatax -o users.csv -f csv -s schema.json -r 10
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
Generate 1000 rows of Parquet data with debug output:
|
|
85
|
+
```bash
|
|
86
|
+
testdatax -o large_dataset.parquet -f parquet -s users_schema.json -r 1000 -d
|
|
87
|
+
```
|
|
88
|
+
Generate JSON data with default row count (10):
|
|
89
|
+
```bash
|
|
90
|
+
testdatax -o data.json -f json -s schema.json
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
Generate ORC file with specific schema:
|
|
94
|
+
```bash
|
|
95
|
+
testdatax -o analytics.orc -f orc -s analytics_schema.json -r 100
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
Generate MySQL with default row count (1000), table_name as 'default':
|
|
99
|
+
```bash
|
|
100
|
+
testdatax -o default.sql -f mysql -r 1000
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
Generate MSSQL with default row count (1000), table_name as 'mstest':
|
|
104
|
+
```bash
|
|
105
|
+
testdatax -o mstest.sql -f mssql -r 1000
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
Generate Oracle with default row count (1000), table_name as 'oracle':
|
|
109
|
+
```bash
|
|
110
|
+
datagen -o oracle.sql -f oracle -r 1000
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
Each command consists of:
|
|
114
|
+
- `-o, --output`: Specify the output file path and name
|
|
115
|
+
- `-f, --format`: Output format (csv, json, orc, parquet, mysql, mssql, oracle)
|
|
116
|
+
- `-s, --schema`: Path to your schema definition file
|
|
117
|
+
- `-r, --rows`: Number of rows to generate (optional, defaults to 10)
|
|
118
|
+
- `-d, --debug`: Enable debug logging (optional)
|
|
119
|
+
|
|
120
|
+
## Schema Example
|
|
121
|
+
|
|
122
|
+
```json
|
|
123
|
+
{
|
|
124
|
+
"username": {
|
|
125
|
+
"type": "string",
|
|
126
|
+
"faker": "name"
|
|
127
|
+
},
|
|
128
|
+
"date_joined": {
|
|
129
|
+
"type": "datetime"
|
|
130
|
+
},
|
|
131
|
+
"date": {
|
|
132
|
+
"type": "date"
|
|
133
|
+
},
|
|
134
|
+
"age": {
|
|
135
|
+
"type": "integer",
|
|
136
|
+
"min": 18,
|
|
137
|
+
"max": 99
|
|
138
|
+
},
|
|
139
|
+
"is_active": {
|
|
140
|
+
"type": "boolean"
|
|
141
|
+
},
|
|
142
|
+
"float": {
|
|
143
|
+
"type": "float"
|
|
144
|
+
},
|
|
145
|
+
"uuid": {
|
|
146
|
+
"type": "uuid"
|
|
147
|
+
},
|
|
148
|
+
"status": {
|
|
149
|
+
"type": "enum",
|
|
150
|
+
"values": ["active", "inactive", "pending"]
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
## Schema Configuration
|
|
156
|
+
|
|
157
|
+
The schema file defines the structure and constraints of your generated data. Each field in the schema can have the following properties:
|
|
158
|
+
|
|
159
|
+
### Basic Field Properties
|
|
160
|
+
- `type`: (required) The data type of the field
|
|
161
|
+
- `nullable`: (optional) Boolean to allow null values (default: false)
|
|
162
|
+
- `unique`: (optional) Boolean to ensure unique values (default: false)
|
|
163
|
+
|
|
164
|
+
### Type-Specific Properties
|
|
165
|
+
|
|
166
|
+
#### String Fields
|
|
167
|
+
```json
|
|
168
|
+
{
|
|
169
|
+
"username": {
|
|
170
|
+
"type": "string",
|
|
171
|
+
"min_length": 5,
|
|
172
|
+
"max_length": 20,
|
|
173
|
+
"faker": "user_name" // Use faker to generate realistic data
|
|
174
|
+
},
|
|
175
|
+
"description": {
|
|
176
|
+
"type": "text",
|
|
177
|
+
"min_length": 100,
|
|
178
|
+
"max_length": 500
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
```
|
|
182
|
+
|
|
183
|
+
#### Numeric Fields
|
|
184
|
+
```json
|
|
185
|
+
{
|
|
186
|
+
"age": {
|
|
187
|
+
"type": "integer",
|
|
188
|
+
"min": 18,
|
|
189
|
+
"max": 99
|
|
190
|
+
},
|
|
191
|
+
"score": {
|
|
192
|
+
"type": "float",
|
|
193
|
+
"min": 0.0,
|
|
194
|
+
"max": 100.0,
|
|
195
|
+
"precision": 2
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
```
|
|
199
|
+
|
|
200
|
+
#### Date and Time Fields
|
|
201
|
+
```json
|
|
202
|
+
{
|
|
203
|
+
"created_at": {
|
|
204
|
+
"type": "datetime",
|
|
205
|
+
"start_date": "2020-01-01",
|
|
206
|
+
"end_date": "2023-12-31"
|
|
207
|
+
},
|
|
208
|
+
"birth_date": {
|
|
209
|
+
"type": "date",
|
|
210
|
+
"format": "%Y-%m-%d"
|
|
211
|
+
}
|
|
212
|
+
}
|
|
213
|
+
```
|
|
214
|
+
|
|
215
|
+
#### Enum Fields
|
|
216
|
+
```json
|
|
217
|
+
{
|
|
218
|
+
"status": {
|
|
219
|
+
"type": "enum",
|
|
220
|
+
"values": ["pending", "active", "suspended"],
|
|
221
|
+
"weights": [0.2, 0.7, 0.1] // Optional probability weights
|
|
222
|
+
}
|
|
223
|
+
}
|
|
224
|
+
```
|
|
225
|
+
|
|
226
|
+
#### Using Faker
|
|
227
|
+
The generator supports Faker providers for generating realistic data:
|
|
228
|
+
```json
|
|
229
|
+
{
|
|
230
|
+
"name": {
|
|
231
|
+
"type": "string",
|
|
232
|
+
"faker": "name"
|
|
233
|
+
},
|
|
234
|
+
"email": {
|
|
235
|
+
"type": "string",
|
|
236
|
+
"faker": "email"
|
|
237
|
+
},
|
|
238
|
+
"address": {
|
|
239
|
+
"type": "string",
|
|
240
|
+
"faker": "address"
|
|
241
|
+
},
|
|
242
|
+
"company": {
|
|
243
|
+
"type": "string",
|
|
244
|
+
"faker": "company"
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
```
|
|
248
|
+
|
|
249
|
+
### Complete Example
|
|
250
|
+
```json
|
|
251
|
+
{
|
|
252
|
+
"user_id": {
|
|
253
|
+
"type": "uuid",
|
|
254
|
+
"unique": true
|
|
255
|
+
},
|
|
256
|
+
"username": {
|
|
257
|
+
"type": "string",
|
|
258
|
+
"faker": "user_name",
|
|
259
|
+
"unique": true
|
|
260
|
+
},
|
|
261
|
+
"email": {
|
|
262
|
+
"type": "string",
|
|
263
|
+
"faker": "email",
|
|
264
|
+
"unique": true
|
|
265
|
+
},
|
|
266
|
+
"age": {
|
|
267
|
+
"type": "integer",
|
|
268
|
+
"min": 18,
|
|
269
|
+
"max": 99
|
|
270
|
+
},
|
|
271
|
+
"status": {
|
|
272
|
+
"type": "enum",
|
|
273
|
+
"values": ["active", "inactive"],
|
|
274
|
+
"weights": [0.8, 0.2]
|
|
275
|
+
},
|
|
276
|
+
"created_at": {
|
|
277
|
+
"type": "datetime",
|
|
278
|
+
"start_date": "2020-01-01",
|
|
279
|
+
"end_date": "2023-12-31"
|
|
280
|
+
},
|
|
281
|
+
"is_verified": {
|
|
282
|
+
"type": "boolean",
|
|
283
|
+
"nullable": true
|
|
284
|
+
}
|
|
285
|
+
}
|
|
286
|
+
```
|
|
287
|
+
|
|
288
|
+
## Supported Data Types
|
|
289
|
+
|
|
290
|
+
- string
|
|
291
|
+
- text
|
|
292
|
+
- integer
|
|
293
|
+
- bigint
|
|
294
|
+
- float
|
|
295
|
+
- decimal
|
|
296
|
+
- boolean
|
|
297
|
+
- date
|
|
298
|
+
- datetime
|
|
299
|
+
- blob
|
|
300
|
+
- uuid
|
|
301
|
+
- enum
|
|
302
|
+
|
|
303
|
+
## Database Type Mappings
|
|
304
|
+
|
|
305
|
+
| Generic Type | MySQL | MSSQL | Oracle |
|
|
306
|
+
|--------------|---------------|-------------------|---------------|
|
|
307
|
+
| string | VARCHAR(255) | NVARCHAR(255) | VARCHAR2(255) |
|
|
308
|
+
| text | TEXT | NVARCHAR(MAX) | CLOB |
|
|
309
|
+
| integer | INT | INT | NUMBER(10) |
|
|
310
|
+
| bigint | BIGINT | BIGINT | NUMBER(19) |
|
|
311
|
+
| float | FLOAT | FLOAT | FLOAT |
|
|
312
|
+
| decimal | DECIMAL(18,2) | DECIMAL(18,2) | NUMBER(18,2) |
|
|
313
|
+
| boolean | TINYINT(1) | BIT | NUMBER(1) |
|
|
314
|
+
| date | DATE | DATE | DATE |
|
|
315
|
+
| datetime | DATETIME | DATETIME2 | TIMESTAMP |
|
|
316
|
+
| blob | LONGBLOB | VARBINARY(MAX) | BLOB |
|
|
317
|
+
| uuid | VARCHAR(36) | UNIQUEIDENTIFIER | VARCHAR2(36) |
|
|
318
|
+
| enum | ENUM | NVARCHAR(255) | VARCHAR2(255) |
|
|
319
|
+
|
|
320
|
+
## License
|
|
321
|
+
|
|
322
|
+
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
|
|
323
|
+
# Test change
|