TestDataX 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- src/__init__.py +7 -0
- src/cli.py +166 -0
- src/exporters/__init__.py +0 -0
- src/exporters/base_exporter.py +23 -0
- src/exporters/csv_exporter.py +115 -0
- src/exporters/json_exporter.py +89 -0
- src/exporters/mssql_exporter.py +198 -0
- src/exporters/mysql_exporter.py +184 -0
- src/exporters/oracle_exporter.py +205 -0
- src/exporters/orc_exporter.py +100 -0
- src/exporters/parquet_exporter.py +102 -0
- src/exporters/utils/__init__.py +0 -0
- src/exporters/utils/chunker.py +27 -0
- src/exporters/utils/constants.py +55 -0
- src/exporters/utils/exporter_config.py +17 -0
- src/exporters/utils/formatters.py +165 -0
- src/generator.py +117 -0
- src/providers/__init__.py +4 -0
- src/providers/base.py +58 -0
- src/providers/faker_provider.py +65 -0
- src/schemas.py +81 -0
- testdatax-0.1.0.dist-info/LICENSE +21 -0
- testdatax-0.1.0.dist-info/METADATA +345 -0
- testdatax-0.1.0.dist-info/RECORD +26 -0
- testdatax-0.1.0.dist-info/WHEEL +4 -0
- testdatax-0.1.0.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
from ..csv_exporter import CsvExporter
|
|
2
|
+
from ..json_exporter import JsonExporter
|
|
3
|
+
from ..mssql_exporter import MssqlExporter
|
|
4
|
+
from ..mysql_exporter import MysqlExporter
|
|
5
|
+
from ..oracle_exporter import OracleExporter
|
|
6
|
+
from ..orc_exporter import OrcExporter
|
|
7
|
+
from ..parquet_exporter import ParquetExporter
|
|
8
|
+
|
|
9
|
+
EXPORTER_CLASSES = {
|
|
10
|
+
"csv": CsvExporter(),
|
|
11
|
+
"json": JsonExporter(),
|
|
12
|
+
"parquet": ParquetExporter(),
|
|
13
|
+
"orc": OrcExporter(),
|
|
14
|
+
"mysql": MysqlExporter(),
|
|
15
|
+
"mssql": MssqlExporter(),
|
|
16
|
+
"oracle": OracleExporter(),
|
|
17
|
+
}
|
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import uuid
|
|
3
|
+
from abc import abstractmethod
|
|
4
|
+
from datetime import date, datetime
|
|
5
|
+
from decimal import Decimal
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class BaseFormatter:
|
|
10
|
+
"""Base class for handling data type formatting across exporters."""
|
|
11
|
+
|
|
12
|
+
@staticmethod
|
|
13
|
+
def format_none() -> str | None:
|
|
14
|
+
"""Format None values."""
|
|
15
|
+
return None
|
|
16
|
+
|
|
17
|
+
@staticmethod
|
|
18
|
+
def format_datetime(value: datetime | date) -> str:
|
|
19
|
+
"""Format datetime and date values."""
|
|
20
|
+
return value.isoformat()
|
|
21
|
+
|
|
22
|
+
@staticmethod
|
|
23
|
+
def format_uuid(value: uuid.UUID) -> str:
|
|
24
|
+
"""Format UUID values."""
|
|
25
|
+
return str(value)
|
|
26
|
+
|
|
27
|
+
@staticmethod
|
|
28
|
+
def format_bytes(value: bytes) -> str:
|
|
29
|
+
"""Format bytes values."""
|
|
30
|
+
return value.hex()
|
|
31
|
+
|
|
32
|
+
@staticmethod
|
|
33
|
+
def format_decimal(value: Decimal) -> float:
|
|
34
|
+
"""Format Decimal values."""
|
|
35
|
+
return float(value)
|
|
36
|
+
|
|
37
|
+
@staticmethod
|
|
38
|
+
def format_string(value: str) -> str:
|
|
39
|
+
"""Format string values, removing null bytes."""
|
|
40
|
+
return str(value).replace("\x00", "")
|
|
41
|
+
|
|
42
|
+
@abstractmethod
|
|
43
|
+
def format_value(
|
|
44
|
+
self,
|
|
45
|
+
value: (
|
|
46
|
+
None
|
|
47
|
+
| datetime
|
|
48
|
+
| date
|
|
49
|
+
| uuid.UUID
|
|
50
|
+
| bytes
|
|
51
|
+
| Decimal
|
|
52
|
+
| dict
|
|
53
|
+
| list
|
|
54
|
+
| int
|
|
55
|
+
| float
|
|
56
|
+
| str
|
|
57
|
+
),
|
|
58
|
+
) -> None | str | float | dict | list | int:
|
|
59
|
+
"""Format a single value according to export format requirements.
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
value: The value to format
|
|
63
|
+
|
|
64
|
+
Returns:
|
|
65
|
+
Formatted value suitable for the target format
|
|
66
|
+
|
|
67
|
+
"""
|
|
68
|
+
pass
|
|
69
|
+
|
|
70
|
+
def format_row(
|
|
71
|
+
self, row: dict[str, Any], **kwargs: dict[str, str | int | float]
|
|
72
|
+
) -> dict[str, Any]:
|
|
73
|
+
"""Format the provided rows with the correct format_value.
|
|
74
|
+
|
|
75
|
+
Args:
|
|
76
|
+
row: Dictionary containing row data
|
|
77
|
+
**kwargs: Additional format-specific parameters
|
|
78
|
+
|
|
79
|
+
Returns:
|
|
80
|
+
Formatted row dictionary
|
|
81
|
+
|
|
82
|
+
"""
|
|
83
|
+
formatted_row: dict[str, Any] = {}
|
|
84
|
+
for key, value in row.items():
|
|
85
|
+
try:
|
|
86
|
+
formatted_row[key] = self.format_value(value)
|
|
87
|
+
except Exception as e:
|
|
88
|
+
formatted_row[key] = f"ERROR: {str(e)}"
|
|
89
|
+
return formatted_row
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
class JSONFormatter(BaseFormatter):
|
|
93
|
+
"""Formatter for JSON exports."""
|
|
94
|
+
|
|
95
|
+
@classmethod
|
|
96
|
+
def format_value(
|
|
97
|
+
cls,
|
|
98
|
+
value: (
|
|
99
|
+
datetime
|
|
100
|
+
| date
|
|
101
|
+
| uuid.UUID
|
|
102
|
+
| bytes
|
|
103
|
+
| Decimal
|
|
104
|
+
| dict
|
|
105
|
+
| list
|
|
106
|
+
| int
|
|
107
|
+
| float
|
|
108
|
+
| str
|
|
109
|
+
| None
|
|
110
|
+
),
|
|
111
|
+
) -> str | float | dict | list | int | None:
|
|
112
|
+
"""Format the provided values."""
|
|
113
|
+
if value is None:
|
|
114
|
+
return cls.format_none()
|
|
115
|
+
elif isinstance(value, (datetime | date)):
|
|
116
|
+
return cls.format_datetime(value)
|
|
117
|
+
elif isinstance(value, uuid.UUID):
|
|
118
|
+
return cls.format_uuid(value)
|
|
119
|
+
elif isinstance(value, bytes):
|
|
120
|
+
return cls.format_bytes(value)
|
|
121
|
+
elif isinstance(value, Decimal):
|
|
122
|
+
return cls.format_decimal(value)
|
|
123
|
+
elif isinstance(value, (dict | list | int | float)):
|
|
124
|
+
return value
|
|
125
|
+
else:
|
|
126
|
+
return cls.format_string(str(value))
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
class CSVFormatter(BaseFormatter):
|
|
130
|
+
"""Formatter for CSV exports."""
|
|
131
|
+
|
|
132
|
+
@classmethod
|
|
133
|
+
def format_value(
|
|
134
|
+
cls,
|
|
135
|
+
value: (
|
|
136
|
+
datetime
|
|
137
|
+
| date
|
|
138
|
+
| uuid.UUID
|
|
139
|
+
| bytes
|
|
140
|
+
| Decimal
|
|
141
|
+
| dict
|
|
142
|
+
| list
|
|
143
|
+
| int
|
|
144
|
+
| float
|
|
145
|
+
| str
|
|
146
|
+
| None
|
|
147
|
+
),
|
|
148
|
+
) -> str | float | dict | list | int | None:
|
|
149
|
+
"""Format the provided values."""
|
|
150
|
+
if value is None:
|
|
151
|
+
return cls.format_none()
|
|
152
|
+
elif isinstance(value, (datetime | date)):
|
|
153
|
+
return cls.format_datetime(value)
|
|
154
|
+
elif isinstance(value, uuid.UUID):
|
|
155
|
+
return cls.format_uuid(value)
|
|
156
|
+
elif isinstance(value, bytes):
|
|
157
|
+
return cls.format_bytes(value)
|
|
158
|
+
elif isinstance(value, Decimal):
|
|
159
|
+
return cls.format_decimal(value)
|
|
160
|
+
elif isinstance(value, (dict | list)):
|
|
161
|
+
return json.dumps(value)
|
|
162
|
+
elif isinstance(value, (int | float)):
|
|
163
|
+
return value
|
|
164
|
+
else:
|
|
165
|
+
return cls.format_string(str(value))
|
src/generator.py
ADDED
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
from datetime import date, datetime
|
|
2
|
+
from decimal import Decimal
|
|
3
|
+
from typing import Any
|
|
4
|
+
from uuid import UUID
|
|
5
|
+
|
|
6
|
+
from faker import Faker
|
|
7
|
+
|
|
8
|
+
from .providers import DataProvider, FakerProvider
|
|
9
|
+
from .schemas import DataType, FieldSchema
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class DataGenerator:
|
|
13
|
+
"""The DataGenerator class generates synthetic data based on the provided field schemas.
|
|
14
|
+
|
|
15
|
+
Supported data types:
|
|
16
|
+
- STRING: Generates usernames
|
|
17
|
+
- TEXT: Generates paragraphs of text
|
|
18
|
+
- INTEGER: Generates integers between 0 and 100
|
|
19
|
+
- BIGINT: Generates large integers between 0 and 9999999999
|
|
20
|
+
- FLOAT: Generates floating point numbers with 2 decimal places
|
|
21
|
+
- DECIMAL: Generates Decimal numbers with 2 decimal places
|
|
22
|
+
- BOOLEAN: Generates True/False values
|
|
23
|
+
- DATE: Generates date objects
|
|
24
|
+
- DATETIME: Generates datetime objects
|
|
25
|
+
- BLOB: Generates binary data
|
|
26
|
+
- UUID: Generates UUID objects
|
|
27
|
+
- ENUM: Generates values from provided enum_values list
|
|
28
|
+
|
|
29
|
+
""" # noqa: E501
|
|
30
|
+
|
|
31
|
+
def __init__(self, provider: DataProvider | None = None) -> None:
|
|
32
|
+
"""Initialize the DataGenerator with Faker instance and type generator mappings.
|
|
33
|
+
|
|
34
|
+
The constructor initializes the Faker instance and creates a mapping of DataType
|
|
35
|
+
enums to their corresponding generator methods.
|
|
36
|
+
"""
|
|
37
|
+
self.provider = provider or FakerProvider()
|
|
38
|
+
self.faker = Faker()
|
|
39
|
+
self.type_generators = {
|
|
40
|
+
DataType.STRING: self._generate_string,
|
|
41
|
+
DataType.TEXT: self._generate_text,
|
|
42
|
+
DataType.INTEGER: self._generate_integer,
|
|
43
|
+
DataType.BIGINT: self._generate_bigint,
|
|
44
|
+
DataType.FLOAT: self._generate_float,
|
|
45
|
+
DataType.DECIMAL: self._generate_decimal,
|
|
46
|
+
DataType.BOOLEAN: self._generate_boolean,
|
|
47
|
+
DataType.DATE: self._generate_date,
|
|
48
|
+
DataType.DATETIME: self._generate_datetime,
|
|
49
|
+
DataType.BLOB: self._generate_blob,
|
|
50
|
+
DataType.UUID: self._generate_uuid,
|
|
51
|
+
DataType.ENUM: self._generate_enum,
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
def generate_data(
|
|
55
|
+
self, fields: list[FieldSchema], count: int
|
|
56
|
+
) -> list[dict[str, Any]]:
|
|
57
|
+
"""Generate data based on the provided schema and count."""
|
|
58
|
+
data = []
|
|
59
|
+
for _ in range(count):
|
|
60
|
+
row = {}
|
|
61
|
+
for field in fields:
|
|
62
|
+
generator = self.type_generators[field.type]
|
|
63
|
+
row[field.name] = generator(field)
|
|
64
|
+
data.append(row)
|
|
65
|
+
return data
|
|
66
|
+
|
|
67
|
+
def _generate_string(self, field: FieldSchema) -> str:
|
|
68
|
+
provider_value = str(field.value_provider) if field.value_provider else "name"
|
|
69
|
+
return self.provider.generate_string(value_provider=provider_value)
|
|
70
|
+
|
|
71
|
+
def _generate_text(self, field: FieldSchema) -> str:
|
|
72
|
+
return self.provider.generate_text()
|
|
73
|
+
|
|
74
|
+
def _generate_integer(self, field: FieldSchema) -> int:
|
|
75
|
+
min_val = (
|
|
76
|
+
int(field.min_value)
|
|
77
|
+
if hasattr(field, "min_value") and field.min_value is not None
|
|
78
|
+
else 0
|
|
79
|
+
)
|
|
80
|
+
max_val = (
|
|
81
|
+
int(field.max_value)
|
|
82
|
+
if hasattr(field, "max_value") and field.max_value is not None
|
|
83
|
+
else 100
|
|
84
|
+
)
|
|
85
|
+
return self.provider.generate_integer(
|
|
86
|
+
min_value=min_val,
|
|
87
|
+
max_value=max_val,
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
def _generate_bigint(self, field: FieldSchema) -> int:
|
|
91
|
+
return self.faker.random_int(min=0, max=9999999999)
|
|
92
|
+
|
|
93
|
+
def _generate_float(self, field: FieldSchema) -> float:
|
|
94
|
+
return self.faker.pyfloat(right_digits=2)
|
|
95
|
+
|
|
96
|
+
def _generate_decimal(self, field: FieldSchema) -> Decimal:
|
|
97
|
+
return self.provider.generate_decimal()
|
|
98
|
+
|
|
99
|
+
def _generate_boolean(self, field: FieldSchema) -> bool:
|
|
100
|
+
return self.provider.generate_boolean()
|
|
101
|
+
|
|
102
|
+
def _generate_date(self, field: FieldSchema) -> date:
|
|
103
|
+
return self.provider.generate_date()
|
|
104
|
+
|
|
105
|
+
def _generate_datetime(self, field: FieldSchema) -> datetime:
|
|
106
|
+
return self.provider.generate_datetime()
|
|
107
|
+
|
|
108
|
+
def _generate_blob(self, field: FieldSchema) -> bytes:
|
|
109
|
+
return self.provider.generate_binary()
|
|
110
|
+
|
|
111
|
+
def _generate_uuid(self, field: FieldSchema) -> UUID:
|
|
112
|
+
return self.provider.generate_uuid()
|
|
113
|
+
|
|
114
|
+
def _generate_enum(self, field: FieldSchema) -> str:
|
|
115
|
+
if not field.enum_values:
|
|
116
|
+
raise ValueError(f"Enum field {field.name} must have values defined")
|
|
117
|
+
return self.provider.generate_enum(field.enum_values)
|
src/providers/base.py
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
from abc import ABC, abstractmethod
|
|
2
|
+
from datetime import date, datetime
|
|
3
|
+
from decimal import Decimal
|
|
4
|
+
from uuid import UUID
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class DataProvider(ABC):
|
|
8
|
+
"""Abstract base class for data providers that generate test data values."""
|
|
9
|
+
|
|
10
|
+
@abstractmethod
|
|
11
|
+
def generate_string(self, **kwargs: str) -> str:
|
|
12
|
+
"""Generate a string value."""
|
|
13
|
+
pass
|
|
14
|
+
|
|
15
|
+
@abstractmethod
|
|
16
|
+
def generate_text(self, **kwargs: str) -> str:
|
|
17
|
+
"""Generate a text value."""
|
|
18
|
+
pass
|
|
19
|
+
|
|
20
|
+
@abstractmethod
|
|
21
|
+
def generate_integer(self, min_value: int = 0, max_value: int = 100) -> int:
|
|
22
|
+
"""Generate an integer value."""
|
|
23
|
+
pass
|
|
24
|
+
|
|
25
|
+
@abstractmethod
|
|
26
|
+
def generate_decimal(self, **kwargs: Decimal) -> Decimal:
|
|
27
|
+
"""Generate a decimal value."""
|
|
28
|
+
pass
|
|
29
|
+
|
|
30
|
+
@abstractmethod
|
|
31
|
+
def generate_boolean(self) -> bool:
|
|
32
|
+
"""Generate a boolean value."""
|
|
33
|
+
pass
|
|
34
|
+
|
|
35
|
+
@abstractmethod
|
|
36
|
+
def generate_date(self) -> date:
|
|
37
|
+
"""Generate a date value."""
|
|
38
|
+
pass
|
|
39
|
+
|
|
40
|
+
@abstractmethod
|
|
41
|
+
def generate_datetime(self) -> datetime:
|
|
42
|
+
"""Generate a datetime value."""
|
|
43
|
+
pass
|
|
44
|
+
|
|
45
|
+
@abstractmethod
|
|
46
|
+
def generate_binary(self, length: int = 64) -> bytes:
|
|
47
|
+
"""Generate binary data."""
|
|
48
|
+
pass
|
|
49
|
+
|
|
50
|
+
@abstractmethod
|
|
51
|
+
def generate_uuid(self) -> UUID:
|
|
52
|
+
"""Generate a UUID."""
|
|
53
|
+
pass
|
|
54
|
+
|
|
55
|
+
@abstractmethod
|
|
56
|
+
def generate_enum(self, values: list[str]) -> str:
|
|
57
|
+
"""Generate an enum value from the given choices."""
|
|
58
|
+
pass
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
from datetime import date, datetime
|
|
2
|
+
from decimal import Decimal
|
|
3
|
+
from uuid import UUID
|
|
4
|
+
|
|
5
|
+
from faker import Faker
|
|
6
|
+
|
|
7
|
+
from .base import DataProvider
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class FakerProvider(DataProvider):
|
|
11
|
+
"""FakerProvider generates fake data using the Faker library."""
|
|
12
|
+
|
|
13
|
+
def __init__(self) -> None:
|
|
14
|
+
"""Initialize the FakerProvider with a Faker instance."""
|
|
15
|
+
self.faker = Faker()
|
|
16
|
+
|
|
17
|
+
def generate_string(self, **kwargs: str) -> str:
|
|
18
|
+
"""Generate a fake string using the specified Faker provider."""
|
|
19
|
+
provider = kwargs.get("value_provider") or "name"
|
|
20
|
+
return str(getattr(self.faker, provider)())
|
|
21
|
+
|
|
22
|
+
def generate_text(self, **kwargs: str) -> str:
|
|
23
|
+
"""Generate a fake text string."""
|
|
24
|
+
return self.faker.text()
|
|
25
|
+
|
|
26
|
+
def generate_integer(
|
|
27
|
+
self, min_value: int | None = None, max_value: int | None = None
|
|
28
|
+
) -> int:
|
|
29
|
+
"""Generate a fake integer within the specified range."""
|
|
30
|
+
min_val = min_value if min_value is not None else 0
|
|
31
|
+
max_val = max_value if max_value is not None else 100
|
|
32
|
+
return self.faker.pyint(min_value=min_val, max_value=max_val)
|
|
33
|
+
|
|
34
|
+
def generate_decimal(self, **kwargs: Decimal) -> Decimal:
|
|
35
|
+
"""Generate a fake decimal number."""
|
|
36
|
+
return Decimal(str(self.faker.pyfloat(right_digits=2)))
|
|
37
|
+
|
|
38
|
+
def generate_boolean(self) -> bool:
|
|
39
|
+
"""Generate a fake boolean value."""
|
|
40
|
+
return self.faker.boolean()
|
|
41
|
+
|
|
42
|
+
def generate_date(self) -> date:
|
|
43
|
+
"""Generate a fake date object."""
|
|
44
|
+
return self.faker.date_object()
|
|
45
|
+
|
|
46
|
+
def generate_datetime(self) -> datetime:
|
|
47
|
+
"""Generate a fake datetime object."""
|
|
48
|
+
return self.faker.date_time()
|
|
49
|
+
|
|
50
|
+
def generate_binary(self, length: int = 64) -> bytes:
|
|
51
|
+
"""Generate a fake binary string of the specified length."""
|
|
52
|
+
return self.faker.binary(length=length)
|
|
53
|
+
|
|
54
|
+
def generate_uuid(self) -> UUID:
|
|
55
|
+
"""Generate a fake UUID."""
|
|
56
|
+
uuid_value = self.faker.uuid4()
|
|
57
|
+
if isinstance(uuid_value, UUID):
|
|
58
|
+
return uuid_value
|
|
59
|
+
return UUID(str(uuid_value))
|
|
60
|
+
|
|
61
|
+
def generate_enum(self, values: list[str]) -> str:
|
|
62
|
+
"""Generate a fake value from the given list of values."""
|
|
63
|
+
if not values:
|
|
64
|
+
raise ValueError("Enum values cannot be empty")
|
|
65
|
+
return self.faker.random_element(values)
|
src/schemas.py
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
from enum import Enum
|
|
2
|
+
from typing import Any
|
|
3
|
+
|
|
4
|
+
from pydantic import BaseModel, Field
|
|
5
|
+
|
|
6
|
+
from .exporters.utils.constants import EXPORT_PATTERNS
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class DataType(str, Enum):
|
|
10
|
+
"""An enumeration class for different data types commonly used in database schemas.
|
|
11
|
+
|
|
12
|
+
A comprehensive set of data types that can be used to define the structure of
|
|
13
|
+
generated data in various database contexts.
|
|
14
|
+
|
|
15
|
+
Attributes:
|
|
16
|
+
STRING: Represents a string data type for short character sequences
|
|
17
|
+
TEXT: Represents a text data type for longer character sequences
|
|
18
|
+
INTEGER: Represents a standard integer data type
|
|
19
|
+
BIGINT: Represents a large integer data type
|
|
20
|
+
FLOAT: Represents a floating-point number data type
|
|
21
|
+
DECIMAL: Represents a precise decimal number data type
|
|
22
|
+
BOOLEAN: Represents a boolean (True/False) data type
|
|
23
|
+
DATE: Represents a date data type
|
|
24
|
+
DATETIME: Represents a date and time data type
|
|
25
|
+
BLOB: Represents a binary large object data type
|
|
26
|
+
UUID: Represents a universally unique identifier data type
|
|
27
|
+
ENUM: Represents an enumerated data type
|
|
28
|
+
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
STRING = "string"
|
|
32
|
+
TEXT = "text"
|
|
33
|
+
INTEGER = "integer"
|
|
34
|
+
BIGINT = "bigint"
|
|
35
|
+
FLOAT = "float"
|
|
36
|
+
DECIMAL = "decimal"
|
|
37
|
+
BOOLEAN = "boolean"
|
|
38
|
+
DATE = "date"
|
|
39
|
+
DATETIME = "datetime"
|
|
40
|
+
BLOB = "blob"
|
|
41
|
+
UUID = "uuid"
|
|
42
|
+
ENUM = "enum"
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class FieldSchema(BaseModel):
|
|
46
|
+
"""Defines the schema for a single field in the data generation configuration.
|
|
47
|
+
|
|
48
|
+
This class represents the structure and constraints for generating field values.
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
name: str
|
|
52
|
+
type: DataType
|
|
53
|
+
enum_values: list[str] | None = None
|
|
54
|
+
min_value: Any | None = None
|
|
55
|
+
max_value: Any | None = None
|
|
56
|
+
right_digits: int | None = None
|
|
57
|
+
value_provider: str | None = None
|
|
58
|
+
pattern: str | None = None
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
class GeneratorConfig(BaseModel):
|
|
62
|
+
"""A configuration class for data generation.
|
|
63
|
+
|
|
64
|
+
This class defines the structure and parameters needed for generating synthetic
|
|
65
|
+
data.
|
|
66
|
+
|
|
67
|
+
Attributes:
|
|
68
|
+
fields (list[FieldSchema]): List of field schemas defining the structure of
|
|
69
|
+
data to generate.
|
|
70
|
+
|
|
71
|
+
row_count (int): Number of rows/records to generate.
|
|
72
|
+
export_format (str): Format for exporting generated data. Must match defined
|
|
73
|
+
export patterns.
|
|
74
|
+
output_path (str): File path where the generated data will be saved.
|
|
75
|
+
|
|
76
|
+
"""
|
|
77
|
+
|
|
78
|
+
fields: list[FieldSchema]
|
|
79
|
+
row_count: int
|
|
80
|
+
export_format: str = Field(..., pattern=EXPORT_PATTERNS)
|
|
81
|
+
output_path: str
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 JamesPBrett
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|