misata 0.2.0b0__py3-none-any.whl → 0.3.0b0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- misata/__init__.py +77 -2
- misata/cache.py +258 -0
- misata/constraints.py +307 -0
- misata/context.py +259 -0
- misata/exceptions.py +277 -0
- misata/generators/__init__.py +29 -0
- misata/generators/base.py +586 -0
- misata/profiles.py +332 -0
- misata/smart_values.py +171 -2
- misata/streaming.py +228 -0
- {misata-0.2.0b0.dist-info → misata-0.3.0b0.dist-info}/METADATA +1 -1
- {misata-0.2.0b0.dist-info → misata-0.3.0b0.dist-info}/RECORD +16 -8
- {misata-0.2.0b0.dist-info → misata-0.3.0b0.dist-info}/WHEEL +0 -0
- {misata-0.2.0b0.dist-info → misata-0.3.0b0.dist-info}/entry_points.txt +0 -0
- {misata-0.2.0b0.dist-info → misata-0.3.0b0.dist-info}/licenses/LICENSE +0 -0
- {misata-0.2.0b0.dist-info → misata-0.3.0b0.dist-info}/top_level.txt +0 -0
misata/context.py
ADDED
|
@@ -0,0 +1,259 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Context management for Misata data generation.
|
|
3
|
+
|
|
4
|
+
Provides stateful context tracking during multi-table generation,
|
|
5
|
+
including parent ID tracking for foreign keys and cross-table references.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from dataclasses import dataclass, field
|
|
9
|
+
from typing import Any, Dict, List, Optional, Set
|
|
10
|
+
|
|
11
|
+
import numpy as np
|
|
12
|
+
import pandas as pd
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dataclass
|
|
16
|
+
class TableContext:
|
|
17
|
+
"""Context for a single generated table."""
|
|
18
|
+
|
|
19
|
+
name: str
|
|
20
|
+
row_count: int = 0
|
|
21
|
+
columns: Set[str] = field(default_factory=set)
|
|
22
|
+
primary_key: Optional[np.ndarray] = None
|
|
23
|
+
foreign_keys: Dict[str, np.ndarray] = field(default_factory=dict)
|
|
24
|
+
cached_columns: Dict[str, np.ndarray] = field(default_factory=dict)
|
|
25
|
+
|
|
26
|
+
def set_primary_key(self, values: np.ndarray) -> None:
|
|
27
|
+
"""Store primary key values for foreign key lookups."""
|
|
28
|
+
self.primary_key = values
|
|
29
|
+
self.row_count = len(values)
|
|
30
|
+
|
|
31
|
+
def set_column(self, column_name: str, values: np.ndarray) -> None:
|
|
32
|
+
"""Cache a column for cross-table references."""
|
|
33
|
+
self.cached_columns[column_name] = values
|
|
34
|
+
self.columns.add(column_name)
|
|
35
|
+
|
|
36
|
+
def get_column(self, column_name: str) -> Optional[np.ndarray]:
|
|
37
|
+
"""Get cached column values."""
|
|
38
|
+
return self.cached_columns.get(column_name)
|
|
39
|
+
|
|
40
|
+
def get_ids(self) -> Optional[np.ndarray]:
|
|
41
|
+
"""Get primary key values."""
|
|
42
|
+
return self.primary_key
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class GenerationContext:
|
|
46
|
+
"""Manages state across multi-table data generation.
|
|
47
|
+
|
|
48
|
+
This context tracks:
|
|
49
|
+
- Generated table data for foreign key references
|
|
50
|
+
- Columns needed for cross-table lookups
|
|
51
|
+
- Progress tracking for callbacks
|
|
52
|
+
|
|
53
|
+
Example:
|
|
54
|
+
context = GenerationContext()
|
|
55
|
+
|
|
56
|
+
# After generating users table
|
|
57
|
+
context.register_table("users", users_df)
|
|
58
|
+
|
|
59
|
+
# When generating orders (which references users)
|
|
60
|
+
user_ids = context.get_parent_ids("users", "id")
|
|
61
|
+
orders_df["user_id"] = np.random.choice(user_ids, size=1000)
|
|
62
|
+
"""
|
|
63
|
+
|
|
64
|
+
def __init__(self):
|
|
65
|
+
self._tables: Dict[str, TableContext] = {}
|
|
66
|
+
self._generation_order: List[str] = []
|
|
67
|
+
self._progress_callbacks: List[callable] = []
|
|
68
|
+
self._current_table: Optional[str] = None
|
|
69
|
+
self._current_progress: float = 0.0
|
|
70
|
+
|
|
71
|
+
def register_table(
|
|
72
|
+
self,
|
|
73
|
+
table_name: str,
|
|
74
|
+
df: pd.DataFrame,
|
|
75
|
+
id_column: str = "id"
|
|
76
|
+
) -> None:
|
|
77
|
+
"""Register a generated table in the context.
|
|
78
|
+
|
|
79
|
+
Args:
|
|
80
|
+
table_name: Name of the table
|
|
81
|
+
df: Generated DataFrame
|
|
82
|
+
id_column: Primary key column name
|
|
83
|
+
"""
|
|
84
|
+
ctx = TableContext(name=table_name)
|
|
85
|
+
|
|
86
|
+
if id_column in df.columns:
|
|
87
|
+
ctx.set_primary_key(df[id_column].values)
|
|
88
|
+
|
|
89
|
+
# Cache all columns for potential cross-references
|
|
90
|
+
for col in df.columns:
|
|
91
|
+
ctx.set_column(col, df[col].values)
|
|
92
|
+
|
|
93
|
+
self._tables[table_name] = ctx
|
|
94
|
+
self._generation_order.append(table_name)
|
|
95
|
+
|
|
96
|
+
def register_batch(
|
|
97
|
+
self,
|
|
98
|
+
table_name: str,
|
|
99
|
+
df: pd.DataFrame,
|
|
100
|
+
id_column: str = "id"
|
|
101
|
+
) -> None:
|
|
102
|
+
"""Register a batch of generated data (appends to existing).
|
|
103
|
+
|
|
104
|
+
Args:
|
|
105
|
+
table_name: Name of the table
|
|
106
|
+
df: Generated batch DataFrame
|
|
107
|
+
id_column: Primary key column name
|
|
108
|
+
"""
|
|
109
|
+
if table_name not in self._tables:
|
|
110
|
+
self.register_table(table_name, df, id_column)
|
|
111
|
+
return
|
|
112
|
+
|
|
113
|
+
ctx = self._tables[table_name]
|
|
114
|
+
|
|
115
|
+
# Append to existing
|
|
116
|
+
if id_column in df.columns:
|
|
117
|
+
if ctx.primary_key is not None:
|
|
118
|
+
ctx.primary_key = np.concatenate([ctx.primary_key, df[id_column].values])
|
|
119
|
+
else:
|
|
120
|
+
ctx.set_primary_key(df[id_column].values)
|
|
121
|
+
|
|
122
|
+
for col in df.columns:
|
|
123
|
+
if col in ctx.cached_columns:
|
|
124
|
+
ctx.cached_columns[col] = np.concatenate([
|
|
125
|
+
ctx.cached_columns[col],
|
|
126
|
+
df[col].values
|
|
127
|
+
])
|
|
128
|
+
else:
|
|
129
|
+
ctx.set_column(col, df[col].values)
|
|
130
|
+
|
|
131
|
+
ctx.row_count = len(ctx.primary_key) if ctx.primary_key is not None else ctx.row_count + len(df)
|
|
132
|
+
|
|
133
|
+
def get_parent_ids(
|
|
134
|
+
self,
|
|
135
|
+
table_name: str,
|
|
136
|
+
column: str = "id"
|
|
137
|
+
) -> Optional[np.ndarray]:
|
|
138
|
+
"""Get column values from a parent table for foreign key generation.
|
|
139
|
+
|
|
140
|
+
Args:
|
|
141
|
+
table_name: Parent table name
|
|
142
|
+
column: Column to get values from
|
|
143
|
+
|
|
144
|
+
Returns:
|
|
145
|
+
Array of values or None if table not found
|
|
146
|
+
"""
|
|
147
|
+
if table_name not in self._tables:
|
|
148
|
+
return None
|
|
149
|
+
|
|
150
|
+
ctx = self._tables[table_name]
|
|
151
|
+
|
|
152
|
+
if column == "id" and ctx.primary_key is not None:
|
|
153
|
+
return ctx.primary_key
|
|
154
|
+
|
|
155
|
+
return ctx.get_column(column)
|
|
156
|
+
|
|
157
|
+
def get_filtered_parent_ids(
|
|
158
|
+
self,
|
|
159
|
+
table_name: str,
|
|
160
|
+
id_column: str = "id",
|
|
161
|
+
filters: Optional[Dict[str, Any]] = None
|
|
162
|
+
) -> Optional[np.ndarray]:
|
|
163
|
+
"""Get filtered parent IDs based on conditions.
|
|
164
|
+
|
|
165
|
+
Args:
|
|
166
|
+
table_name: Parent table name
|
|
167
|
+
id_column: ID column to return
|
|
168
|
+
filters: Dict of column -> value conditions
|
|
169
|
+
|
|
170
|
+
Returns:
|
|
171
|
+
Filtered array of IDs
|
|
172
|
+
"""
|
|
173
|
+
if table_name not in self._tables:
|
|
174
|
+
return None
|
|
175
|
+
|
|
176
|
+
ctx = self._tables[table_name]
|
|
177
|
+
|
|
178
|
+
if not filters:
|
|
179
|
+
return self.get_parent_ids(table_name, id_column)
|
|
180
|
+
|
|
181
|
+
# Get base IDs
|
|
182
|
+
ids = ctx.get_column(id_column)
|
|
183
|
+
if ids is None:
|
|
184
|
+
ids = ctx.primary_key
|
|
185
|
+
|
|
186
|
+
if ids is None:
|
|
187
|
+
return None
|
|
188
|
+
|
|
189
|
+
# Apply filters
|
|
190
|
+
mask = np.ones(len(ids), dtype=bool)
|
|
191
|
+
|
|
192
|
+
for filter_col, filter_val in filters.items():
|
|
193
|
+
col_values = ctx.get_column(filter_col)
|
|
194
|
+
if col_values is not None:
|
|
195
|
+
mask &= (col_values == filter_val)
|
|
196
|
+
|
|
197
|
+
return ids[mask] if mask.any() else None
|
|
198
|
+
|
|
199
|
+
def get_table_context(self, table_name: str) -> Optional[TableContext]:
|
|
200
|
+
"""Get full context for a table."""
|
|
201
|
+
return self._tables.get(table_name)
|
|
202
|
+
|
|
203
|
+
def has_table(self, table_name: str) -> bool:
|
|
204
|
+
"""Check if a table has been generated."""
|
|
205
|
+
return table_name in self._tables
|
|
206
|
+
|
|
207
|
+
def get_generated_tables(self) -> List[str]:
|
|
208
|
+
"""Get list of generated tables in order."""
|
|
209
|
+
return self._generation_order.copy()
|
|
210
|
+
|
|
211
|
+
def clear(self) -> None:
|
|
212
|
+
"""Clear all context data."""
|
|
213
|
+
self._tables.clear()
|
|
214
|
+
self._generation_order.clear()
|
|
215
|
+
self._current_table = None
|
|
216
|
+
self._current_progress = 0.0
|
|
217
|
+
|
|
218
|
+
# ============ Progress Tracking ============
|
|
219
|
+
|
|
220
|
+
def add_progress_callback(self, callback: callable) -> None:
|
|
221
|
+
"""Add a progress callback function.
|
|
222
|
+
|
|
223
|
+
Callback signature: callback(table_name: str, progress: float, message: str)
|
|
224
|
+
"""
|
|
225
|
+
self._progress_callbacks.append(callback)
|
|
226
|
+
|
|
227
|
+
def set_current_table(self, table_name: str) -> None:
|
|
228
|
+
"""Set the currently generating table."""
|
|
229
|
+
self._current_table = table_name
|
|
230
|
+
self._notify_progress(0.0, f"Starting {table_name}")
|
|
231
|
+
|
|
232
|
+
def update_progress(self, progress: float, message: str = "") -> None:
|
|
233
|
+
"""Update generation progress (0.0 to 1.0)."""
|
|
234
|
+
self._current_progress = progress
|
|
235
|
+
self._notify_progress(progress, message)
|
|
236
|
+
|
|
237
|
+
def _notify_progress(self, progress: float, message: str) -> None:
|
|
238
|
+
"""Notify all progress callbacks."""
|
|
239
|
+
for callback in self._progress_callbacks:
|
|
240
|
+
try:
|
|
241
|
+
callback(self._current_table, progress, message)
|
|
242
|
+
except Exception:
|
|
243
|
+
pass # Don't let callback errors break generation
|
|
244
|
+
|
|
245
|
+
# ============ Statistics ============
|
|
246
|
+
|
|
247
|
+
def get_summary(self) -> Dict[str, Any]:
|
|
248
|
+
"""Get summary of all generated data."""
|
|
249
|
+
return {
|
|
250
|
+
"tables": {
|
|
251
|
+
name: {
|
|
252
|
+
"row_count": ctx.row_count,
|
|
253
|
+
"columns": list(ctx.columns),
|
|
254
|
+
}
|
|
255
|
+
for name, ctx in self._tables.items()
|
|
256
|
+
},
|
|
257
|
+
"generation_order": self._generation_order,
|
|
258
|
+
"total_rows": sum(ctx.row_count for ctx in self._tables.values()),
|
|
259
|
+
}
|
misata/exceptions.py
ADDED
|
@@ -0,0 +1,277 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Custom exceptions for Misata.
|
|
3
|
+
|
|
4
|
+
Provides a hierarchy of exception classes for better error handling
|
|
5
|
+
and more informative error messages.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from typing import Any, Dict, List, Optional
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class MisataError(Exception):
|
|
12
|
+
"""Base exception for all Misata errors."""
|
|
13
|
+
|
|
14
|
+
def __init__(self, message: str, details: Optional[Dict[str, Any]] = None):
|
|
15
|
+
self.message = message
|
|
16
|
+
self.details = details or {}
|
|
17
|
+
super().__init__(self.message)
|
|
18
|
+
|
|
19
|
+
def __str__(self) -> str:
|
|
20
|
+
if self.details:
|
|
21
|
+
return f"{self.message} | Details: {self.details}"
|
|
22
|
+
return self.message
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
# ============ Schema Errors ============
|
|
26
|
+
|
|
27
|
+
class SchemaError(MisataError):
|
|
28
|
+
"""Base class for schema-related errors."""
|
|
29
|
+
pass
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class SchemaValidationError(SchemaError):
|
|
33
|
+
"""Raised when schema validation fails."""
|
|
34
|
+
|
|
35
|
+
def __init__(
|
|
36
|
+
self,
|
|
37
|
+
message: str,
|
|
38
|
+
field: Optional[str] = None,
|
|
39
|
+
value: Optional[Any] = None,
|
|
40
|
+
suggestion: Optional[str] = None,
|
|
41
|
+
):
|
|
42
|
+
self.field = field
|
|
43
|
+
self.value = value
|
|
44
|
+
self.suggestion = suggestion
|
|
45
|
+
details = {}
|
|
46
|
+
if field:
|
|
47
|
+
details["field"] = field
|
|
48
|
+
if value is not None:
|
|
49
|
+
details["value"] = str(value)[:100] # Truncate long values
|
|
50
|
+
if suggestion:
|
|
51
|
+
details["suggestion"] = suggestion
|
|
52
|
+
super().__init__(message, details)
|
|
53
|
+
|
|
54
|
+
def __str__(self) -> str:
|
|
55
|
+
msg = self.message
|
|
56
|
+
if self.field:
|
|
57
|
+
msg = f"[{self.field}] {msg}"
|
|
58
|
+
if self.suggestion:
|
|
59
|
+
msg += f"\n → Suggestion: {self.suggestion}"
|
|
60
|
+
return msg
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
class SchemaParseError(SchemaError):
|
|
64
|
+
"""Raised when schema parsing fails (YAML, JSON, etc.)."""
|
|
65
|
+
|
|
66
|
+
def __init__(self, message: str, source: Optional[str] = None, line: Optional[int] = None):
|
|
67
|
+
self.source = source
|
|
68
|
+
self.line = line
|
|
69
|
+
details = {}
|
|
70
|
+
if source:
|
|
71
|
+
details["source"] = source
|
|
72
|
+
if line:
|
|
73
|
+
details["line"] = line
|
|
74
|
+
super().__init__(message, details)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
class RelationshipError(SchemaError):
|
|
78
|
+
"""Raised when relationship definition is invalid."""
|
|
79
|
+
|
|
80
|
+
def __init__(
|
|
81
|
+
self,
|
|
82
|
+
message: str,
|
|
83
|
+
parent_table: Optional[str] = None,
|
|
84
|
+
child_table: Optional[str] = None,
|
|
85
|
+
):
|
|
86
|
+
self.parent_table = parent_table
|
|
87
|
+
self.child_table = child_table
|
|
88
|
+
details = {}
|
|
89
|
+
if parent_table:
|
|
90
|
+
details["parent_table"] = parent_table
|
|
91
|
+
if child_table:
|
|
92
|
+
details["child_table"] = child_table
|
|
93
|
+
super().__init__(message, details)
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
# ============ Generation Errors ============
|
|
97
|
+
|
|
98
|
+
class GenerationError(MisataError):
|
|
99
|
+
"""Base class for data generation errors."""
|
|
100
|
+
pass
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
class ColumnGenerationError(GenerationError):
|
|
104
|
+
"""Raised when column generation fails."""
|
|
105
|
+
|
|
106
|
+
def __init__(
|
|
107
|
+
self,
|
|
108
|
+
message: str,
|
|
109
|
+
table: Optional[str] = None,
|
|
110
|
+
column: Optional[str] = None,
|
|
111
|
+
column_type: Optional[str] = None,
|
|
112
|
+
suggestion: Optional[str] = None,
|
|
113
|
+
):
|
|
114
|
+
self.table = table
|
|
115
|
+
self.column = column
|
|
116
|
+
self.column_type = column_type
|
|
117
|
+
self.suggestion = suggestion
|
|
118
|
+
details = {}
|
|
119
|
+
if table:
|
|
120
|
+
details["table"] = table
|
|
121
|
+
if column:
|
|
122
|
+
details["column"] = column
|
|
123
|
+
if column_type:
|
|
124
|
+
details["column_type"] = column_type
|
|
125
|
+
super().__init__(message, details)
|
|
126
|
+
|
|
127
|
+
def __str__(self) -> str:
|
|
128
|
+
location = ""
|
|
129
|
+
if self.table and self.column:
|
|
130
|
+
location = f"[{self.table}.{self.column}] "
|
|
131
|
+
elif self.table:
|
|
132
|
+
location = f"[{self.table}] "
|
|
133
|
+
msg = f"{location}{self.message}"
|
|
134
|
+
if self.suggestion:
|
|
135
|
+
msg += f"\n → Suggestion: {self.suggestion}"
|
|
136
|
+
return msg
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
class ConstraintError(GenerationError):
|
|
140
|
+
"""Raised when constraint application fails."""
|
|
141
|
+
|
|
142
|
+
def __init__(
|
|
143
|
+
self,
|
|
144
|
+
message: str,
|
|
145
|
+
constraint_type: Optional[str] = None,
|
|
146
|
+
affected_columns: Optional[List[str]] = None,
|
|
147
|
+
):
|
|
148
|
+
self.constraint_type = constraint_type
|
|
149
|
+
self.affected_columns = affected_columns or []
|
|
150
|
+
details = {}
|
|
151
|
+
if constraint_type:
|
|
152
|
+
details["constraint_type"] = constraint_type
|
|
153
|
+
if affected_columns:
|
|
154
|
+
details["affected_columns"] = affected_columns
|
|
155
|
+
super().__init__(message, details)
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
class CircularDependencyError(GenerationError):
|
|
159
|
+
"""Raised when circular dependencies are detected in table relationships."""
|
|
160
|
+
|
|
161
|
+
def __init__(self, message: str, tables: Optional[List[str]] = None):
|
|
162
|
+
self.tables = tables or []
|
|
163
|
+
details = {"tables": tables} if tables else {}
|
|
164
|
+
super().__init__(message, details)
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
class ReferentialIntegrityError(GenerationError):
|
|
168
|
+
"""Raised when foreign key references cannot be satisfied."""
|
|
169
|
+
|
|
170
|
+
def __init__(
|
|
171
|
+
self,
|
|
172
|
+
message: str,
|
|
173
|
+
parent_table: Optional[str] = None,
|
|
174
|
+
child_table: Optional[str] = None,
|
|
175
|
+
missing_ids: Optional[int] = None,
|
|
176
|
+
):
|
|
177
|
+
self.parent_table = parent_table
|
|
178
|
+
self.child_table = child_table
|
|
179
|
+
self.missing_ids = missing_ids
|
|
180
|
+
details = {}
|
|
181
|
+
if parent_table:
|
|
182
|
+
details["parent_table"] = parent_table
|
|
183
|
+
if child_table:
|
|
184
|
+
details["child_table"] = child_table
|
|
185
|
+
if missing_ids:
|
|
186
|
+
details["missing_ids"] = missing_ids
|
|
187
|
+
super().__init__(message, details)
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
# ============ LLM Errors ============
|
|
191
|
+
|
|
192
|
+
class LLMError(MisataError):
|
|
193
|
+
"""Base class for LLM-related errors."""
|
|
194
|
+
pass
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
class LLMConnectionError(LLMError):
|
|
198
|
+
"""Raised when LLM API connection fails."""
|
|
199
|
+
|
|
200
|
+
def __init__(self, message: str, provider: Optional[str] = None):
|
|
201
|
+
self.provider = provider
|
|
202
|
+
details = {"provider": provider} if provider else {}
|
|
203
|
+
super().__init__(message, details)
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
class LLMParseError(LLMError):
|
|
207
|
+
"""Raised when LLM response cannot be parsed."""
|
|
208
|
+
|
|
209
|
+
def __init__(self, message: str, raw_response: Optional[str] = None):
|
|
210
|
+
self.raw_response = raw_response[:500] if raw_response else None
|
|
211
|
+
details = {}
|
|
212
|
+
if self.raw_response:
|
|
213
|
+
details["raw_response_preview"] = self.raw_response
|
|
214
|
+
super().__init__(message, details)
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
class LLMQuotaError(LLMError):
|
|
218
|
+
"""Raised when LLM API quota is exceeded."""
|
|
219
|
+
|
|
220
|
+
def __init__(self, message: str, provider: Optional[str] = None, retry_after: Optional[int] = None):
|
|
221
|
+
self.provider = provider
|
|
222
|
+
self.retry_after = retry_after
|
|
223
|
+
details = {}
|
|
224
|
+
if provider:
|
|
225
|
+
details["provider"] = provider
|
|
226
|
+
if retry_after:
|
|
227
|
+
details["retry_after_seconds"] = retry_after
|
|
228
|
+
super().__init__(message, details)
|
|
229
|
+
|
|
230
|
+
|
|
231
|
+
# ============ Configuration Errors ============
|
|
232
|
+
|
|
233
|
+
class ConfigurationError(MisataError):
|
|
234
|
+
"""Raised when configuration is invalid."""
|
|
235
|
+
pass
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
class MissingAPIKeyError(ConfigurationError):
|
|
239
|
+
"""Raised when required API key is missing."""
|
|
240
|
+
|
|
241
|
+
def __init__(self, provider: str, env_var: str):
|
|
242
|
+
self.provider = provider
|
|
243
|
+
self.env_var = env_var
|
|
244
|
+
message = f"API key for {provider} not found"
|
|
245
|
+
suggestion = f"Set {env_var} environment variable or pass api_key parameter"
|
|
246
|
+
details = {"provider": provider, "env_var": env_var, "suggestion": suggestion}
|
|
247
|
+
super().__init__(message, details)
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
# ============ Export Errors ============
|
|
251
|
+
|
|
252
|
+
class ExportError(MisataError):
|
|
253
|
+
"""Base class for export-related errors."""
|
|
254
|
+
pass
|
|
255
|
+
|
|
256
|
+
|
|
257
|
+
class FileWriteError(ExportError):
|
|
258
|
+
"""Raised when file writing fails."""
|
|
259
|
+
|
|
260
|
+
def __init__(self, message: str, path: Optional[str] = None):
|
|
261
|
+
self.path = path
|
|
262
|
+
details = {"path": path} if path else {}
|
|
263
|
+
super().__init__(message, details)
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
class InvalidOutputFormatError(ExportError):
|
|
267
|
+
"""Raised when export format is not supported."""
|
|
268
|
+
|
|
269
|
+
def __init__(self, format: str, supported_formats: Optional[List[str]] = None):
|
|
270
|
+
self.format = format
|
|
271
|
+
self.supported_formats = supported_formats or ["csv", "parquet", "json"]
|
|
272
|
+
message = f"Unsupported export format: {format}"
|
|
273
|
+
details = {
|
|
274
|
+
"format": format,
|
|
275
|
+
"supported_formats": self.supported_formats,
|
|
276
|
+
}
|
|
277
|
+
super().__init__(message, details)
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Generators package for Misata.
|
|
3
|
+
|
|
4
|
+
Provides type-safe data generators for all supported column types.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from misata.generators.base import (
|
|
8
|
+
BaseGenerator,
|
|
9
|
+
BooleanGenerator,
|
|
10
|
+
CategoricalGenerator,
|
|
11
|
+
DateGenerator,
|
|
12
|
+
FloatGenerator,
|
|
13
|
+
ForeignKeyGenerator,
|
|
14
|
+
GeneratorFactory,
|
|
15
|
+
IntegerGenerator,
|
|
16
|
+
TextGenerator,
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
__all__ = [
|
|
20
|
+
"BaseGenerator",
|
|
21
|
+
"GeneratorFactory",
|
|
22
|
+
"IntegerGenerator",
|
|
23
|
+
"FloatGenerator",
|
|
24
|
+
"BooleanGenerator",
|
|
25
|
+
"CategoricalGenerator",
|
|
26
|
+
"DateGenerator",
|
|
27
|
+
"TextGenerator",
|
|
28
|
+
"ForeignKeyGenerator",
|
|
29
|
+
]
|