misata 0.1.0b0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,444 @@
1
+ """
2
+ Industry templates for quick-start synthetic data generation.
3
+
4
+ Each template provides:
5
+ - Reference tables with realistic inline data
6
+ - Transactional tables with proper relationships
7
+ - Industry-specific column definitions
8
+ """
9
+
10
+ from typing import Dict, List, Any
11
+
12
+ from misata.schema import SchemaConfig, Table, Column, Relationship
13
+
14
+
15
+ # ============================================================================
16
+ # SAAS TEMPLATE
17
+ # ============================================================================
18
+
19
+ SAAS_TEMPLATE = {
20
+ "name": "SaaS Company Dataset",
21
+ "description": "Complete SaaS company data with users, plans, subscriptions, and payments",
22
+ "seed": 42,
23
+ "tables": [
24
+ {
25
+ "name": "plans",
26
+ "is_reference": True,
27
+ "inline_data": [
28
+ {"id": 1, "name": "Free", "price": 0.0, "billing_period": "monthly", "features": "Basic features, 1 user"},
29
+ {"id": 2, "name": "Starter", "price": 9.99, "billing_period": "monthly", "features": "All free + 5 users, analytics"},
30
+ {"id": 3, "name": "Professional", "price": 29.99, "billing_period": "monthly", "features": "All starter + 25 users, API access"},
31
+ {"id": 4, "name": "Enterprise", "price": 99.99, "billing_period": "monthly", "features": "Unlimited users, custom integrations, SLA"},
32
+ ]
33
+ },
34
+ {"name": "users", "row_count": 10000, "is_reference": False},
35
+ {"name": "subscriptions", "row_count": 8000, "is_reference": False},
36
+ {"name": "payments", "row_count": 50000, "is_reference": False},
37
+ {"name": "usage_events", "row_count": 100000, "is_reference": False},
38
+ ],
39
+ "columns": {
40
+ "users": [
41
+ {"name": "id", "type": "int", "distribution_params": {"distribution": "uniform", "min": 1, "max": 10000}, "unique": True},
42
+ {"name": "name", "type": "text", "distribution_params": {"text_type": "name"}},
43
+ {"name": "email", "type": "text", "distribution_params": {"text_type": "email"}},
44
+ {"name": "company", "type": "text", "distribution_params": {"text_type": "company"}},
45
+ {"name": "created_at", "type": "date", "distribution_params": {"start": "2022-01-01", "end": "2024-12-31"}},
46
+ ],
47
+ "subscriptions": [
48
+ {"name": "id", "type": "int", "distribution_params": {"distribution": "uniform", "min": 1, "max": 8000}},
49
+ {"name": "user_id", "type": "foreign_key", "distribution_params": {}},
50
+ {"name": "plan_id", "type": "foreign_key", "distribution_params": {}},
51
+ {"name": "status", "type": "categorical", "distribution_params": {"choices": ["active", "cancelled", "paused", "trial"], "probabilities": [0.7, 0.15, 0.1, 0.05]}},
52
+ {"name": "started_at", "type": "date", "distribution_params": {"start": "2022-01-01", "end": "2024-12-31"}},
53
+ ],
54
+ "payments": [
55
+ {"name": "id", "type": "int", "distribution_params": {"distribution": "uniform", "min": 1, "max": 50000}},
56
+ {"name": "subscription_id", "type": "foreign_key", "distribution_params": {}},
57
+ {"name": "amount", "type": "categorical", "distribution_params": {"choices": [9.99, 29.99, 99.99], "probabilities": [0.5, 0.35, 0.15]}},
58
+ {"name": "status", "type": "categorical", "distribution_params": {"choices": ["completed", "pending", "failed", "refunded"], "probabilities": [0.9, 0.05, 0.03, 0.02]}},
59
+ {"name": "paid_at", "type": "date", "distribution_params": {"start": "2022-01-01", "end": "2024-12-31"}},
60
+ ],
61
+ "usage_events": [
62
+ {"name": "id", "type": "int", "distribution_params": {"distribution": "uniform", "min": 1, "max": 100000}},
63
+ {"name": "user_id", "type": "foreign_key", "distribution_params": {}},
64
+ {"name": "event_type", "type": "categorical", "distribution_params": {"choices": ["login", "api_call", "export", "invite_user", "report_view"]}},
65
+ {"name": "created_at", "type": "date", "distribution_params": {"start": "2023-01-01", "end": "2024-12-31"}},
66
+ ],
67
+ },
68
+ "relationships": [
69
+ {"parent_table": "users", "child_table": "subscriptions", "parent_key": "id", "child_key": "user_id"},
70
+ {"parent_table": "plans", "child_table": "subscriptions", "parent_key": "id", "child_key": "plan_id"},
71
+ {"parent_table": "subscriptions", "child_table": "payments", "parent_key": "id", "child_key": "subscription_id"},
72
+ {"parent_table": "users", "child_table": "usage_events", "parent_key": "id", "child_key": "user_id"},
73
+ ],
74
+ "events": []
75
+ }
76
+
77
+
78
+ # ============================================================================
79
+ # E-COMMERCE TEMPLATE
80
+ # ============================================================================
81
+
82
+ ECOMMERCE_TEMPLATE = {
83
+ "name": "E-Commerce Store Dataset",
84
+ "description": "Complete e-commerce data with products, orders, and reviews",
85
+ "seed": 42,
86
+ "tables": [
87
+ {
88
+ "name": "categories",
89
+ "is_reference": True,
90
+ "inline_data": [
91
+ {"id": 1, "name": "Electronics", "description": "Phones, computers, accessories"},
92
+ {"id": 2, "name": "Clothing", "description": "Apparel and fashion"},
93
+ {"id": 3, "name": "Home & Garden", "description": "Furniture and decor"},
94
+ {"id": 4, "name": "Sports", "description": "Sports equipment and apparel"},
95
+ {"id": 5, "name": "Books", "description": "Books and media"},
96
+ ]
97
+ },
98
+ {
99
+ "name": "products",
100
+ "is_reference": True,
101
+ "inline_data": [
102
+ {"id": 1, "name": "iPhone 15 Pro", "category_id": 1, "price": 999.99, "stock": 150},
103
+ {"id": 2, "name": "MacBook Air M3", "category_id": 1, "price": 1299.99, "stock": 80},
104
+ {"id": 3, "name": "AirPods Pro", "category_id": 1, "price": 249.99, "stock": 500},
105
+ {"id": 4, "name": "Classic T-Shirt", "category_id": 2, "price": 29.99, "stock": 1000},
106
+ {"id": 5, "name": "Running Shoes", "category_id": 4, "price": 89.99, "stock": 300},
107
+ {"id": 6, "name": "Yoga Mat", "category_id": 4, "price": 39.99, "stock": 450},
108
+ {"id": 7, "name": "Coffee Table", "category_id": 3, "price": 199.99, "stock": 75},
109
+ {"id": 8, "name": "Desk Lamp", "category_id": 3, "price": 49.99, "stock": 200},
110
+ {"id": 9, "name": "Python Cookbook", "category_id": 5, "price": 49.99, "stock": 120},
111
+ {"id": 10, "name": "Data Science Handbook", "category_id": 5, "price": 59.99, "stock": 100},
112
+ ]
113
+ },
114
+ {"name": "customers", "row_count": 10000, "is_reference": False},
115
+ {"name": "orders", "row_count": 25000, "is_reference": False},
116
+ {"name": "order_items", "row_count": 50000, "is_reference": False},
117
+ {"name": "reviews", "row_count": 15000, "is_reference": False},
118
+ ],
119
+ "columns": {
120
+ "customers": [
121
+ {"name": "id", "type": "int", "distribution_params": {"distribution": "uniform", "min": 1, "max": 10000}, "unique": True},
122
+ {"name": "name", "type": "text", "distribution_params": {"text_type": "name"}},
123
+ {"name": "email", "type": "text", "distribution_params": {"text_type": "email"}},
124
+ {"name": "address", "type": "text", "distribution_params": {"text_type": "address"}},
125
+ {"name": "created_at", "type": "date", "distribution_params": {"start": "2020-01-01", "end": "2024-12-31"}},
126
+ ],
127
+ "orders": [
128
+ {"name": "id", "type": "int", "distribution_params": {"distribution": "uniform", "min": 1, "max": 25000}},
129
+ {"name": "customer_id", "type": "foreign_key", "distribution_params": {}},
130
+ {"name": "status", "type": "categorical", "distribution_params": {"choices": ["pending", "shipped", "delivered", "cancelled", "returned"], "probabilities": [0.1, 0.15, 0.65, 0.05, 0.05]}},
131
+ {"name": "total", "type": "float", "distribution_params": {"distribution": "exponential", "scale": 150, "min": 10, "max": 5000}},
132
+ {"name": "ordered_at", "type": "date", "distribution_params": {"start": "2022-01-01", "end": "2024-12-31"}},
133
+ ],
134
+ "order_items": [
135
+ {"name": "id", "type": "int", "distribution_params": {"distribution": "uniform", "min": 1, "max": 50000}},
136
+ {"name": "order_id", "type": "foreign_key", "distribution_params": {}},
137
+ {"name": "product_id", "type": "foreign_key", "distribution_params": {}},
138
+ {"name": "quantity", "type": "int", "distribution_params": {"distribution": "poisson", "lambda": 2, "min": 1, "max": 10}},
139
+ ],
140
+ "reviews": [
141
+ {"name": "id", "type": "int", "distribution_params": {"distribution": "uniform", "min": 1, "max": 15000}},
142
+ {"name": "product_id", "type": "foreign_key", "distribution_params": {}},
143
+ {"name": "customer_id", "type": "foreign_key", "distribution_params": {}},
144
+ {"name": "rating", "type": "int", "distribution_params": {"choices": [1, 2, 3, 4, 5], "probabilities": [0.05, 0.05, 0.15, 0.35, 0.40]}},
145
+ {"name": "created_at", "type": "date", "distribution_params": {"start": "2022-01-01", "end": "2024-12-31"}},
146
+ ],
147
+ },
148
+ "relationships": [
149
+ {"parent_table": "customers", "child_table": "orders", "parent_key": "id", "child_key": "customer_id"},
150
+ {"parent_table": "orders", "child_table": "order_items", "parent_key": "id", "child_key": "order_id"},
151
+ {"parent_table": "products", "child_table": "order_items", "parent_key": "id", "child_key": "product_id"},
152
+ {"parent_table": "products", "child_table": "reviews", "parent_key": "id", "child_key": "product_id"},
153
+ {"parent_table": "customers", "child_table": "reviews", "parent_key": "id", "child_key": "customer_id"},
154
+ ],
155
+ "events": []
156
+ }
157
+
158
+
159
+ # ============================================================================
160
+ # FITNESS TEMPLATE
161
+ # ============================================================================
162
+
163
+ FITNESS_TEMPLATE = {
164
+ "name": "Fitness App Dataset",
165
+ "description": "Fitness app data with exercises, workouts, and nutrition",
166
+ "seed": 42,
167
+ "tables": [
168
+ {
169
+ "name": "plans",
170
+ "is_reference": True,
171
+ "inline_data": [
172
+ {"id": 1, "name": "Free", "price": 0.0, "features": "Basic workout tracking"},
173
+ {"id": 2, "name": "Premium", "price": 9.99, "features": "All workouts + nutrition tracking"},
174
+ {"id": 3, "name": "Pro", "price": 19.99, "features": "Everything + personal coaching"},
175
+ ]
176
+ },
177
+ {
178
+ "name": "exercises",
179
+ "is_reference": True,
180
+ "inline_data": [
181
+ {"id": 1, "name": "Running", "category": "Cardio", "calories_per_minute": 10, "difficulty": "medium"},
182
+ {"id": 2, "name": "Cycling", "category": "Cardio", "calories_per_minute": 8, "difficulty": "easy"},
183
+ {"id": 3, "name": "Swimming", "category": "Cardio", "calories_per_minute": 9, "difficulty": "medium"},
184
+ {"id": 4, "name": "Yoga", "category": "Flexibility", "calories_per_minute": 3, "difficulty": "easy"},
185
+ {"id": 5, "name": "Pilates", "category": "Flexibility", "calories_per_minute": 4, "difficulty": "medium"},
186
+ {"id": 6, "name": "Weightlifting", "category": "Strength", "calories_per_minute": 6, "difficulty": "hard"},
187
+ {"id": 7, "name": "HIIT", "category": "Cardio", "calories_per_minute": 12, "difficulty": "hard"},
188
+ {"id": 8, "name": "Boxing", "category": "Cardio", "calories_per_minute": 11, "difficulty": "hard"},
189
+ {"id": 9, "name": "Stretching", "category": "Flexibility", "calories_per_minute": 2, "difficulty": "easy"},
190
+ {"id": 10, "name": "Walking", "category": "Cardio", "calories_per_minute": 4, "difficulty": "easy"},
191
+ ]
192
+ },
193
+ {
194
+ "name": "meal_types",
195
+ "is_reference": True,
196
+ "inline_data": [
197
+ {"id": 1, "name": "Breakfast", "typical_calories": 400},
198
+ {"id": 2, "name": "Lunch", "typical_calories": 600},
199
+ {"id": 3, "name": "Dinner", "typical_calories": 700},
200
+ {"id": 4, "name": "Snack", "typical_calories": 200},
201
+ ]
202
+ },
203
+ {"name": "users", "row_count": 10000, "is_reference": False},
204
+ {"name": "subscriptions", "row_count": 8000, "is_reference": False},
205
+ {"name": "workouts", "row_count": 100000, "is_reference": False},
206
+ {"name": "meals", "row_count": 50000, "is_reference": False},
207
+ ],
208
+ "columns": {
209
+ "users": [
210
+ {"name": "id", "type": "int", "distribution_params": {"distribution": "uniform", "min": 1, "max": 10000}, "unique": True},
211
+ {"name": "name", "type": "text", "distribution_params": {"text_type": "name"}},
212
+ {"name": "email", "type": "text", "distribution_params": {"text_type": "email"}},
213
+ {"name": "age", "type": "int", "distribution_params": {"distribution": "uniform", "min": 18, "max": 65}},
214
+ {"name": "weight_kg", "type": "float", "distribution_params": {"distribution": "normal", "mean": 75, "std": 15, "min": 40, "max": 150}},
215
+ {"name": "height_cm", "type": "float", "distribution_params": {"distribution": "normal", "mean": 170, "std": 10, "min": 140, "max": 210}},
216
+ {"name": "goal", "type": "categorical", "distribution_params": {"choices": ["lose_weight", "build_muscle", "maintain", "improve_endurance"]}},
217
+ ],
218
+ "subscriptions": [
219
+ {"name": "id", "type": "int", "distribution_params": {"distribution": "uniform", "min": 1, "max": 8000}},
220
+ {"name": "user_id", "type": "foreign_key", "distribution_params": {}},
221
+ {"name": "plan_id", "type": "foreign_key", "distribution_params": {}},
222
+ {"name": "status", "type": "categorical", "distribution_params": {"choices": ["active", "cancelled", "paused"], "probabilities": [0.75, 0.15, 0.10]}},
223
+ {"name": "started_at", "type": "date", "distribution_params": {"start": "2022-01-01", "end": "2024-12-31"}},
224
+ ],
225
+ "workouts": [
226
+ {"name": "id", "type": "int", "distribution_params": {"distribution": "uniform", "min": 1, "max": 100000}},
227
+ {"name": "user_id", "type": "foreign_key", "distribution_params": {}},
228
+ {"name": "exercise_id", "type": "foreign_key", "distribution_params": {}},
229
+ {"name": "duration_minutes", "type": "int", "distribution_params": {"distribution": "uniform", "min": 15, "max": 90}},
230
+ {"name": "calories_burned", "type": "int", "distribution_params": {"distribution": "normal", "mean": 300, "std": 150, "min": 50, "max": 1500}},
231
+ {"name": "date", "type": "date", "distribution_params": {"start": "2023-01-01", "end": "2024-12-31"}},
232
+ ],
233
+ "meals": [
234
+ {"name": "id", "type": "int", "distribution_params": {"distribution": "uniform", "min": 1, "max": 50000}},
235
+ {"name": "user_id", "type": "foreign_key", "distribution_params": {}},
236
+ {"name": "meal_type_id", "type": "foreign_key", "distribution_params": {}},
237
+ {"name": "calories", "type": "int", "distribution_params": {"distribution": "normal", "mean": 500, "std": 200, "min": 100, "max": 1500}},
238
+ {"name": "date", "type": "date", "distribution_params": {"start": "2023-01-01", "end": "2024-12-31"}},
239
+ ],
240
+ },
241
+ "relationships": [
242
+ {"parent_table": "users", "child_table": "subscriptions", "parent_key": "id", "child_key": "user_id"},
243
+ {"parent_table": "plans", "child_table": "subscriptions", "parent_key": "id", "child_key": "plan_id"},
244
+ {"parent_table": "users", "child_table": "workouts", "parent_key": "id", "child_key": "user_id"},
245
+ {"parent_table": "exercises", "child_table": "workouts", "parent_key": "id", "child_key": "exercise_id"},
246
+ {"parent_table": "users", "child_table": "meals", "parent_key": "id", "child_key": "user_id"},
247
+ {"parent_table": "meal_types", "child_table": "meals", "parent_key": "id", "child_key": "meal_type_id"},
248
+ ],
249
+ "events": []
250
+ }
251
+
252
+
253
+ # ============================================================================
254
+ # HEALTHCARE TEMPLATE
255
+ # ============================================================================
256
+
257
+ HEALTHCARE_TEMPLATE = {
258
+ "name": "Healthcare System Dataset",
259
+ "description": "Healthcare data with patients, doctors, appointments, and diagnoses",
260
+ "seed": 42,
261
+ "tables": [
262
+ {
263
+ "name": "departments",
264
+ "is_reference": True,
265
+ "inline_data": [
266
+ {"id": 1, "name": "Cardiology", "floor": 3},
267
+ {"id": 2, "name": "Orthopedics", "floor": 4},
268
+ {"id": 3, "name": "Pediatrics", "floor": 2},
269
+ {"id": 4, "name": "Neurology", "floor": 5},
270
+ {"id": 5, "name": "General Medicine", "floor": 1},
271
+ {"id": 6, "name": "Emergency", "floor": 1},
272
+ ]
273
+ },
274
+ {
275
+ "name": "diagnoses_catalog",
276
+ "is_reference": True,
277
+ "inline_data": [
278
+ {"id": 1, "code": "J06.9", "name": "Acute upper respiratory infection", "category": "Respiratory"},
279
+ {"id": 2, "code": "I10", "name": "Essential hypertension", "category": "Cardiovascular"},
280
+ {"id": 3, "code": "E11.9", "name": "Type 2 diabetes", "category": "Endocrine"},
281
+ {"id": 4, "code": "M54.5", "name": "Low back pain", "category": "Musculoskeletal"},
282
+ {"id": 5, "code": "J18.9", "name": "Pneumonia", "category": "Respiratory"},
283
+ {"id": 6, "code": "K21.0", "name": "GERD", "category": "Digestive"},
284
+ {"id": 7, "code": "F32.9", "name": "Major depressive disorder", "category": "Mental Health"},
285
+ {"id": 8, "code": "G43.909", "name": "Migraine", "category": "Neurological"},
286
+ ]
287
+ },
288
+ {"name": "doctors", "row_count": 100, "is_reference": False},
289
+ {"name": "patients", "row_count": 10000, "is_reference": False},
290
+ {"name": "appointments", "row_count": 50000, "is_reference": False},
291
+ {"name": "patient_diagnoses", "row_count": 30000, "is_reference": False},
292
+ ],
293
+ "columns": {
294
+ "doctors": [
295
+ {"name": "id", "type": "int", "distribution_params": {"distribution": "uniform", "min": 1, "max": 100}, "unique": True},
296
+ {"name": "name", "type": "text", "distribution_params": {"text_type": "name"}},
297
+ {"name": "department_id", "type": "foreign_key", "distribution_params": {}},
298
+ {"name": "specialization", "type": "categorical", "distribution_params": {"choices": ["MD", "DO", "Specialist", "Surgeon"]}},
299
+ {"name": "years_experience", "type": "int", "distribution_params": {"distribution": "uniform", "min": 1, "max": 35}},
300
+ ],
301
+ "patients": [
302
+ {"name": "id", "type": "int", "distribution_params": {"distribution": "uniform", "min": 1, "max": 10000}, "unique": True},
303
+ {"name": "name", "type": "text", "distribution_params": {"text_type": "name"}},
304
+ {"name": "date_of_birth", "type": "date", "distribution_params": {"start": "1940-01-01", "end": "2010-12-31"}},
305
+ {"name": "gender", "type": "categorical", "distribution_params": {"choices": ["Male", "Female", "Other"], "probabilities": [0.48, 0.48, 0.04]}},
306
+ {"name": "phone", "type": "text", "distribution_params": {"text_type": "phone"}},
307
+ {"name": "blood_type", "type": "categorical", "distribution_params": {"choices": ["A+", "A-", "B+", "B-", "AB+", "AB-", "O+", "O-"]}},
308
+ ],
309
+ "appointments": [
310
+ {"name": "id", "type": "int", "distribution_params": {"distribution": "uniform", "min": 1, "max": 50000}},
311
+ {"name": "patient_id", "type": "foreign_key", "distribution_params": {}},
312
+ {"name": "doctor_id", "type": "foreign_key", "distribution_params": {}},
313
+ {"name": "scheduled_at", "type": "date", "distribution_params": {"start": "2023-01-01", "end": "2025-12-31"}},
314
+ {"name": "status", "type": "categorical", "distribution_params": {"choices": ["scheduled", "completed", "cancelled", "no_show"], "probabilities": [0.2, 0.65, 0.10, 0.05]}},
315
+ {"name": "duration_minutes", "type": "int", "distribution_params": {"choices": [15, 30, 45, 60], "probabilities": [0.3, 0.4, 0.2, 0.1]}},
316
+ ],
317
+ "patient_diagnoses": [
318
+ {"name": "id", "type": "int", "distribution_params": {"distribution": "uniform", "min": 1, "max": 30000}},
319
+ {"name": "patient_id", "type": "foreign_key", "distribution_params": {}},
320
+ {"name": "diagnosis_id", "type": "foreign_key", "distribution_params": {}},
321
+ {"name": "diagnosed_at", "type": "date", "distribution_params": {"start": "2020-01-01", "end": "2024-12-31"}},
322
+ {"name": "severity", "type": "categorical", "distribution_params": {"choices": ["mild", "moderate", "severe"], "probabilities": [0.5, 0.35, 0.15]}},
323
+ ],
324
+ },
325
+ "relationships": [
326
+ {"parent_table": "departments", "child_table": "doctors", "parent_key": "id", "child_key": "department_id"},
327
+ {"parent_table": "patients", "child_table": "appointments", "parent_key": "id", "child_key": "patient_id"},
328
+ {"parent_table": "doctors", "child_table": "appointments", "parent_key": "id", "child_key": "doctor_id"},
329
+ {"parent_table": "patients", "child_table": "patient_diagnoses", "parent_key": "id", "child_key": "patient_id"},
330
+ {"parent_table": "diagnoses_catalog", "child_table": "patient_diagnoses", "parent_key": "id", "child_key": "diagnosis_id"},
331
+ ],
332
+ "events": []
333
+ }
334
+
335
+
336
+ # ============================================================================
337
+ # TEMPLATE REGISTRY
338
+ # ============================================================================
339
+
340
+ TEMPLATES = {
341
+ "saas": SAAS_TEMPLATE,
342
+ "ecommerce": ECOMMERCE_TEMPLATE,
343
+ "fitness": FITNESS_TEMPLATE,
344
+ "healthcare": HEALTHCARE_TEMPLATE,
345
+ }
346
+
347
+
348
+ def get_template(name: str) -> Dict[str, Any]:
349
+ """
350
+ Get a template by name.
351
+
352
+ Args:
353
+ name: Template name (saas, ecommerce, fitness, healthcare)
354
+
355
+ Returns:
356
+ Template dictionary
357
+
358
+ Raises:
359
+ ValueError: If template not found
360
+ """
361
+ if name not in TEMPLATES:
362
+ available = ", ".join(TEMPLATES.keys())
363
+ raise ValueError(f"Template '{name}' not found. Available: {available}")
364
+ return TEMPLATES[name]
365
+
366
+
367
+ def list_templates() -> List[str]:
368
+ """Get list of available template names."""
369
+ return list(TEMPLATES.keys())
370
+
371
+
372
+ def template_to_schema(template_name: str, row_multiplier: float = 1.0) -> SchemaConfig:
373
+ """
374
+ Convert a template to a SchemaConfig.
375
+
376
+ Args:
377
+ template_name: Name of template
378
+ row_multiplier: Multiply row counts by this factor
379
+
380
+ Returns:
381
+ SchemaConfig ready for generation
382
+ """
383
+ template = get_template(template_name)
384
+
385
+ # Adjust row counts
386
+ if row_multiplier != 1.0:
387
+ for table in template["tables"]:
388
+ if "row_count" in table and not table.get("is_reference"):
389
+ table["row_count"] = int(table["row_count"] * row_multiplier)
390
+
391
+ # Parse tables
392
+ tables = []
393
+ for t in template["tables"]:
394
+ tables.append(Table(
395
+ name=t["name"],
396
+ row_count=t.get("row_count", len(t.get("inline_data", [])) or 100),
397
+ is_reference=t.get("is_reference", False),
398
+ inline_data=t.get("inline_data"),
399
+ ))
400
+
401
+ # Parse columns
402
+ columns = {}
403
+ for table_name, cols in template["columns"].items():
404
+ columns[table_name] = []
405
+ for c in cols:
406
+ columns[table_name].append(Column(
407
+ name=c["name"],
408
+ type=c["type"],
409
+ distribution_params=c.get("distribution_params", {}),
410
+ nullable=c.get("nullable", False),
411
+ unique=c.get("unique", False),
412
+ ))
413
+
414
+ # Add inferred columns for reference tables
415
+ for table in tables:
416
+ if table.is_reference and table.inline_data and table.name not in columns:
417
+ columns[table.name] = []
418
+ first_row = table.inline_data[0]
419
+ for col_name in first_row.keys():
420
+ columns[table.name].append(Column(
421
+ name=col_name,
422
+ type="text", # Will be inferred
423
+ distribution_params={},
424
+ ))
425
+
426
+ # Parse relationships
427
+ relationships = []
428
+ for r in template["relationships"]:
429
+ relationships.append(Relationship(
430
+ parent_table=r["parent_table"],
431
+ child_table=r["child_table"],
432
+ parent_key=r["parent_key"],
433
+ child_key=r["child_key"],
434
+ ))
435
+
436
+ return SchemaConfig(
437
+ name=template["name"],
438
+ description=template.get("description"),
439
+ tables=tables,
440
+ columns=columns,
441
+ relationships=relationships,
442
+ events=[],
443
+ seed=template.get("seed", 42),
444
+ )