cudag 0.3.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cudag/__init__.py +334 -0
- cudag/annotation/__init__.py +77 -0
- cudag/annotation/codegen.py +648 -0
- cudag/annotation/config.py +545 -0
- cudag/annotation/loader.py +342 -0
- cudag/annotation/scaffold.py +121 -0
- cudag/annotation/transcription.py +296 -0
- cudag/cli/__init__.py +5 -0
- cudag/cli/main.py +315 -0
- cudag/cli/new.py +873 -0
- cudag/core/__init__.py +364 -0
- cudag/core/button.py +137 -0
- cudag/core/canvas.py +222 -0
- cudag/core/config.py +70 -0
- cudag/core/coords.py +233 -0
- cudag/core/data_grid.py +804 -0
- cudag/core/dataset.py +678 -0
- cudag/core/distribution.py +136 -0
- cudag/core/drawing.py +75 -0
- cudag/core/fonts.py +156 -0
- cudag/core/generator.py +163 -0
- cudag/core/grid.py +367 -0
- cudag/core/grounding_task.py +247 -0
- cudag/core/icon.py +207 -0
- cudag/core/iconlist_task.py +301 -0
- cudag/core/models.py +1251 -0
- cudag/core/random.py +130 -0
- cudag/core/renderer.py +190 -0
- cudag/core/screen.py +402 -0
- cudag/core/scroll_task.py +254 -0
- cudag/core/scrollable_grid.py +447 -0
- cudag/core/state.py +110 -0
- cudag/core/task.py +293 -0
- cudag/core/taskbar.py +350 -0
- cudag/core/text.py +212 -0
- cudag/core/utils.py +82 -0
- cudag/data/surnames.txt +5000 -0
- cudag/modal_apps/__init__.py +4 -0
- cudag/modal_apps/archive.py +103 -0
- cudag/modal_apps/extract.py +138 -0
- cudag/modal_apps/preprocess.py +529 -0
- cudag/modal_apps/upload.py +317 -0
- cudag/prompts/SYSTEM_PROMPT.txt +104 -0
- cudag/prompts/__init__.py +33 -0
- cudag/prompts/system.py +43 -0
- cudag/prompts/tools.py +382 -0
- cudag/py.typed +0 -0
- cudag/schemas/filesystem.json +90 -0
- cudag/schemas/test_record.schema.json +113 -0
- cudag/schemas/train_record.schema.json +90 -0
- cudag/server/__init__.py +21 -0
- cudag/server/app.py +232 -0
- cudag/server/services/__init__.py +9 -0
- cudag/server/services/generator.py +128 -0
- cudag/templates/scripts/archive.sh +35 -0
- cudag/templates/scripts/build.sh +13 -0
- cudag/templates/scripts/extract.sh +54 -0
- cudag/templates/scripts/generate.sh +116 -0
- cudag/templates/scripts/pre-commit.sh +44 -0
- cudag/templates/scripts/preprocess.sh +46 -0
- cudag/templates/scripts/upload.sh +63 -0
- cudag/templates/scripts/verify.py +428 -0
- cudag/validation/__init__.py +35 -0
- cudag/validation/validate.py +508 -0
- cudag-0.3.10.dist-info/METADATA +570 -0
- cudag-0.3.10.dist-info/RECORD +69 -0
- cudag-0.3.10.dist-info/WHEEL +4 -0
- cudag-0.3.10.dist-info/entry_points.txt +2 -0
- cudag-0.3.10.dist-info/licenses/LICENSE +66 -0
cudag/core/models.py
ADDED
|
@@ -0,0 +1,1251 @@
|
|
|
1
|
+
# Copyright (c) 2025 Tylt LLC. All rights reserved.
|
|
2
|
+
# CONFIDENTIAL AND PROPRIETARY. Unauthorized use, copying, or distribution
|
|
3
|
+
# is strictly prohibited. For licensing inquiries: hello@claimhawk.app
|
|
4
|
+
|
|
5
|
+
"""Rails-like DSL for model definitions.
|
|
6
|
+
|
|
7
|
+
Simple, readable model definitions with convention over configuration.
|
|
8
|
+
|
|
9
|
+
Example:
|
|
10
|
+
class Patient(Model):
|
|
11
|
+
first_name = string(faker="first_name")
|
|
12
|
+
last_name = string(faker="last_name")
|
|
13
|
+
dob = date(min_year=1940, max_year=2010)
|
|
14
|
+
|
|
15
|
+
full_name = computed("first_name", "last_name")
|
|
16
|
+
age = years_since("dob")
|
|
17
|
+
|
|
18
|
+
appointments = has_many("Appointment")
|
|
19
|
+
primary_provider = belongs_to("Provider")
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
from __future__ import annotations
|
|
23
|
+
|
|
24
|
+
import random
|
|
25
|
+
import string as string_module
|
|
26
|
+
from dataclasses import dataclass
|
|
27
|
+
from dataclasses import field as dataclass_field
|
|
28
|
+
from datetime import date, datetime, timedelta
|
|
29
|
+
from typing import Any, ClassVar, Generic, TypeVar
|
|
30
|
+
|
|
31
|
+
# =============================================================================
|
|
32
|
+
# Built-in Data (used by fakers)
|
|
33
|
+
# =============================================================================
|
|
34
|
+
|
|
35
|
+
FIRST_NAMES = [
|
|
36
|
+
"James",
|
|
37
|
+
"Mary",
|
|
38
|
+
"John",
|
|
39
|
+
"Patricia",
|
|
40
|
+
"Robert",
|
|
41
|
+
"Jennifer",
|
|
42
|
+
"Michael",
|
|
43
|
+
"Linda",
|
|
44
|
+
"William",
|
|
45
|
+
"Elizabeth",
|
|
46
|
+
"David",
|
|
47
|
+
"Barbara",
|
|
48
|
+
"Richard",
|
|
49
|
+
"Susan",
|
|
50
|
+
"Joseph",
|
|
51
|
+
"Jessica",
|
|
52
|
+
"Thomas",
|
|
53
|
+
"Sarah",
|
|
54
|
+
"Charles",
|
|
55
|
+
"Karen",
|
|
56
|
+
"Christopher",
|
|
57
|
+
"Nancy",
|
|
58
|
+
"Daniel",
|
|
59
|
+
"Lisa",
|
|
60
|
+
"Matthew",
|
|
61
|
+
"Betty",
|
|
62
|
+
"Anthony",
|
|
63
|
+
"Margaret",
|
|
64
|
+
"Mark",
|
|
65
|
+
"Sandra",
|
|
66
|
+
"Donald",
|
|
67
|
+
"Ashley",
|
|
68
|
+
]
|
|
69
|
+
|
|
70
|
+
LAST_NAMES = [
|
|
71
|
+
# Train surnames (first ~90%)
|
|
72
|
+
"Smith",
|
|
73
|
+
"Johnson",
|
|
74
|
+
"Williams",
|
|
75
|
+
"Brown",
|
|
76
|
+
"Jones",
|
|
77
|
+
"Garcia",
|
|
78
|
+
"Miller",
|
|
79
|
+
"Davis",
|
|
80
|
+
"Rodriguez",
|
|
81
|
+
"Martinez",
|
|
82
|
+
"Hernandez",
|
|
83
|
+
"Lopez",
|
|
84
|
+
"Gonzalez",
|
|
85
|
+
"Wilson",
|
|
86
|
+
"Anderson",
|
|
87
|
+
"Thomas",
|
|
88
|
+
"Taylor",
|
|
89
|
+
"Moore",
|
|
90
|
+
"Jackson",
|
|
91
|
+
"Martin",
|
|
92
|
+
"Lee",
|
|
93
|
+
"Perez",
|
|
94
|
+
"Thompson",
|
|
95
|
+
"White",
|
|
96
|
+
"Harris",
|
|
97
|
+
"Sanchez",
|
|
98
|
+
"Clark",
|
|
99
|
+
"Ramirez",
|
|
100
|
+
"Lewis",
|
|
101
|
+
"Robinson",
|
|
102
|
+
"Walker",
|
|
103
|
+
"Young",
|
|
104
|
+
"Allen",
|
|
105
|
+
"King",
|
|
106
|
+
"Wright",
|
|
107
|
+
"Scott",
|
|
108
|
+
"Torres",
|
|
109
|
+
"Nguyen",
|
|
110
|
+
"Hill",
|
|
111
|
+
"Flores",
|
|
112
|
+
"Green",
|
|
113
|
+
"Adams",
|
|
114
|
+
"Nelson",
|
|
115
|
+
"Baker",
|
|
116
|
+
"Hall",
|
|
117
|
+
"Rivera",
|
|
118
|
+
# Test surnames (last ~10% - held out for evaluation)
|
|
119
|
+
"Campbell",
|
|
120
|
+
"Mitchell",
|
|
121
|
+
"Roberts",
|
|
122
|
+
"Carter",
|
|
123
|
+
"Phillips",
|
|
124
|
+
]
|
|
125
|
+
|
|
126
|
+
# Index where test surnames start (for train/test split)
|
|
127
|
+
_LAST_NAME_TEST_START = 46
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def get_last_name(
|
|
131
|
+
rng: random.Random,
|
|
132
|
+
augment: bool = False,
|
|
133
|
+
split: str = "train",
|
|
134
|
+
) -> str:
|
|
135
|
+
"""Get a last name with optional augmentation and train/test split.
|
|
136
|
+
|
|
137
|
+
Args:
|
|
138
|
+
rng: Random number generator
|
|
139
|
+
augment: If True, may add augmentation (Jr., III, etc.)
|
|
140
|
+
split: "train" uses first 90% of surnames, "test" uses held-out 10%
|
|
141
|
+
|
|
142
|
+
Returns:
|
|
143
|
+
Last name string, optionally augmented
|
|
144
|
+
"""
|
|
145
|
+
if split == "test":
|
|
146
|
+
names = LAST_NAMES[_LAST_NAME_TEST_START:]
|
|
147
|
+
else:
|
|
148
|
+
names = LAST_NAMES[:_LAST_NAME_TEST_START]
|
|
149
|
+
|
|
150
|
+
name = rng.choice(names)
|
|
151
|
+
|
|
152
|
+
if augment and rng.random() < 0.1:
|
|
153
|
+
suffix = rng.choice(["Jr.", "Sr.", "II", "III", "IV"])
|
|
154
|
+
name = f"{name} {suffix}"
|
|
155
|
+
|
|
156
|
+
return name
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def get_first_name(rng: random.Random) -> str:
|
|
160
|
+
"""Get a random first name.
|
|
161
|
+
|
|
162
|
+
Args:
|
|
163
|
+
rng: Random number generator
|
|
164
|
+
|
|
165
|
+
Returns:
|
|
166
|
+
First name string
|
|
167
|
+
"""
|
|
168
|
+
return rng.choice(FIRST_NAMES)
|
|
169
|
+
|
|
170
|
+
US_STATES = [
|
|
171
|
+
"AL",
|
|
172
|
+
"AK",
|
|
173
|
+
"AZ",
|
|
174
|
+
"AR",
|
|
175
|
+
"CA",
|
|
176
|
+
"CO",
|
|
177
|
+
"CT",
|
|
178
|
+
"DE",
|
|
179
|
+
"FL",
|
|
180
|
+
"GA",
|
|
181
|
+
"HI",
|
|
182
|
+
"ID",
|
|
183
|
+
"IL",
|
|
184
|
+
"IN",
|
|
185
|
+
"IA",
|
|
186
|
+
"KS",
|
|
187
|
+
"KY",
|
|
188
|
+
"LA",
|
|
189
|
+
"ME",
|
|
190
|
+
"MD",
|
|
191
|
+
"MA",
|
|
192
|
+
"MI",
|
|
193
|
+
"MN",
|
|
194
|
+
"MS",
|
|
195
|
+
"MO",
|
|
196
|
+
"MT",
|
|
197
|
+
"NE",
|
|
198
|
+
"NV",
|
|
199
|
+
"NH",
|
|
200
|
+
"NJ",
|
|
201
|
+
"NM",
|
|
202
|
+
"NY",
|
|
203
|
+
"NC",
|
|
204
|
+
"ND",
|
|
205
|
+
"OH",
|
|
206
|
+
"OK",
|
|
207
|
+
"OR",
|
|
208
|
+
"PA",
|
|
209
|
+
"RI",
|
|
210
|
+
"SC",
|
|
211
|
+
"SD",
|
|
212
|
+
"TN",
|
|
213
|
+
"TX",
|
|
214
|
+
"UT",
|
|
215
|
+
"VT",
|
|
216
|
+
"VA",
|
|
217
|
+
"WA",
|
|
218
|
+
"WV",
|
|
219
|
+
"WI",
|
|
220
|
+
"WY",
|
|
221
|
+
]
|
|
222
|
+
|
|
223
|
+
CITIES = [
|
|
224
|
+
"New York",
|
|
225
|
+
"Los Angeles",
|
|
226
|
+
"Chicago",
|
|
227
|
+
"Houston",
|
|
228
|
+
"Phoenix",
|
|
229
|
+
"Philadelphia",
|
|
230
|
+
"San Antonio",
|
|
231
|
+
"San Diego",
|
|
232
|
+
"Dallas",
|
|
233
|
+
"San Jose",
|
|
234
|
+
]
|
|
235
|
+
|
|
236
|
+
STREET_SUFFIXES = ["St", "Ave", "Blvd", "Dr", "Ln", "Rd", "Way", "Ct", "Pl"]
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
# =============================================================================
|
|
240
|
+
# Field DSL - lowercase functions that return Field instances
|
|
241
|
+
# =============================================================================
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
@dataclass
|
|
245
|
+
class Field:
|
|
246
|
+
"""Base field - use the DSL functions below instead of this directly."""
|
|
247
|
+
|
|
248
|
+
_type: str = "base"
|
|
249
|
+
required: bool = True
|
|
250
|
+
default: Any = None
|
|
251
|
+
|
|
252
|
+
def generate(self, rng: random.Random) -> Any:
|
|
253
|
+
"""Generate a value. Override in subclasses."""
|
|
254
|
+
return self.default
|
|
255
|
+
|
|
256
|
+
|
|
257
|
+
# --- String Fields ---
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
@dataclass
|
|
261
|
+
class StringField(Field):
|
|
262
|
+
"""String field implementation."""
|
|
263
|
+
|
|
264
|
+
_type: str = "string"
|
|
265
|
+
faker: str | None = None
|
|
266
|
+
pattern: str | None = None
|
|
267
|
+
choices: list[str] | None = None
|
|
268
|
+
min_length: int = 5
|
|
269
|
+
max_length: int = 20
|
|
270
|
+
|
|
271
|
+
def generate(self, rng: random.Random) -> str:
|
|
272
|
+
if self.choices:
|
|
273
|
+
return rng.choice(self.choices)
|
|
274
|
+
if self.faker:
|
|
275
|
+
return _generate_faker(self.faker, rng)
|
|
276
|
+
if self.pattern:
|
|
277
|
+
return _generate_pattern(self.pattern, rng)
|
|
278
|
+
length = rng.randint(self.min_length, self.max_length)
|
|
279
|
+
return "".join(rng.choices(string_module.ascii_letters, k=length))
|
|
280
|
+
|
|
281
|
+
|
|
282
|
+
def string(
|
|
283
|
+
faker: str | None = None,
|
|
284
|
+
pattern: str | None = None,
|
|
285
|
+
choices: list[str] | None = None,
|
|
286
|
+
default: str | None = None,
|
|
287
|
+
required: bool = True,
|
|
288
|
+
) -> StringField:
|
|
289
|
+
"""Define a string field.
|
|
290
|
+
|
|
291
|
+
Examples:
|
|
292
|
+
first_name = string(faker="first_name")
|
|
293
|
+
member_id = string(pattern=r"[A-Z]{3}[0-9]{6}")
|
|
294
|
+
status = string(choices=["active", "inactive"])
|
|
295
|
+
"""
|
|
296
|
+
return StringField(
|
|
297
|
+
faker=faker, pattern=pattern, choices=choices, default=default, required=required
|
|
298
|
+
)
|
|
299
|
+
|
|
300
|
+
|
|
301
|
+
# --- Numeric Fields ---
|
|
302
|
+
|
|
303
|
+
|
|
304
|
+
@dataclass
|
|
305
|
+
class IntField(Field):
|
|
306
|
+
"""Integer field implementation."""
|
|
307
|
+
|
|
308
|
+
_type: str = "int"
|
|
309
|
+
min_value: int = 0
|
|
310
|
+
max_value: int = 100
|
|
311
|
+
|
|
312
|
+
def generate(self, rng: random.Random) -> int:
|
|
313
|
+
return rng.randint(self.min_value, self.max_value)
|
|
314
|
+
|
|
315
|
+
|
|
316
|
+
def integer(
|
|
317
|
+
min_value: int = 0,
|
|
318
|
+
max_value: int = 100,
|
|
319
|
+
default: int | None = None,
|
|
320
|
+
required: bool = True,
|
|
321
|
+
) -> IntField:
|
|
322
|
+
"""Define an integer field.
|
|
323
|
+
|
|
324
|
+
Example:
|
|
325
|
+
age = integer(min_value=18, max_value=100)
|
|
326
|
+
"""
|
|
327
|
+
return IntField(min_value=min_value, max_value=max_value, default=default, required=required)
|
|
328
|
+
|
|
329
|
+
|
|
330
|
+
@dataclass
|
|
331
|
+
class FloatField(Field):
|
|
332
|
+
"""Float field implementation."""
|
|
333
|
+
|
|
334
|
+
_type: str = "float"
|
|
335
|
+
min_value: float = 0.0
|
|
336
|
+
max_value: float = 100.0
|
|
337
|
+
precision: int = 2
|
|
338
|
+
|
|
339
|
+
def generate(self, rng: random.Random) -> float:
|
|
340
|
+
value = rng.uniform(self.min_value, self.max_value)
|
|
341
|
+
return round(value, self.precision)
|
|
342
|
+
|
|
343
|
+
|
|
344
|
+
def decimal(
|
|
345
|
+
min_value: float = 0.0,
|
|
346
|
+
max_value: float = 100.0,
|
|
347
|
+
precision: int = 2,
|
|
348
|
+
default: float | None = None,
|
|
349
|
+
required: bool = True,
|
|
350
|
+
) -> FloatField:
|
|
351
|
+
"""Define a decimal field.
|
|
352
|
+
|
|
353
|
+
Example:
|
|
354
|
+
price = decimal(min_value=0.01, max_value=999.99, precision=2)
|
|
355
|
+
"""
|
|
356
|
+
return FloatField(
|
|
357
|
+
min_value=min_value,
|
|
358
|
+
max_value=max_value,
|
|
359
|
+
precision=precision,
|
|
360
|
+
default=default,
|
|
361
|
+
required=required,
|
|
362
|
+
)
|
|
363
|
+
|
|
364
|
+
|
|
365
|
+
def money(
|
|
366
|
+
min_value: float = 0.0,
|
|
367
|
+
max_value: float = 1000.0,
|
|
368
|
+
default: float | None = None,
|
|
369
|
+
) -> MoneyField:
|
|
370
|
+
"""Define a money field (formatted as $X.XX).
|
|
371
|
+
|
|
372
|
+
Example:
|
|
373
|
+
fee = money(min_value=50.0, max_value=2500.0)
|
|
374
|
+
"""
|
|
375
|
+
return MoneyField(min_value=min_value, max_value=max_value, default=default)
|
|
376
|
+
|
|
377
|
+
|
|
378
|
+
@dataclass
|
|
379
|
+
class MoneyField(Field):
|
|
380
|
+
"""Money field - formats as $X.XX."""
|
|
381
|
+
|
|
382
|
+
_type: str = "money"
|
|
383
|
+
min_value: float = 0.0
|
|
384
|
+
max_value: float = 1000.0
|
|
385
|
+
|
|
386
|
+
def generate(self, rng: random.Random) -> str:
|
|
387
|
+
value = rng.uniform(self.min_value, self.max_value)
|
|
388
|
+
return f"${value:.2f}"
|
|
389
|
+
|
|
390
|
+
|
|
391
|
+
# --- Date/Time Fields ---
|
|
392
|
+
|
|
393
|
+
|
|
394
|
+
@dataclass
|
|
395
|
+
class DateField(Field):
|
|
396
|
+
"""Date field implementation."""
|
|
397
|
+
|
|
398
|
+
_type: str = "date"
|
|
399
|
+
min_year: int = 2000
|
|
400
|
+
max_year: int = 2025
|
|
401
|
+
format: str = "%Y-%m-%d"
|
|
402
|
+
|
|
403
|
+
def generate(self, rng: random.Random) -> str:
|
|
404
|
+
start = date(self.min_year, 1, 1)
|
|
405
|
+
end = date(self.max_year, 12, 31)
|
|
406
|
+
days_between = (end - start).days
|
|
407
|
+
random_days = rng.randint(0, days_between)
|
|
408
|
+
result_date = start + timedelta(days=random_days)
|
|
409
|
+
return result_date.strftime(self.format)
|
|
410
|
+
|
|
411
|
+
|
|
412
|
+
def date_field(
|
|
413
|
+
min_year: int = 2000,
|
|
414
|
+
max_year: int = 2025,
|
|
415
|
+
format: str = "%Y-%m-%d",
|
|
416
|
+
default: str | None = None,
|
|
417
|
+
required: bool = True,
|
|
418
|
+
) -> DateField:
|
|
419
|
+
"""Define a date field.
|
|
420
|
+
|
|
421
|
+
Example:
|
|
422
|
+
dob = date_field(min_year=1940, max_year=2010, format="%m/%d/%Y")
|
|
423
|
+
"""
|
|
424
|
+
return DateField(
|
|
425
|
+
min_year=min_year, max_year=max_year, format=format, default=default, required=required
|
|
426
|
+
)
|
|
427
|
+
|
|
428
|
+
|
|
429
|
+
@dataclass
|
|
430
|
+
class TimeField(Field):
|
|
431
|
+
"""Time field implementation."""
|
|
432
|
+
|
|
433
|
+
_type: str = "time"
|
|
434
|
+
min_hour: int = 0
|
|
435
|
+
max_hour: int = 23
|
|
436
|
+
format: str = "%I:%M %p"
|
|
437
|
+
|
|
438
|
+
def generate(self, rng: random.Random) -> str:
|
|
439
|
+
hour = rng.randint(self.min_hour, self.max_hour)
|
|
440
|
+
minute = rng.choice([0, 15, 30, 45])
|
|
441
|
+
dt = datetime(2000, 1, 1, hour, minute)
|
|
442
|
+
return dt.strftime(self.format)
|
|
443
|
+
|
|
444
|
+
|
|
445
|
+
def time_field(
|
|
446
|
+
min_hour: int = 0,
|
|
447
|
+
max_hour: int = 23,
|
|
448
|
+
format: str = "%I:%M %p",
|
|
449
|
+
default: str | None = None,
|
|
450
|
+
required: bool = True,
|
|
451
|
+
) -> TimeField:
|
|
452
|
+
"""Define a time field.
|
|
453
|
+
|
|
454
|
+
Example:
|
|
455
|
+
appointment_time = time_field(min_hour=8, max_hour=17)
|
|
456
|
+
"""
|
|
457
|
+
return TimeField(
|
|
458
|
+
min_hour=min_hour, max_hour=max_hour, format=format, default=default, required=required
|
|
459
|
+
)
|
|
460
|
+
|
|
461
|
+
|
|
462
|
+
# --- Boolean Field ---
|
|
463
|
+
|
|
464
|
+
|
|
465
|
+
@dataclass
|
|
466
|
+
class BoolField(Field):
|
|
467
|
+
"""Boolean field implementation."""
|
|
468
|
+
|
|
469
|
+
_type: str = "bool"
|
|
470
|
+
probability: float = 0.5
|
|
471
|
+
|
|
472
|
+
def generate(self, rng: random.Random) -> bool:
|
|
473
|
+
return rng.random() < self.probability
|
|
474
|
+
|
|
475
|
+
|
|
476
|
+
def boolean(probability: float = 0.5, default: bool | None = None) -> BoolField:
|
|
477
|
+
"""Define a boolean field.
|
|
478
|
+
|
|
479
|
+
Example:
|
|
480
|
+
is_active = boolean(probability=0.8) # 80% chance True
|
|
481
|
+
"""
|
|
482
|
+
return BoolField(probability=probability, default=default)
|
|
483
|
+
|
|
484
|
+
|
|
485
|
+
# --- Choice Field ---
|
|
486
|
+
|
|
487
|
+
|
|
488
|
+
@dataclass
|
|
489
|
+
class ChoiceField(Field):
|
|
490
|
+
"""Choice field implementation."""
|
|
491
|
+
|
|
492
|
+
_type: str = "choice"
|
|
493
|
+
choices: list[Any] = dataclass_field(default_factory=list)
|
|
494
|
+
weights: list[float] | None = None
|
|
495
|
+
|
|
496
|
+
def generate(self, rng: random.Random) -> Any:
|
|
497
|
+
if not self.choices:
|
|
498
|
+
raise ValueError("choice() requires at least one option")
|
|
499
|
+
if self.weights:
|
|
500
|
+
return rng.choices(self.choices, weights=self.weights, k=1)[0]
|
|
501
|
+
return rng.choice(self.choices)
|
|
502
|
+
|
|
503
|
+
|
|
504
|
+
def choice(
|
|
505
|
+
*options: Any,
|
|
506
|
+
weights: list[float] | None = None,
|
|
507
|
+
default: Any = None,
|
|
508
|
+
) -> ChoiceField:
|
|
509
|
+
"""Define a choice field.
|
|
510
|
+
|
|
511
|
+
Example:
|
|
512
|
+
status = choice("pending", "approved", "denied")
|
|
513
|
+
status = choice("pending", "approved", weights=[0.7, 0.3])
|
|
514
|
+
"""
|
|
515
|
+
return ChoiceField(choices=list(options), weights=weights, default=default)
|
|
516
|
+
|
|
517
|
+
|
|
518
|
+
# --- List Field ---
|
|
519
|
+
|
|
520
|
+
|
|
521
|
+
@dataclass
|
|
522
|
+
class ListField(Field):
|
|
523
|
+
"""List field implementation."""
|
|
524
|
+
|
|
525
|
+
_type: str = "list"
|
|
526
|
+
item_field: Field = dataclass_field(default_factory=lambda: StringField())
|
|
527
|
+
min_items: int = 1
|
|
528
|
+
max_items: int = 5
|
|
529
|
+
|
|
530
|
+
def generate(self, rng: random.Random) -> list[Any]:
|
|
531
|
+
count = rng.randint(self.min_items, self.max_items)
|
|
532
|
+
return [self.item_field.generate(rng) for _ in range(count)]
|
|
533
|
+
|
|
534
|
+
|
|
535
|
+
def list_of(
|
|
536
|
+
item_field: Field,
|
|
537
|
+
min_items: int = 1,
|
|
538
|
+
max_items: int = 5,
|
|
539
|
+
) -> ListField:
|
|
540
|
+
"""Define a list field.
|
|
541
|
+
|
|
542
|
+
Example:
|
|
543
|
+
phones = list_of(string(faker="phone"), min_items=1, max_items=3)
|
|
544
|
+
"""
|
|
545
|
+
return ListField(item_field=item_field, min_items=min_items, max_items=max_items)
|
|
546
|
+
|
|
547
|
+
|
|
548
|
+
# =============================================================================
|
|
549
|
+
# Computed Fields - derive values from other fields
|
|
550
|
+
# =============================================================================
|
|
551
|
+
|
|
552
|
+
|
|
553
|
+
@dataclass
|
|
554
|
+
class ComputedField(Field):
|
|
555
|
+
"""Computed field that derives value from other fields."""
|
|
556
|
+
|
|
557
|
+
_type: str = "computed"
|
|
558
|
+
sources: tuple[str, ...] = ()
|
|
559
|
+
formula: str = "concat"
|
|
560
|
+
separator: str = " "
|
|
561
|
+
|
|
562
|
+
def generate(self, rng: random.Random) -> Any:
|
|
563
|
+
return None # Computed after generation
|
|
564
|
+
|
|
565
|
+
def compute(self, instance: Model) -> Any:
|
|
566
|
+
"""Compute value from instance fields."""
|
|
567
|
+
if self.formula == "concat":
|
|
568
|
+
values = [str(getattr(instance, s, "")) for s in self.sources]
|
|
569
|
+
return self.separator.join(values)
|
|
570
|
+
elif self.formula == "years_since":
|
|
571
|
+
if len(self.sources) != 1:
|
|
572
|
+
return 0
|
|
573
|
+
source_val = getattr(instance, self.sources[0], None)
|
|
574
|
+
return _compute_years_since(source_val)
|
|
575
|
+
return None
|
|
576
|
+
|
|
577
|
+
|
|
578
|
+
def computed(*sources: str, separator: str = " ") -> ComputedField:
|
|
579
|
+
"""Define a computed field that concatenates other fields.
|
|
580
|
+
|
|
581
|
+
Example:
|
|
582
|
+
full_name = computed("first_name", "last_name")
|
|
583
|
+
full_address = computed("street", "city", "state", separator=", ")
|
|
584
|
+
"""
|
|
585
|
+
return ComputedField(sources=sources, formula="concat", separator=separator)
|
|
586
|
+
|
|
587
|
+
|
|
588
|
+
def years_since(source: str) -> ComputedField:
|
|
589
|
+
"""Define a computed field that calculates years since a date.
|
|
590
|
+
|
|
591
|
+
Example:
|
|
592
|
+
age = years_since("dob")
|
|
593
|
+
"""
|
|
594
|
+
return ComputedField(sources=(source,), formula="years_since")
|
|
595
|
+
|
|
596
|
+
|
|
597
|
+
# =============================================================================
|
|
598
|
+
# Relationships - Rails-style with convention over configuration
|
|
599
|
+
# =============================================================================
|
|
600
|
+
|
|
601
|
+
|
|
602
|
+
@dataclass
|
|
603
|
+
class Relationship:
|
|
604
|
+
"""Base relationship class."""
|
|
605
|
+
|
|
606
|
+
model: str
|
|
607
|
+
foreign_key: str | None = None # Convention: model_name_id
|
|
608
|
+
|
|
609
|
+
def resolve(self, registry: dict[str, type[Model]]) -> type[Model] | None:
|
|
610
|
+
return registry.get(self.model)
|
|
611
|
+
|
|
612
|
+
def inferred_foreign_key(self) -> str:
|
|
613
|
+
"""Infer foreign key from model name (Rails convention)."""
|
|
614
|
+
if self.foreign_key:
|
|
615
|
+
return self.foreign_key
|
|
616
|
+
# Convert CamelCase to snake_case_id
|
|
617
|
+
name = self.model
|
|
618
|
+
result: list[str] = []
|
|
619
|
+
for i, char in enumerate(name):
|
|
620
|
+
if char.isupper() and i > 0:
|
|
621
|
+
result.append("_")
|
|
622
|
+
result.append(char.lower())
|
|
623
|
+
return "".join(result) + "_id"
|
|
624
|
+
|
|
625
|
+
|
|
626
|
+
@dataclass
|
|
627
|
+
class HasManyRel(Relationship):
|
|
628
|
+
"""Has-many relationship."""
|
|
629
|
+
|
|
630
|
+
min_count: int = 1
|
|
631
|
+
max_count: int = 10
|
|
632
|
+
|
|
633
|
+
|
|
634
|
+
@dataclass
|
|
635
|
+
class BelongsToRel(Relationship):
|
|
636
|
+
"""Belongs-to relationship."""
|
|
637
|
+
|
|
638
|
+
pass
|
|
639
|
+
|
|
640
|
+
|
|
641
|
+
@dataclass
|
|
642
|
+
class HasOneRel(Relationship):
|
|
643
|
+
"""Has-one relationship."""
|
|
644
|
+
|
|
645
|
+
pass
|
|
646
|
+
|
|
647
|
+
|
|
648
|
+
def has_many(
|
|
649
|
+
model: str,
|
|
650
|
+
foreign_key: str | None = None,
|
|
651
|
+
min_count: int = 1,
|
|
652
|
+
max_count: int = 10,
|
|
653
|
+
) -> HasManyRel:
|
|
654
|
+
"""Define a has-many relationship.
|
|
655
|
+
|
|
656
|
+
Example:
|
|
657
|
+
appointments = has_many("Appointment")
|
|
658
|
+
appointments = has_many("Appointment", min_count=0, max_count=20)
|
|
659
|
+
"""
|
|
660
|
+
return HasManyRel(
|
|
661
|
+
model=model, foreign_key=foreign_key, min_count=min_count, max_count=max_count
|
|
662
|
+
)
|
|
663
|
+
|
|
664
|
+
|
|
665
|
+
def belongs_to(model: str, foreign_key: str | None = None) -> BelongsToRel:
|
|
666
|
+
"""Define a belongs-to relationship.
|
|
667
|
+
|
|
668
|
+
Example:
|
|
669
|
+
patient = belongs_to("Patient")
|
|
670
|
+
treating_provider = belongs_to("Provider", foreign_key="treating_provider_id")
|
|
671
|
+
"""
|
|
672
|
+
return BelongsToRel(model=model, foreign_key=foreign_key)
|
|
673
|
+
|
|
674
|
+
|
|
675
|
+
def has_one(model: str, foreign_key: str | None = None) -> HasOneRel:
|
|
676
|
+
"""Define a has-one relationship.
|
|
677
|
+
|
|
678
|
+
Example:
|
|
679
|
+
primary_provider = has_one("Provider")
|
|
680
|
+
"""
|
|
681
|
+
return HasOneRel(model=model, foreign_key=foreign_key)
|
|
682
|
+
|
|
683
|
+
|
|
684
|
+
# =============================================================================
|
|
685
|
+
# Rails-like attribute() DSL
|
|
686
|
+
# =============================================================================
|
|
687
|
+
|
|
688
|
+
# Pending attributes registered by attribute() calls during class body execution
|
|
689
|
+
_pending_attributes: list[tuple[str, Field | Relationship]] = []
|
|
690
|
+
|
|
691
|
+
|
|
692
|
+
def attribute(name: str, field_type: str, *args: Any, **kwargs: Any) -> None:
|
|
693
|
+
"""Register an attribute on the model being defined.
|
|
694
|
+
|
|
695
|
+
Rails-like DSL for defining model attributes:
|
|
696
|
+
|
|
697
|
+
class User(Model):
|
|
698
|
+
attribute("name", "string")
|
|
699
|
+
attribute("age", "integer")
|
|
700
|
+
attribute("email", "email")
|
|
701
|
+
attribute("status", "choice", "active", "inactive", "pending")
|
|
702
|
+
|
|
703
|
+
Args:
|
|
704
|
+
name: Attribute name
|
|
705
|
+
field_type: Type of field (string, integer, choice, email, npi, etc.)
|
|
706
|
+
*args: Positional args passed to field constructor
|
|
707
|
+
**kwargs: Keyword args passed to field constructor
|
|
708
|
+
"""
|
|
709
|
+
field = _make_field(field_type, *args, **kwargs)
|
|
710
|
+
_pending_attributes.append((name, field))
|
|
711
|
+
|
|
712
|
+
|
|
713
|
+
def _make_field(field_type: str, *args: Any, **kwargs: Any) -> Field:
|
|
714
|
+
"""Create a field from type name."""
|
|
715
|
+
# Semantic types (no args needed)
|
|
716
|
+
semantic_types: dict[str, Any] = {
|
|
717
|
+
"first_name": FirstName,
|
|
718
|
+
"last_name": LastName,
|
|
719
|
+
"full_name": FullName,
|
|
720
|
+
"dob": DOB,
|
|
721
|
+
"npi": NPI,
|
|
722
|
+
"ssn": SSN,
|
|
723
|
+
"phone": Phone,
|
|
724
|
+
"email": Email,
|
|
725
|
+
"street": Street,
|
|
726
|
+
"city": City,
|
|
727
|
+
"state": State,
|
|
728
|
+
"zip": ZipCode,
|
|
729
|
+
"zip_code": ZipCode,
|
|
730
|
+
"member_id": MemberID,
|
|
731
|
+
"claim_number": ClaimNumber,
|
|
732
|
+
"procedure_code": ProcedureCode,
|
|
733
|
+
"license_number": LicenseNumber,
|
|
734
|
+
"specialty": Specialty,
|
|
735
|
+
"claim_status": ClaimStatus,
|
|
736
|
+
"fee": Fee,
|
|
737
|
+
}
|
|
738
|
+
|
|
739
|
+
# Base types
|
|
740
|
+
base_types: dict[str, Any] = {
|
|
741
|
+
"string": string,
|
|
742
|
+
"integer": integer,
|
|
743
|
+
"int": integer,
|
|
744
|
+
"decimal": decimal,
|
|
745
|
+
"float": decimal,
|
|
746
|
+
"money": money,
|
|
747
|
+
"date": date_field,
|
|
748
|
+
"time": time_field,
|
|
749
|
+
"boolean": boolean,
|
|
750
|
+
"bool": boolean,
|
|
751
|
+
"choice": choice,
|
|
752
|
+
"list": list_of,
|
|
753
|
+
"computed": computed,
|
|
754
|
+
}
|
|
755
|
+
|
|
756
|
+
field_type_lower = field_type.lower()
|
|
757
|
+
|
|
758
|
+
# Check semantic types first
|
|
759
|
+
if field_type_lower in semantic_types:
|
|
760
|
+
return semantic_types[field_type_lower](*args, **kwargs)
|
|
761
|
+
|
|
762
|
+
# Check base types
|
|
763
|
+
if field_type_lower in base_types:
|
|
764
|
+
return base_types[field_type_lower](*args, **kwargs)
|
|
765
|
+
|
|
766
|
+
raise ValueError(f"Unknown field type: {field_type}")
|
|
767
|
+
|
|
768
|
+
|
|
769
|
+
# =============================================================================
|
|
770
|
+
# Model Base Class
|
|
771
|
+
# =============================================================================
|
|
772
|
+
|
|
773
|
+
|
|
774
|
+
class ModelMeta(type):
|
|
775
|
+
"""Metaclass that collects field definitions."""
|
|
776
|
+
|
|
777
|
+
def __new__(mcs, name: str, bases: tuple[type, ...], namespace: dict[str, Any]) -> ModelMeta:
|
|
778
|
+
global _pending_attributes
|
|
779
|
+
|
|
780
|
+
fields: dict[str, Field] = {}
|
|
781
|
+
relationships: dict[str, Relationship] = {}
|
|
782
|
+
|
|
783
|
+
# Inherit from parents
|
|
784
|
+
for base in bases:
|
|
785
|
+
if hasattr(base, "_fields"):
|
|
786
|
+
fields.update(base._fields)
|
|
787
|
+
if hasattr(base, "_relationships"):
|
|
788
|
+
relationships.update(base._relationships)
|
|
789
|
+
|
|
790
|
+
# Process pending attributes from attribute() calls
|
|
791
|
+
for attr_name, attr_value in _pending_attributes:
|
|
792
|
+
if isinstance(attr_value, Field):
|
|
793
|
+
fields[attr_name] = attr_value
|
|
794
|
+
elif isinstance(attr_value, Relationship):
|
|
795
|
+
relationships[attr_name] = attr_value
|
|
796
|
+
_pending_attributes = []
|
|
797
|
+
|
|
798
|
+
# Collect from class attributes (original style still works)
|
|
799
|
+
for attr_name, attr_value in namespace.items():
|
|
800
|
+
if isinstance(attr_value, Field):
|
|
801
|
+
fields[attr_name] = attr_value
|
|
802
|
+
elif isinstance(attr_value, Relationship):
|
|
803
|
+
relationships[attr_name] = attr_value
|
|
804
|
+
|
|
805
|
+
namespace["_fields"] = fields
|
|
806
|
+
namespace["_relationships"] = relationships
|
|
807
|
+
return super().__new__(mcs, name, bases, namespace)
|
|
808
|
+
|
|
809
|
+
|
|
810
|
+
T = TypeVar("T", bound="Model")
|
|
811
|
+
|
|
812
|
+
|
|
813
|
+
class Model(metaclass=ModelMeta):
|
|
814
|
+
"""Base class for data models.
|
|
815
|
+
|
|
816
|
+
Example:
|
|
817
|
+
class Patient(Model):
|
|
818
|
+
first_name = string(faker="first_name")
|
|
819
|
+
last_name = string(faker="last_name")
|
|
820
|
+
dob = date_field(min_year=1940, max_year=2010)
|
|
821
|
+
|
|
822
|
+
full_name = computed("first_name", "last_name")
|
|
823
|
+
age = years_since("dob")
|
|
824
|
+
|
|
825
|
+
appointments = has_many("Appointment")
|
|
826
|
+
|
|
827
|
+
patient = Patient.generate()
|
|
828
|
+
"""
|
|
829
|
+
|
|
830
|
+
_fields: ClassVar[dict[str, Field]] = {}
|
|
831
|
+
_relationships: ClassVar[dict[str, Relationship]] = {}
|
|
832
|
+
|
|
833
|
+
def __init__(self, **kwargs: Any) -> None:
|
|
834
|
+
for name, field_def in self._fields.items():
|
|
835
|
+
# Skip computed fields - they're computed after init
|
|
836
|
+
if isinstance(field_def, ComputedField):
|
|
837
|
+
continue
|
|
838
|
+
if name in kwargs:
|
|
839
|
+
setattr(self, name, kwargs[name])
|
|
840
|
+
elif field_def.default is not None:
|
|
841
|
+
setattr(self, name, field_def.default)
|
|
842
|
+
elif not field_def.required:
|
|
843
|
+
setattr(self, name, None)
|
|
844
|
+
else:
|
|
845
|
+
raise ValueError(f"Missing required field: {name}")
|
|
846
|
+
|
|
847
|
+
# Compute derived fields
|
|
848
|
+
for name, field_def in self._fields.items():
|
|
849
|
+
if isinstance(field_def, ComputedField):
|
|
850
|
+
setattr(self, name, field_def.compute(self))
|
|
851
|
+
|
|
852
|
+
@classmethod
|
|
853
|
+
def generate(cls: type[T], rng: random.Random | None = None) -> T:
|
|
854
|
+
"""Generate a single model instance with random data.
|
|
855
|
+
|
|
856
|
+
Args:
|
|
857
|
+
rng: Optional seeded random generator for reproducibility
|
|
858
|
+
|
|
859
|
+
Returns:
|
|
860
|
+
Single model instance
|
|
861
|
+
"""
|
|
862
|
+
if rng is None:
|
|
863
|
+
rng = random.Random()
|
|
864
|
+
|
|
865
|
+
kwargs = {}
|
|
866
|
+
for name, field_def in cls._fields.items():
|
|
867
|
+
if not isinstance(field_def, ComputedField):
|
|
868
|
+
kwargs[name] = field_def.generate(rng)
|
|
869
|
+
|
|
870
|
+
return cls(**kwargs)
|
|
871
|
+
|
|
872
|
+
@classmethod
|
|
873
|
+
def generate_many(
|
|
874
|
+
cls: type[T],
|
|
875
|
+
count: int,
|
|
876
|
+
rng: random.Random | None = None,
|
|
877
|
+
) -> list[T]:
|
|
878
|
+
"""Generate multiple model instances with random data.
|
|
879
|
+
|
|
880
|
+
Args:
|
|
881
|
+
count: Number of instances to generate
|
|
882
|
+
rng: Optional seeded random generator for reproducibility
|
|
883
|
+
|
|
884
|
+
Returns:
|
|
885
|
+
List of model instances
|
|
886
|
+
"""
|
|
887
|
+
if rng is None:
|
|
888
|
+
rng = random.Random()
|
|
889
|
+
return [cls.generate(rng) for _ in range(count)]
|
|
890
|
+
|
|
891
|
+
@classmethod
|
|
892
|
+
def generator(cls: type[T], rng: random.Random | None = None) -> "ModelGenerator[T]":
|
|
893
|
+
"""Get a generator instance for this model.
|
|
894
|
+
|
|
895
|
+
Args:
|
|
896
|
+
rng: Optional seeded random generator
|
|
897
|
+
|
|
898
|
+
Returns:
|
|
899
|
+
ModelGenerator that yields instances
|
|
900
|
+
"""
|
|
901
|
+
return ModelGenerator(cls, rng)
|
|
902
|
+
|
|
903
|
+
def to_dict(self) -> dict[str, Any]:
|
|
904
|
+
"""Convert model to dictionary."""
|
|
905
|
+
return {name: getattr(self, name) for name in self._fields}
|
|
906
|
+
|
|
907
|
+
def __repr__(self) -> str:
|
|
908
|
+
fields_str = ", ".join(f"{k}={getattr(self, k)!r}" for k in self._fields)
|
|
909
|
+
return f"{self.__class__.__name__}({fields_str})"
|
|
910
|
+
|
|
911
|
+
|
|
912
|
+
class ModelGenerator(Generic[T]):
|
|
913
|
+
"""Generator wrapper for creating model instances.
|
|
914
|
+
|
|
915
|
+
Provides iterator-based generation and batch methods.
|
|
916
|
+
|
|
917
|
+
Example:
|
|
918
|
+
gen = Patient.generator(rng)
|
|
919
|
+
patient = gen.one()
|
|
920
|
+
patients = gen.many(10)
|
|
921
|
+
|
|
922
|
+
# Or iterate
|
|
923
|
+
for patient in gen.take(5):
|
|
924
|
+
print(patient)
|
|
925
|
+
"""
|
|
926
|
+
|
|
927
|
+
def __init__(self, model_class: type[T], rng: random.Random | None = None) -> None:
|
|
928
|
+
self.model_class = model_class
|
|
929
|
+
self.rng = rng or random.Random()
|
|
930
|
+
|
|
931
|
+
def one(self) -> T:
|
|
932
|
+
"""Generate a single instance."""
|
|
933
|
+
return self.model_class.generate(self.rng)
|
|
934
|
+
|
|
935
|
+
def many(self, count: int) -> list[T]:
|
|
936
|
+
"""Generate multiple instances."""
|
|
937
|
+
return self.model_class.generate_many(count, self.rng)
|
|
938
|
+
|
|
939
|
+
def take(self, count: int) -> list[T]:
|
|
940
|
+
"""Alias for many()."""
|
|
941
|
+
return self.many(count)
|
|
942
|
+
|
|
943
|
+
def __iter__(self) -> "ModelGenerator[T]":
|
|
944
|
+
return self
|
|
945
|
+
|
|
946
|
+
def __next__(self) -> T:
|
|
947
|
+
return self.one()
|
|
948
|
+
|
|
949
|
+
|
|
950
|
+
# =============================================================================
|
|
951
|
+
# Helper Functions
|
|
952
|
+
# =============================================================================
|
|
953
|
+
|
|
954
|
+
|
|
955
|
+
def _generate_faker(faker_type: str, rng: random.Random) -> str:
|
|
956
|
+
"""Generate value using built-in faker."""
|
|
957
|
+
match faker_type:
|
|
958
|
+
case "first_name":
|
|
959
|
+
return rng.choice(FIRST_NAMES)
|
|
960
|
+
case "last_name":
|
|
961
|
+
return rng.choice(LAST_NAMES)
|
|
962
|
+
case "full_name":
|
|
963
|
+
return f"{rng.choice(FIRST_NAMES)} {rng.choice(LAST_NAMES)}"
|
|
964
|
+
case "city":
|
|
965
|
+
return rng.choice(CITIES)
|
|
966
|
+
case "state":
|
|
967
|
+
return rng.choice(US_STATES)
|
|
968
|
+
case "street":
|
|
969
|
+
num = rng.randint(100, 9999)
|
|
970
|
+
name = rng.choice(LAST_NAMES)
|
|
971
|
+
suffix = rng.choice(STREET_SUFFIXES)
|
|
972
|
+
return f"{num} {name} {suffix}"
|
|
973
|
+
case "phone":
|
|
974
|
+
area = rng.randint(200, 999)
|
|
975
|
+
prefix = rng.randint(200, 999)
|
|
976
|
+
line = rng.randint(1000, 9999)
|
|
977
|
+
return f"({area}) {prefix}-{line}"
|
|
978
|
+
case "email":
|
|
979
|
+
first = rng.choice(FIRST_NAMES).lower()
|
|
980
|
+
last = rng.choice(LAST_NAMES).lower()
|
|
981
|
+
domain = rng.choice(["gmail.com", "yahoo.com", "outlook.com"])
|
|
982
|
+
return f"{first}.{last}@{domain}"
|
|
983
|
+
case "ssn":
|
|
984
|
+
return f"{rng.randint(100, 999)}-{rng.randint(10, 99)}-{rng.randint(1000, 9999)}"
|
|
985
|
+
case "npi":
|
|
986
|
+
return "".join(str(rng.randint(0, 9)) for _ in range(10))
|
|
987
|
+
case "zip":
|
|
988
|
+
return f"{rng.randint(10000, 99999)}"
|
|
989
|
+
case _:
|
|
990
|
+
raise ValueError(f"Unknown faker: {faker_type}")
|
|
991
|
+
|
|
992
|
+
|
|
993
|
+
def _generate_pattern(pattern: str, rng: random.Random) -> str:
|
|
994
|
+
"""Generate string from simple regex pattern."""
|
|
995
|
+
result: list[str] = []
|
|
996
|
+
i = 0
|
|
997
|
+
|
|
998
|
+
while i < len(pattern):
|
|
999
|
+
char = pattern[i]
|
|
1000
|
+
|
|
1001
|
+
if char == "[":
|
|
1002
|
+
end = pattern.index("]", i)
|
|
1003
|
+
char_class = pattern[i + 1 : end]
|
|
1004
|
+
chars = _parse_char_class(char_class)
|
|
1005
|
+
|
|
1006
|
+
i = end + 1
|
|
1007
|
+
count = 1
|
|
1008
|
+
if i < len(pattern) and pattern[i] == "{":
|
|
1009
|
+
end_q = pattern.index("}", i)
|
|
1010
|
+
count = int(pattern[i + 1 : end_q])
|
|
1011
|
+
i = end_q + 1
|
|
1012
|
+
|
|
1013
|
+
result.extend(rng.choice(chars) for _ in range(count))
|
|
1014
|
+
else:
|
|
1015
|
+
result.append(char)
|
|
1016
|
+
i += 1
|
|
1017
|
+
|
|
1018
|
+
return "".join(result)
|
|
1019
|
+
|
|
1020
|
+
|
|
1021
|
+
def _parse_char_class(char_class: str) -> list[str]:
|
|
1022
|
+
"""Parse character class like A-Z or 0-9."""
|
|
1023
|
+
chars: list[str] = []
|
|
1024
|
+
j = 0
|
|
1025
|
+
while j < len(char_class):
|
|
1026
|
+
if j + 2 < len(char_class) and char_class[j + 1] == "-":
|
|
1027
|
+
start_char = char_class[j]
|
|
1028
|
+
end_char = char_class[j + 2]
|
|
1029
|
+
chars.extend(chr(c) for c in range(ord(start_char), ord(end_char) + 1))
|
|
1030
|
+
j += 3
|
|
1031
|
+
else:
|
|
1032
|
+
chars.append(char_class[j])
|
|
1033
|
+
j += 1
|
|
1034
|
+
return chars
|
|
1035
|
+
|
|
1036
|
+
|
|
1037
|
+
def _compute_years_since(date_value: Any) -> int:
|
|
1038
|
+
"""Compute years since a date value."""
|
|
1039
|
+
if not date_value:
|
|
1040
|
+
return 0
|
|
1041
|
+
try:
|
|
1042
|
+
if isinstance(date_value, str):
|
|
1043
|
+
for fmt in ["%m/%d/%Y", "%Y-%m-%d", "%d/%m/%Y"]:
|
|
1044
|
+
try:
|
|
1045
|
+
birth = datetime.strptime(date_value, fmt)
|
|
1046
|
+
today = date.today()
|
|
1047
|
+
born_later = (today.month, today.day) < (birth.month, birth.day)
|
|
1048
|
+
return today.year - birth.year - int(born_later)
|
|
1049
|
+
except ValueError:
|
|
1050
|
+
continue
|
|
1051
|
+
except Exception:
|
|
1052
|
+
pass
|
|
1053
|
+
return 0
|
|
1054
|
+
|
|
1055
|
+
|
|
1056
|
+
# =============================================================================
|
|
1057
|
+
# Semantic Field Types - Django-like convenience classes
|
|
1058
|
+
# =============================================================================
|
|
1059
|
+
|
|
1060
|
+
|
|
1061
|
+
def FirstName() -> StringField:
|
|
1062
|
+
"""First name field."""
|
|
1063
|
+
return string(faker="first_name")
|
|
1064
|
+
|
|
1065
|
+
|
|
1066
|
+
def LastName() -> StringField:
|
|
1067
|
+
"""Last name field."""
|
|
1068
|
+
return string(faker="last_name")
|
|
1069
|
+
|
|
1070
|
+
|
|
1071
|
+
def FullName() -> StringField:
|
|
1072
|
+
"""Full name field."""
|
|
1073
|
+
return string(faker="full_name")
|
|
1074
|
+
|
|
1075
|
+
|
|
1076
|
+
def DOB(min_year: int = 1940, max_year: int = 2010) -> DateField:
|
|
1077
|
+
"""Date of birth field."""
|
|
1078
|
+
return date_field(min_year=min_year, max_year=max_year, format="%m/%d/%Y")
|
|
1079
|
+
|
|
1080
|
+
|
|
1081
|
+
def NPI() -> StringField:
|
|
1082
|
+
"""National Provider Identifier (10 digits)."""
|
|
1083
|
+
return string(faker="npi")
|
|
1084
|
+
|
|
1085
|
+
|
|
1086
|
+
def SSN() -> StringField:
|
|
1087
|
+
"""Social Security Number."""
|
|
1088
|
+
return string(faker="ssn")
|
|
1089
|
+
|
|
1090
|
+
|
|
1091
|
+
def Phone() -> StringField:
|
|
1092
|
+
"""Phone number."""
|
|
1093
|
+
return string(faker="phone")
|
|
1094
|
+
|
|
1095
|
+
|
|
1096
|
+
def Email() -> StringField:
|
|
1097
|
+
"""Email address."""
|
|
1098
|
+
return string(faker="email")
|
|
1099
|
+
|
|
1100
|
+
|
|
1101
|
+
def Street() -> StringField:
|
|
1102
|
+
"""Street address."""
|
|
1103
|
+
return string(faker="street")
|
|
1104
|
+
|
|
1105
|
+
|
|
1106
|
+
def City() -> StringField:
|
|
1107
|
+
"""City name."""
|
|
1108
|
+
return string(faker="city")
|
|
1109
|
+
|
|
1110
|
+
|
|
1111
|
+
def State() -> StringField:
|
|
1112
|
+
"""US state abbreviation."""
|
|
1113
|
+
return string(faker="state")
|
|
1114
|
+
|
|
1115
|
+
|
|
1116
|
+
def ZipCode() -> StringField:
|
|
1117
|
+
"""ZIP code."""
|
|
1118
|
+
return string(faker="zip")
|
|
1119
|
+
|
|
1120
|
+
|
|
1121
|
+
def MemberID(prefix: str = "", digits: int = 6) -> StringField:
|
|
1122
|
+
"""Member/account ID with pattern."""
|
|
1123
|
+
if prefix:
|
|
1124
|
+
return string(pattern=f"{prefix}[0-9]{{{digits}}}")
|
|
1125
|
+
return string(pattern=f"[A-Z]{{3}}[0-9]{{{digits}}}")
|
|
1126
|
+
|
|
1127
|
+
|
|
1128
|
+
def ClaimNumber() -> StringField:
|
|
1129
|
+
"""Claim number."""
|
|
1130
|
+
return string(pattern=r"CLM[0-9]{8}")
|
|
1131
|
+
|
|
1132
|
+
|
|
1133
|
+
def ProcedureCode() -> StringField:
|
|
1134
|
+
"""Dental procedure code (D####)."""
|
|
1135
|
+
return string(pattern=r"D[0-9]{4}")
|
|
1136
|
+
|
|
1137
|
+
|
|
1138
|
+
def LicenseNumber() -> StringField:
|
|
1139
|
+
"""License number."""
|
|
1140
|
+
return string(pattern=r"[A-Z]{2}[0-9]{6}")
|
|
1141
|
+
|
|
1142
|
+
|
|
1143
|
+
def Specialty(*options: str) -> ChoiceField:
|
|
1144
|
+
"""Provider specialty."""
|
|
1145
|
+
if not options:
|
|
1146
|
+
options = (
|
|
1147
|
+
"General Dentistry",
|
|
1148
|
+
"Orthodontics",
|
|
1149
|
+
"Periodontics",
|
|
1150
|
+
"Endodontics",
|
|
1151
|
+
"Oral Surgery",
|
|
1152
|
+
)
|
|
1153
|
+
return choice(*options)
|
|
1154
|
+
|
|
1155
|
+
|
|
1156
|
+
def ClaimStatus() -> ChoiceField:
|
|
1157
|
+
"""Claim status."""
|
|
1158
|
+
return choice("Pending", "Approved", "Denied", "In Review", "Paid")
|
|
1159
|
+
|
|
1160
|
+
|
|
1161
|
+
def Fee(min_value: float = 50.0, max_value: float = 2500.0) -> MoneyField:
|
|
1162
|
+
"""Fee/charge amount."""
|
|
1163
|
+
return money(min_value=min_value, max_value=max_value)
|
|
1164
|
+
|
|
1165
|
+
|
|
1166
|
+
# =============================================================================
|
|
1167
|
+
# Common Healthcare Models
|
|
1168
|
+
# =============================================================================
|
|
1169
|
+
|
|
1170
|
+
|
|
1171
|
+
class Patient(Model):
|
|
1172
|
+
"""Patient with common healthcare fields."""
|
|
1173
|
+
|
|
1174
|
+
first_name = FirstName()
|
|
1175
|
+
last_name = LastName()
|
|
1176
|
+
dob = DOB()
|
|
1177
|
+
member_id = MemberID()
|
|
1178
|
+
ssn = SSN()
|
|
1179
|
+
phone = Phone()
|
|
1180
|
+
email = Email()
|
|
1181
|
+
street = Street()
|
|
1182
|
+
city = City()
|
|
1183
|
+
state = State()
|
|
1184
|
+
zip_code = ZipCode()
|
|
1185
|
+
|
|
1186
|
+
full_name = computed("first_name", "last_name")
|
|
1187
|
+
age = years_since("dob")
|
|
1188
|
+
|
|
1189
|
+
|
|
1190
|
+
class Provider(Model):
|
|
1191
|
+
"""Provider (treating or billing)."""
|
|
1192
|
+
|
|
1193
|
+
first_name = FirstName()
|
|
1194
|
+
last_name = LastName()
|
|
1195
|
+
npi = NPI()
|
|
1196
|
+
license_number = LicenseNumber()
|
|
1197
|
+
specialty = Specialty()
|
|
1198
|
+
phone = Phone()
|
|
1199
|
+
|
|
1200
|
+
full_name = computed("first_name", "last_name")
|
|
1201
|
+
|
|
1202
|
+
|
|
1203
|
+
class Procedure(Model):
|
|
1204
|
+
"""Dental procedure."""
|
|
1205
|
+
|
|
1206
|
+
code = ProcedureCode()
|
|
1207
|
+
description = choice(
|
|
1208
|
+
"Periodic oral evaluation",
|
|
1209
|
+
"Comprehensive oral evaluation",
|
|
1210
|
+
"Prophylaxis - adult",
|
|
1211
|
+
"Topical fluoride",
|
|
1212
|
+
"Bitewing - single film",
|
|
1213
|
+
"Panoramic film",
|
|
1214
|
+
"Amalgam - one surface",
|
|
1215
|
+
"Resin composite - one surface",
|
|
1216
|
+
"Crown - porcelain/ceramic",
|
|
1217
|
+
"Root canal - anterior",
|
|
1218
|
+
"Extraction - single tooth",
|
|
1219
|
+
)
|
|
1220
|
+
tooth = choice(*[str(i) for i in range(1, 33)])
|
|
1221
|
+
surface = choice("M", "O", "D", "B", "L", "I", "MO", "DO", "MOD")
|
|
1222
|
+
fee = Fee()
|
|
1223
|
+
|
|
1224
|
+
|
|
1225
|
+
class Claim(Model):
|
|
1226
|
+
"""Insurance claim."""
|
|
1227
|
+
|
|
1228
|
+
claim_number = ClaimNumber()
|
|
1229
|
+
date_of_service = date_field(min_year=2023, max_year=2025, format="%m/%d/%Y")
|
|
1230
|
+
date_submitted = date_field(min_year=2023, max_year=2025, format="%m/%d/%Y")
|
|
1231
|
+
status = ClaimStatus()
|
|
1232
|
+
total_charge = Fee(min_value=100.0, max_value=5000.0)
|
|
1233
|
+
insurance_paid = Fee(min_value=50.0, max_value=4000.0)
|
|
1234
|
+
patient_responsibility = Fee(min_value=0.0, max_value=1000.0)
|
|
1235
|
+
|
|
1236
|
+
|
|
1237
|
+
class Attachment(Model):
|
|
1238
|
+
"""Document attachment."""
|
|
1239
|
+
|
|
1240
|
+
filename = string(pattern=r"[a-z]{8}.[a-z]{3}")
|
|
1241
|
+
file_type = choice("PDF", "JPG", "PNG", "TIFF", "DOC")
|
|
1242
|
+
date_uploaded = date_field(min_year=2023, max_year=2025, format="%m/%d/%Y")
|
|
1243
|
+
description = choice(
|
|
1244
|
+
"X-Ray",
|
|
1245
|
+
"Periodontal Chart",
|
|
1246
|
+
"Treatment Plan",
|
|
1247
|
+
"Insurance Card",
|
|
1248
|
+
"EOB",
|
|
1249
|
+
"Referral",
|
|
1250
|
+
"Clinical Notes",
|
|
1251
|
+
)
|