cudag 0.3.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. cudag/__init__.py +334 -0
  2. cudag/annotation/__init__.py +77 -0
  3. cudag/annotation/codegen.py +648 -0
  4. cudag/annotation/config.py +545 -0
  5. cudag/annotation/loader.py +342 -0
  6. cudag/annotation/scaffold.py +121 -0
  7. cudag/annotation/transcription.py +296 -0
  8. cudag/cli/__init__.py +5 -0
  9. cudag/cli/main.py +315 -0
  10. cudag/cli/new.py +873 -0
  11. cudag/core/__init__.py +364 -0
  12. cudag/core/button.py +137 -0
  13. cudag/core/canvas.py +222 -0
  14. cudag/core/config.py +70 -0
  15. cudag/core/coords.py +233 -0
  16. cudag/core/data_grid.py +804 -0
  17. cudag/core/dataset.py +678 -0
  18. cudag/core/distribution.py +136 -0
  19. cudag/core/drawing.py +75 -0
  20. cudag/core/fonts.py +156 -0
  21. cudag/core/generator.py +163 -0
  22. cudag/core/grid.py +367 -0
  23. cudag/core/grounding_task.py +247 -0
  24. cudag/core/icon.py +207 -0
  25. cudag/core/iconlist_task.py +301 -0
  26. cudag/core/models.py +1251 -0
  27. cudag/core/random.py +130 -0
  28. cudag/core/renderer.py +190 -0
  29. cudag/core/screen.py +402 -0
  30. cudag/core/scroll_task.py +254 -0
  31. cudag/core/scrollable_grid.py +447 -0
  32. cudag/core/state.py +110 -0
  33. cudag/core/task.py +293 -0
  34. cudag/core/taskbar.py +350 -0
  35. cudag/core/text.py +212 -0
  36. cudag/core/utils.py +82 -0
  37. cudag/data/surnames.txt +5000 -0
  38. cudag/modal_apps/__init__.py +4 -0
  39. cudag/modal_apps/archive.py +103 -0
  40. cudag/modal_apps/extract.py +138 -0
  41. cudag/modal_apps/preprocess.py +529 -0
  42. cudag/modal_apps/upload.py +317 -0
  43. cudag/prompts/SYSTEM_PROMPT.txt +104 -0
  44. cudag/prompts/__init__.py +33 -0
  45. cudag/prompts/system.py +43 -0
  46. cudag/prompts/tools.py +382 -0
  47. cudag/py.typed +0 -0
  48. cudag/schemas/filesystem.json +90 -0
  49. cudag/schemas/test_record.schema.json +113 -0
  50. cudag/schemas/train_record.schema.json +90 -0
  51. cudag/server/__init__.py +21 -0
  52. cudag/server/app.py +232 -0
  53. cudag/server/services/__init__.py +9 -0
  54. cudag/server/services/generator.py +128 -0
  55. cudag/templates/scripts/archive.sh +35 -0
  56. cudag/templates/scripts/build.sh +13 -0
  57. cudag/templates/scripts/extract.sh +54 -0
  58. cudag/templates/scripts/generate.sh +116 -0
  59. cudag/templates/scripts/pre-commit.sh +44 -0
  60. cudag/templates/scripts/preprocess.sh +46 -0
  61. cudag/templates/scripts/upload.sh +63 -0
  62. cudag/templates/scripts/verify.py +428 -0
  63. cudag/validation/__init__.py +35 -0
  64. cudag/validation/validate.py +508 -0
  65. cudag-0.3.10.dist-info/METADATA +570 -0
  66. cudag-0.3.10.dist-info/RECORD +69 -0
  67. cudag-0.3.10.dist-info/WHEEL +4 -0
  68. cudag-0.3.10.dist-info/entry_points.txt +2 -0
  69. cudag-0.3.10.dist-info/licenses/LICENSE +66 -0
cudag/core/models.py ADDED
@@ -0,0 +1,1251 @@
1
+ # Copyright (c) 2025 Tylt LLC. All rights reserved.
2
+ # CONFIDENTIAL AND PROPRIETARY. Unauthorized use, copying, or distribution
3
+ # is strictly prohibited. For licensing inquiries: hello@claimhawk.app
4
+
5
+ """Rails-like DSL for model definitions.
6
+
7
+ Simple, readable model definitions with convention over configuration.
8
+
9
+ Example:
10
+ class Patient(Model):
11
+ first_name = string(faker="first_name")
12
+ last_name = string(faker="last_name")
13
+ dob = date(min_year=1940, max_year=2010)
14
+
15
+ full_name = computed("first_name", "last_name")
16
+ age = years_since("dob")
17
+
18
+ appointments = has_many("Appointment")
19
+ primary_provider = belongs_to("Provider")
20
+ """
21
+
22
+ from __future__ import annotations
23
+
24
+ import random
25
+ import string as string_module
26
+ from dataclasses import dataclass
27
+ from dataclasses import field as dataclass_field
28
+ from datetime import date, datetime, timedelta
29
+ from typing import Any, ClassVar, Generic, TypeVar
30
+
31
+ # =============================================================================
32
+ # Built-in Data (used by fakers)
33
+ # =============================================================================
34
+
35
+ FIRST_NAMES = [
36
+ "James",
37
+ "Mary",
38
+ "John",
39
+ "Patricia",
40
+ "Robert",
41
+ "Jennifer",
42
+ "Michael",
43
+ "Linda",
44
+ "William",
45
+ "Elizabeth",
46
+ "David",
47
+ "Barbara",
48
+ "Richard",
49
+ "Susan",
50
+ "Joseph",
51
+ "Jessica",
52
+ "Thomas",
53
+ "Sarah",
54
+ "Charles",
55
+ "Karen",
56
+ "Christopher",
57
+ "Nancy",
58
+ "Daniel",
59
+ "Lisa",
60
+ "Matthew",
61
+ "Betty",
62
+ "Anthony",
63
+ "Margaret",
64
+ "Mark",
65
+ "Sandra",
66
+ "Donald",
67
+ "Ashley",
68
+ ]
69
+
70
+ LAST_NAMES = [
71
+ # Train surnames (first ~90%)
72
+ "Smith",
73
+ "Johnson",
74
+ "Williams",
75
+ "Brown",
76
+ "Jones",
77
+ "Garcia",
78
+ "Miller",
79
+ "Davis",
80
+ "Rodriguez",
81
+ "Martinez",
82
+ "Hernandez",
83
+ "Lopez",
84
+ "Gonzalez",
85
+ "Wilson",
86
+ "Anderson",
87
+ "Thomas",
88
+ "Taylor",
89
+ "Moore",
90
+ "Jackson",
91
+ "Martin",
92
+ "Lee",
93
+ "Perez",
94
+ "Thompson",
95
+ "White",
96
+ "Harris",
97
+ "Sanchez",
98
+ "Clark",
99
+ "Ramirez",
100
+ "Lewis",
101
+ "Robinson",
102
+ "Walker",
103
+ "Young",
104
+ "Allen",
105
+ "King",
106
+ "Wright",
107
+ "Scott",
108
+ "Torres",
109
+ "Nguyen",
110
+ "Hill",
111
+ "Flores",
112
+ "Green",
113
+ "Adams",
114
+ "Nelson",
115
+ "Baker",
116
+ "Hall",
117
+ "Rivera",
118
+ # Test surnames (last ~10% - held out for evaluation)
119
+ "Campbell",
120
+ "Mitchell",
121
+ "Roberts",
122
+ "Carter",
123
+ "Phillips",
124
+ ]
125
+
126
+ # Index where test surnames start (for train/test split)
127
+ _LAST_NAME_TEST_START = 46
128
+
129
+
130
+ def get_last_name(
131
+ rng: random.Random,
132
+ augment: bool = False,
133
+ split: str = "train",
134
+ ) -> str:
135
+ """Get a last name with optional augmentation and train/test split.
136
+
137
+ Args:
138
+ rng: Random number generator
139
+ augment: If True, may add augmentation (Jr., III, etc.)
140
+ split: "train" uses first 90% of surnames, "test" uses held-out 10%
141
+
142
+ Returns:
143
+ Last name string, optionally augmented
144
+ """
145
+ if split == "test":
146
+ names = LAST_NAMES[_LAST_NAME_TEST_START:]
147
+ else:
148
+ names = LAST_NAMES[:_LAST_NAME_TEST_START]
149
+
150
+ name = rng.choice(names)
151
+
152
+ if augment and rng.random() < 0.1:
153
+ suffix = rng.choice(["Jr.", "Sr.", "II", "III", "IV"])
154
+ name = f"{name} {suffix}"
155
+
156
+ return name
157
+
158
+
159
+ def get_first_name(rng: random.Random) -> str:
160
+ """Get a random first name.
161
+
162
+ Args:
163
+ rng: Random number generator
164
+
165
+ Returns:
166
+ First name string
167
+ """
168
+ return rng.choice(FIRST_NAMES)
169
+
170
+ US_STATES = [
171
+ "AL",
172
+ "AK",
173
+ "AZ",
174
+ "AR",
175
+ "CA",
176
+ "CO",
177
+ "CT",
178
+ "DE",
179
+ "FL",
180
+ "GA",
181
+ "HI",
182
+ "ID",
183
+ "IL",
184
+ "IN",
185
+ "IA",
186
+ "KS",
187
+ "KY",
188
+ "LA",
189
+ "ME",
190
+ "MD",
191
+ "MA",
192
+ "MI",
193
+ "MN",
194
+ "MS",
195
+ "MO",
196
+ "MT",
197
+ "NE",
198
+ "NV",
199
+ "NH",
200
+ "NJ",
201
+ "NM",
202
+ "NY",
203
+ "NC",
204
+ "ND",
205
+ "OH",
206
+ "OK",
207
+ "OR",
208
+ "PA",
209
+ "RI",
210
+ "SC",
211
+ "SD",
212
+ "TN",
213
+ "TX",
214
+ "UT",
215
+ "VT",
216
+ "VA",
217
+ "WA",
218
+ "WV",
219
+ "WI",
220
+ "WY",
221
+ ]
222
+
223
+ CITIES = [
224
+ "New York",
225
+ "Los Angeles",
226
+ "Chicago",
227
+ "Houston",
228
+ "Phoenix",
229
+ "Philadelphia",
230
+ "San Antonio",
231
+ "San Diego",
232
+ "Dallas",
233
+ "San Jose",
234
+ ]
235
+
236
+ STREET_SUFFIXES = ["St", "Ave", "Blvd", "Dr", "Ln", "Rd", "Way", "Ct", "Pl"]
237
+
238
+
239
+ # =============================================================================
240
+ # Field DSL - lowercase functions that return Field instances
241
+ # =============================================================================
242
+
243
+
244
+ @dataclass
245
+ class Field:
246
+ """Base field - use the DSL functions below instead of this directly."""
247
+
248
+ _type: str = "base"
249
+ required: bool = True
250
+ default: Any = None
251
+
252
+ def generate(self, rng: random.Random) -> Any:
253
+ """Generate a value. Override in subclasses."""
254
+ return self.default
255
+
256
+
257
+ # --- String Fields ---
258
+
259
+
260
+ @dataclass
261
+ class StringField(Field):
262
+ """String field implementation."""
263
+
264
+ _type: str = "string"
265
+ faker: str | None = None
266
+ pattern: str | None = None
267
+ choices: list[str] | None = None
268
+ min_length: int = 5
269
+ max_length: int = 20
270
+
271
+ def generate(self, rng: random.Random) -> str:
272
+ if self.choices:
273
+ return rng.choice(self.choices)
274
+ if self.faker:
275
+ return _generate_faker(self.faker, rng)
276
+ if self.pattern:
277
+ return _generate_pattern(self.pattern, rng)
278
+ length = rng.randint(self.min_length, self.max_length)
279
+ return "".join(rng.choices(string_module.ascii_letters, k=length))
280
+
281
+
282
+ def string(
283
+ faker: str | None = None,
284
+ pattern: str | None = None,
285
+ choices: list[str] | None = None,
286
+ default: str | None = None,
287
+ required: bool = True,
288
+ ) -> StringField:
289
+ """Define a string field.
290
+
291
+ Examples:
292
+ first_name = string(faker="first_name")
293
+ member_id = string(pattern=r"[A-Z]{3}[0-9]{6}")
294
+ status = string(choices=["active", "inactive"])
295
+ """
296
+ return StringField(
297
+ faker=faker, pattern=pattern, choices=choices, default=default, required=required
298
+ )
299
+
300
+
301
+ # --- Numeric Fields ---
302
+
303
+
304
+ @dataclass
305
+ class IntField(Field):
306
+ """Integer field implementation."""
307
+
308
+ _type: str = "int"
309
+ min_value: int = 0
310
+ max_value: int = 100
311
+
312
+ def generate(self, rng: random.Random) -> int:
313
+ return rng.randint(self.min_value, self.max_value)
314
+
315
+
316
+ def integer(
317
+ min_value: int = 0,
318
+ max_value: int = 100,
319
+ default: int | None = None,
320
+ required: bool = True,
321
+ ) -> IntField:
322
+ """Define an integer field.
323
+
324
+ Example:
325
+ age = integer(min_value=18, max_value=100)
326
+ """
327
+ return IntField(min_value=min_value, max_value=max_value, default=default, required=required)
328
+
329
+
330
+ @dataclass
331
+ class FloatField(Field):
332
+ """Float field implementation."""
333
+
334
+ _type: str = "float"
335
+ min_value: float = 0.0
336
+ max_value: float = 100.0
337
+ precision: int = 2
338
+
339
+ def generate(self, rng: random.Random) -> float:
340
+ value = rng.uniform(self.min_value, self.max_value)
341
+ return round(value, self.precision)
342
+
343
+
344
+ def decimal(
345
+ min_value: float = 0.0,
346
+ max_value: float = 100.0,
347
+ precision: int = 2,
348
+ default: float | None = None,
349
+ required: bool = True,
350
+ ) -> FloatField:
351
+ """Define a decimal field.
352
+
353
+ Example:
354
+ price = decimal(min_value=0.01, max_value=999.99, precision=2)
355
+ """
356
+ return FloatField(
357
+ min_value=min_value,
358
+ max_value=max_value,
359
+ precision=precision,
360
+ default=default,
361
+ required=required,
362
+ )
363
+
364
+
365
+ def money(
366
+ min_value: float = 0.0,
367
+ max_value: float = 1000.0,
368
+ default: float | None = None,
369
+ ) -> MoneyField:
370
+ """Define a money field (formatted as $X.XX).
371
+
372
+ Example:
373
+ fee = money(min_value=50.0, max_value=2500.0)
374
+ """
375
+ return MoneyField(min_value=min_value, max_value=max_value, default=default)
376
+
377
+
378
+ @dataclass
379
+ class MoneyField(Field):
380
+ """Money field - formats as $X.XX."""
381
+
382
+ _type: str = "money"
383
+ min_value: float = 0.0
384
+ max_value: float = 1000.0
385
+
386
+ def generate(self, rng: random.Random) -> str:
387
+ value = rng.uniform(self.min_value, self.max_value)
388
+ return f"${value:.2f}"
389
+
390
+
391
+ # --- Date/Time Fields ---
392
+
393
+
394
+ @dataclass
395
+ class DateField(Field):
396
+ """Date field implementation."""
397
+
398
+ _type: str = "date"
399
+ min_year: int = 2000
400
+ max_year: int = 2025
401
+ format: str = "%Y-%m-%d"
402
+
403
+ def generate(self, rng: random.Random) -> str:
404
+ start = date(self.min_year, 1, 1)
405
+ end = date(self.max_year, 12, 31)
406
+ days_between = (end - start).days
407
+ random_days = rng.randint(0, days_between)
408
+ result_date = start + timedelta(days=random_days)
409
+ return result_date.strftime(self.format)
410
+
411
+
412
+ def date_field(
413
+ min_year: int = 2000,
414
+ max_year: int = 2025,
415
+ format: str = "%Y-%m-%d",
416
+ default: str | None = None,
417
+ required: bool = True,
418
+ ) -> DateField:
419
+ """Define a date field.
420
+
421
+ Example:
422
+ dob = date_field(min_year=1940, max_year=2010, format="%m/%d/%Y")
423
+ """
424
+ return DateField(
425
+ min_year=min_year, max_year=max_year, format=format, default=default, required=required
426
+ )
427
+
428
+
429
+ @dataclass
430
+ class TimeField(Field):
431
+ """Time field implementation."""
432
+
433
+ _type: str = "time"
434
+ min_hour: int = 0
435
+ max_hour: int = 23
436
+ format: str = "%I:%M %p"
437
+
438
+ def generate(self, rng: random.Random) -> str:
439
+ hour = rng.randint(self.min_hour, self.max_hour)
440
+ minute = rng.choice([0, 15, 30, 45])
441
+ dt = datetime(2000, 1, 1, hour, minute)
442
+ return dt.strftime(self.format)
443
+
444
+
445
+ def time_field(
446
+ min_hour: int = 0,
447
+ max_hour: int = 23,
448
+ format: str = "%I:%M %p",
449
+ default: str | None = None,
450
+ required: bool = True,
451
+ ) -> TimeField:
452
+ """Define a time field.
453
+
454
+ Example:
455
+ appointment_time = time_field(min_hour=8, max_hour=17)
456
+ """
457
+ return TimeField(
458
+ min_hour=min_hour, max_hour=max_hour, format=format, default=default, required=required
459
+ )
460
+
461
+
462
+ # --- Boolean Field ---
463
+
464
+
465
+ @dataclass
466
+ class BoolField(Field):
467
+ """Boolean field implementation."""
468
+
469
+ _type: str = "bool"
470
+ probability: float = 0.5
471
+
472
+ def generate(self, rng: random.Random) -> bool:
473
+ return rng.random() < self.probability
474
+
475
+
476
+ def boolean(probability: float = 0.5, default: bool | None = None) -> BoolField:
477
+ """Define a boolean field.
478
+
479
+ Example:
480
+ is_active = boolean(probability=0.8) # 80% chance True
481
+ """
482
+ return BoolField(probability=probability, default=default)
483
+
484
+
485
+ # --- Choice Field ---
486
+
487
+
488
+ @dataclass
489
+ class ChoiceField(Field):
490
+ """Choice field implementation."""
491
+
492
+ _type: str = "choice"
493
+ choices: list[Any] = dataclass_field(default_factory=list)
494
+ weights: list[float] | None = None
495
+
496
+ def generate(self, rng: random.Random) -> Any:
497
+ if not self.choices:
498
+ raise ValueError("choice() requires at least one option")
499
+ if self.weights:
500
+ return rng.choices(self.choices, weights=self.weights, k=1)[0]
501
+ return rng.choice(self.choices)
502
+
503
+
504
+ def choice(
505
+ *options: Any,
506
+ weights: list[float] | None = None,
507
+ default: Any = None,
508
+ ) -> ChoiceField:
509
+ """Define a choice field.
510
+
511
+ Example:
512
+ status = choice("pending", "approved", "denied")
513
+ status = choice("pending", "approved", weights=[0.7, 0.3])
514
+ """
515
+ return ChoiceField(choices=list(options), weights=weights, default=default)
516
+
517
+
518
+ # --- List Field ---
519
+
520
+
521
+ @dataclass
522
+ class ListField(Field):
523
+ """List field implementation."""
524
+
525
+ _type: str = "list"
526
+ item_field: Field = dataclass_field(default_factory=lambda: StringField())
527
+ min_items: int = 1
528
+ max_items: int = 5
529
+
530
+ def generate(self, rng: random.Random) -> list[Any]:
531
+ count = rng.randint(self.min_items, self.max_items)
532
+ return [self.item_field.generate(rng) for _ in range(count)]
533
+
534
+
535
+ def list_of(
536
+ item_field: Field,
537
+ min_items: int = 1,
538
+ max_items: int = 5,
539
+ ) -> ListField:
540
+ """Define a list field.
541
+
542
+ Example:
543
+ phones = list_of(string(faker="phone"), min_items=1, max_items=3)
544
+ """
545
+ return ListField(item_field=item_field, min_items=min_items, max_items=max_items)
546
+
547
+
548
+ # =============================================================================
549
+ # Computed Fields - derive values from other fields
550
+ # =============================================================================
551
+
552
+
553
+ @dataclass
554
+ class ComputedField(Field):
555
+ """Computed field that derives value from other fields."""
556
+
557
+ _type: str = "computed"
558
+ sources: tuple[str, ...] = ()
559
+ formula: str = "concat"
560
+ separator: str = " "
561
+
562
+ def generate(self, rng: random.Random) -> Any:
563
+ return None # Computed after generation
564
+
565
+ def compute(self, instance: Model) -> Any:
566
+ """Compute value from instance fields."""
567
+ if self.formula == "concat":
568
+ values = [str(getattr(instance, s, "")) for s in self.sources]
569
+ return self.separator.join(values)
570
+ elif self.formula == "years_since":
571
+ if len(self.sources) != 1:
572
+ return 0
573
+ source_val = getattr(instance, self.sources[0], None)
574
+ return _compute_years_since(source_val)
575
+ return None
576
+
577
+
578
+ def computed(*sources: str, separator: str = " ") -> ComputedField:
579
+ """Define a computed field that concatenates other fields.
580
+
581
+ Example:
582
+ full_name = computed("first_name", "last_name")
583
+ full_address = computed("street", "city", "state", separator=", ")
584
+ """
585
+ return ComputedField(sources=sources, formula="concat", separator=separator)
586
+
587
+
588
+ def years_since(source: str) -> ComputedField:
589
+ """Define a computed field that calculates years since a date.
590
+
591
+ Example:
592
+ age = years_since("dob")
593
+ """
594
+ return ComputedField(sources=(source,), formula="years_since")
595
+
596
+
597
+ # =============================================================================
598
+ # Relationships - Rails-style with convention over configuration
599
+ # =============================================================================
600
+
601
+
602
+ @dataclass
603
+ class Relationship:
604
+ """Base relationship class."""
605
+
606
+ model: str
607
+ foreign_key: str | None = None # Convention: model_name_id
608
+
609
+ def resolve(self, registry: dict[str, type[Model]]) -> type[Model] | None:
610
+ return registry.get(self.model)
611
+
612
+ def inferred_foreign_key(self) -> str:
613
+ """Infer foreign key from model name (Rails convention)."""
614
+ if self.foreign_key:
615
+ return self.foreign_key
616
+ # Convert CamelCase to snake_case_id
617
+ name = self.model
618
+ result: list[str] = []
619
+ for i, char in enumerate(name):
620
+ if char.isupper() and i > 0:
621
+ result.append("_")
622
+ result.append(char.lower())
623
+ return "".join(result) + "_id"
624
+
625
+
626
+ @dataclass
627
+ class HasManyRel(Relationship):
628
+ """Has-many relationship."""
629
+
630
+ min_count: int = 1
631
+ max_count: int = 10
632
+
633
+
634
+ @dataclass
635
+ class BelongsToRel(Relationship):
636
+ """Belongs-to relationship."""
637
+
638
+ pass
639
+
640
+
641
+ @dataclass
642
+ class HasOneRel(Relationship):
643
+ """Has-one relationship."""
644
+
645
+ pass
646
+
647
+
648
+ def has_many(
649
+ model: str,
650
+ foreign_key: str | None = None,
651
+ min_count: int = 1,
652
+ max_count: int = 10,
653
+ ) -> HasManyRel:
654
+ """Define a has-many relationship.
655
+
656
+ Example:
657
+ appointments = has_many("Appointment")
658
+ appointments = has_many("Appointment", min_count=0, max_count=20)
659
+ """
660
+ return HasManyRel(
661
+ model=model, foreign_key=foreign_key, min_count=min_count, max_count=max_count
662
+ )
663
+
664
+
665
+ def belongs_to(model: str, foreign_key: str | None = None) -> BelongsToRel:
666
+ """Define a belongs-to relationship.
667
+
668
+ Example:
669
+ patient = belongs_to("Patient")
670
+ treating_provider = belongs_to("Provider", foreign_key="treating_provider_id")
671
+ """
672
+ return BelongsToRel(model=model, foreign_key=foreign_key)
673
+
674
+
675
+ def has_one(model: str, foreign_key: str | None = None) -> HasOneRel:
676
+ """Define a has-one relationship.
677
+
678
+ Example:
679
+ primary_provider = has_one("Provider")
680
+ """
681
+ return HasOneRel(model=model, foreign_key=foreign_key)
682
+
683
+
684
+ # =============================================================================
685
+ # Rails-like attribute() DSL
686
+ # =============================================================================
687
+
688
+ # Pending attributes registered by attribute() calls during class body execution
689
+ _pending_attributes: list[tuple[str, Field | Relationship]] = []
690
+
691
+
692
+ def attribute(name: str, field_type: str, *args: Any, **kwargs: Any) -> None:
693
+ """Register an attribute on the model being defined.
694
+
695
+ Rails-like DSL for defining model attributes:
696
+
697
+ class User(Model):
698
+ attribute("name", "string")
699
+ attribute("age", "integer")
700
+ attribute("email", "email")
701
+ attribute("status", "choice", "active", "inactive", "pending")
702
+
703
+ Args:
704
+ name: Attribute name
705
+ field_type: Type of field (string, integer, choice, email, npi, etc.)
706
+ *args: Positional args passed to field constructor
707
+ **kwargs: Keyword args passed to field constructor
708
+ """
709
+ field = _make_field(field_type, *args, **kwargs)
710
+ _pending_attributes.append((name, field))
711
+
712
+
713
+ def _make_field(field_type: str, *args: Any, **kwargs: Any) -> Field:
714
+ """Create a field from type name."""
715
+ # Semantic types (no args needed)
716
+ semantic_types: dict[str, Any] = {
717
+ "first_name": FirstName,
718
+ "last_name": LastName,
719
+ "full_name": FullName,
720
+ "dob": DOB,
721
+ "npi": NPI,
722
+ "ssn": SSN,
723
+ "phone": Phone,
724
+ "email": Email,
725
+ "street": Street,
726
+ "city": City,
727
+ "state": State,
728
+ "zip": ZipCode,
729
+ "zip_code": ZipCode,
730
+ "member_id": MemberID,
731
+ "claim_number": ClaimNumber,
732
+ "procedure_code": ProcedureCode,
733
+ "license_number": LicenseNumber,
734
+ "specialty": Specialty,
735
+ "claim_status": ClaimStatus,
736
+ "fee": Fee,
737
+ }
738
+
739
+ # Base types
740
+ base_types: dict[str, Any] = {
741
+ "string": string,
742
+ "integer": integer,
743
+ "int": integer,
744
+ "decimal": decimal,
745
+ "float": decimal,
746
+ "money": money,
747
+ "date": date_field,
748
+ "time": time_field,
749
+ "boolean": boolean,
750
+ "bool": boolean,
751
+ "choice": choice,
752
+ "list": list_of,
753
+ "computed": computed,
754
+ }
755
+
756
+ field_type_lower = field_type.lower()
757
+
758
+ # Check semantic types first
759
+ if field_type_lower in semantic_types:
760
+ return semantic_types[field_type_lower](*args, **kwargs)
761
+
762
+ # Check base types
763
+ if field_type_lower in base_types:
764
+ return base_types[field_type_lower](*args, **kwargs)
765
+
766
+ raise ValueError(f"Unknown field type: {field_type}")
767
+
768
+
769
+ # =============================================================================
770
+ # Model Base Class
771
+ # =============================================================================
772
+
773
+
774
+ class ModelMeta(type):
775
+ """Metaclass that collects field definitions."""
776
+
777
+ def __new__(mcs, name: str, bases: tuple[type, ...], namespace: dict[str, Any]) -> ModelMeta:
778
+ global _pending_attributes
779
+
780
+ fields: dict[str, Field] = {}
781
+ relationships: dict[str, Relationship] = {}
782
+
783
+ # Inherit from parents
784
+ for base in bases:
785
+ if hasattr(base, "_fields"):
786
+ fields.update(base._fields)
787
+ if hasattr(base, "_relationships"):
788
+ relationships.update(base._relationships)
789
+
790
+ # Process pending attributes from attribute() calls
791
+ for attr_name, attr_value in _pending_attributes:
792
+ if isinstance(attr_value, Field):
793
+ fields[attr_name] = attr_value
794
+ elif isinstance(attr_value, Relationship):
795
+ relationships[attr_name] = attr_value
796
+ _pending_attributes = []
797
+
798
+ # Collect from class attributes (original style still works)
799
+ for attr_name, attr_value in namespace.items():
800
+ if isinstance(attr_value, Field):
801
+ fields[attr_name] = attr_value
802
+ elif isinstance(attr_value, Relationship):
803
+ relationships[attr_name] = attr_value
804
+
805
+ namespace["_fields"] = fields
806
+ namespace["_relationships"] = relationships
807
+ return super().__new__(mcs, name, bases, namespace)
808
+
809
+
810
+ T = TypeVar("T", bound="Model")
811
+
812
+
813
+ class Model(metaclass=ModelMeta):
814
+ """Base class for data models.
815
+
816
+ Example:
817
+ class Patient(Model):
818
+ first_name = string(faker="first_name")
819
+ last_name = string(faker="last_name")
820
+ dob = date_field(min_year=1940, max_year=2010)
821
+
822
+ full_name = computed("first_name", "last_name")
823
+ age = years_since("dob")
824
+
825
+ appointments = has_many("Appointment")
826
+
827
+ patient = Patient.generate()
828
+ """
829
+
830
+ _fields: ClassVar[dict[str, Field]] = {}
831
+ _relationships: ClassVar[dict[str, Relationship]] = {}
832
+
833
+ def __init__(self, **kwargs: Any) -> None:
834
+ for name, field_def in self._fields.items():
835
+ # Skip computed fields - they're computed after init
836
+ if isinstance(field_def, ComputedField):
837
+ continue
838
+ if name in kwargs:
839
+ setattr(self, name, kwargs[name])
840
+ elif field_def.default is not None:
841
+ setattr(self, name, field_def.default)
842
+ elif not field_def.required:
843
+ setattr(self, name, None)
844
+ else:
845
+ raise ValueError(f"Missing required field: {name}")
846
+
847
+ # Compute derived fields
848
+ for name, field_def in self._fields.items():
849
+ if isinstance(field_def, ComputedField):
850
+ setattr(self, name, field_def.compute(self))
851
+
852
+ @classmethod
853
+ def generate(cls: type[T], rng: random.Random | None = None) -> T:
854
+ """Generate a single model instance with random data.
855
+
856
+ Args:
857
+ rng: Optional seeded random generator for reproducibility
858
+
859
+ Returns:
860
+ Single model instance
861
+ """
862
+ if rng is None:
863
+ rng = random.Random()
864
+
865
+ kwargs = {}
866
+ for name, field_def in cls._fields.items():
867
+ if not isinstance(field_def, ComputedField):
868
+ kwargs[name] = field_def.generate(rng)
869
+
870
+ return cls(**kwargs)
871
+
872
+ @classmethod
873
+ def generate_many(
874
+ cls: type[T],
875
+ count: int,
876
+ rng: random.Random | None = None,
877
+ ) -> list[T]:
878
+ """Generate multiple model instances with random data.
879
+
880
+ Args:
881
+ count: Number of instances to generate
882
+ rng: Optional seeded random generator for reproducibility
883
+
884
+ Returns:
885
+ List of model instances
886
+ """
887
+ if rng is None:
888
+ rng = random.Random()
889
+ return [cls.generate(rng) for _ in range(count)]
890
+
891
+ @classmethod
892
+ def generator(cls: type[T], rng: random.Random | None = None) -> "ModelGenerator[T]":
893
+ """Get a generator instance for this model.
894
+
895
+ Args:
896
+ rng: Optional seeded random generator
897
+
898
+ Returns:
899
+ ModelGenerator that yields instances
900
+ """
901
+ return ModelGenerator(cls, rng)
902
+
903
+ def to_dict(self) -> dict[str, Any]:
904
+ """Convert model to dictionary."""
905
+ return {name: getattr(self, name) for name in self._fields}
906
+
907
+ def __repr__(self) -> str:
908
+ fields_str = ", ".join(f"{k}={getattr(self, k)!r}" for k in self._fields)
909
+ return f"{self.__class__.__name__}({fields_str})"
910
+
911
+
912
+ class ModelGenerator(Generic[T]):
913
+ """Generator wrapper for creating model instances.
914
+
915
+ Provides iterator-based generation and batch methods.
916
+
917
+ Example:
918
+ gen = Patient.generator(rng)
919
+ patient = gen.one()
920
+ patients = gen.many(10)
921
+
922
+ # Or iterate
923
+ for patient in gen.take(5):
924
+ print(patient)
925
+ """
926
+
927
+ def __init__(self, model_class: type[T], rng: random.Random | None = None) -> None:
928
+ self.model_class = model_class
929
+ self.rng = rng or random.Random()
930
+
931
+ def one(self) -> T:
932
+ """Generate a single instance."""
933
+ return self.model_class.generate(self.rng)
934
+
935
+ def many(self, count: int) -> list[T]:
936
+ """Generate multiple instances."""
937
+ return self.model_class.generate_many(count, self.rng)
938
+
939
+ def take(self, count: int) -> list[T]:
940
+ """Alias for many()."""
941
+ return self.many(count)
942
+
943
+ def __iter__(self) -> "ModelGenerator[T]":
944
+ return self
945
+
946
+ def __next__(self) -> T:
947
+ return self.one()
948
+
949
+
950
+ # =============================================================================
951
+ # Helper Functions
952
+ # =============================================================================
953
+
954
+
955
+ def _generate_faker(faker_type: str, rng: random.Random) -> str:
956
+ """Generate value using built-in faker."""
957
+ match faker_type:
958
+ case "first_name":
959
+ return rng.choice(FIRST_NAMES)
960
+ case "last_name":
961
+ return rng.choice(LAST_NAMES)
962
+ case "full_name":
963
+ return f"{rng.choice(FIRST_NAMES)} {rng.choice(LAST_NAMES)}"
964
+ case "city":
965
+ return rng.choice(CITIES)
966
+ case "state":
967
+ return rng.choice(US_STATES)
968
+ case "street":
969
+ num = rng.randint(100, 9999)
970
+ name = rng.choice(LAST_NAMES)
971
+ suffix = rng.choice(STREET_SUFFIXES)
972
+ return f"{num} {name} {suffix}"
973
+ case "phone":
974
+ area = rng.randint(200, 999)
975
+ prefix = rng.randint(200, 999)
976
+ line = rng.randint(1000, 9999)
977
+ return f"({area}) {prefix}-{line}"
978
+ case "email":
979
+ first = rng.choice(FIRST_NAMES).lower()
980
+ last = rng.choice(LAST_NAMES).lower()
981
+ domain = rng.choice(["gmail.com", "yahoo.com", "outlook.com"])
982
+ return f"{first}.{last}@{domain}"
983
+ case "ssn":
984
+ return f"{rng.randint(100, 999)}-{rng.randint(10, 99)}-{rng.randint(1000, 9999)}"
985
+ case "npi":
986
+ return "".join(str(rng.randint(0, 9)) for _ in range(10))
987
+ case "zip":
988
+ return f"{rng.randint(10000, 99999)}"
989
+ case _:
990
+ raise ValueError(f"Unknown faker: {faker_type}")
991
+
992
+
993
+ def _generate_pattern(pattern: str, rng: random.Random) -> str:
994
+ """Generate string from simple regex pattern."""
995
+ result: list[str] = []
996
+ i = 0
997
+
998
+ while i < len(pattern):
999
+ char = pattern[i]
1000
+
1001
+ if char == "[":
1002
+ end = pattern.index("]", i)
1003
+ char_class = pattern[i + 1 : end]
1004
+ chars = _parse_char_class(char_class)
1005
+
1006
+ i = end + 1
1007
+ count = 1
1008
+ if i < len(pattern) and pattern[i] == "{":
1009
+ end_q = pattern.index("}", i)
1010
+ count = int(pattern[i + 1 : end_q])
1011
+ i = end_q + 1
1012
+
1013
+ result.extend(rng.choice(chars) for _ in range(count))
1014
+ else:
1015
+ result.append(char)
1016
+ i += 1
1017
+
1018
+ return "".join(result)
1019
+
1020
+
1021
+ def _parse_char_class(char_class: str) -> list[str]:
1022
+ """Parse character class like A-Z or 0-9."""
1023
+ chars: list[str] = []
1024
+ j = 0
1025
+ while j < len(char_class):
1026
+ if j + 2 < len(char_class) and char_class[j + 1] == "-":
1027
+ start_char = char_class[j]
1028
+ end_char = char_class[j + 2]
1029
+ chars.extend(chr(c) for c in range(ord(start_char), ord(end_char) + 1))
1030
+ j += 3
1031
+ else:
1032
+ chars.append(char_class[j])
1033
+ j += 1
1034
+ return chars
1035
+
1036
+
1037
+ def _compute_years_since(date_value: Any) -> int:
1038
+ """Compute years since a date value."""
1039
+ if not date_value:
1040
+ return 0
1041
+ try:
1042
+ if isinstance(date_value, str):
1043
+ for fmt in ["%m/%d/%Y", "%Y-%m-%d", "%d/%m/%Y"]:
1044
+ try:
1045
+ birth = datetime.strptime(date_value, fmt)
1046
+ today = date.today()
1047
+ born_later = (today.month, today.day) < (birth.month, birth.day)
1048
+ return today.year - birth.year - int(born_later)
1049
+ except ValueError:
1050
+ continue
1051
+ except Exception:
1052
+ pass
1053
+ return 0
1054
+
1055
+
1056
+ # =============================================================================
1057
+ # Semantic Field Types - Django-like convenience classes
1058
+ # =============================================================================
1059
+
1060
+
1061
+ def FirstName() -> StringField:
1062
+ """First name field."""
1063
+ return string(faker="first_name")
1064
+
1065
+
1066
+ def LastName() -> StringField:
1067
+ """Last name field."""
1068
+ return string(faker="last_name")
1069
+
1070
+
1071
+ def FullName() -> StringField:
1072
+ """Full name field."""
1073
+ return string(faker="full_name")
1074
+
1075
+
1076
+ def DOB(min_year: int = 1940, max_year: int = 2010) -> DateField:
1077
+ """Date of birth field."""
1078
+ return date_field(min_year=min_year, max_year=max_year, format="%m/%d/%Y")
1079
+
1080
+
1081
+ def NPI() -> StringField:
1082
+ """National Provider Identifier (10 digits)."""
1083
+ return string(faker="npi")
1084
+
1085
+
1086
+ def SSN() -> StringField:
1087
+ """Social Security Number."""
1088
+ return string(faker="ssn")
1089
+
1090
+
1091
+ def Phone() -> StringField:
1092
+ """Phone number."""
1093
+ return string(faker="phone")
1094
+
1095
+
1096
+ def Email() -> StringField:
1097
+ """Email address."""
1098
+ return string(faker="email")
1099
+
1100
+
1101
+ def Street() -> StringField:
1102
+ """Street address."""
1103
+ return string(faker="street")
1104
+
1105
+
1106
+ def City() -> StringField:
1107
+ """City name."""
1108
+ return string(faker="city")
1109
+
1110
+
1111
+ def State() -> StringField:
1112
+ """US state abbreviation."""
1113
+ return string(faker="state")
1114
+
1115
+
1116
+ def ZipCode() -> StringField:
1117
+ """ZIP code."""
1118
+ return string(faker="zip")
1119
+
1120
+
1121
+ def MemberID(prefix: str = "", digits: int = 6) -> StringField:
1122
+ """Member/account ID with pattern."""
1123
+ if prefix:
1124
+ return string(pattern=f"{prefix}[0-9]{{{digits}}}")
1125
+ return string(pattern=f"[A-Z]{{3}}[0-9]{{{digits}}}")
1126
+
1127
+
1128
+ def ClaimNumber() -> StringField:
1129
+ """Claim number."""
1130
+ return string(pattern=r"CLM[0-9]{8}")
1131
+
1132
+
1133
+ def ProcedureCode() -> StringField:
1134
+ """Dental procedure code (D####)."""
1135
+ return string(pattern=r"D[0-9]{4}")
1136
+
1137
+
1138
+ def LicenseNumber() -> StringField:
1139
+ """License number."""
1140
+ return string(pattern=r"[A-Z]{2}[0-9]{6}")
1141
+
1142
+
1143
+ def Specialty(*options: str) -> ChoiceField:
1144
+ """Provider specialty."""
1145
+ if not options:
1146
+ options = (
1147
+ "General Dentistry",
1148
+ "Orthodontics",
1149
+ "Periodontics",
1150
+ "Endodontics",
1151
+ "Oral Surgery",
1152
+ )
1153
+ return choice(*options)
1154
+
1155
+
1156
+ def ClaimStatus() -> ChoiceField:
1157
+ """Claim status."""
1158
+ return choice("Pending", "Approved", "Denied", "In Review", "Paid")
1159
+
1160
+
1161
+ def Fee(min_value: float = 50.0, max_value: float = 2500.0) -> MoneyField:
1162
+ """Fee/charge amount."""
1163
+ return money(min_value=min_value, max_value=max_value)
1164
+
1165
+
1166
+ # =============================================================================
1167
+ # Common Healthcare Models
1168
+ # =============================================================================
1169
+
1170
+
1171
+ class Patient(Model):
1172
+ """Patient with common healthcare fields."""
1173
+
1174
+ first_name = FirstName()
1175
+ last_name = LastName()
1176
+ dob = DOB()
1177
+ member_id = MemberID()
1178
+ ssn = SSN()
1179
+ phone = Phone()
1180
+ email = Email()
1181
+ street = Street()
1182
+ city = City()
1183
+ state = State()
1184
+ zip_code = ZipCode()
1185
+
1186
+ full_name = computed("first_name", "last_name")
1187
+ age = years_since("dob")
1188
+
1189
+
1190
+ class Provider(Model):
1191
+ """Provider (treating or billing)."""
1192
+
1193
+ first_name = FirstName()
1194
+ last_name = LastName()
1195
+ npi = NPI()
1196
+ license_number = LicenseNumber()
1197
+ specialty = Specialty()
1198
+ phone = Phone()
1199
+
1200
+ full_name = computed("first_name", "last_name")
1201
+
1202
+
1203
+ class Procedure(Model):
1204
+ """Dental procedure."""
1205
+
1206
+ code = ProcedureCode()
1207
+ description = choice(
1208
+ "Periodic oral evaluation",
1209
+ "Comprehensive oral evaluation",
1210
+ "Prophylaxis - adult",
1211
+ "Topical fluoride",
1212
+ "Bitewing - single film",
1213
+ "Panoramic film",
1214
+ "Amalgam - one surface",
1215
+ "Resin composite - one surface",
1216
+ "Crown - porcelain/ceramic",
1217
+ "Root canal - anterior",
1218
+ "Extraction - single tooth",
1219
+ )
1220
+ tooth = choice(*[str(i) for i in range(1, 33)])
1221
+ surface = choice("M", "O", "D", "B", "L", "I", "MO", "DO", "MOD")
1222
+ fee = Fee()
1223
+
1224
+
1225
+ class Claim(Model):
1226
+ """Insurance claim."""
1227
+
1228
+ claim_number = ClaimNumber()
1229
+ date_of_service = date_field(min_year=2023, max_year=2025, format="%m/%d/%Y")
1230
+ date_submitted = date_field(min_year=2023, max_year=2025, format="%m/%d/%Y")
1231
+ status = ClaimStatus()
1232
+ total_charge = Fee(min_value=100.0, max_value=5000.0)
1233
+ insurance_paid = Fee(min_value=50.0, max_value=4000.0)
1234
+ patient_responsibility = Fee(min_value=0.0, max_value=1000.0)
1235
+
1236
+
1237
+ class Attachment(Model):
1238
+ """Document attachment."""
1239
+
1240
+ filename = string(pattern=r"[a-z]{8}.[a-z]{3}")
1241
+ file_type = choice("PDF", "JPG", "PNG", "TIFF", "DOC")
1242
+ date_uploaded = date_field(min_year=2023, max_year=2025, format="%m/%d/%Y")
1243
+ description = choice(
1244
+ "X-Ray",
1245
+ "Periodontal Chart",
1246
+ "Treatment Plan",
1247
+ "Insurance Card",
1248
+ "EOB",
1249
+ "Referral",
1250
+ "Clinical Notes",
1251
+ )