@definite-app/data-apps 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. package/CLAUDE.md +686 -0
  2. package/LICENSE +201 -0
  3. package/README.md +643 -0
  4. package/build.mjs +459 -0
  5. package/examples/_refined_demo/app.json +15 -0
  6. package/examples/_refined_demo/data/sample.parquet +0 -0
  7. package/examples/_refined_demo/gen_preview_data.py +59 -0
  8. package/examples/_refined_demo/preview-data.json +13 -0
  9. package/examples/_refined_demo/src/App.tsx +188 -0
  10. package/examples/_refined_demo/src/main.tsx +12 -0
  11. package/examples/loan-portfolio/app.json +31 -0
  12. package/examples/loan-portfolio/data/loan_book.parquet +0 -0
  13. package/examples/loan-portfolio/gen_preview_data.py +454 -0
  14. package/examples/loan-portfolio/preview-data.json +84 -0
  15. package/examples/loan-portfolio/src/App.tsx +1103 -0
  16. package/examples/loan-portfolio/src/main.tsx +12 -0
  17. package/examples/revenue-explorer/app.json +23 -0
  18. package/examples/revenue-explorer/data/transactions.parquet +0 -0
  19. package/examples/revenue-explorer/gen_preview_data.py +129 -0
  20. package/examples/revenue-explorer/preview-data.json +49 -0
  21. package/examples/revenue-explorer/src/App.tsx +527 -0
  22. package/examples/revenue-explorer/src/main.tsx +12 -0
  23. package/package.json +55 -0
  24. package/preview.mjs +35 -0
  25. package/runtime/definite-runtime.tsx +5934 -0
  26. package/scripts/headless-smoke.mjs +196 -0
  27. package/templates/blank/app.json +15 -0
  28. package/templates/blank/src/App.tsx +41 -0
  29. package/templates/blank/src/main.tsx +12 -0
  30. package/templates/refined/app.json +15 -0
  31. package/templates/refined/src/App.tsx +198 -0
  32. package/templates/refined/src/main.tsx +12 -0
@@ -0,0 +1,188 @@
1
+ import React, { useMemo, useState } from "react";
2
+
3
+ import {
4
+ buildPalette,
5
+ CachePopover,
6
+ DateRangeFilter,
7
+ DEFAULT_DATE_RANGE_PRESETS,
8
+ type DateRangeValue,
9
+ DrillProvider,
10
+ ErrorState,
11
+ LoadingState,
12
+ PaletteProvider,
13
+ SaasKpiCard,
14
+ ShellLayout,
15
+ Sidebar,
16
+ type SidebarNavItem,
17
+ useDataset,
18
+ useDrill,
19
+ usePalette,
20
+ useSqlQuery,
21
+ useTheme,
22
+ } from "@definite/runtime";
23
+
24
+ // Default to the "Last 12 months" preset. The filter is applied against the
25
+ // `createdDate` column declared in app.json — change DATE_COLUMN if you
26
+ // rename it.
27
+ const DATE_COLUMN = "originated";
28
+
29
+ function initialDateRange(): DateRangeValue {
30
+ const preset =
31
+ DEFAULT_DATE_RANGE_PRESETS.find((p) => p.key === "last12m") ??
32
+ DEFAULT_DATE_RANGE_PRESETS[0];
33
+ return preset.compute();
34
+ }
35
+
36
+ const escSql = (v: string) => v.replace(/'/g, "''");
37
+
38
+ function buildWhere(from: string, to: string): string {
39
+ const clauses: string[] = [];
40
+ if (from) clauses.push(`${DATE_COLUMN} >= '${escSql(from)}'`);
41
+ if (to) clauses.push(`${DATE_COLUMN} <= '${escSql(to)}'`);
42
+ return clauses.length > 0 ? ` WHERE ${clauses.join(" AND ")}` : "";
43
+ }
44
+
45
+ // ── Sidebar navigation ────────────────────────────────────────────────────
46
+ // Each entry maps to a view rendered in the main pane. Icons are single
47
+ // glyphs (Unicode) so apps don't need an icon library; swap for SVGs when
48
+ // you want brand-specific marks.
49
+ const NAV_ITEMS: SidebarNavItem[] = [
50
+ { id: "overview", label: "Overview", icon: "◧" },
51
+ { id: "detail", label: "Detail", icon: "≣" },
52
+ ];
53
+
54
+ // ── App root ─────────────────────────────────────────────────────────────
55
+ // Pattern: outer <App> holds dataset-loading fallbacks; <InnerApp> runs
56
+ // inside the palette + drill providers so every descendant can usePalette()
57
+ // and useDrill() without prop-drilling.
58
+
59
+ export default function App() {
60
+ const { theme, toggleTheme } = useTheme();
61
+ // Optionally pass a brand accent: buildPalette(theme, { accent: "#FF006E" })
62
+ const palette = useMemo(() => buildPalette(theme), [theme]);
63
+
64
+ const data = useDataset("main");
65
+ if (data.loading) return <LoadingState message="Loading data…" />;
66
+ if (data.error) return <ErrorState title="Dataset failed to load" message={data.error} />;
67
+
68
+ return (
69
+ <PaletteProvider value={palette}>
70
+ <DrillProvider>
71
+ <InnerApp
72
+ theme={theme}
73
+ onThemeChange={(t) => { if (t !== theme) toggleTheme(); }}
74
+ dataset={data}
75
+ />
76
+ </DrillProvider>
77
+ </PaletteProvider>
78
+ );
79
+ }
80
+
81
+ type DatasetHandle = ReturnType<typeof useDataset>;
82
+
83
+ function InnerApp({ theme, onThemeChange, dataset }: {
84
+ theme: "dark" | "light";
85
+ onThemeChange: (t: "dark" | "light") => void;
86
+ dataset: DatasetHandle;
87
+ }) {
88
+ const P = usePalette();
89
+ const [view, setView] = useState("overview");
90
+ const [dateRange, setDateRange] = useState<DateRangeValue>(initialDateRange);
91
+
92
+ const where = useMemo(
93
+ () => buildWhere(dateRange.from, dateRange.to),
94
+ [dateRange.from, dateRange.to],
95
+ );
96
+
97
+ // Example query — replace with your own.
98
+ const summary = useSqlQuery<Array<{ rowCount: number }>>(
99
+ dataset,
100
+ dataset.tableRef
101
+ ? `SELECT COUNT(*)::INTEGER AS rowCount FROM ${dataset.tableRef}${where}`
102
+ : "",
103
+ [where],
104
+ );
105
+
106
+ const rowCount = summary.data?.[0]?.rowCount ?? 0;
107
+ const navItem = NAV_ITEMS.find((n) => n.id === view) ?? NAV_ITEMS[0];
108
+
109
+ const sidebar = (
110
+ <Sidebar
111
+ logo={{ title: "My App", subtitle: "Replace this" }}
112
+ navItems={NAV_ITEMS}
113
+ activeView={view}
114
+ onViewChange={setView}
115
+ dateRangeSlot={
116
+ <DateRangeFilter
117
+ value={dateRange}
118
+ onChange={setDateRange}
119
+ label={null}
120
+ popoverPlacement="right-start"
121
+ triggerStyle={{ width: "100%", minWidth: 0, justifyContent: "space-between", padding: "7px 10px", fontSize: 12 }}
122
+ />
123
+ }
124
+ theme={theme}
125
+ onThemeChange={onThemeChange}
126
+ footer={<>Live DuckDB · {rowCount.toLocaleString()} rows</>}
127
+ />
128
+ );
129
+
130
+ const headerRight = (
131
+ <CachePopover
132
+ isLoading={summary.loading}
133
+ rowCount={dataset.cache?.rowCount ?? rowCount}
134
+ cache={dataset.cache}
135
+ onRefresh={dataset.refresh}
136
+ />
137
+ );
138
+
139
+ return (
140
+ <ShellLayout
141
+ palette={P}
142
+ sidebar={sidebar}
143
+ title={navItem.label}
144
+ breadcrumb={["App", navItem.label]}
145
+ headerRight={headerRight}
146
+ >
147
+ {view === "overview" && <OverviewView rowCount={rowCount} loading={summary.loading} />}
148
+ {view === "detail" && <DetailView rowCount={rowCount} />}
149
+ </ShellLayout>
150
+ );
151
+ }
152
+
153
+ function OverviewView({ rowCount, loading }: { rowCount: number; loading: boolean }) {
154
+ const drill = useDrill();
155
+ return (
156
+ <div style={{ display: "grid", gridTemplateColumns: "repeat(auto-fit, minmax(220px, 1fr))", gap: 12 }}>
157
+ <SaasKpiCard
158
+ title="Total rows"
159
+ value={rowCount.toLocaleString()}
160
+ sub="in dataset"
161
+ loading={loading}
162
+ onClick={() => drill.open({
163
+ kind: "kpi",
164
+ id: "rows",
165
+ title: "Total rows",
166
+ value: rowCount.toLocaleString(),
167
+ breadcrumb: "Overview",
168
+ stats: [["Row count", rowCount.toLocaleString()]],
169
+ narrative: "Dataset row count. Replace with your own computed stats.",
170
+ sql: `SELECT COUNT(*) FROM main;`,
171
+ })}
172
+ />
173
+ </div>
174
+ );
175
+ }
176
+
177
+ function DetailView({ rowCount }: { rowCount: number }) {
178
+ const P = usePalette();
179
+ return (
180
+ <div style={{
181
+ background: P.card, border: `1px solid ${P.border}`, borderRadius: 10,
182
+ padding: 24, color: P.sub, fontSize: 14, lineHeight: 1.6,
183
+ }}>
184
+ <div style={{ fontSize: 16, color: P.text, fontWeight: 600, marginBottom: 8 }}>Detail view</div>
185
+ Replace this with your own detail content — tables, forms, drill-downs. The dataset has <b style={{ color: P.text }}>{rowCount.toLocaleString()}</b> rows available via <code style={{ fontFamily: P.mono, background: P.elev, padding: "1px 5px", borderRadius: 3 }}>useSqlQuery(data, "...")</code>.
186
+ </div>
187
+ );
188
+ }
@@ -0,0 +1,12 @@
1
+ import React from "react";
2
+ import { createRoot } from "react-dom/client";
3
+
4
+ import App from "./App";
5
+
6
+ const rootElement = document.getElementById("root");
7
+
8
+ if (!rootElement) {
9
+ throw new Error("Missing #root mount element");
10
+ }
11
+
12
+ createRoot(rootElement).render(<App />);
@@ -0,0 +1,31 @@
1
+ {
2
+ "version": 2,
3
+ "name": "Loan Portfolio",
4
+ "entry": "src/main.tsx",
5
+ "resources": {
6
+ "loans": {
7
+ "kind": "dataset",
8
+ "source": {
9
+ "type": "sql",
10
+ "sql": "SELECT loan_id AS loanId, borrower, amount::BIGINT AS amount, balance::BIGINT AS balance, fico::INTEGER AS fico, fico_band AS ficoBand, rate::DOUBLE AS rate, term::INTEGER AS term, income::BIGINT AS income, income_band AS incomeBand, dti::DOUBLE AS dti, dti_band AS dtiBand, status, state, product, channel, employment, purpose, autopay, pay_method AS payMethod, STRFTIME(originated, '%Y-%m-%d') AS originated, STRFTIME(originated, '%Y-%m') AS originatedMonth, vintage, mob::INTEGER AS mob, STRFTIME(last_pay, '%Y-%m-%d') AS lastPay, collections_flag AS collectionsFlag FROM LAKE.SCHEMA.loan_book LIMIT 500000"
11
+ },
12
+ "public": false
13
+ },
14
+ "riskBands": {
15
+ "kind": "json",
16
+ "source": {
17
+ "type": "sql",
18
+ "sql": "SELECT band, range, apr::DOUBLE AS apr, default_rate::DOUBLE AS defaultRate, color FROM LAKE.SCHEMA.risk_bands ORDER BY band"
19
+ },
20
+ "public": false
21
+ },
22
+ "statusCatalog": {
23
+ "kind": "json",
24
+ "source": {
25
+ "type": "sql",
26
+ "sql": "SELECT key, label, tone FROM LAKE.SCHEMA.loan_statuses"
27
+ },
28
+ "public": false
29
+ }
30
+ }
31
+ }
@@ -0,0 +1,454 @@
1
+ #!/usr/bin/env -S uv run
2
+ # /// script
3
+ # requires-python = ">=3.10"
4
+ # dependencies = ["duckdb", "pyarrow"]
5
+ # ///
6
+ """Generate a synthetic consumer loan book for the Refined SaaS v2 template.
7
+
8
+ Writes data/loan_book.parquet (2,588 loans) and preview-data.json so the app
9
+ has realistic data to render against without hitting the warehouse.
10
+
11
+ Run: uv run gen_preview_data.py
12
+ """
13
+
14
+ import json
15
+ import math
16
+ import random
17
+ from datetime import date
18
+ from pathlib import Path
19
+
20
+ import pyarrow as pa
21
+ import pyarrow.parquet as pq
22
+
23
+ HERE = Path(__file__).parent
24
+ DATA_DIR = HERE / "data"
25
+ PARQUET_PATH = DATA_DIR / "loan_book.parquet"
26
+ PREVIEW_PATH = HERE / "preview-data.json"
27
+
28
+ N_ROWS = 2588
29
+ # Anchor the book at end of 2026 so the dashboard's default "Last 12 months"
30
+ # preset (relative to current date) covers a dense window of recent originations.
31
+ # The 48-month span in months_back (see gen_rows) spans back to 2023-01.
32
+ END_Y, END_M = 2026, 12
33
+
34
+ PRODUCTS = [
35
+ ("personal", "Personal loan", 1482),
36
+ ("consolidation", "Debt consolidation", 612),
37
+ ("auto", "Auto-secured", 308),
38
+ ("home", "Home improvement", 186),
39
+ ]
40
+ CHANNELS = [
41
+ ("direct_mail", "Direct mail", 784),
42
+ ("paid_search", "Paid search", 612),
43
+ ("affiliate", "Affiliate", 498),
44
+ ("organic", "Organic", 406),
45
+ ("partner", "Partner API", 288),
46
+ ]
47
+ EMPLOYMENTS = [
48
+ ("w2", "W-2 full-time", 1812),
49
+ ("self_employed", "Self-employed", 412),
50
+ ("contract", "1099 / contract", 218),
51
+ ("retired", "Retired", 86),
52
+ ("other", "Other", 60),
53
+ ]
54
+ TERMS = [24, 36, 48, 60]
55
+ PURPOSES = [
56
+ "Debt consolidation",
57
+ "Home repair",
58
+ "Medical",
59
+ "Wedding",
60
+ "Auto repair",
61
+ "Moving",
62
+ "Education",
63
+ "Vacation",
64
+ "Business",
65
+ "Other",
66
+ ]
67
+ FIRST_NAMES = list("MRJASDET KLPBCHNOVWYZFGI".replace(" ", ""))
68
+ LAST_NAMES = [
69
+ "Chen",
70
+ "Patel",
71
+ "Williams",
72
+ "Kowalski",
73
+ "Nakamura",
74
+ "Okafor",
75
+ "Martinez",
76
+ "Becker",
77
+ "Thompson",
78
+ "Garcia",
79
+ "Rodriguez",
80
+ "Hernandez",
81
+ "Singh",
82
+ "Ahmed",
83
+ "Cohen",
84
+ "O'Brien",
85
+ "Walsh",
86
+ "Andersen",
87
+ "Muller",
88
+ "Lopez",
89
+ "Tran",
90
+ "Nguyen",
91
+ "Park",
92
+ "Kim",
93
+ "Wong",
94
+ "Liu",
95
+ "Murphy",
96
+ "Foster",
97
+ "Reyes",
98
+ "Diaz",
99
+ "Webb",
100
+ "Holt",
101
+ "Sloan",
102
+ "Mendez",
103
+ "Zhao",
104
+ "Iyer",
105
+ "Khan",
106
+ "Bauer",
107
+ "Roux",
108
+ "Esposito",
109
+ "Romano",
110
+ "Bianchi",
111
+ "Fischer",
112
+ "Schmidt",
113
+ "Larsen",
114
+ "Berg",
115
+ "Costa",
116
+ "Silva",
117
+ "Petrov",
118
+ "Volkov",
119
+ "Novak",
120
+ "Jansen",
121
+ "Yamamoto",
122
+ "Suzuki",
123
+ "Tanaka",
124
+ "Mensah",
125
+ "Adeyemi",
126
+ "Eze",
127
+ "Ndiaye",
128
+ "Cisse",
129
+ "Saito",
130
+ "Kovac",
131
+ "Vasquez",
132
+ "Romero",
133
+ "Castillo",
134
+ "Cruz",
135
+ "Ortiz",
136
+ "Ramos",
137
+ "Brennan",
138
+ "Donnelly",
139
+ "Quinn",
140
+ "Sweeney",
141
+ "McKay",
142
+ "Lindgren",
143
+ "Hansen",
144
+ ]
145
+ STATE_WEIGHTS = {
146
+ "CA": 412,
147
+ "TX": 389,
148
+ "NY": 276,
149
+ "FL": 268,
150
+ "IL": 214,
151
+ "PA": 158,
152
+ "OH": 142,
153
+ "GA": 131,
154
+ "AZ": 118,
155
+ "WA": 104,
156
+ "NC": 96,
157
+ "CO": 84,
158
+ "OR": 71,
159
+ "MA": 68,
160
+ "VA": 62,
161
+ "MI": 58,
162
+ "NJ": 54,
163
+ "MN": 48,
164
+ "MD": 42,
165
+ "IN": 38,
166
+ "WI": 34,
167
+ "TN": 32,
168
+ "MO": 28,
169
+ "SC": 25,
170
+ "LA": 22,
171
+ "NV": 21,
172
+ "KY": 18,
173
+ "OK": 16,
174
+ "AL": 15,
175
+ "UT": 14,
176
+ "CT": 13,
177
+ "KS": 12,
178
+ "AR": 11,
179
+ "IA": 10,
180
+ "MS": 9,
181
+ "NE": 8,
182
+ "NM": 7,
183
+ "WV": 6,
184
+ "ID": 6,
185
+ "HI": 5,
186
+ "ME": 4,
187
+ "RI": 4,
188
+ "NH": 4,
189
+ "MT": 3,
190
+ "DE": 3,
191
+ "SD": 3,
192
+ "ND": 2,
193
+ "VT": 2,
194
+ "AK": 2,
195
+ "WY": 1,
196
+ }
197
+
198
+
199
+ def weighted_choice(rng, opts):
200
+ total = sum(w for *_, w in opts)
201
+ r = rng.random() * total
202
+ for *ids, w in opts:
203
+ r -= w
204
+ if r <= 0:
205
+ return ids[0]
206
+ return opts[-1][0]
207
+
208
+
209
+ def weighted_state(rng):
210
+ total = sum(STATE_WEIGHTS.values())
211
+ r = rng.random() * total
212
+ for code, w in STATE_WEIGHTS.items():
213
+ r -= w
214
+ if r <= 0:
215
+ return code
216
+ return "CA"
217
+
218
+
219
+ def status_from_fico(rng, fico, mob):
220
+ base_delinq = max(0.01, (760 - fico) / 1000) * (1 + mob / 24)
221
+ r = rng.random()
222
+ if r < 0.005 + base_delinq * 0.10:
223
+ return "charged_off"
224
+ if r < 0.012 + base_delinq * 0.30:
225
+ return "late_90"
226
+ if r < 0.04 + base_delinq * 0.55:
227
+ return "late_60"
228
+ if r < 0.10 + base_delinq * 0.85:
229
+ return "late_30"
230
+ if r > 0.985:
231
+ return "paid_off"
232
+ return "current"
233
+
234
+
235
+ def fico_band(fico):
236
+ if fico >= 750:
237
+ return "A"
238
+ if fico >= 700:
239
+ return "B"
240
+ if fico >= 650:
241
+ return "C"
242
+ if fico >= 600:
243
+ return "D"
244
+ return "E"
245
+
246
+
247
+ def income_band(income):
248
+ if income < 50_000:
249
+ return "lt50"
250
+ if income < 75_000:
251
+ return "50_75"
252
+ if income < 100_000:
253
+ return "75_100"
254
+ if income < 150_000:
255
+ return "100_150"
256
+ if income < 200_000:
257
+ return "150_200"
258
+ return "gt200"
259
+
260
+
261
+ def dti_band(dti):
262
+ pct = dti * 100
263
+ if pct < 20:
264
+ return "lt20"
265
+ if pct < 30:
266
+ return "20_30"
267
+ if pct < 40:
268
+ return "30_40"
269
+ return "gt40"
270
+
271
+
272
+ def gen_rows():
273
+ rng = random.Random(42)
274
+ out = []
275
+ for i in range(N_ROWS):
276
+ months_back = int(48 * math.pow(rng.random(), 1.6))
277
+ oM = END_M - months_back
278
+ year = END_Y + (oM - 1) // 12
279
+ month = ((oM - 1) % 12 + 12) % 12 + 1
280
+ day = 1 + int(rng.random() * 28)
281
+ originated = f"{year:04d}-{month:02d}-{day:02d}"
282
+ mob = months_back
283
+
284
+ fico_raw = 695 + (rng.random() + rng.random() + rng.random() - 1.5) * 75
285
+ fico = max(540, min(820, round(fico_raw)))
286
+ band = fico_band(fico)
287
+
288
+ amount = round((5000 + math.pow(rng.random(), 1.4) * 70000) / 100) * 100
289
+ base_rate = 14 - (fico - 600) * 0.03
290
+ rate = max(4.5, min(19.9, base_rate + (rng.random() - 0.5) * 1.5))
291
+ income = round((30_000 + (fico - 540) * 200 + rng.random() * 60_000) / 1000) * 1000
292
+ dti = max(0.05, min(0.55, 0.18 + (rng.random() - 0.4) * 0.25))
293
+ term = TERMS[int(rng.random() * len(TERMS))]
294
+
295
+ status = status_from_fico(rng, fico, mob)
296
+
297
+ monthly_pmt = amount * (rate / 1200) / (1 - (1 + rate / 1200) ** (-term)) if rate > 0 else amount / term
298
+ months_paid = min(term, mob)
299
+ balance = float(amount)
300
+ for _ in range(months_paid):
301
+ interest = balance * rate / 1200
302
+ balance -= max(0, monthly_pmt - interest)
303
+ if status == "paid_off":
304
+ balance = 0
305
+ elif status == "charged_off":
306
+ balance = balance * 0.7
307
+
308
+ product = weighted_choice(rng, [(p[0], p[2]) for p in PRODUCTS])
309
+ channel = weighted_choice(rng, [(c[0], c[2]) for c in CHANNELS])
310
+ employment = weighted_choice(rng, [(e[0], e[2]) for e in EMPLOYMENTS])
311
+ purpose = PURPOSES[int(rng.random() * len(PURPOSES))]
312
+ state = weighted_state(rng)
313
+ autopay = rng.random() < 0.78
314
+
315
+ if status == "current":
316
+ last_pay = f"{END_Y:04d}-{END_M:02d}-{1 + int(rng.random() * 28):02d}"
317
+ elif status == "late_30":
318
+ last_pay = f"{END_Y:04d}-11-{1 + int(rng.random() * 28):02d}"
319
+ elif status == "late_60":
320
+ last_pay = f"{END_Y:04d}-10-{1 + int(rng.random() * 28):02d}"
321
+ elif status == "late_90":
322
+ last_pay = f"{END_Y:04d}-09-{1 + int(rng.random() * 28):02d}"
323
+ elif status == "paid_off":
324
+ last_pay = f"{END_Y:04d}-{END_M:02d}-15"
325
+ else:
326
+ last_pay = f"{END_Y:04d}-08-{1 + int(rng.random() * 28):02d}"
327
+
328
+ fn = FIRST_NAMES[int(rng.random() * len(FIRST_NAMES))]
329
+ ln = LAST_NAMES[int(rng.random() * len(LAST_NAMES))]
330
+ vintage = f"{year}-Q{(month - 1) // 3 + 1}"
331
+
332
+ out.append(
333
+ {
334
+ "loanId": f"LN-{25883 - i}",
335
+ "borrower": f"{fn}. {ln}",
336
+ "amount": int(amount),
337
+ "balance": int(round(balance)),
338
+ "fico": int(fico),
339
+ "ficoBand": band,
340
+ "rate": round(rate, 2),
341
+ "term": int(term),
342
+ "income": int(income),
343
+ "incomeBand": income_band(income),
344
+ "dti": round(dti, 3),
345
+ "dtiBand": dti_band(dti),
346
+ "status": status,
347
+ "state": state,
348
+ "product": product,
349
+ "channel": channel,
350
+ "employment": employment,
351
+ "purpose": purpose,
352
+ "autopay": bool(autopay),
353
+ "payMethod": "ach_auto"
354
+ if autopay
355
+ else ("ach_manual" if rng.random() < 0.6 else ("card" if rng.random() < 0.7 else "check")),
356
+ "originated": originated,
357
+ "originatedMonth": f"{year:04d}-{month:02d}",
358
+ "vintage": vintage,
359
+ "mob": int(mob),
360
+ "lastPay": last_pay,
361
+ "collectionsFlag": "none"
362
+ if status in ("current", "paid_off")
363
+ else ("soft" if status == "late_30" else ("active" if status == "late_60" else "legal")),
364
+ }
365
+ )
366
+ out.sort(key=lambda r: r["originated"], reverse=True)
367
+ return out
368
+
369
+
370
+ def write_parquet(rows):
371
+ DATA_DIR.mkdir(parents=True, exist_ok=True)
372
+ schema = pa.schema(
373
+ [
374
+ ("loanId", pa.string()),
375
+ ("borrower", pa.string()),
376
+ ("amount", pa.int64()),
377
+ ("balance", pa.int64()),
378
+ ("fico", pa.int32()),
379
+ ("ficoBand", pa.string()),
380
+ ("rate", pa.float64()),
381
+ ("term", pa.int32()),
382
+ ("income", pa.int64()),
383
+ ("incomeBand", pa.string()),
384
+ ("dti", pa.float64()),
385
+ ("dtiBand", pa.string()),
386
+ ("status", pa.string()),
387
+ ("state", pa.string()),
388
+ ("product", pa.string()),
389
+ ("channel", pa.string()),
390
+ ("employment", pa.string()),
391
+ ("purpose", pa.string()),
392
+ ("autopay", pa.bool_()),
393
+ ("payMethod", pa.string()),
394
+ ("originated", pa.string()),
395
+ ("originatedMonth", pa.string()),
396
+ ("vintage", pa.string()),
397
+ ("mob", pa.int32()),
398
+ ("lastPay", pa.string()),
399
+ ("collectionsFlag", pa.string()),
400
+ ]
401
+ )
402
+ table = pa.Table.from_pylist(rows, schema=schema)
403
+ pq.write_table(table, PARQUET_PATH, compression="zstd")
404
+
405
+
406
+ FICO_BANDS = [
407
+ {"band": "A", "range": "750+", "apr": 5.8, "defaultRate": 0.6, "color": "#10B981"},
408
+ {"band": "B", "range": "700-749", "apr": 7.2, "defaultRate": 1.4, "color": "#84CC16"},
409
+ {"band": "C", "range": "650-699", "apr": 9.5, "defaultRate": 3.1, "color": "#F59E0B"},
410
+ {"band": "D", "range": "600-649", "apr": 12.8, "defaultRate": 6.2, "color": "#F97316"},
411
+ {"band": "E", "range": "<600", "apr": 17.4, "defaultRate": 11.8, "color": "#EF4444"},
412
+ ]
413
+
414
+ STATUS_CATALOG = [
415
+ {"key": "current", "label": "Current", "tone": "ok"},
416
+ {"key": "late_30", "label": "30 days late", "tone": "warn"},
417
+ {"key": "late_60", "label": "60 days late", "tone": "warn"},
418
+ {"key": "late_90", "label": "90+ late", "tone": "bad"},
419
+ {"key": "paid_off", "label": "Paid off", "tone": "muted"},
420
+ {"key": "charged_off", "label": "Charged off", "tone": "bad"},
421
+ ]
422
+
423
+
424
+ def write_preview_json():
425
+ payload = {
426
+ "context": {
427
+ "publicMode": False,
428
+ "driveFile": "preview://data-apps/loan-portfolio",
429
+ "appVersion": "v2",
430
+ },
431
+ "datasets": {
432
+ "loans": {"file": "data/loan_book.parquet", "format": "parquet"},
433
+ },
434
+ "resources": {
435
+ "riskBands": FICO_BANDS,
436
+ "statusCatalog": STATUS_CATALOG,
437
+ },
438
+ }
439
+ PREVIEW_PATH.write_text(json.dumps(payload, indent=2) + "\n")
440
+
441
+
442
+ def main():
443
+ rows = gen_rows()
444
+ write_parquet(rows)
445
+ write_preview_json()
446
+ print(f"rows={len(rows)} parquet={PARQUET_PATH.stat().st_size:,} bytes")
447
+ statuses = {}
448
+ for r in rows:
449
+ statuses[r["status"]] = statuses.get(r["status"], 0) + 1
450
+ print(f"statuses={statuses}")
451
+
452
+
453
+ if __name__ == "__main__":
454
+ main()