@definite-app/data-apps 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CLAUDE.md +686 -0
- package/LICENSE +201 -0
- package/README.md +643 -0
- package/build.mjs +459 -0
- package/examples/_refined_demo/app.json +15 -0
- package/examples/_refined_demo/data/sample.parquet +0 -0
- package/examples/_refined_demo/gen_preview_data.py +59 -0
- package/examples/_refined_demo/preview-data.json +13 -0
- package/examples/_refined_demo/src/App.tsx +188 -0
- package/examples/_refined_demo/src/main.tsx +12 -0
- package/examples/loan-portfolio/app.json +31 -0
- package/examples/loan-portfolio/data/loan_book.parquet +0 -0
- package/examples/loan-portfolio/gen_preview_data.py +454 -0
- package/examples/loan-portfolio/preview-data.json +84 -0
- package/examples/loan-portfolio/src/App.tsx +1103 -0
- package/examples/loan-portfolio/src/main.tsx +12 -0
- package/examples/revenue-explorer/app.json +23 -0
- package/examples/revenue-explorer/data/transactions.parquet +0 -0
- package/examples/revenue-explorer/gen_preview_data.py +129 -0
- package/examples/revenue-explorer/preview-data.json +49 -0
- package/examples/revenue-explorer/src/App.tsx +527 -0
- package/examples/revenue-explorer/src/main.tsx +12 -0
- package/package.json +55 -0
- package/preview.mjs +35 -0
- package/runtime/definite-runtime.tsx +5934 -0
- package/scripts/headless-smoke.mjs +196 -0
- package/templates/blank/app.json +15 -0
- package/templates/blank/src/App.tsx +41 -0
- package/templates/blank/src/main.tsx +12 -0
- package/templates/refined/app.json +15 -0
- package/templates/refined/src/App.tsx +198 -0
- package/templates/refined/src/main.tsx +12 -0
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
import React, { useMemo, useState } from "react";
|
|
2
|
+
|
|
3
|
+
import {
|
|
4
|
+
buildPalette,
|
|
5
|
+
CachePopover,
|
|
6
|
+
DateRangeFilter,
|
|
7
|
+
DEFAULT_DATE_RANGE_PRESETS,
|
|
8
|
+
type DateRangeValue,
|
|
9
|
+
DrillProvider,
|
|
10
|
+
ErrorState,
|
|
11
|
+
LoadingState,
|
|
12
|
+
PaletteProvider,
|
|
13
|
+
SaasKpiCard,
|
|
14
|
+
ShellLayout,
|
|
15
|
+
Sidebar,
|
|
16
|
+
type SidebarNavItem,
|
|
17
|
+
useDataset,
|
|
18
|
+
useDrill,
|
|
19
|
+
usePalette,
|
|
20
|
+
useSqlQuery,
|
|
21
|
+
useTheme,
|
|
22
|
+
} from "@definite/runtime";
|
|
23
|
+
|
|
24
|
+
// Default to the "Last 12 months" preset. The filter is applied against the
|
|
25
|
+
// `createdDate` column declared in app.json — change DATE_COLUMN if you
|
|
26
|
+
// rename it.
|
|
27
|
+
const DATE_COLUMN = "originated";
|
|
28
|
+
|
|
29
|
+
function initialDateRange(): DateRangeValue {
|
|
30
|
+
const preset =
|
|
31
|
+
DEFAULT_DATE_RANGE_PRESETS.find((p) => p.key === "last12m") ??
|
|
32
|
+
DEFAULT_DATE_RANGE_PRESETS[0];
|
|
33
|
+
return preset.compute();
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
const escSql = (v: string) => v.replace(/'/g, "''");
|
|
37
|
+
|
|
38
|
+
function buildWhere(from: string, to: string): string {
|
|
39
|
+
const clauses: string[] = [];
|
|
40
|
+
if (from) clauses.push(`${DATE_COLUMN} >= '${escSql(from)}'`);
|
|
41
|
+
if (to) clauses.push(`${DATE_COLUMN} <= '${escSql(to)}'`);
|
|
42
|
+
return clauses.length > 0 ? ` WHERE ${clauses.join(" AND ")}` : "";
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
// ── Sidebar navigation ────────────────────────────────────────────────────
|
|
46
|
+
// Each entry maps to a view rendered in the main pane. Icons are single
|
|
47
|
+
// glyphs (Unicode) so apps don't need an icon library; swap for SVGs when
|
|
48
|
+
// you want brand-specific marks.
|
|
49
|
+
const NAV_ITEMS: SidebarNavItem[] = [
|
|
50
|
+
{ id: "overview", label: "Overview", icon: "◧" },
|
|
51
|
+
{ id: "detail", label: "Detail", icon: "≣" },
|
|
52
|
+
];
|
|
53
|
+
|
|
54
|
+
// ── App root ─────────────────────────────────────────────────────────────
|
|
55
|
+
// Pattern: outer <App> holds dataset-loading fallbacks; <InnerApp> runs
|
|
56
|
+
// inside the palette + drill providers so every descendant can usePalette()
|
|
57
|
+
// and useDrill() without prop-drilling.
|
|
58
|
+
|
|
59
|
+
export default function App() {
|
|
60
|
+
const { theme, toggleTheme } = useTheme();
|
|
61
|
+
// Optionally pass a brand accent: buildPalette(theme, { accent: "#FF006E" })
|
|
62
|
+
const palette = useMemo(() => buildPalette(theme), [theme]);
|
|
63
|
+
|
|
64
|
+
const data = useDataset("main");
|
|
65
|
+
if (data.loading) return <LoadingState message="Loading data…" />;
|
|
66
|
+
if (data.error) return <ErrorState title="Dataset failed to load" message={data.error} />;
|
|
67
|
+
|
|
68
|
+
return (
|
|
69
|
+
<PaletteProvider value={palette}>
|
|
70
|
+
<DrillProvider>
|
|
71
|
+
<InnerApp
|
|
72
|
+
theme={theme}
|
|
73
|
+
onThemeChange={(t) => { if (t !== theme) toggleTheme(); }}
|
|
74
|
+
dataset={data}
|
|
75
|
+
/>
|
|
76
|
+
</DrillProvider>
|
|
77
|
+
</PaletteProvider>
|
|
78
|
+
);
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
type DatasetHandle = ReturnType<typeof useDataset>;
|
|
82
|
+
|
|
83
|
+
function InnerApp({ theme, onThemeChange, dataset }: {
|
|
84
|
+
theme: "dark" | "light";
|
|
85
|
+
onThemeChange: (t: "dark" | "light") => void;
|
|
86
|
+
dataset: DatasetHandle;
|
|
87
|
+
}) {
|
|
88
|
+
const P = usePalette();
|
|
89
|
+
const [view, setView] = useState("overview");
|
|
90
|
+
const [dateRange, setDateRange] = useState<DateRangeValue>(initialDateRange);
|
|
91
|
+
|
|
92
|
+
const where = useMemo(
|
|
93
|
+
() => buildWhere(dateRange.from, dateRange.to),
|
|
94
|
+
[dateRange.from, dateRange.to],
|
|
95
|
+
);
|
|
96
|
+
|
|
97
|
+
// Example query — replace with your own.
|
|
98
|
+
const summary = useSqlQuery<Array<{ rowCount: number }>>(
|
|
99
|
+
dataset,
|
|
100
|
+
dataset.tableRef
|
|
101
|
+
? `SELECT COUNT(*)::INTEGER AS rowCount FROM ${dataset.tableRef}${where}`
|
|
102
|
+
: "",
|
|
103
|
+
[where],
|
|
104
|
+
);
|
|
105
|
+
|
|
106
|
+
const rowCount = summary.data?.[0]?.rowCount ?? 0;
|
|
107
|
+
const navItem = NAV_ITEMS.find((n) => n.id === view) ?? NAV_ITEMS[0];
|
|
108
|
+
|
|
109
|
+
const sidebar = (
|
|
110
|
+
<Sidebar
|
|
111
|
+
logo={{ title: "My App", subtitle: "Replace this" }}
|
|
112
|
+
navItems={NAV_ITEMS}
|
|
113
|
+
activeView={view}
|
|
114
|
+
onViewChange={setView}
|
|
115
|
+
dateRangeSlot={
|
|
116
|
+
<DateRangeFilter
|
|
117
|
+
value={dateRange}
|
|
118
|
+
onChange={setDateRange}
|
|
119
|
+
label={null}
|
|
120
|
+
popoverPlacement="right-start"
|
|
121
|
+
triggerStyle={{ width: "100%", minWidth: 0, justifyContent: "space-between", padding: "7px 10px", fontSize: 12 }}
|
|
122
|
+
/>
|
|
123
|
+
}
|
|
124
|
+
theme={theme}
|
|
125
|
+
onThemeChange={onThemeChange}
|
|
126
|
+
footer={<>Live DuckDB · {rowCount.toLocaleString()} rows</>}
|
|
127
|
+
/>
|
|
128
|
+
);
|
|
129
|
+
|
|
130
|
+
const headerRight = (
|
|
131
|
+
<CachePopover
|
|
132
|
+
isLoading={summary.loading}
|
|
133
|
+
rowCount={dataset.cache?.rowCount ?? rowCount}
|
|
134
|
+
cache={dataset.cache}
|
|
135
|
+
onRefresh={dataset.refresh}
|
|
136
|
+
/>
|
|
137
|
+
);
|
|
138
|
+
|
|
139
|
+
return (
|
|
140
|
+
<ShellLayout
|
|
141
|
+
palette={P}
|
|
142
|
+
sidebar={sidebar}
|
|
143
|
+
title={navItem.label}
|
|
144
|
+
breadcrumb={["App", navItem.label]}
|
|
145
|
+
headerRight={headerRight}
|
|
146
|
+
>
|
|
147
|
+
{view === "overview" && <OverviewView rowCount={rowCount} loading={summary.loading} />}
|
|
148
|
+
{view === "detail" && <DetailView rowCount={rowCount} />}
|
|
149
|
+
</ShellLayout>
|
|
150
|
+
);
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
function OverviewView({ rowCount, loading }: { rowCount: number; loading: boolean }) {
|
|
154
|
+
const drill = useDrill();
|
|
155
|
+
return (
|
|
156
|
+
<div style={{ display: "grid", gridTemplateColumns: "repeat(auto-fit, minmax(220px, 1fr))", gap: 12 }}>
|
|
157
|
+
<SaasKpiCard
|
|
158
|
+
title="Total rows"
|
|
159
|
+
value={rowCount.toLocaleString()}
|
|
160
|
+
sub="in dataset"
|
|
161
|
+
loading={loading}
|
|
162
|
+
onClick={() => drill.open({
|
|
163
|
+
kind: "kpi",
|
|
164
|
+
id: "rows",
|
|
165
|
+
title: "Total rows",
|
|
166
|
+
value: rowCount.toLocaleString(),
|
|
167
|
+
breadcrumb: "Overview",
|
|
168
|
+
stats: [["Row count", rowCount.toLocaleString()]],
|
|
169
|
+
narrative: "Dataset row count. Replace with your own computed stats.",
|
|
170
|
+
sql: `SELECT COUNT(*) FROM main;`,
|
|
171
|
+
})}
|
|
172
|
+
/>
|
|
173
|
+
</div>
|
|
174
|
+
);
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
function DetailView({ rowCount }: { rowCount: number }) {
|
|
178
|
+
const P = usePalette();
|
|
179
|
+
return (
|
|
180
|
+
<div style={{
|
|
181
|
+
background: P.card, border: `1px solid ${P.border}`, borderRadius: 10,
|
|
182
|
+
padding: 24, color: P.sub, fontSize: 14, lineHeight: 1.6,
|
|
183
|
+
}}>
|
|
184
|
+
<div style={{ fontSize: 16, color: P.text, fontWeight: 600, marginBottom: 8 }}>Detail view</div>
|
|
185
|
+
Replace this with your own detail content — tables, forms, drill-downs. The dataset has <b style={{ color: P.text }}>{rowCount.toLocaleString()}</b> rows available via <code style={{ fontFamily: P.mono, background: P.elev, padding: "1px 5px", borderRadius: 3 }}>useSqlQuery(data, "...")</code>.
|
|
186
|
+
</div>
|
|
187
|
+
);
|
|
188
|
+
}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import React from "react";
|
|
2
|
+
import { createRoot } from "react-dom/client";
|
|
3
|
+
|
|
4
|
+
import App from "./App";
|
|
5
|
+
|
|
6
|
+
const rootElement = document.getElementById("root");
|
|
7
|
+
|
|
8
|
+
if (!rootElement) {
|
|
9
|
+
throw new Error("Missing #root mount element");
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
createRoot(rootElement).render(<App />);
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
{
|
|
2
|
+
"version": 2,
|
|
3
|
+
"name": "Loan Portfolio",
|
|
4
|
+
"entry": "src/main.tsx",
|
|
5
|
+
"resources": {
|
|
6
|
+
"loans": {
|
|
7
|
+
"kind": "dataset",
|
|
8
|
+
"source": {
|
|
9
|
+
"type": "sql",
|
|
10
|
+
"sql": "SELECT loan_id AS loanId, borrower, amount::BIGINT AS amount, balance::BIGINT AS balance, fico::INTEGER AS fico, fico_band AS ficoBand, rate::DOUBLE AS rate, term::INTEGER AS term, income::BIGINT AS income, income_band AS incomeBand, dti::DOUBLE AS dti, dti_band AS dtiBand, status, state, product, channel, employment, purpose, autopay, pay_method AS payMethod, STRFTIME(originated, '%Y-%m-%d') AS originated, STRFTIME(originated, '%Y-%m') AS originatedMonth, vintage, mob::INTEGER AS mob, STRFTIME(last_pay, '%Y-%m-%d') AS lastPay, collections_flag AS collectionsFlag FROM LAKE.SCHEMA.loan_book LIMIT 500000"
|
|
11
|
+
},
|
|
12
|
+
"public": false
|
|
13
|
+
},
|
|
14
|
+
"riskBands": {
|
|
15
|
+
"kind": "json",
|
|
16
|
+
"source": {
|
|
17
|
+
"type": "sql",
|
|
18
|
+
"sql": "SELECT band, range, apr::DOUBLE AS apr, default_rate::DOUBLE AS defaultRate, color FROM LAKE.SCHEMA.risk_bands ORDER BY band"
|
|
19
|
+
},
|
|
20
|
+
"public": false
|
|
21
|
+
},
|
|
22
|
+
"statusCatalog": {
|
|
23
|
+
"kind": "json",
|
|
24
|
+
"source": {
|
|
25
|
+
"type": "sql",
|
|
26
|
+
"sql": "SELECT key, label, tone FROM LAKE.SCHEMA.loan_statuses"
|
|
27
|
+
},
|
|
28
|
+
"public": false
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
}
|
|
Binary file
|
|
@@ -0,0 +1,454 @@
|
|
|
1
|
+
#!/usr/bin/env -S uv run
|
|
2
|
+
# /// script
|
|
3
|
+
# requires-python = ">=3.10"
|
|
4
|
+
# dependencies = ["duckdb", "pyarrow"]
|
|
5
|
+
# ///
|
|
6
|
+
"""Generate a synthetic consumer loan book for the Refined SaaS v2 template.
|
|
7
|
+
|
|
8
|
+
Writes data/loan_book.parquet (2,588 loans) and preview-data.json so the app
|
|
9
|
+
has realistic data to render against without hitting the warehouse.
|
|
10
|
+
|
|
11
|
+
Run: uv run gen_preview_data.py
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
import json
|
|
15
|
+
import math
|
|
16
|
+
import random
|
|
17
|
+
from datetime import date
|
|
18
|
+
from pathlib import Path
|
|
19
|
+
|
|
20
|
+
import pyarrow as pa
|
|
21
|
+
import pyarrow.parquet as pq
|
|
22
|
+
|
|
23
|
+
HERE = Path(__file__).parent
|
|
24
|
+
DATA_DIR = HERE / "data"
|
|
25
|
+
PARQUET_PATH = DATA_DIR / "loan_book.parquet"
|
|
26
|
+
PREVIEW_PATH = HERE / "preview-data.json"
|
|
27
|
+
|
|
28
|
+
N_ROWS = 2588
|
|
29
|
+
# Anchor the book at end of 2026 so the dashboard's default "Last 12 months"
|
|
30
|
+
# preset (relative to current date) covers a dense window of recent originations.
|
|
31
|
+
# The 48-month span in months_back (see gen_rows) spans back to 2023-01.
|
|
32
|
+
END_Y, END_M = 2026, 12
|
|
33
|
+
|
|
34
|
+
PRODUCTS = [
|
|
35
|
+
("personal", "Personal loan", 1482),
|
|
36
|
+
("consolidation", "Debt consolidation", 612),
|
|
37
|
+
("auto", "Auto-secured", 308),
|
|
38
|
+
("home", "Home improvement", 186),
|
|
39
|
+
]
|
|
40
|
+
CHANNELS = [
|
|
41
|
+
("direct_mail", "Direct mail", 784),
|
|
42
|
+
("paid_search", "Paid search", 612),
|
|
43
|
+
("affiliate", "Affiliate", 498),
|
|
44
|
+
("organic", "Organic", 406),
|
|
45
|
+
("partner", "Partner API", 288),
|
|
46
|
+
]
|
|
47
|
+
EMPLOYMENTS = [
|
|
48
|
+
("w2", "W-2 full-time", 1812),
|
|
49
|
+
("self_employed", "Self-employed", 412),
|
|
50
|
+
("contract", "1099 / contract", 218),
|
|
51
|
+
("retired", "Retired", 86),
|
|
52
|
+
("other", "Other", 60),
|
|
53
|
+
]
|
|
54
|
+
TERMS = [24, 36, 48, 60]
|
|
55
|
+
PURPOSES = [
|
|
56
|
+
"Debt consolidation",
|
|
57
|
+
"Home repair",
|
|
58
|
+
"Medical",
|
|
59
|
+
"Wedding",
|
|
60
|
+
"Auto repair",
|
|
61
|
+
"Moving",
|
|
62
|
+
"Education",
|
|
63
|
+
"Vacation",
|
|
64
|
+
"Business",
|
|
65
|
+
"Other",
|
|
66
|
+
]
|
|
67
|
+
FIRST_NAMES = list("MRJASDET KLPBCHNOVWYZFGI".replace(" ", ""))
|
|
68
|
+
LAST_NAMES = [
|
|
69
|
+
"Chen",
|
|
70
|
+
"Patel",
|
|
71
|
+
"Williams",
|
|
72
|
+
"Kowalski",
|
|
73
|
+
"Nakamura",
|
|
74
|
+
"Okafor",
|
|
75
|
+
"Martinez",
|
|
76
|
+
"Becker",
|
|
77
|
+
"Thompson",
|
|
78
|
+
"Garcia",
|
|
79
|
+
"Rodriguez",
|
|
80
|
+
"Hernandez",
|
|
81
|
+
"Singh",
|
|
82
|
+
"Ahmed",
|
|
83
|
+
"Cohen",
|
|
84
|
+
"O'Brien",
|
|
85
|
+
"Walsh",
|
|
86
|
+
"Andersen",
|
|
87
|
+
"Muller",
|
|
88
|
+
"Lopez",
|
|
89
|
+
"Tran",
|
|
90
|
+
"Nguyen",
|
|
91
|
+
"Park",
|
|
92
|
+
"Kim",
|
|
93
|
+
"Wong",
|
|
94
|
+
"Liu",
|
|
95
|
+
"Murphy",
|
|
96
|
+
"Foster",
|
|
97
|
+
"Reyes",
|
|
98
|
+
"Diaz",
|
|
99
|
+
"Webb",
|
|
100
|
+
"Holt",
|
|
101
|
+
"Sloan",
|
|
102
|
+
"Mendez",
|
|
103
|
+
"Zhao",
|
|
104
|
+
"Iyer",
|
|
105
|
+
"Khan",
|
|
106
|
+
"Bauer",
|
|
107
|
+
"Roux",
|
|
108
|
+
"Esposito",
|
|
109
|
+
"Romano",
|
|
110
|
+
"Bianchi",
|
|
111
|
+
"Fischer",
|
|
112
|
+
"Schmidt",
|
|
113
|
+
"Larsen",
|
|
114
|
+
"Berg",
|
|
115
|
+
"Costa",
|
|
116
|
+
"Silva",
|
|
117
|
+
"Petrov",
|
|
118
|
+
"Volkov",
|
|
119
|
+
"Novak",
|
|
120
|
+
"Jansen",
|
|
121
|
+
"Yamamoto",
|
|
122
|
+
"Suzuki",
|
|
123
|
+
"Tanaka",
|
|
124
|
+
"Mensah",
|
|
125
|
+
"Adeyemi",
|
|
126
|
+
"Eze",
|
|
127
|
+
"Ndiaye",
|
|
128
|
+
"Cisse",
|
|
129
|
+
"Saito",
|
|
130
|
+
"Kovac",
|
|
131
|
+
"Vasquez",
|
|
132
|
+
"Romero",
|
|
133
|
+
"Castillo",
|
|
134
|
+
"Cruz",
|
|
135
|
+
"Ortiz",
|
|
136
|
+
"Ramos",
|
|
137
|
+
"Brennan",
|
|
138
|
+
"Donnelly",
|
|
139
|
+
"Quinn",
|
|
140
|
+
"Sweeney",
|
|
141
|
+
"McKay",
|
|
142
|
+
"Lindgren",
|
|
143
|
+
"Hansen",
|
|
144
|
+
]
|
|
145
|
+
STATE_WEIGHTS = {
|
|
146
|
+
"CA": 412,
|
|
147
|
+
"TX": 389,
|
|
148
|
+
"NY": 276,
|
|
149
|
+
"FL": 268,
|
|
150
|
+
"IL": 214,
|
|
151
|
+
"PA": 158,
|
|
152
|
+
"OH": 142,
|
|
153
|
+
"GA": 131,
|
|
154
|
+
"AZ": 118,
|
|
155
|
+
"WA": 104,
|
|
156
|
+
"NC": 96,
|
|
157
|
+
"CO": 84,
|
|
158
|
+
"OR": 71,
|
|
159
|
+
"MA": 68,
|
|
160
|
+
"VA": 62,
|
|
161
|
+
"MI": 58,
|
|
162
|
+
"NJ": 54,
|
|
163
|
+
"MN": 48,
|
|
164
|
+
"MD": 42,
|
|
165
|
+
"IN": 38,
|
|
166
|
+
"WI": 34,
|
|
167
|
+
"TN": 32,
|
|
168
|
+
"MO": 28,
|
|
169
|
+
"SC": 25,
|
|
170
|
+
"LA": 22,
|
|
171
|
+
"NV": 21,
|
|
172
|
+
"KY": 18,
|
|
173
|
+
"OK": 16,
|
|
174
|
+
"AL": 15,
|
|
175
|
+
"UT": 14,
|
|
176
|
+
"CT": 13,
|
|
177
|
+
"KS": 12,
|
|
178
|
+
"AR": 11,
|
|
179
|
+
"IA": 10,
|
|
180
|
+
"MS": 9,
|
|
181
|
+
"NE": 8,
|
|
182
|
+
"NM": 7,
|
|
183
|
+
"WV": 6,
|
|
184
|
+
"ID": 6,
|
|
185
|
+
"HI": 5,
|
|
186
|
+
"ME": 4,
|
|
187
|
+
"RI": 4,
|
|
188
|
+
"NH": 4,
|
|
189
|
+
"MT": 3,
|
|
190
|
+
"DE": 3,
|
|
191
|
+
"SD": 3,
|
|
192
|
+
"ND": 2,
|
|
193
|
+
"VT": 2,
|
|
194
|
+
"AK": 2,
|
|
195
|
+
"WY": 1,
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
def weighted_choice(rng, opts):
|
|
200
|
+
total = sum(w for *_, w in opts)
|
|
201
|
+
r = rng.random() * total
|
|
202
|
+
for *ids, w in opts:
|
|
203
|
+
r -= w
|
|
204
|
+
if r <= 0:
|
|
205
|
+
return ids[0]
|
|
206
|
+
return opts[-1][0]
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
def weighted_state(rng):
|
|
210
|
+
total = sum(STATE_WEIGHTS.values())
|
|
211
|
+
r = rng.random() * total
|
|
212
|
+
for code, w in STATE_WEIGHTS.items():
|
|
213
|
+
r -= w
|
|
214
|
+
if r <= 0:
|
|
215
|
+
return code
|
|
216
|
+
return "CA"
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
def status_from_fico(rng, fico, mob):
|
|
220
|
+
base_delinq = max(0.01, (760 - fico) / 1000) * (1 + mob / 24)
|
|
221
|
+
r = rng.random()
|
|
222
|
+
if r < 0.005 + base_delinq * 0.10:
|
|
223
|
+
return "charged_off"
|
|
224
|
+
if r < 0.012 + base_delinq * 0.30:
|
|
225
|
+
return "late_90"
|
|
226
|
+
if r < 0.04 + base_delinq * 0.55:
|
|
227
|
+
return "late_60"
|
|
228
|
+
if r < 0.10 + base_delinq * 0.85:
|
|
229
|
+
return "late_30"
|
|
230
|
+
if r > 0.985:
|
|
231
|
+
return "paid_off"
|
|
232
|
+
return "current"
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
def fico_band(fico):
|
|
236
|
+
if fico >= 750:
|
|
237
|
+
return "A"
|
|
238
|
+
if fico >= 700:
|
|
239
|
+
return "B"
|
|
240
|
+
if fico >= 650:
|
|
241
|
+
return "C"
|
|
242
|
+
if fico >= 600:
|
|
243
|
+
return "D"
|
|
244
|
+
return "E"
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
def income_band(income):
|
|
248
|
+
if income < 50_000:
|
|
249
|
+
return "lt50"
|
|
250
|
+
if income < 75_000:
|
|
251
|
+
return "50_75"
|
|
252
|
+
if income < 100_000:
|
|
253
|
+
return "75_100"
|
|
254
|
+
if income < 150_000:
|
|
255
|
+
return "100_150"
|
|
256
|
+
if income < 200_000:
|
|
257
|
+
return "150_200"
|
|
258
|
+
return "gt200"
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
def dti_band(dti):
|
|
262
|
+
pct = dti * 100
|
|
263
|
+
if pct < 20:
|
|
264
|
+
return "lt20"
|
|
265
|
+
if pct < 30:
|
|
266
|
+
return "20_30"
|
|
267
|
+
if pct < 40:
|
|
268
|
+
return "30_40"
|
|
269
|
+
return "gt40"
|
|
270
|
+
|
|
271
|
+
|
|
272
|
+
def gen_rows():
|
|
273
|
+
rng = random.Random(42)
|
|
274
|
+
out = []
|
|
275
|
+
for i in range(N_ROWS):
|
|
276
|
+
months_back = int(48 * math.pow(rng.random(), 1.6))
|
|
277
|
+
oM = END_M - months_back
|
|
278
|
+
year = END_Y + (oM - 1) // 12
|
|
279
|
+
month = ((oM - 1) % 12 + 12) % 12 + 1
|
|
280
|
+
day = 1 + int(rng.random() * 28)
|
|
281
|
+
originated = f"{year:04d}-{month:02d}-{day:02d}"
|
|
282
|
+
mob = months_back
|
|
283
|
+
|
|
284
|
+
fico_raw = 695 + (rng.random() + rng.random() + rng.random() - 1.5) * 75
|
|
285
|
+
fico = max(540, min(820, round(fico_raw)))
|
|
286
|
+
band = fico_band(fico)
|
|
287
|
+
|
|
288
|
+
amount = round((5000 + math.pow(rng.random(), 1.4) * 70000) / 100) * 100
|
|
289
|
+
base_rate = 14 - (fico - 600) * 0.03
|
|
290
|
+
rate = max(4.5, min(19.9, base_rate + (rng.random() - 0.5) * 1.5))
|
|
291
|
+
income = round((30_000 + (fico - 540) * 200 + rng.random() * 60_000) / 1000) * 1000
|
|
292
|
+
dti = max(0.05, min(0.55, 0.18 + (rng.random() - 0.4) * 0.25))
|
|
293
|
+
term = TERMS[int(rng.random() * len(TERMS))]
|
|
294
|
+
|
|
295
|
+
status = status_from_fico(rng, fico, mob)
|
|
296
|
+
|
|
297
|
+
monthly_pmt = amount * (rate / 1200) / (1 - (1 + rate / 1200) ** (-term)) if rate > 0 else amount / term
|
|
298
|
+
months_paid = min(term, mob)
|
|
299
|
+
balance = float(amount)
|
|
300
|
+
for _ in range(months_paid):
|
|
301
|
+
interest = balance * rate / 1200
|
|
302
|
+
balance -= max(0, monthly_pmt - interest)
|
|
303
|
+
if status == "paid_off":
|
|
304
|
+
balance = 0
|
|
305
|
+
elif status == "charged_off":
|
|
306
|
+
balance = balance * 0.7
|
|
307
|
+
|
|
308
|
+
product = weighted_choice(rng, [(p[0], p[2]) for p in PRODUCTS])
|
|
309
|
+
channel = weighted_choice(rng, [(c[0], c[2]) for c in CHANNELS])
|
|
310
|
+
employment = weighted_choice(rng, [(e[0], e[2]) for e in EMPLOYMENTS])
|
|
311
|
+
purpose = PURPOSES[int(rng.random() * len(PURPOSES))]
|
|
312
|
+
state = weighted_state(rng)
|
|
313
|
+
autopay = rng.random() < 0.78
|
|
314
|
+
|
|
315
|
+
if status == "current":
|
|
316
|
+
last_pay = f"{END_Y:04d}-{END_M:02d}-{1 + int(rng.random() * 28):02d}"
|
|
317
|
+
elif status == "late_30":
|
|
318
|
+
last_pay = f"{END_Y:04d}-11-{1 + int(rng.random() * 28):02d}"
|
|
319
|
+
elif status == "late_60":
|
|
320
|
+
last_pay = f"{END_Y:04d}-10-{1 + int(rng.random() * 28):02d}"
|
|
321
|
+
elif status == "late_90":
|
|
322
|
+
last_pay = f"{END_Y:04d}-09-{1 + int(rng.random() * 28):02d}"
|
|
323
|
+
elif status == "paid_off":
|
|
324
|
+
last_pay = f"{END_Y:04d}-{END_M:02d}-15"
|
|
325
|
+
else:
|
|
326
|
+
last_pay = f"{END_Y:04d}-08-{1 + int(rng.random() * 28):02d}"
|
|
327
|
+
|
|
328
|
+
fn = FIRST_NAMES[int(rng.random() * len(FIRST_NAMES))]
|
|
329
|
+
ln = LAST_NAMES[int(rng.random() * len(LAST_NAMES))]
|
|
330
|
+
vintage = f"{year}-Q{(month - 1) // 3 + 1}"
|
|
331
|
+
|
|
332
|
+
out.append(
|
|
333
|
+
{
|
|
334
|
+
"loanId": f"LN-{25883 - i}",
|
|
335
|
+
"borrower": f"{fn}. {ln}",
|
|
336
|
+
"amount": int(amount),
|
|
337
|
+
"balance": int(round(balance)),
|
|
338
|
+
"fico": int(fico),
|
|
339
|
+
"ficoBand": band,
|
|
340
|
+
"rate": round(rate, 2),
|
|
341
|
+
"term": int(term),
|
|
342
|
+
"income": int(income),
|
|
343
|
+
"incomeBand": income_band(income),
|
|
344
|
+
"dti": round(dti, 3),
|
|
345
|
+
"dtiBand": dti_band(dti),
|
|
346
|
+
"status": status,
|
|
347
|
+
"state": state,
|
|
348
|
+
"product": product,
|
|
349
|
+
"channel": channel,
|
|
350
|
+
"employment": employment,
|
|
351
|
+
"purpose": purpose,
|
|
352
|
+
"autopay": bool(autopay),
|
|
353
|
+
"payMethod": "ach_auto"
|
|
354
|
+
if autopay
|
|
355
|
+
else ("ach_manual" if rng.random() < 0.6 else ("card" if rng.random() < 0.7 else "check")),
|
|
356
|
+
"originated": originated,
|
|
357
|
+
"originatedMonth": f"{year:04d}-{month:02d}",
|
|
358
|
+
"vintage": vintage,
|
|
359
|
+
"mob": int(mob),
|
|
360
|
+
"lastPay": last_pay,
|
|
361
|
+
"collectionsFlag": "none"
|
|
362
|
+
if status in ("current", "paid_off")
|
|
363
|
+
else ("soft" if status == "late_30" else ("active" if status == "late_60" else "legal")),
|
|
364
|
+
}
|
|
365
|
+
)
|
|
366
|
+
out.sort(key=lambda r: r["originated"], reverse=True)
|
|
367
|
+
return out
|
|
368
|
+
|
|
369
|
+
|
|
370
|
+
def write_parquet(rows):
|
|
371
|
+
DATA_DIR.mkdir(parents=True, exist_ok=True)
|
|
372
|
+
schema = pa.schema(
|
|
373
|
+
[
|
|
374
|
+
("loanId", pa.string()),
|
|
375
|
+
("borrower", pa.string()),
|
|
376
|
+
("amount", pa.int64()),
|
|
377
|
+
("balance", pa.int64()),
|
|
378
|
+
("fico", pa.int32()),
|
|
379
|
+
("ficoBand", pa.string()),
|
|
380
|
+
("rate", pa.float64()),
|
|
381
|
+
("term", pa.int32()),
|
|
382
|
+
("income", pa.int64()),
|
|
383
|
+
("incomeBand", pa.string()),
|
|
384
|
+
("dti", pa.float64()),
|
|
385
|
+
("dtiBand", pa.string()),
|
|
386
|
+
("status", pa.string()),
|
|
387
|
+
("state", pa.string()),
|
|
388
|
+
("product", pa.string()),
|
|
389
|
+
("channel", pa.string()),
|
|
390
|
+
("employment", pa.string()),
|
|
391
|
+
("purpose", pa.string()),
|
|
392
|
+
("autopay", pa.bool_()),
|
|
393
|
+
("payMethod", pa.string()),
|
|
394
|
+
("originated", pa.string()),
|
|
395
|
+
("originatedMonth", pa.string()),
|
|
396
|
+
("vintage", pa.string()),
|
|
397
|
+
("mob", pa.int32()),
|
|
398
|
+
("lastPay", pa.string()),
|
|
399
|
+
("collectionsFlag", pa.string()),
|
|
400
|
+
]
|
|
401
|
+
)
|
|
402
|
+
table = pa.Table.from_pylist(rows, schema=schema)
|
|
403
|
+
pq.write_table(table, PARQUET_PATH, compression="zstd")
|
|
404
|
+
|
|
405
|
+
|
|
406
|
+
FICO_BANDS = [
|
|
407
|
+
{"band": "A", "range": "750+", "apr": 5.8, "defaultRate": 0.6, "color": "#10B981"},
|
|
408
|
+
{"band": "B", "range": "700-749", "apr": 7.2, "defaultRate": 1.4, "color": "#84CC16"},
|
|
409
|
+
{"band": "C", "range": "650-699", "apr": 9.5, "defaultRate": 3.1, "color": "#F59E0B"},
|
|
410
|
+
{"band": "D", "range": "600-649", "apr": 12.8, "defaultRate": 6.2, "color": "#F97316"},
|
|
411
|
+
{"band": "E", "range": "<600", "apr": 17.4, "defaultRate": 11.8, "color": "#EF4444"},
|
|
412
|
+
]
|
|
413
|
+
|
|
414
|
+
STATUS_CATALOG = [
|
|
415
|
+
{"key": "current", "label": "Current", "tone": "ok"},
|
|
416
|
+
{"key": "late_30", "label": "30 days late", "tone": "warn"},
|
|
417
|
+
{"key": "late_60", "label": "60 days late", "tone": "warn"},
|
|
418
|
+
{"key": "late_90", "label": "90+ late", "tone": "bad"},
|
|
419
|
+
{"key": "paid_off", "label": "Paid off", "tone": "muted"},
|
|
420
|
+
{"key": "charged_off", "label": "Charged off", "tone": "bad"},
|
|
421
|
+
]
|
|
422
|
+
|
|
423
|
+
|
|
424
|
+
def write_preview_json():
|
|
425
|
+
payload = {
|
|
426
|
+
"context": {
|
|
427
|
+
"publicMode": False,
|
|
428
|
+
"driveFile": "preview://data-apps/loan-portfolio",
|
|
429
|
+
"appVersion": "v2",
|
|
430
|
+
},
|
|
431
|
+
"datasets": {
|
|
432
|
+
"loans": {"file": "data/loan_book.parquet", "format": "parquet"},
|
|
433
|
+
},
|
|
434
|
+
"resources": {
|
|
435
|
+
"riskBands": FICO_BANDS,
|
|
436
|
+
"statusCatalog": STATUS_CATALOG,
|
|
437
|
+
},
|
|
438
|
+
}
|
|
439
|
+
PREVIEW_PATH.write_text(json.dumps(payload, indent=2) + "\n")
|
|
440
|
+
|
|
441
|
+
|
|
442
|
+
def main():
|
|
443
|
+
rows = gen_rows()
|
|
444
|
+
write_parquet(rows)
|
|
445
|
+
write_preview_json()
|
|
446
|
+
print(f"rows={len(rows)} parquet={PARQUET_PATH.stat().st_size:,} bytes")
|
|
447
|
+
statuses = {}
|
|
448
|
+
for r in rows:
|
|
449
|
+
statuses[r["status"]] = statuses.get(r["status"], 0) + 1
|
|
450
|
+
print(f"statuses={statuses}")
|
|
451
|
+
|
|
452
|
+
|
|
453
|
+
if __name__ == "__main__":
|
|
454
|
+
main()
|