optimal-cli 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. package/README.md +175 -0
  2. package/dist/bin/optimal.d.ts +2 -0
  3. package/dist/bin/optimal.js +995 -0
  4. package/dist/lib/budget/projections.d.ts +115 -0
  5. package/dist/lib/budget/projections.js +384 -0
  6. package/dist/lib/budget/scenarios.d.ts +93 -0
  7. package/dist/lib/budget/scenarios.js +214 -0
  8. package/dist/lib/cms/publish-blog.d.ts +62 -0
  9. package/dist/lib/cms/publish-blog.js +74 -0
  10. package/dist/lib/cms/strapi-client.d.ts +123 -0
  11. package/dist/lib/cms/strapi-client.js +213 -0
  12. package/dist/lib/config.d.ts +55 -0
  13. package/dist/lib/config.js +206 -0
  14. package/dist/lib/infra/deploy.d.ts +29 -0
  15. package/dist/lib/infra/deploy.js +58 -0
  16. package/dist/lib/infra/migrate.d.ts +34 -0
  17. package/dist/lib/infra/migrate.js +103 -0
  18. package/dist/lib/kanban.d.ts +46 -0
  19. package/dist/lib/kanban.js +118 -0
  20. package/dist/lib/newsletter/distribute.d.ts +52 -0
  21. package/dist/lib/newsletter/distribute.js +193 -0
  22. package/dist/lib/newsletter/generate-insurance.d.ts +42 -0
  23. package/dist/lib/newsletter/generate-insurance.js +36 -0
  24. package/dist/lib/newsletter/generate.d.ts +104 -0
  25. package/dist/lib/newsletter/generate.js +571 -0
  26. package/dist/lib/returnpro/anomalies.d.ts +64 -0
  27. package/dist/lib/returnpro/anomalies.js +166 -0
  28. package/dist/lib/returnpro/audit.d.ts +32 -0
  29. package/dist/lib/returnpro/audit.js +147 -0
  30. package/dist/lib/returnpro/diagnose.d.ts +52 -0
  31. package/dist/lib/returnpro/diagnose.js +281 -0
  32. package/dist/lib/returnpro/kpis.d.ts +32 -0
  33. package/dist/lib/returnpro/kpis.js +192 -0
  34. package/dist/lib/returnpro/templates.d.ts +48 -0
  35. package/dist/lib/returnpro/templates.js +229 -0
  36. package/dist/lib/returnpro/upload-income.d.ts +25 -0
  37. package/dist/lib/returnpro/upload-income.js +235 -0
  38. package/dist/lib/returnpro/upload-netsuite.d.ts +37 -0
  39. package/dist/lib/returnpro/upload-netsuite.js +566 -0
  40. package/dist/lib/returnpro/upload-r1.d.ts +48 -0
  41. package/dist/lib/returnpro/upload-r1.js +398 -0
  42. package/dist/lib/social/post-generator.d.ts +83 -0
  43. package/dist/lib/social/post-generator.js +333 -0
  44. package/dist/lib/social/publish.d.ts +66 -0
  45. package/dist/lib/social/publish.js +226 -0
  46. package/dist/lib/social/scraper.d.ts +67 -0
  47. package/dist/lib/social/scraper.js +361 -0
  48. package/dist/lib/supabase.d.ts +4 -0
  49. package/dist/lib/supabase.js +20 -0
  50. package/dist/lib/transactions/delete-batch.d.ts +60 -0
  51. package/dist/lib/transactions/delete-batch.js +203 -0
  52. package/dist/lib/transactions/ingest.d.ts +43 -0
  53. package/dist/lib/transactions/ingest.js +555 -0
  54. package/dist/lib/transactions/stamp.d.ts +51 -0
  55. package/dist/lib/transactions/stamp.js +524 -0
  56. package/package.json +50 -0
@@ -0,0 +1,166 @@
1
+ import { getSupabase } from '../supabase.js';
2
+ // --- Helpers ---
3
+ const PAGE_SIZE = 1000;
4
+ function toNum(v) {
5
+ if (v === null || v === undefined)
6
+ return 0;
7
+ return typeof v === 'string' ? parseFloat(v) || 0 : Number(v) || 0;
8
+ }
9
+ function toNumOrNull(v) {
10
+ if (v === null || v === undefined)
11
+ return null;
12
+ const n = typeof v === 'string' ? parseFloat(v) : Number(v);
13
+ return isFinite(n) ? n : null;
14
+ }
15
+ /**
16
+ * Paginate through v_rate_anomaly_analysis with optional month filters.
17
+ * Returns raw view rows.
18
+ */
19
+ async function fetchViewRows(months) {
20
+ const sb = getSupabase('returnpro');
21
+ const allRows = [];
22
+ let from = 0;
23
+ while (true) {
24
+ let query = sb
25
+ .from('v_rate_anomaly_analysis')
26
+ .select('master_program,program_code,program_id,client_id,client_name,' +
27
+ 'month,checkin_fee_dollars,units,rate_per_unit,prev_month_rate,' +
28
+ 'rate_delta_pct,units_change_pct,dollars_change_pct')
29
+ .order('month', { ascending: false })
30
+ .order('master_program')
31
+ .range(from, from + PAGE_SIZE - 1);
32
+ if (months && months.length > 0) {
33
+ query = query.in('month', months);
34
+ }
35
+ const { data, error } = await query;
36
+ if (error)
37
+ throw new Error(`Fetch v_rate_anomaly_analysis failed: ${error.message}`);
38
+ if (!data || data.length === 0)
39
+ break;
40
+ allRows.push(...data);
41
+ if (data.length < PAGE_SIZE)
42
+ break;
43
+ from += PAGE_SIZE;
44
+ }
45
+ return allRows;
46
+ }
47
+ /**
48
+ * Compute mean and standard deviation for an array of numbers.
49
+ * Returns { mean, stddev }. If fewer than 2 values, stddev = 0.
50
+ */
51
+ function computeStats(values) {
52
+ if (values.length === 0)
53
+ return { mean: 0, stddev: 0 };
54
+ const mean = values.reduce((s, v) => s + v, 0) / values.length;
55
+ if (values.length < 2)
56
+ return { mean, stddev: 0 };
57
+ const variance = values.reduce((s, v) => s + (v - mean) ** 2, 0) / (values.length - 1);
58
+ return { mean, stddev: Math.sqrt(variance) };
59
+ }
60
+ // --- Core ---
61
+ /**
62
+ * Detect $/unit rate outliers across all programs in stg_financials_raw.
63
+ *
64
+ * Method:
65
+ * 1. Fetch all rows from v_rate_anomaly_analysis (paginated) filtered to
66
+ * the requested months (or fiscal YTD if omitted).
67
+ * 2. For each month, compute mean and population stddev of rate_per_unit
68
+ * across all programs with valid rates.
69
+ * 3. Flag any program-month where |z-score| > threshold (default 2.0).
70
+ * 4. Return the flagged rows sorted by |z-score| descending.
71
+ *
72
+ * @param options.months - YYYY-MM strings to analyse. If omitted, uses fiscal
73
+ * YTD (April of current/previous fiscal year → today).
74
+ * @param options.threshold - Z-score magnitude threshold. Default 2.0.
75
+ */
76
+ export async function detectRateAnomalies(options) {
77
+ const threshold = options?.threshold ?? 2.0;
78
+ // Resolve target months: explicit list, or derive fiscal YTD
79
+ let targetMonths = options?.months;
80
+ if (!targetMonths || targetMonths.length === 0) {
81
+ // Fiscal year starts April. If Jan-Mar, fiscal year began previous calendar year.
82
+ const now = new Date();
83
+ const month0 = now.getMonth(); // 0-indexed
84
+ const year = now.getFullYear();
85
+ const fiscalStartYear = month0 < 3 ? year - 1 : year;
86
+ const fiscalStart = `${fiscalStartYear}-04`;
87
+ const currentMonthStr = `${year}-${String(month0 + 1).padStart(2, '0')}`;
88
+ // Build explicit month list for fiscal YTD so the DB filter is tight
89
+ const start = new Date(`${fiscalStart}-01`);
90
+ const end = new Date(`${currentMonthStr}-01`);
91
+ const months = [];
92
+ const cursor = new Date(start);
93
+ while (cursor <= end) {
94
+ months.push(`${cursor.getFullYear()}-${String(cursor.getMonth() + 1).padStart(2, '0')}`);
95
+ cursor.setMonth(cursor.getMonth() + 1);
96
+ }
97
+ targetMonths = months;
98
+ }
99
+ // Fetch view rows
100
+ const rawRows = await fetchViewRows(targetMonths);
101
+ const totalRows = rawRows.length;
102
+ if (totalRows === 0) {
103
+ return { anomalies: [], totalRows: 0, threshold, months: targetMonths };
104
+ }
105
+ // Group rows by month for per-month z-score calculation
106
+ const byMonth = new Map();
107
+ for (const row of rawRows) {
108
+ const m = row.month;
109
+ if (!byMonth.has(m))
110
+ byMonth.set(m, []);
111
+ byMonth.get(m).push(row);
112
+ }
113
+ // Compute z-scores per month and collect anomalies
114
+ const anomalies = [];
115
+ for (const [month, rows] of byMonth) {
116
+ // Collect valid (non-null, positive-unit) rate values for this month
117
+ const validRates = rows
118
+ .map(r => toNumOrNull(r.rate_per_unit))
119
+ .filter((v) => v !== null && isFinite(v));
120
+ const { mean, stddev } = computeStats(validRates);
121
+ for (const row of rows) {
122
+ const rate = toNumOrNull(row.rate_per_unit);
123
+ if (rate === null)
124
+ continue; // cannot score rows with no rate
125
+ const units = toNum(row.units);
126
+ if (units <= 0)
127
+ continue; // require positive units for a meaningful rate
128
+ // Z-score: how many std-deviations from the mean
129
+ const zscore = stddev > 0 ? (rate - mean) / stddev : 0;
130
+ if (Math.abs(zscore) <= threshold)
131
+ continue; // within normal range
132
+ const expectedLow = mean - threshold * stddev;
133
+ const expectedHigh = mean + threshold * stddev;
134
+ anomalies.push({
135
+ master_program: row.master_program,
136
+ program_code: row.program_code,
137
+ program_id: typeof row.program_id === 'number' ? row.program_id : null,
138
+ client_id: typeof row.client_id === 'number' ? row.client_id : null,
139
+ client_name: row.client_name,
140
+ month,
141
+ checkin_fee_dollars: toNum(row.checkin_fee_dollars),
142
+ units,
143
+ rate_per_unit: rate,
144
+ prev_month_rate: toNumOrNull(row.prev_month_rate),
145
+ rate_delta_pct: toNumOrNull(row.rate_delta_pct),
146
+ units_change_pct: toNumOrNull(row.units_change_pct),
147
+ dollars_change_pct: toNumOrNull(row.dollars_change_pct),
148
+ zscore: Math.round(zscore * 100) / 100,
149
+ expected_range: [
150
+ Math.round(expectedLow * 10000) / 10000,
151
+ Math.round(expectedHigh * 10000) / 10000,
152
+ ],
153
+ });
154
+ }
155
+ }
156
+ // Sort by absolute z-score descending (most extreme outliers first)
157
+ anomalies.sort((a, b) => Math.abs(b.zscore) - Math.abs(a.zscore));
158
+ // Collect distinct months that were actually present in the data
159
+ const observedMonths = [...new Set(rawRows.map(r => r.month))].sort().reverse();
160
+ return {
161
+ anomalies,
162
+ totalRows,
163
+ threshold,
164
+ months: observedMonths,
165
+ };
166
+ }
@@ -0,0 +1,32 @@
1
+ export interface MonthSummary {
2
+ month: string;
3
+ confirmedAccounts: number;
4
+ stagedAccounts: number;
5
+ exactMatch: number;
6
+ signFlipMatch: number;
7
+ mismatch: number;
8
+ confirmedOnly: number;
9
+ stagingOnly: number;
10
+ accuracy: number | null;
11
+ stagedTotal: number;
12
+ confirmedTotal: number;
13
+ }
14
+ export interface AuditResult {
15
+ summaries: MonthSummary[];
16
+ totalStagingRows: number;
17
+ totalConfirmedRows: number;
18
+ }
19
+ /**
20
+ * Compare staged financials against confirmed income statements.
21
+ *
22
+ * Replicates the logic from dashboard-returnpro's /api/staging/audit-summary route:
23
+ * 1. Paginate stg_financials_raw (amount is TEXT, must parseFloat)
24
+ * 2. Paginate confirmed_income_statements
25
+ * 3. Aggregate staging by account_code|YYYY-MM key
26
+ * 4. Compare with tolerance (default $1.00), detect sign-flips
27
+ * 5. Return per-month summaries with accuracy %
28
+ *
29
+ * @param months - Optional array of YYYY-MM strings to filter to. If omitted, all months are included.
30
+ * @param tolerance - Dollar tolerance for match detection. Default $1.00.
31
+ */
32
+ export declare function runAuditComparison(months?: string[], tolerance?: number): Promise<AuditResult>;
@@ -0,0 +1,147 @@
1
+ import { getSupabase } from '../supabase.js';
2
+ // --- Helpers ---
3
+ const PAGE_SIZE = 1000;
4
+ /**
5
+ * Paginate through a Supabase table, fetching all rows.
6
+ * Uses .range() to bypass the 1000-row cap.
7
+ */
8
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
9
+ async function paginateAll(table, select, orderCol) {
10
+ const sb = getSupabase('returnpro');
11
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
12
+ const allRows = [];
13
+ let from = 0;
14
+ while (true) {
15
+ const query = sb.from(table).select(select).order(orderCol).range(from, from + PAGE_SIZE - 1);
16
+ const { data, error } = await query;
17
+ if (error)
18
+ throw new Error(`Fetch ${table} failed: ${error.message}`);
19
+ if (!data || data.length === 0)
20
+ break;
21
+ allRows.push(...data);
22
+ if (data.length < PAGE_SIZE)
23
+ break;
24
+ from += PAGE_SIZE;
25
+ }
26
+ return allRows;
27
+ }
28
+ // --- Core ---
29
+ /**
30
+ * Compare staged financials against confirmed income statements.
31
+ *
32
+ * Replicates the logic from dashboard-returnpro's /api/staging/audit-summary route:
33
+ * 1. Paginate stg_financials_raw (amount is TEXT, must parseFloat)
34
+ * 2. Paginate confirmed_income_statements
35
+ * 3. Aggregate staging by account_code|YYYY-MM key
36
+ * 4. Compare with tolerance (default $1.00), detect sign-flips
37
+ * 5. Return per-month summaries with accuracy %
38
+ *
39
+ * @param months - Optional array of YYYY-MM strings to filter to. If omitted, all months are included.
40
+ * @param tolerance - Dollar tolerance for match detection. Default $1.00.
41
+ */
42
+ export async function runAuditComparison(months, tolerance = 1.00) {
43
+ // 1. Fetch all staging rows (paginated)
44
+ const stagingRows = await paginateAll('stg_financials_raw', 'account_code,date,amount', 'date');
45
+ // 2. Fetch all confirmed income statements (paginated)
46
+ const confirmedRows = await paginateAll('confirmed_income_statements', 'account_code,period,total_amount', 'period');
47
+ // 3. Aggregate staging: account_code|YYYY-MM -> sum(amount)
48
+ // amount is TEXT in the DB — must parseFloat
49
+ const stagingAgg = new Map();
50
+ for (const row of stagingRows) {
51
+ const month = row.date ? row.date.substring(0, 7) : null;
52
+ if (!month)
53
+ continue;
54
+ const key = `${row.account_code}|${month}`;
55
+ stagingAgg.set(key, (stagingAgg.get(key) ?? 0) + (parseFloat(row.amount) || 0));
56
+ }
57
+ // 4. Build confirmed lookup: account_code|YYYY-MM -> total_amount
58
+ const confirmedMap = new Map();
59
+ for (const row of confirmedRows) {
60
+ const key = `${row.account_code}|${row.period}`;
61
+ confirmedMap.set(key, parseFloat(String(row.total_amount)) || 0);
62
+ }
63
+ // 5. Collect all months present in either dataset
64
+ const allMonths = new Set();
65
+ for (const key of stagingAgg.keys())
66
+ allMonths.add(key.split('|')[1]);
67
+ for (const key of confirmedMap.keys())
68
+ allMonths.add(key.split('|')[1]);
69
+ // 6. Filter to requested months if specified
70
+ const targetMonths = months
71
+ ? [...allMonths].filter(m => months.includes(m)).sort()
72
+ : [...allMonths].sort();
73
+ // 7. Build per-month summaries
74
+ const summaries = [];
75
+ for (const month of targetMonths) {
76
+ // Collect accounts present in each dataset for this month
77
+ const cAccounts = new Set();
78
+ const sAccounts = new Set();
79
+ for (const key of confirmedMap.keys()) {
80
+ if (key.endsWith(`|${month}`))
81
+ cAccounts.add(key.split('|')[0]);
82
+ }
83
+ for (const key of stagingAgg.keys()) {
84
+ if (key.endsWith(`|${month}`))
85
+ sAccounts.add(key.split('|')[0]);
86
+ }
87
+ let exactMatch = 0;
88
+ let signFlipMatch = 0;
89
+ let mismatch = 0;
90
+ let confirmedOnly = 0;
91
+ let stagingOnly = 0;
92
+ let stagedTotal = 0;
93
+ let confirmedTotal = 0;
94
+ // Compare confirmed accounts against staging
95
+ for (const acct of cAccounts) {
96
+ const cAmt = confirmedMap.get(`${acct}|${month}`) ?? 0;
97
+ confirmedTotal += Math.abs(cAmt);
98
+ if (sAccounts.has(acct)) {
99
+ const sAmt = stagingAgg.get(`${acct}|${month}`) ?? 0;
100
+ const directDiff = Math.abs(cAmt - sAmt);
101
+ const signFlipDiff = Math.abs(cAmt + sAmt);
102
+ if (directDiff <= tolerance) {
103
+ exactMatch++;
104
+ }
105
+ else if (signFlipDiff <= tolerance) {
106
+ signFlipMatch++;
107
+ }
108
+ else {
109
+ mismatch++;
110
+ }
111
+ }
112
+ else {
113
+ confirmedOnly++;
114
+ }
115
+ }
116
+ // Count staging-only accounts and accumulate staged total
117
+ for (const acct of sAccounts) {
118
+ const sAmt = stagingAgg.get(`${acct}|${month}`) ?? 0;
119
+ stagedTotal += Math.abs(sAmt);
120
+ if (!cAccounts.has(acct))
121
+ stagingOnly++;
122
+ }
123
+ // Accuracy = (exactMatch + signFlipMatch) / overlap, null if no overlap
124
+ const overlap = exactMatch + signFlipMatch + mismatch;
125
+ const accuracy = overlap > 0
126
+ ? Math.round(((exactMatch + signFlipMatch) / overlap) * 1000) / 10
127
+ : null;
128
+ summaries.push({
129
+ month,
130
+ confirmedAccounts: cAccounts.size,
131
+ stagedAccounts: sAccounts.size,
132
+ exactMatch,
133
+ signFlipMatch,
134
+ mismatch,
135
+ confirmedOnly,
136
+ stagingOnly,
137
+ accuracy,
138
+ stagedTotal,
139
+ confirmedTotal,
140
+ });
141
+ }
142
+ return {
143
+ summaries,
144
+ totalStagingRows: stagingRows.length,
145
+ totalConfirmedRows: confirmedRows.length,
146
+ };
147
+ }
@@ -0,0 +1,52 @@
1
+ export type DiagnosticIssueKind = 'unresolved_account_code' | 'unresolved_program_code' | 'unresolved_master_program' | 'unresolved_client' | 'low_row_count' | 'missing_month' | 'null_date_rows' | 'null_account_code_rows';
2
+ export interface DiagnosticIssue {
3
+ /** Category of the problem. */
4
+ kind: DiagnosticIssueKind;
5
+ /** YYYY-MM if the issue is month-scoped, null if global. */
6
+ month: string | null;
7
+ /** Short human-readable summary. */
8
+ message: string;
9
+ /** Optional payload with supporting data. */
10
+ detail?: Record<string, unknown>;
11
+ }
12
+ export interface DiagnosisResult {
13
+ /** YYYY-MM months that were analysed. */
14
+ monthsAnalysed: string[];
15
+ /** Total rows in stg_financials_raw across the analysed months. */
16
+ totalRows: number;
17
+ /** Per-month row counts. */
18
+ rowsPerMonth: Record<string, number>;
19
+ /** Median row count across months — used to flag anomalously low months. */
20
+ medianRowCount: number;
21
+ /** All issues found. */
22
+ issues: DiagnosticIssue[];
23
+ /** Convenience summary counts. */
24
+ summary: {
25
+ unresolvedAccountCodes: number;
26
+ unresolvedProgramCodes: number;
27
+ unresolvedMasterPrograms: number;
28
+ unresolvedClients: number;
29
+ lowRowCountMonths: number;
30
+ missingMonths: number;
31
+ totalIssues: number;
32
+ };
33
+ }
34
+ /**
35
+ * Diagnose FK resolution failures and data gaps in stg_financials_raw.
36
+ *
37
+ * Checks performed:
38
+ * 1. Rows with null date or null account_code (data quality)
39
+ * 2. account_codes not present in dim_account
40
+ * 3. program_codes not present in dim_program_id
41
+ * 4. program_codes whose dim_program_id row has a null master_program_id
42
+ * 5. master_program_ids not present in dim_master_program
43
+ * 6. master_programs whose dim_master_program row has a null client_id
44
+ * 7. Months with row counts < 50% of the median (anomalously low)
45
+ * 8. Calendar months completely absent between the first and last month seen
46
+ *
47
+ * @param options.months - If provided, only analyse these YYYY-MM months.
48
+ * If omitted, all months present in staging are analysed.
49
+ */
50
+ export declare function diagnoseMonths(options?: {
51
+ months?: string[];
52
+ }): Promise<DiagnosisResult>;
@@ -0,0 +1,281 @@
1
+ import { getSupabase } from '../supabase.js';
2
+ // --- Helpers ---
3
+ const PAGE_SIZE = 1000;
4
+ /** Fetch all rows from a table with pagination, bypassing the 1000-row cap. */
5
+ async function paginateAll(table, select, orderCol) {
6
+ const sb = getSupabase('returnpro');
7
+ const all = [];
8
+ let from = 0;
9
+ while (true) {
10
+ const { data, error } = await sb
11
+ .from(table)
12
+ .select(select)
13
+ .order(orderCol)
14
+ .range(from, from + PAGE_SIZE - 1);
15
+ if (error)
16
+ throw new Error(`Fetch ${table} failed: ${error.message}`);
17
+ if (!data || data.length === 0)
18
+ break;
19
+ all.push(...data);
20
+ if (data.length < PAGE_SIZE)
21
+ break;
22
+ from += PAGE_SIZE;
23
+ }
24
+ return all;
25
+ }
26
+ /** Compute the median of a numeric array. Returns 0 for empty arrays. */
27
+ function median(values) {
28
+ if (values.length === 0)
29
+ return 0;
30
+ const sorted = [...values].sort((a, b) => a - b);
31
+ const mid = Math.floor(sorted.length / 2);
32
+ return sorted.length % 2 === 0
33
+ ? (sorted[mid - 1] + sorted[mid]) / 2
34
+ : sorted[mid];
35
+ }
36
+ /**
37
+ * Extract YYYY-MM from a date string. Returns null if unparseable.
38
+ */
39
+ function toYearMonth(date) {
40
+ if (!date)
41
+ return null;
42
+ const d = new Date(date);
43
+ if (isNaN(d.getTime()))
44
+ return null;
45
+ return `${d.getFullYear()}-${String(d.getMonth() + 1).padStart(2, '0')}`;
46
+ }
47
+ /**
48
+ * Build the list of expected YYYY-MM months between the earliest and latest
49
+ * months seen in staging data. Used to detect completely missing months.
50
+ */
51
+ function buildExpectedMonths(present) {
52
+ if (present.length === 0)
53
+ return [];
54
+ const sorted = [...present].sort();
55
+ const first = sorted[0];
56
+ const last = sorted[sorted.length - 1];
57
+ const [fy, fm] = first.split('-').map(Number);
58
+ const [ly, lm] = last.split('-').map(Number);
59
+ const expected = [];
60
+ let y = fy;
61
+ let m = fm;
62
+ while (y < ly || (y === ly && m <= lm)) {
63
+ expected.push(`${y}-${String(m).padStart(2, '0')}`);
64
+ m++;
65
+ if (m > 12) {
66
+ m = 1;
67
+ y++;
68
+ }
69
+ }
70
+ return expected;
71
+ }
72
+ // --- Core ---
73
+ /**
74
+ * Diagnose FK resolution failures and data gaps in stg_financials_raw.
75
+ *
76
+ * Checks performed:
77
+ * 1. Rows with null date or null account_code (data quality)
78
+ * 2. account_codes not present in dim_account
79
+ * 3. program_codes not present in dim_program_id
80
+ * 4. program_codes whose dim_program_id row has a null master_program_id
81
+ * 5. master_program_ids not present in dim_master_program
82
+ * 6. master_programs whose dim_master_program row has a null client_id
83
+ * 7. Months with row counts < 50% of the median (anomalously low)
84
+ * 8. Calendar months completely absent between the first and last month seen
85
+ *
86
+ * @param options.months - If provided, only analyse these YYYY-MM months.
87
+ * If omitted, all months present in staging are analysed.
88
+ */
89
+ export async function diagnoseMonths(options) {
90
+ const issues = [];
91
+ // --- 1. Load all staging rows (paginated) ---
92
+ const stagingRows = await paginateAll('stg_financials_raw', 'raw_id,date,account_code,account_id,program_code,program_id_key,master_program,master_program_id,client_id', 'raw_id');
93
+ // --- 2. Load all dimension tables in parallel ---
94
+ const [dimAccounts, dimProgramIds, dimMasterPrograms, dimClients] = await Promise.all([
95
+ paginateAll('dim_account', 'account_code', 'account_code'),
96
+ paginateAll('dim_program_id', 'program_code,master_program_id', 'program_code'),
97
+ paginateAll('dim_master_program', 'master_program_id,master_name,client_id', 'master_program_id'),
98
+ paginateAll('dim_client', 'client_id,client_name', 'client_id'),
99
+ ]);
100
+ // Build lookup sets
101
+ const knownAccountCodes = new Set(dimAccounts.map(r => r.account_code));
102
+ const knownProgramCodes = new Set(dimProgramIds.map(r => r.program_code));
103
+ // dim_program_id entries that have a null master_program_id (orphaned program codes)
104
+ const orphanedProgramCodes = new Set(dimProgramIds.filter(r => r.master_program_id === null).map(r => r.program_code));
105
+ const knownMasterProgramIds = new Set(dimMasterPrograms.map(r => r.master_program_id));
106
+ // master programs without a client
107
+ const masterProgramsWithoutClient = new Set(dimMasterPrograms.filter(r => r.client_id === null).map(r => r.master_program_id));
108
+ const knownClientIds = new Set(dimClients.map(r => r.client_id));
109
+ // --- 3. Assign staging rows to months ---
110
+ const rowsByMonth = new Map();
111
+ let nullDateCount = 0;
112
+ let nullAccountCodeCount = 0;
113
+ for (const row of stagingRows) {
114
+ if (!row.date) {
115
+ nullDateCount++;
116
+ continue;
117
+ }
118
+ const ym = toYearMonth(row.date);
119
+ if (!ym) {
120
+ nullDateCount++;
121
+ continue;
122
+ }
123
+ const existing = rowsByMonth.get(ym) ?? [];
124
+ existing.push(row);
125
+ rowsByMonth.set(ym, existing);
126
+ if (!row.account_code)
127
+ nullAccountCodeCount++;
128
+ }
129
+ // --- 4. Apply month filter ---
130
+ let targetMonths;
131
+ if (options?.months && options.months.length > 0) {
132
+ targetMonths = options.months.filter(m => rowsByMonth.has(m)).sort();
133
+ }
134
+ else {
135
+ targetMonths = [...rowsByMonth.keys()].sort();
136
+ }
137
+ // --- 5. Global data quality issues ---
138
+ if (nullDateCount > 0) {
139
+ issues.push({
140
+ kind: 'null_date_rows',
141
+ month: null,
142
+ message: `${nullDateCount} row(s) in stg_financials_raw have a null or unparseable date`,
143
+ detail: { count: nullDateCount },
144
+ });
145
+ }
146
+ if (nullAccountCodeCount > 0) {
147
+ issues.push({
148
+ kind: 'null_account_code_rows',
149
+ month: null,
150
+ message: `${nullAccountCodeCount} row(s) in stg_financials_raw have a null account_code`,
151
+ detail: { count: nullAccountCodeCount },
152
+ });
153
+ }
154
+ // --- 6. Per-month analysis ---
155
+ const rowsPerMonth = {};
156
+ // Aggregate FK failure sets per dimension (global, deduplicated)
157
+ const unresolvedAccountCodes = new Set();
158
+ const unresolvedProgramCodes = new Set();
159
+ const unresolvedMasterProgramIds = new Set();
160
+ const unresolvedClientIds = new Set();
161
+ for (const month of targetMonths) {
162
+ const rows = rowsByMonth.get(month) ?? [];
163
+ rowsPerMonth[month] = rows.length;
164
+ for (const row of rows) {
165
+ // account_code → dim_account
166
+ if (row.account_code && !knownAccountCodes.has(row.account_code)) {
167
+ unresolvedAccountCodes.add(row.account_code);
168
+ }
169
+ // program_code → dim_program_id
170
+ if (row.program_code) {
171
+ if (!knownProgramCodes.has(row.program_code)) {
172
+ unresolvedProgramCodes.add(row.program_code);
173
+ }
174
+ else if (orphanedProgramCodes.has(row.program_code)) {
175
+ // The program_code exists in dim_program_id but its master_program_id is null
176
+ unresolvedProgramCodes.add(row.program_code);
177
+ }
178
+ }
179
+ // master_program_id → dim_master_program
180
+ if (row.master_program_id !== null && row.master_program_id !== undefined) {
181
+ if (!knownMasterProgramIds.has(row.master_program_id)) {
182
+ unresolvedMasterProgramIds.add(row.master_program_id);
183
+ }
184
+ else if (masterProgramsWithoutClient.has(row.master_program_id)) {
185
+ // master_program exists but has no client_id
186
+ unresolvedClientIds.add(row.master_program_id);
187
+ }
188
+ }
189
+ // client_id → dim_client (direct FK on staging row)
190
+ if (row.client_id !== null && row.client_id !== undefined) {
191
+ if (!knownClientIds.has(row.client_id)) {
192
+ unresolvedClientIds.add(row.client_id);
193
+ }
194
+ }
195
+ }
196
+ }
197
+ // Emit per-dimension issues (global, not per-month — less noise)
198
+ if (unresolvedAccountCodes.size > 0) {
199
+ const codes = [...unresolvedAccountCodes].sort();
200
+ issues.push({
201
+ kind: 'unresolved_account_code',
202
+ month: null,
203
+ message: `${codes.length} account_code(s) in staging do not resolve to dim_account`,
204
+ detail: { codes },
205
+ });
206
+ }
207
+ if (unresolvedProgramCodes.size > 0) {
208
+ const codes = [...unresolvedProgramCodes].sort();
209
+ issues.push({
210
+ kind: 'unresolved_program_code',
211
+ month: null,
212
+ message: `${codes.length} program_code(s) in staging do not resolve (missing from dim_program_id or dim_program_id.master_program_id is null)`,
213
+ detail: { codes },
214
+ });
215
+ }
216
+ if (unresolvedMasterProgramIds.size > 0) {
217
+ const ids = [...unresolvedMasterProgramIds].sort((a, b) => a - b);
218
+ issues.push({
219
+ kind: 'unresolved_master_program',
220
+ month: null,
221
+ message: `${ids.length} master_program_id(s) in staging do not resolve to dim_master_program`,
222
+ detail: { ids },
223
+ });
224
+ }
225
+ if (unresolvedClientIds.size > 0) {
226
+ const ids = [...unresolvedClientIds].sort((a, b) => a - b);
227
+ issues.push({
228
+ kind: 'unresolved_client',
229
+ month: null,
230
+ message: `${ids.length} client_id(s) in staging do not resolve to dim_client (or master_program has no client)`,
231
+ detail: { ids },
232
+ });
233
+ }
234
+ // --- 7. Row count anomalies (< 50% of median) ---
235
+ const counts = targetMonths.map(m => rowsPerMonth[m] ?? 0);
236
+ const med = median(counts);
237
+ const lowThreshold = med * 0.5;
238
+ for (const month of targetMonths) {
239
+ const count = rowsPerMonth[month] ?? 0;
240
+ if (med > 0 && count < lowThreshold) {
241
+ issues.push({
242
+ kind: 'low_row_count',
243
+ month,
244
+ message: `${month} has only ${count} rows — below 50% of median (${med})`,
245
+ detail: { rowCount: count, median: med, threshold: lowThreshold },
246
+ });
247
+ }
248
+ }
249
+ // --- 8. Missing months (gaps in the calendar range) ---
250
+ const allPresentMonths = [...rowsByMonth.keys()].sort();
251
+ const expectedMonths = buildExpectedMonths(allPresentMonths);
252
+ const presentSet = new Set(allPresentMonths);
253
+ for (const expected of expectedMonths) {
254
+ // Only flag months within the analysed range that are entirely absent
255
+ if (!presentSet.has(expected)) {
256
+ issues.push({
257
+ kind: 'missing_month',
258
+ month: expected,
259
+ message: `${expected} has no rows in stg_financials_raw — month is missing`,
260
+ });
261
+ }
262
+ }
263
+ // --- 9. Compute summary ---
264
+ const summary = {
265
+ unresolvedAccountCodes: unresolvedAccountCodes.size,
266
+ unresolvedProgramCodes: unresolvedProgramCodes.size,
267
+ unresolvedMasterPrograms: unresolvedMasterProgramIds.size,
268
+ unresolvedClients: unresolvedClientIds.size,
269
+ lowRowCountMonths: issues.filter(i => i.kind === 'low_row_count').length,
270
+ missingMonths: issues.filter(i => i.kind === 'missing_month').length,
271
+ totalIssues: issues.length,
272
+ };
273
+ return {
274
+ monthsAnalysed: targetMonths,
275
+ totalRows: counts.reduce((a, b) => a + b, 0),
276
+ rowsPerMonth,
277
+ medianRowCount: med,
278
+ issues,
279
+ summary,
280
+ };
281
+ }