optimal-cli 0.1.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. package/agents/.gitkeep +0 -0
  2. package/agents/content-ops.md +227 -0
  3. package/agents/financial-ops.md +184 -0
  4. package/agents/infra-ops.md +206 -0
  5. package/agents/profiles.json +5 -0
  6. package/bin/optimal.ts +1731 -0
  7. package/docs/CLI-REFERENCE.md +361 -0
  8. package/lib/assets/index.ts +225 -0
  9. package/lib/assets.ts +124 -0
  10. package/lib/auth/index.ts +189 -0
  11. package/lib/board/index.ts +309 -0
  12. package/lib/board/types.ts +124 -0
  13. package/lib/bot/claim.ts +43 -0
  14. package/lib/bot/coordinator.ts +254 -0
  15. package/lib/bot/heartbeat.ts +37 -0
  16. package/lib/bot/index.ts +9 -0
  17. package/lib/bot/protocol.ts +99 -0
  18. package/lib/bot/reporter.ts +42 -0
  19. package/lib/bot/skills.ts +81 -0
  20. package/lib/budget/projections.ts +561 -0
  21. package/lib/budget/scenarios.ts +312 -0
  22. package/lib/cms/publish-blog.ts +129 -0
  23. package/lib/cms/strapi-client.ts +302 -0
  24. package/lib/config/registry.ts +228 -0
  25. package/lib/config/schema.ts +58 -0
  26. package/lib/config.ts +247 -0
  27. package/lib/errors.ts +129 -0
  28. package/lib/format.ts +120 -0
  29. package/lib/infra/.gitkeep +0 -0
  30. package/lib/infra/deploy.ts +70 -0
  31. package/lib/infra/migrate.ts +141 -0
  32. package/lib/newsletter/.gitkeep +0 -0
  33. package/lib/newsletter/distribute.ts +256 -0
  34. package/{dist/lib/newsletter/generate-insurance.d.ts → lib/newsletter/generate-insurance.ts} +24 -7
  35. package/lib/newsletter/generate.ts +735 -0
  36. package/lib/returnpro/.gitkeep +0 -0
  37. package/lib/returnpro/anomalies.ts +258 -0
  38. package/lib/returnpro/audit.ts +194 -0
  39. package/lib/returnpro/diagnose.ts +400 -0
  40. package/lib/returnpro/kpis.ts +255 -0
  41. package/lib/returnpro/templates.ts +323 -0
  42. package/lib/returnpro/upload-income.ts +311 -0
  43. package/lib/returnpro/upload-netsuite.ts +696 -0
  44. package/lib/returnpro/upload-r1.ts +563 -0
  45. package/lib/returnpro/validate.ts +154 -0
  46. package/lib/social/meta.ts +228 -0
  47. package/lib/social/post-generator.ts +468 -0
  48. package/lib/social/publish.ts +301 -0
  49. package/lib/social/scraper.ts +503 -0
  50. package/lib/supabase.ts +25 -0
  51. package/lib/transactions/delete-batch.ts +258 -0
  52. package/lib/transactions/ingest.ts +659 -0
  53. package/lib/transactions/stamp.ts +654 -0
  54. package/package.json +15 -25
  55. package/dist/bin/optimal.d.ts +0 -2
  56. package/dist/bin/optimal.js +0 -995
  57. package/dist/lib/budget/projections.d.ts +0 -115
  58. package/dist/lib/budget/projections.js +0 -384
  59. package/dist/lib/budget/scenarios.d.ts +0 -93
  60. package/dist/lib/budget/scenarios.js +0 -214
  61. package/dist/lib/cms/publish-blog.d.ts +0 -62
  62. package/dist/lib/cms/publish-blog.js +0 -74
  63. package/dist/lib/cms/strapi-client.d.ts +0 -123
  64. package/dist/lib/cms/strapi-client.js +0 -213
  65. package/dist/lib/config.d.ts +0 -55
  66. package/dist/lib/config.js +0 -206
  67. package/dist/lib/infra/deploy.d.ts +0 -29
  68. package/dist/lib/infra/deploy.js +0 -58
  69. package/dist/lib/infra/migrate.d.ts +0 -34
  70. package/dist/lib/infra/migrate.js +0 -103
  71. package/dist/lib/kanban.d.ts +0 -46
  72. package/dist/lib/kanban.js +0 -118
  73. package/dist/lib/newsletter/distribute.d.ts +0 -52
  74. package/dist/lib/newsletter/distribute.js +0 -193
  75. package/dist/lib/newsletter/generate-insurance.js +0 -36
  76. package/dist/lib/newsletter/generate.d.ts +0 -104
  77. package/dist/lib/newsletter/generate.js +0 -571
  78. package/dist/lib/returnpro/anomalies.d.ts +0 -64
  79. package/dist/lib/returnpro/anomalies.js +0 -166
  80. package/dist/lib/returnpro/audit.d.ts +0 -32
  81. package/dist/lib/returnpro/audit.js +0 -147
  82. package/dist/lib/returnpro/diagnose.d.ts +0 -52
  83. package/dist/lib/returnpro/diagnose.js +0 -281
  84. package/dist/lib/returnpro/kpis.d.ts +0 -32
  85. package/dist/lib/returnpro/kpis.js +0 -192
  86. package/dist/lib/returnpro/templates.d.ts +0 -48
  87. package/dist/lib/returnpro/templates.js +0 -229
  88. package/dist/lib/returnpro/upload-income.d.ts +0 -25
  89. package/dist/lib/returnpro/upload-income.js +0 -235
  90. package/dist/lib/returnpro/upload-netsuite.d.ts +0 -37
  91. package/dist/lib/returnpro/upload-netsuite.js +0 -566
  92. package/dist/lib/returnpro/upload-r1.d.ts +0 -48
  93. package/dist/lib/returnpro/upload-r1.js +0 -398
  94. package/dist/lib/social/post-generator.d.ts +0 -83
  95. package/dist/lib/social/post-generator.js +0 -333
  96. package/dist/lib/social/publish.d.ts +0 -66
  97. package/dist/lib/social/publish.js +0 -226
  98. package/dist/lib/social/scraper.d.ts +0 -67
  99. package/dist/lib/social/scraper.js +0 -361
  100. package/dist/lib/supabase.d.ts +0 -4
  101. package/dist/lib/supabase.js +0 -20
  102. package/dist/lib/transactions/delete-batch.d.ts +0 -60
  103. package/dist/lib/transactions/delete-batch.js +0 -203
  104. package/dist/lib/transactions/ingest.d.ts +0 -43
  105. package/dist/lib/transactions/ingest.js +0 -555
  106. package/dist/lib/transactions/stamp.d.ts +0 -51
  107. package/dist/lib/transactions/stamp.js +0 -524
@@ -0,0 +1,659 @@
1
+ /**
2
+ * Transaction Ingestion — CSV Parsing & Deduplication
3
+ *
4
+ * Ported from OptimalOS:
5
+ * - /home/optimal/optimalos/app/api/csv/ingest/route.ts
6
+ * - /home/optimal/optimalos/lib/csv/upload.ts
7
+ * - /home/optimal/optimalos/lib/stamp-engine/normalizers/
8
+ * - /home/optimal/optimalos/lib/stamp-engine/format-detector.ts
9
+ *
10
+ * Reads a CSV file from disk, auto-detects bank format, parses into
11
+ * normalized transactions, deduplicates against existing rows in Supabase,
12
+ * and batch-inserts new records into the `transactions` table.
13
+ */
14
+
15
+ import { readFileSync } from 'node:fs'
16
+ import { createHash, randomUUID } from 'node:crypto'
17
+ import { getSupabase } from '../supabase.js'
18
+
19
+ // =============================================================================
20
+ // TYPES
21
+ // =============================================================================
22
+
23
+ export interface RawTransaction {
24
+ date: string
25
+ description: string
26
+ amount: number
27
+ originalCategory?: string
28
+ transactionType?: string
29
+ postDate?: string
30
+ balance?: number
31
+ extendedDetails?: string
32
+ merchantAddress?: string
33
+ }
34
+
35
+ export type BankFormat =
36
+ | 'chase_checking'
37
+ | 'chase_credit'
38
+ | 'discover'
39
+ | 'amex'
40
+ | 'generic'
41
+ | 'unknown'
42
+
43
+ interface FormatDetectionResult {
44
+ format: BankFormat
45
+ confidence: number
46
+ headers: string[]
47
+ }
48
+
49
+ interface NormalizeResult {
50
+ transactions: RawTransaction[]
51
+ errors: string[]
52
+ warnings: string[]
53
+ }
54
+
55
+ export interface IngestResult {
56
+ inserted: number
57
+ skipped: number
58
+ failed: number
59
+ errors: string[]
60
+ format: BankFormat
61
+ }
62
+
63
+ // =============================================================================
64
+ // CSV PARSING UTILITIES
65
+ // =============================================================================
66
+
67
+ /**
68
+ * Parse a single CSV line, handling quoted fields.
69
+ */
70
+ function parseCSVLine(line: string): string[] {
71
+ const values: string[] = []
72
+ let currentValue = ''
73
+ let insideQuotes = false
74
+
75
+ for (let i = 0; i < line.length; i++) {
76
+ const char = line[i]
77
+ const nextChar = line[i + 1]
78
+
79
+ if (char === '"') {
80
+ if (insideQuotes && nextChar === '"') {
81
+ currentValue += '"'
82
+ i++
83
+ } else {
84
+ insideQuotes = !insideQuotes
85
+ }
86
+ } else if (char === ',' && !insideQuotes) {
87
+ values.push(currentValue.trim())
88
+ currentValue = ''
89
+ } else {
90
+ currentValue += char
91
+ }
92
+ }
93
+
94
+ values.push(currentValue.trim())
95
+ return values
96
+ }
97
+
98
+ /**
99
+ * Parse CSV content into headers and rows.
100
+ */
101
+ function parseCSVContent(content: string): { headers: string[]; rows: string[][] } {
102
+ let clean = content
103
+ if (clean.charCodeAt(0) === 0xfeff) clean = clean.slice(1) // remove BOM
104
+ clean = clean.replace(/\r\n/g, '\n').replace(/\r/g, '\n')
105
+
106
+ const lines = clean.split('\n').filter((l) => l.trim())
107
+ if (lines.length === 0) return { headers: [], rows: [] }
108
+
109
+ const headers = parseCSVLine(lines[0])
110
+ const rows = lines.slice(1).map((l) => parseCSVLine(l))
111
+ return { headers, rows }
112
+ }
113
+
114
+ function findColumn(headers: string[], names: string[]): number {
115
+ const lower = headers.map((h) => h.toLowerCase().trim())
116
+ for (const name of names) {
117
+ const idx = lower.indexOf(name.toLowerCase())
118
+ if (idx !== -1) return idx
119
+ }
120
+ return -1
121
+ }
122
+
123
+ /**
124
+ * Parse date string to ISO format (YYYY-MM-DD).
125
+ */
126
+ function parseDate(dateStr: string | undefined | null): string {
127
+ if (!dateStr?.trim()) return new Date().toISOString().split('T')[0]
128
+ const trimmed = dateStr.trim()
129
+
130
+ if (/^\d{4}-\d{2}-\d{2}$/.test(trimmed)) return trimmed
131
+
132
+ const slash = trimmed.match(/^(\d{1,2})\/(\d{1,2})\/(\d{2,4})$/)
133
+ if (slash) {
134
+ const [, month, day, year] = slash
135
+ const fullYear = year.length === 2 ? `20${year}` : year
136
+ return `${fullYear}-${month.padStart(2, '0')}-${day.padStart(2, '0')}`
137
+ }
138
+
139
+ try {
140
+ const d = new Date(trimmed)
141
+ if (!isNaN(d.getTime())) return d.toISOString().split('T')[0]
142
+ } catch {
143
+ /* fall through */
144
+ }
145
+ return new Date().toISOString().split('T')[0]
146
+ }
147
+
148
+ /**
149
+ * Parse amount string to number.
150
+ */
151
+ function parseAmount(amountStr: string | undefined | null): number {
152
+ if (!amountStr?.trim()) return 0
153
+ let str = amountStr.trim()
154
+
155
+ const isParens = str.startsWith('(') && str.endsWith(')')
156
+ if (isParens) str = str.slice(1, -1)
157
+ str = str.replace(/[$,]/g, '')
158
+
159
+ let amount = parseFloat(str)
160
+ if (isNaN(amount)) return 0
161
+ if (isParens && amount > 0) amount = -amount
162
+ return amount
163
+ }
164
+
165
+ // =============================================================================
166
+ // FORMAT DETECTION
167
+ // =============================================================================
168
+
169
+ interface FormatSignature {
170
+ format: BankFormat
171
+ requiredHeaders: string[]
172
+ disambiguator?: (headers: string[]) => boolean
173
+ }
174
+
175
+ const FORMAT_SIGNATURES: FormatSignature[] = [
176
+ {
177
+ format: 'chase_checking',
178
+ requiredHeaders: [
179
+ 'details',
180
+ 'posting date',
181
+ 'description',
182
+ 'amount',
183
+ 'type',
184
+ 'balance',
185
+ ],
186
+ },
187
+ {
188
+ format: 'chase_credit',
189
+ requiredHeaders: [
190
+ 'transaction date',
191
+ 'post date',
192
+ 'description',
193
+ 'category',
194
+ 'type',
195
+ 'amount',
196
+ ],
197
+ },
198
+ {
199
+ format: 'discover',
200
+ requiredHeaders: [
201
+ 'trans. date',
202
+ 'post date',
203
+ 'description',
204
+ 'amount',
205
+ 'category',
206
+ ],
207
+ },
208
+ {
209
+ format: 'amex',
210
+ requiredHeaders: ['date', 'description', 'amount'],
211
+ disambiguator: (h) =>
212
+ h.some(
213
+ (x) =>
214
+ x.includes('card member') ||
215
+ x.includes('account #') ||
216
+ x.includes('extended details'),
217
+ ),
218
+ },
219
+ {
220
+ format: 'generic',
221
+ requiredHeaders: ['date', 'description', 'amount'],
222
+ },
223
+ ]
224
+
225
+ function detectFormat(content: string, filename?: string): FormatDetectionResult {
226
+ if (filename?.toLowerCase().endsWith('.xlsx')) {
227
+ return { format: 'amex', confidence: 0.8, headers: [] }
228
+ }
229
+
230
+ const lines = content.split(/\r?\n/)
231
+ if (lines.length === 0) return { format: 'unknown', confidence: 0, headers: [] }
232
+
233
+ const headers = parseCSVLine(lines[0])
234
+ const normalized = headers.map((h) => h.toLowerCase().trim())
235
+
236
+ for (const sig of FORMAT_SIGNATURES) {
237
+ const matchCount = sig.requiredHeaders.filter((req) =>
238
+ normalized.some((h) => h.includes(req) || req.includes(h)),
239
+ ).length
240
+ const ratio = matchCount / sig.requiredHeaders.length
241
+
242
+ if (ratio >= 0.8) {
243
+ if (sig.disambiguator && !sig.disambiguator(normalized)) continue
244
+ return { format: sig.format, confidence: ratio, headers: normalized }
245
+ }
246
+ }
247
+
248
+ return { format: 'generic', confidence: 0.5, headers: normalized }
249
+ }
250
+
251
+ // =============================================================================
252
+ // BANK-SPECIFIC PARSERS
253
+ // =============================================================================
254
+
255
+ function parseChaseChecking(content: string): NormalizeResult {
256
+ const transactions: RawTransaction[] = []
257
+ const errors: string[] = []
258
+ const warnings: string[] = []
259
+ const { headers, rows } = parseCSVContent(content)
260
+
261
+ const colMap = {
262
+ postingDate: findColumn(headers, ['posting date', 'date']),
263
+ description: findColumn(headers, ['description']),
264
+ amount: findColumn(headers, ['amount']),
265
+ type: findColumn(headers, ['type']),
266
+ balance: findColumn(headers, ['balance']),
267
+ }
268
+
269
+ if (colMap.description === -1 || colMap.amount === -1) {
270
+ errors.push('Missing required columns: description and/or amount')
271
+ return { transactions, errors, warnings }
272
+ }
273
+
274
+ for (let i = 0; i < rows.length; i++) {
275
+ const row = rows[i]
276
+ try {
277
+ const description = (row[colMap.description] || '').trim()
278
+ if (!description) { warnings.push(`Row ${i + 2}: Empty description, skipping`); continue }
279
+ transactions.push({
280
+ date: parseDate(colMap.postingDate >= 0 ? row[colMap.postingDate] : ''),
281
+ description,
282
+ amount: parseAmount(row[colMap.amount]),
283
+ transactionType: colMap.type >= 0 ? row[colMap.type] : undefined,
284
+ balance: colMap.balance >= 0 ? parseAmount(row[colMap.balance]) : undefined,
285
+ })
286
+ } catch (err) {
287
+ errors.push(`Row ${i + 2}: ${err instanceof Error ? err.message : 'Unknown error'}`)
288
+ }
289
+ }
290
+ return { transactions, errors, warnings }
291
+ }
292
+
293
+ function parseChaseCredit(content: string): NormalizeResult {
294
+ const transactions: RawTransaction[] = []
295
+ const errors: string[] = []
296
+ const warnings: string[] = []
297
+ const { headers, rows } = parseCSVContent(content)
298
+
299
+ const colMap = {
300
+ transactionDate: findColumn(headers, ['transaction date', 'trans date']),
301
+ postDate: findColumn(headers, ['post date']),
302
+ description: findColumn(headers, ['description']),
303
+ category: findColumn(headers, ['category']),
304
+ type: findColumn(headers, ['type']),
305
+ amount: findColumn(headers, ['amount']),
306
+ }
307
+
308
+ if (colMap.description === -1 || colMap.amount === -1) {
309
+ errors.push('Missing required columns: description and/or amount')
310
+ return { transactions, errors, warnings }
311
+ }
312
+
313
+ for (let i = 0; i < rows.length; i++) {
314
+ const row = rows[i]
315
+ try {
316
+ const description = (row[colMap.description] || '').trim()
317
+ if (!description) { warnings.push(`Row ${i + 2}: Empty description, skipping`); continue }
318
+ const txDate = colMap.transactionDate >= 0 ? row[colMap.transactionDate] : ''
319
+ const pDate = colMap.postDate >= 0 ? row[colMap.postDate] : ''
320
+ transactions.push({
321
+ date: parseDate(txDate) || parseDate(pDate),
322
+ description,
323
+ amount: parseAmount(row[colMap.amount]),
324
+ originalCategory: colMap.category >= 0 ? row[colMap.category] || undefined : undefined,
325
+ transactionType: colMap.type >= 0 ? row[colMap.type] : undefined,
326
+ postDate: parseDate(pDate),
327
+ })
328
+ } catch (err) {
329
+ errors.push(`Row ${i + 2}: ${err instanceof Error ? err.message : 'Unknown error'}`)
330
+ }
331
+ }
332
+ return { transactions, errors, warnings }
333
+ }
334
+
335
+ function parseDiscover(content: string): NormalizeResult {
336
+ const transactions: RawTransaction[] = []
337
+ const errors: string[] = []
338
+ const warnings: string[] = []
339
+ const { headers, rows } = parseCSVContent(content)
340
+
341
+ const colMap = {
342
+ transDate: findColumn(headers, ['trans. date', 'trans date', 'transaction date']),
343
+ postDate: findColumn(headers, ['post date']),
344
+ description: findColumn(headers, ['description']),
345
+ amount: findColumn(headers, ['amount']),
346
+ category: findColumn(headers, ['category']),
347
+ }
348
+
349
+ if (colMap.description === -1 || colMap.amount === -1) {
350
+ errors.push('Missing required columns: description and/or amount')
351
+ return { transactions, errors, warnings }
352
+ }
353
+
354
+ for (let i = 0; i < rows.length; i++) {
355
+ const row = rows[i]
356
+ try {
357
+ const description = (row[colMap.description] || '').trim()
358
+ if (!description) { warnings.push(`Row ${i + 2}: Empty description, skipping`); continue }
359
+ const category = colMap.category >= 0 ? row[colMap.category] || '' : ''
360
+ let amount = parseAmount(row[colMap.amount])
361
+
362
+ // Discover uses positive for charges; flip sign unless it's a payment/credit
363
+ if (amount > 0 && !isDiscoverPayment(description, category)) {
364
+ amount = -amount
365
+ }
366
+
367
+ transactions.push({
368
+ date: parseDate(colMap.transDate >= 0 ? row[colMap.transDate] : '') ||
369
+ parseDate(colMap.postDate >= 0 ? row[colMap.postDate] : ''),
370
+ description,
371
+ amount,
372
+ originalCategory: category || undefined,
373
+ postDate: colMap.postDate >= 0 ? parseDate(row[colMap.postDate]) : undefined,
374
+ })
375
+ } catch (err) {
376
+ errors.push(`Row ${i + 2}: ${err instanceof Error ? err.message : 'Unknown error'}`)
377
+ }
378
+ }
379
+ return { transactions, errors, warnings }
380
+ }
381
+
382
+ function isDiscoverPayment(description: string, category: string): boolean {
383
+ const d = description.toLowerCase()
384
+ const c = category.toLowerCase()
385
+ return (
386
+ c.includes('payment') || c.includes('credit') || c.includes('rebate') ||
387
+ d.includes('directpay') || d.includes('payment') || d.includes('statement credit')
388
+ )
389
+ }
390
+
391
+ function parseGenericCSV(content: string): NormalizeResult {
392
+ const transactions: RawTransaction[] = []
393
+ const errors: string[] = []
394
+ const warnings: string[] = []
395
+ const { headers, rows } = parseCSVContent(content)
396
+
397
+ const colMap = {
398
+ date: findColumn(headers, [
399
+ 'date', 'transaction date', 'trans. date', 'trans date',
400
+ 'posting date', 'post date',
401
+ ]),
402
+ description: findColumn(headers, [
403
+ 'description', 'desc', 'memo', 'narrative', 'details',
404
+ 'transaction description', 'merchant',
405
+ ]),
406
+ amount: findColumn(headers, [
407
+ 'amount', 'value', 'sum', 'total', 'debit/credit',
408
+ ]),
409
+ category: findColumn(headers, ['category', 'type', 'transaction type']),
410
+ }
411
+
412
+ if (colMap.description === -1) {
413
+ errors.push('Missing required column: description')
414
+ return { transactions, errors, warnings }
415
+ }
416
+ if (colMap.amount === -1) {
417
+ errors.push('Missing required column: amount')
418
+ return { transactions, errors, warnings }
419
+ }
420
+ if (colMap.date === -1) {
421
+ warnings.push("No date column found, using today's date for all transactions")
422
+ }
423
+
424
+ for (let i = 0; i < rows.length; i++) {
425
+ const row = rows[i]
426
+ try {
427
+ const description = (row[colMap.description] || '').trim()
428
+ if (!description) { warnings.push(`Row ${i + 2}: Empty description, skipping`); continue }
429
+ transactions.push({
430
+ date: parseDate(colMap.date >= 0 ? row[colMap.date] : ''),
431
+ description,
432
+ amount: parseAmount(row[colMap.amount]),
433
+ originalCategory: colMap.category >= 0 ? row[colMap.category] || undefined : undefined,
434
+ })
435
+ } catch (err) {
436
+ errors.push(`Row ${i + 2}: ${err instanceof Error ? err.message : 'Unknown error'}`)
437
+ }
438
+ }
439
+ return { transactions, errors, warnings }
440
+ }
441
+
442
+ /**
443
+ * Normalize CSV content based on detected bank format.
444
+ */
445
+ function normalizeTransactions(content: string, format: BankFormat): NormalizeResult {
446
+ switch (format) {
447
+ case 'chase_checking':
448
+ return parseChaseChecking(content)
449
+ case 'chase_credit':
450
+ return parseChaseCredit(content)
451
+ case 'discover':
452
+ return parseDiscover(content)
453
+ case 'amex':
454
+ return { transactions: [], errors: ['Amex XLSX not supported in CLI yet'], warnings: [] }
455
+ case 'generic':
456
+ return parseGenericCSV(content)
457
+ default:
458
+ return { transactions: [], errors: [`Unknown format: ${format}`], warnings: [] }
459
+ }
460
+ }
461
+
462
+ // =============================================================================
463
+ // DEDUPLICATION
464
+ // =============================================================================
465
+
466
+ /**
467
+ * Generate a deterministic hash for transaction deduplication.
468
+ * Uses date + amount + normalized description.
469
+ */
470
+ function generateTransactionHash(date: string, amount: number, description: string): string {
471
+ const normalizedDesc = description.trim().toLowerCase()
472
+ const hashInput = `${date}|${amount}|${normalizedDesc}`
473
+ return createHash('sha256').update(hashInput).digest('hex').slice(0, 32)
474
+ }
475
+
476
+ /**
477
+ * Find which hashes already exist in the database.
478
+ */
479
+ async function findExistingHashes(userId: string, hashes: string[]): Promise<Set<string>> {
480
+ if (hashes.length === 0) return new Set()
481
+ const supabase = getSupabase('optimal')
482
+ const existing = new Set<string>()
483
+
484
+ const batchSize = 100
485
+ for (let i = 0; i < hashes.length; i += batchSize) {
486
+ const batch = hashes.slice(i, i + batchSize)
487
+ const { data } = await supabase
488
+ .from('transactions')
489
+ .select('dedup_hash')
490
+ .eq('user_id', userId)
491
+ .in('dedup_hash', batch)
492
+
493
+ if (data) {
494
+ for (const row of data) {
495
+ if (row.dedup_hash) existing.add(row.dedup_hash as string)
496
+ }
497
+ }
498
+ }
499
+ return existing
500
+ }
501
+
502
+ // =============================================================================
503
+ // MAIN INGESTION FUNCTION
504
+ // =============================================================================
505
+
506
+ /**
507
+ * Ingest transactions from a CSV file.
508
+ *
509
+ * 1. Read & detect format
510
+ * 2. Parse into normalized transactions
511
+ * 3. Deduplicate against existing rows (by hash)
512
+ * 4. Batch-insert new rows into `transactions`
513
+ *
514
+ * @returns count of inserted, skipped (duplicate), and failed rows
515
+ */
516
+ export async function ingestTransactions(
517
+ filePath: string,
518
+ userId: string,
519
+ ): Promise<IngestResult> {
520
+ const supabase = getSupabase('optimal')
521
+
522
+ // 1. Read file
523
+ const content = readFileSync(filePath, 'utf-8')
524
+
525
+ // 2. Detect format
526
+ const detection = detectFormat(content, filePath)
527
+ if (detection.format === 'unknown') {
528
+ return { inserted: 0, skipped: 0, failed: 0, errors: ['Could not detect CSV format'], format: 'unknown' }
529
+ }
530
+
531
+ // 3. Normalize / parse
532
+ const { transactions, errors: parseErrors, warnings } = normalizeTransactions(
533
+ content,
534
+ detection.format,
535
+ )
536
+
537
+ if (transactions.length === 0) {
538
+ return {
539
+ inserted: 0,
540
+ skipped: 0,
541
+ failed: 0,
542
+ errors: parseErrors.length > 0 ? parseErrors : ['No transactions parsed from file'],
543
+ format: detection.format,
544
+ }
545
+ }
546
+
547
+ // 4. Compute dedup hashes
548
+ const withHashes = transactions.map((tx) => ({
549
+ ...tx,
550
+ dedupHash: generateTransactionHash(tx.date, tx.amount, tx.description),
551
+ }))
552
+
553
+ // 5. Find existing duplicates
554
+ const allHashes = withHashes.map((t) => t.dedupHash)
555
+ const existingHashes = await findExistingHashes(userId, allHashes)
556
+ const duplicateCount = withHashes.filter((t) => existingHashes.has(t.dedupHash)).length
557
+ const newTxns = withHashes.filter((t) => !existingHashes.has(t.dedupHash))
558
+
559
+ if (newTxns.length === 0) {
560
+ return {
561
+ inserted: 0,
562
+ skipped: duplicateCount,
563
+ failed: 0,
564
+ errors: parseErrors,
565
+ format: detection.format,
566
+ }
567
+ }
568
+
569
+ // 6. Create upload batch record for provenance
570
+ const { data: batchRecord } = await supabase
571
+ .from('upload_batches')
572
+ .insert({
573
+ user_id: userId,
574
+ file_name: filePath.split('/').pop() || 'unnamed.csv',
575
+ row_count: newTxns.length,
576
+ })
577
+ .select('id')
578
+ .single()
579
+
580
+ const batchId = batchRecord?.id ?? null
581
+
582
+ // 7. Resolve categories (find or create)
583
+ const uniqueCategories = [
584
+ ...new Set(newTxns.map((t) => t.originalCategory).filter(Boolean)),
585
+ ] as string[]
586
+ const categoryMap = new Map<string, number>()
587
+
588
+ for (const catName of uniqueCategories) {
589
+ const { data: existing } = await supabase
590
+ .from('categories')
591
+ .select('id')
592
+ .eq('user_id', userId)
593
+ .eq('name', catName)
594
+ .single()
595
+
596
+ if (existing) {
597
+ categoryMap.set(catName, existing.id as number)
598
+ continue
599
+ }
600
+
601
+ const { data: created, error: createErr } = await supabase
602
+ .from('categories')
603
+ .insert({
604
+ user_id: userId,
605
+ name: catName,
606
+ color: `#${Math.floor(Math.random() * 16_777_215).toString(16).padStart(6, '0')}`,
607
+ })
608
+ .select('id')
609
+ .single()
610
+
611
+ if (createErr) {
612
+ parseErrors.push(`Failed to create category '${catName}': ${createErr.message}`)
613
+ continue
614
+ }
615
+ if (created) categoryMap.set(catName, created.id as number)
616
+ }
617
+
618
+ // 8. Prepare rows for insert
619
+ const rows = newTxns.map((txn) => ({
620
+ user_id: userId,
621
+ date: txn.date,
622
+ description: txn.description,
623
+ amount: parseFloat(txn.amount.toString()),
624
+ type: txn.transactionType || null,
625
+ category_id: txn.originalCategory ? categoryMap.get(txn.originalCategory) ?? null : null,
626
+ mode: 'actual',
627
+ provider: 'csv',
628
+ dedup_hash: txn.dedupHash,
629
+ batch_id: batchId,
630
+ }))
631
+
632
+ // 9. Batch-insert (50 at a time)
633
+ let insertedCount = 0
634
+ let failedCount = 0
635
+ const insertBatchSize = 50
636
+
637
+ for (let i = 0; i < rows.length; i += insertBatchSize) {
638
+ const batch = rows.slice(i, i + insertBatchSize)
639
+ const { error: insertErr } = await supabase.from('transactions').insert(batch)
640
+
641
+ if (insertErr) {
642
+ failedCount += batch.length
643
+ parseErrors.push(`Insert batch ${Math.floor(i / insertBatchSize) + 1} failed: ${insertErr.message}`)
644
+ } else {
645
+ insertedCount += batch.length
646
+ }
647
+ }
648
+
649
+ // Log warnings as non-fatal errors
650
+ parseErrors.push(...warnings)
651
+
652
+ return {
653
+ inserted: insertedCount,
654
+ skipped: duplicateCount,
655
+ failed: failedCount,
656
+ errors: parseErrors,
657
+ format: detection.format,
658
+ }
659
+ }