optimal-cli 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +175 -0
- package/dist/bin/optimal.d.ts +2 -0
- package/dist/bin/optimal.js +995 -0
- package/dist/lib/budget/projections.d.ts +115 -0
- package/dist/lib/budget/projections.js +384 -0
- package/dist/lib/budget/scenarios.d.ts +93 -0
- package/dist/lib/budget/scenarios.js +214 -0
- package/dist/lib/cms/publish-blog.d.ts +62 -0
- package/dist/lib/cms/publish-blog.js +74 -0
- package/dist/lib/cms/strapi-client.d.ts +123 -0
- package/dist/lib/cms/strapi-client.js +213 -0
- package/dist/lib/config.d.ts +55 -0
- package/dist/lib/config.js +206 -0
- package/dist/lib/infra/deploy.d.ts +29 -0
- package/dist/lib/infra/deploy.js +58 -0
- package/dist/lib/infra/migrate.d.ts +34 -0
- package/dist/lib/infra/migrate.js +103 -0
- package/dist/lib/kanban.d.ts +46 -0
- package/dist/lib/kanban.js +118 -0
- package/dist/lib/newsletter/distribute.d.ts +52 -0
- package/dist/lib/newsletter/distribute.js +193 -0
- package/dist/lib/newsletter/generate-insurance.d.ts +42 -0
- package/dist/lib/newsletter/generate-insurance.js +36 -0
- package/dist/lib/newsletter/generate.d.ts +104 -0
- package/dist/lib/newsletter/generate.js +571 -0
- package/dist/lib/returnpro/anomalies.d.ts +64 -0
- package/dist/lib/returnpro/anomalies.js +166 -0
- package/dist/lib/returnpro/audit.d.ts +32 -0
- package/dist/lib/returnpro/audit.js +147 -0
- package/dist/lib/returnpro/diagnose.d.ts +52 -0
- package/dist/lib/returnpro/diagnose.js +281 -0
- package/dist/lib/returnpro/kpis.d.ts +32 -0
- package/dist/lib/returnpro/kpis.js +192 -0
- package/dist/lib/returnpro/templates.d.ts +48 -0
- package/dist/lib/returnpro/templates.js +229 -0
- package/dist/lib/returnpro/upload-income.d.ts +25 -0
- package/dist/lib/returnpro/upload-income.js +235 -0
- package/dist/lib/returnpro/upload-netsuite.d.ts +37 -0
- package/dist/lib/returnpro/upload-netsuite.js +566 -0
- package/dist/lib/returnpro/upload-r1.d.ts +48 -0
- package/dist/lib/returnpro/upload-r1.js +398 -0
- package/dist/lib/social/post-generator.d.ts +83 -0
- package/dist/lib/social/post-generator.js +333 -0
- package/dist/lib/social/publish.d.ts +66 -0
- package/dist/lib/social/publish.js +226 -0
- package/dist/lib/social/scraper.d.ts +67 -0
- package/dist/lib/social/scraper.js +361 -0
- package/dist/lib/supabase.d.ts +4 -0
- package/dist/lib/supabase.js +20 -0
- package/dist/lib/transactions/delete-batch.d.ts +60 -0
- package/dist/lib/transactions/delete-batch.js +203 -0
- package/dist/lib/transactions/ingest.d.ts +43 -0
- package/dist/lib/transactions/ingest.js +555 -0
- package/dist/lib/transactions/stamp.d.ts +51 -0
- package/dist/lib/transactions/stamp.js +524 -0
- package/package.json +50 -0
|
@@ -0,0 +1,361 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Meta Ad Library Scraper
|
|
3
|
+
*
|
|
4
|
+
* Ported from Python: ~/projects/meta-ad-scraper/scripts/meta_ad_scraper_v2.py
|
|
5
|
+
*
|
|
6
|
+
* Scrapes Facebook Ad Library for competitor ad intelligence.
|
|
7
|
+
* Uses Playwright headless Chromium with anti-detection measures.
|
|
8
|
+
* Splits ads by Library ID pattern, extracts metadata via regex.
|
|
9
|
+
*
|
|
10
|
+
* Functions:
|
|
11
|
+
* buildUrl() — construct Facebook Ad Library URL for a company
|
|
12
|
+
* scrollAndLoad() — auto-scroll page to load all ads (max 15 scrolls)
|
|
13
|
+
* extractAds() — two-stage extraction: DOM containers, then text split fallback
|
|
14
|
+
* parseAdText() — regex extraction of ad metadata from text blocks
|
|
15
|
+
* extractLandingUrls() — find landing page URLs from DOM links
|
|
16
|
+
* scrapeCompany() — orchestrate single company scrape
|
|
17
|
+
* scrapeCompanies() — batch-scrape multiple companies with configurable parallelism
|
|
18
|
+
* formatCsv() — convert ad records to CSV string
|
|
19
|
+
*/
|
|
20
|
+
import { chromium } from 'playwright';
|
|
21
|
+
import { writeFileSync } from 'node:fs';
|
|
22
|
+
// ── CSV Column Order ────────────────────────────────────────────────
|
|
23
|
+
const CSV_FIELDS = [
|
|
24
|
+
'company_searched',
|
|
25
|
+
'ad_id',
|
|
26
|
+
'page_name',
|
|
27
|
+
'ad_text',
|
|
28
|
+
'status',
|
|
29
|
+
'start_date',
|
|
30
|
+
'impressions',
|
|
31
|
+
'spend',
|
|
32
|
+
'media_type',
|
|
33
|
+
'platforms',
|
|
34
|
+
'landing_page_url',
|
|
35
|
+
'full_text_snippet',
|
|
36
|
+
];
|
|
37
|
+
// ── URL Builder ─────────────────────────────────────────────────────
|
|
38
|
+
export function buildUrl(companyName) {
|
|
39
|
+
const base = 'https://www.facebook.com/ads/library/';
|
|
40
|
+
const params = `?active_status=active` +
|
|
41
|
+
`&ad_type=all` +
|
|
42
|
+
`&country=US` +
|
|
43
|
+
`&is_targeted_country=false` +
|
|
44
|
+
`&media_type=all` +
|
|
45
|
+
`&sort_data[mode]=total_impressions` +
|
|
46
|
+
`&sort_data[direction]=desc` +
|
|
47
|
+
`&q=${encodeURIComponent(companyName)}`;
|
|
48
|
+
return base + params;
|
|
49
|
+
}
|
|
50
|
+
// ── Scroll & Load ───────────────────────────────────────────────────
|
|
51
|
+
export async function scrollAndLoad(page, maxScrolls = 15) {
|
|
52
|
+
let prevHeight = 0;
|
|
53
|
+
for (let i = 0; i < maxScrolls; i++) {
|
|
54
|
+
await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));
|
|
55
|
+
await page.waitForTimeout(2000);
|
|
56
|
+
const currHeight = await page.evaluate(() => document.body.scrollHeight);
|
|
57
|
+
if (currHeight === prevHeight && i > 1)
|
|
58
|
+
break;
|
|
59
|
+
prevHeight = currHeight;
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
// ── Parse Ad Text ───────────────────────────────────────────────────
|
|
63
|
+
export function parseAdText(text, companyName) {
|
|
64
|
+
if (!text || text.length < 20)
|
|
65
|
+
return null;
|
|
66
|
+
const ad = { company_searched: companyName };
|
|
67
|
+
// Library ID
|
|
68
|
+
const idMatch = text.match(/Library ID:\s*(\d+)/);
|
|
69
|
+
if (idMatch) {
|
|
70
|
+
ad.ad_id = idMatch[1];
|
|
71
|
+
}
|
|
72
|
+
else {
|
|
73
|
+
return null; // Skip blocks without a Library ID
|
|
74
|
+
}
|
|
75
|
+
// Start date
|
|
76
|
+
const dateMatch = text.match(/Started running on\s+(\w+ \d+,?\s*\d*)/);
|
|
77
|
+
if (dateMatch) {
|
|
78
|
+
ad.start_date = dateMatch[1].trim();
|
|
79
|
+
}
|
|
80
|
+
else {
|
|
81
|
+
ad.start_date = '';
|
|
82
|
+
}
|
|
83
|
+
// Status (Active/Inactive)
|
|
84
|
+
if (text.includes('Active')) {
|
|
85
|
+
ad.status = 'Active';
|
|
86
|
+
}
|
|
87
|
+
else if (text.includes('Inactive')) {
|
|
88
|
+
ad.status = 'Inactive';
|
|
89
|
+
}
|
|
90
|
+
else {
|
|
91
|
+
ad.status = 'Unknown';
|
|
92
|
+
}
|
|
93
|
+
// Page name - look for "Sponsored" text preceded by the page name
|
|
94
|
+
const sponsorMatch = text.match(/(?:^|\n)([^\n]+)\nSponsored/);
|
|
95
|
+
if (sponsorMatch) {
|
|
96
|
+
ad.page_name = sponsorMatch[1].trim();
|
|
97
|
+
}
|
|
98
|
+
else {
|
|
99
|
+
ad.page_name = '';
|
|
100
|
+
}
|
|
101
|
+
// Ad creative text - text after "Sponsored" and before common end markers
|
|
102
|
+
const creativeMatch = text.match(/Sponsored\n(.+?)(?:\n(?:Learn More|Sign Up|Shop Now|Get Offer|Download|Apply Now|Book Now|Contact Us|Send Message|Watch More|See Menu|Get Quote|Subscribe|Get Showtimes)|\Z)/s);
|
|
103
|
+
if (creativeMatch) {
|
|
104
|
+
ad.ad_text = creativeMatch[1].trim().slice(0, 500);
|
|
105
|
+
}
|
|
106
|
+
else {
|
|
107
|
+
ad.ad_text = '';
|
|
108
|
+
}
|
|
109
|
+
// Impressions
|
|
110
|
+
const impMatch = text.match(/(?:impressions?)\s*[:\s]*([\d,.]+\s*[-\u2013]\s*[\d,.]+)/i);
|
|
111
|
+
if (impMatch) {
|
|
112
|
+
ad.impressions = impMatch[1];
|
|
113
|
+
}
|
|
114
|
+
else {
|
|
115
|
+
ad.impressions = '';
|
|
116
|
+
}
|
|
117
|
+
// Spend
|
|
118
|
+
const spendMatch = text.match(/(?:spend|spent)\s*[:\s]*\$?([\d,.]+\s*[-\u2013]\s*\$?[\d,.]+)/i);
|
|
119
|
+
if (spendMatch) {
|
|
120
|
+
ad.spend = spendMatch[1];
|
|
121
|
+
}
|
|
122
|
+
else {
|
|
123
|
+
ad.spend = '';
|
|
124
|
+
}
|
|
125
|
+
// Media type
|
|
126
|
+
const textLower = text.toLowerCase();
|
|
127
|
+
if (['video', '0:00', 'play'].some((kw) => textLower.includes(kw))) {
|
|
128
|
+
ad.media_type = 'video';
|
|
129
|
+
}
|
|
130
|
+
else if (textLower.includes('carousel') ||
|
|
131
|
+
textLower.includes('multiple versions')) {
|
|
132
|
+
ad.media_type = 'carousel/multiple';
|
|
133
|
+
}
|
|
134
|
+
else {
|
|
135
|
+
ad.media_type = 'image';
|
|
136
|
+
}
|
|
137
|
+
// Platforms
|
|
138
|
+
const platformNames = ['Facebook', 'Instagram', 'Messenger', 'Audience Network'];
|
|
139
|
+
const platforms = platformNames.filter((p) => textLower.includes(p.toLowerCase()));
|
|
140
|
+
ad.platforms = platforms.join(', ');
|
|
141
|
+
// Landing page URL (not available from text, would need DOM links)
|
|
142
|
+
ad.landing_page_url = '';
|
|
143
|
+
// Full text snippet for reference
|
|
144
|
+
ad.full_text_snippet = text.slice(0, 500);
|
|
145
|
+
return ad;
|
|
146
|
+
}
|
|
147
|
+
// ── Extract Ads ─────────────────────────────────────────────────────
|
|
148
|
+
export async function extractAds(page, companyName, maxScrolls = 15) {
|
|
149
|
+
const ads = [];
|
|
150
|
+
// Wait for content
|
|
151
|
+
try {
|
|
152
|
+
await page.waitForLoadState('networkidle', { timeout: 15000 });
|
|
153
|
+
}
|
|
154
|
+
catch {
|
|
155
|
+
// Timeout is acceptable — continue with what loaded
|
|
156
|
+
}
|
|
157
|
+
await page.waitForTimeout(3000);
|
|
158
|
+
// Check for no results
|
|
159
|
+
const pageText = await page.evaluate(() => document.body.innerText);
|
|
160
|
+
if (!pageText ||
|
|
161
|
+
pageText.toLowerCase().includes('no results') ||
|
|
162
|
+
pageText.toLowerCase().includes('no ads match')) {
|
|
163
|
+
console.log(` [INFO] No ads found for ${companyName}`);
|
|
164
|
+
return ads;
|
|
165
|
+
}
|
|
166
|
+
// Scroll to load all ads
|
|
167
|
+
await scrollAndLoad(page, maxScrolls);
|
|
168
|
+
// Also try to extract structured data from the DOM
|
|
169
|
+
const domAds = await page.evaluate(() => {
|
|
170
|
+
const results = [];
|
|
171
|
+
// Find all Library ID occurrences via DOM containers
|
|
172
|
+
const allElements = document.querySelectorAll('div');
|
|
173
|
+
const adContainers = [];
|
|
174
|
+
allElements.forEach((el) => {
|
|
175
|
+
const text = el.innerText || '';
|
|
176
|
+
// An ad container typically has EXACTLY ONE Library ID
|
|
177
|
+
const idMatches = text.match(/Library ID:\s*\d+/g);
|
|
178
|
+
if (idMatches && idMatches.length === 1) {
|
|
179
|
+
// Check it's not too small (just a label) or too large (parent of multiple ads)
|
|
180
|
+
const textLen = text.length;
|
|
181
|
+
if (textLen > 50 && textLen < 5000) {
|
|
182
|
+
adContainers.push({
|
|
183
|
+
text,
|
|
184
|
+
textLen,
|
|
185
|
+
tag: el.tagName,
|
|
186
|
+
});
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
});
|
|
190
|
+
// Deduplicate - remove containers that are subsets of other containers
|
|
191
|
+
// Sort by text length (smallest first - these are the most specific)
|
|
192
|
+
adContainers.sort((a, b) => a.textLen - b.textLen);
|
|
193
|
+
const seen = new Set();
|
|
194
|
+
adContainers.forEach((container) => {
|
|
195
|
+
const idMatch = container.text.match(/Library ID:\s*(\d+)/);
|
|
196
|
+
if (idMatch && !seen.has(idMatch[1])) {
|
|
197
|
+
seen.add(idMatch[1]);
|
|
198
|
+
results.push(container);
|
|
199
|
+
}
|
|
200
|
+
});
|
|
201
|
+
return results;
|
|
202
|
+
});
|
|
203
|
+
if (domAds && domAds.length > 0) {
|
|
204
|
+
console.log(` [DOM] Found ${domAds.length} individual ad containers`);
|
|
205
|
+
for (const raw of domAds) {
|
|
206
|
+
const ad = parseAdText(raw.text, companyName);
|
|
207
|
+
if (ad)
|
|
208
|
+
ads.push(ad);
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
else {
|
|
212
|
+
// Fallback: split page text by "Library ID:" pattern
|
|
213
|
+
console.log(` [TEXT] Falling back to text-based splitting`);
|
|
214
|
+
const fullText = await page.evaluate(() => document.body.innerText);
|
|
215
|
+
const sections = fullText.split(/(?=Library ID:\s*\d+)/);
|
|
216
|
+
for (const section of sections) {
|
|
217
|
+
const trimmed = section.trim();
|
|
218
|
+
if (!trimmed || trimmed.length < 30)
|
|
219
|
+
continue;
|
|
220
|
+
const ad = parseAdText(trimmed, companyName);
|
|
221
|
+
if (ad)
|
|
222
|
+
ads.push(ad);
|
|
223
|
+
}
|
|
224
|
+
}
|
|
225
|
+
return ads;
|
|
226
|
+
}
|
|
227
|
+
// ── Extract Landing URLs ────────────────────────────────────────────
|
|
228
|
+
export async function extractLandingUrls(page, adIds) {
|
|
229
|
+
return page.evaluate((ids) => {
|
|
230
|
+
const result = {};
|
|
231
|
+
const links = document.querySelectorAll('a[href*="l.facebook.com"]');
|
|
232
|
+
links.forEach((link) => {
|
|
233
|
+
const href = link.href || '';
|
|
234
|
+
const parent = link.closest('div');
|
|
235
|
+
if (parent) {
|
|
236
|
+
const text = parent.innerText || '';
|
|
237
|
+
for (const id of ids) {
|
|
238
|
+
if (text.includes(id) && !result[id]) {
|
|
239
|
+
result[id] = href;
|
|
240
|
+
}
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
});
|
|
244
|
+
return result;
|
|
245
|
+
}, adIds);
|
|
246
|
+
}
|
|
247
|
+
// ── Scrape Single Company ───────────────────────────────────────────
|
|
248
|
+
export async function scrapeCompany(page, companyName, maxScrolls = 15) {
|
|
249
|
+
const url = buildUrl(companyName);
|
|
250
|
+
console.log(`\n${'='.repeat(60)}`);
|
|
251
|
+
console.log(`Scraping: ${companyName}`);
|
|
252
|
+
console.log(`URL: ${url}`);
|
|
253
|
+
console.log(`${'='.repeat(60)}`);
|
|
254
|
+
try {
|
|
255
|
+
await page.goto(url, { timeout: 30000, waitUntil: 'domcontentloaded' });
|
|
256
|
+
}
|
|
257
|
+
catch {
|
|
258
|
+
console.log(` [ERROR] Page load timeout for ${companyName}`);
|
|
259
|
+
return [];
|
|
260
|
+
}
|
|
261
|
+
const ads = await extractAds(page, companyName, maxScrolls);
|
|
262
|
+
// Try to get landing URLs
|
|
263
|
+
if (ads.length > 0) {
|
|
264
|
+
const adIds = ads.map((a) => a.ad_id).filter(Boolean);
|
|
265
|
+
if (adIds.length > 0) {
|
|
266
|
+
const urls = await extractLandingUrls(page, adIds);
|
|
267
|
+
for (const ad of ads) {
|
|
268
|
+
if (ad.ad_id in urls) {
|
|
269
|
+
ad.landing_page_url = urls[ad.ad_id];
|
|
270
|
+
}
|
|
271
|
+
}
|
|
272
|
+
}
|
|
273
|
+
}
|
|
274
|
+
console.log(` [DONE] Extracted ${ads.length} individual ads for ${companyName}`);
|
|
275
|
+
return ads;
|
|
276
|
+
}
|
|
277
|
+
// ── Batch Scraper ───────────────────────────────────────────────────
|
|
278
|
+
/**
|
|
279
|
+
* Scrape multiple companies in batches.
|
|
280
|
+
* Default: 6 companies per batch, 3 parallel batches (as documented in memory).
|
|
281
|
+
*/
|
|
282
|
+
export async function scrapeCompanies(opts) {
|
|
283
|
+
const { companies, outputPath, batchSize = 6, maxScrolls = 15, companyDelay = 4000, headless = true, } = opts;
|
|
284
|
+
console.log(`Starting Meta Ad Library scraper for ${companies.length} companies`);
|
|
285
|
+
if (outputPath)
|
|
286
|
+
console.log(`Output: ${outputPath}`);
|
|
287
|
+
const allAds = [];
|
|
288
|
+
let companiesScraped = 0;
|
|
289
|
+
// Split into batches
|
|
290
|
+
const batches = [];
|
|
291
|
+
for (let i = 0; i < companies.length; i += batchSize) {
|
|
292
|
+
batches.push(companies.slice(i, i + batchSize));
|
|
293
|
+
}
|
|
294
|
+
console.log(`Processing ${batches.length} batch(es) of up to ${batchSize} companies each`);
|
|
295
|
+
let browser = null;
|
|
296
|
+
try {
|
|
297
|
+
browser = await chromium.launch({
|
|
298
|
+
headless,
|
|
299
|
+
args: [
|
|
300
|
+
'--no-sandbox',
|
|
301
|
+
'--disable-setuid-sandbox',
|
|
302
|
+
'--disable-dev-shm-usage',
|
|
303
|
+
'--disable-blink-features=AutomationControlled',
|
|
304
|
+
],
|
|
305
|
+
});
|
|
306
|
+
for (let bi = 0; bi < batches.length; bi++) {
|
|
307
|
+
const batch = batches[bi];
|
|
308
|
+
console.log(`\nBatch ${bi + 1}/${batches.length}: ${batch.length} companies`);
|
|
309
|
+
const context = await browser.newContext({
|
|
310
|
+
viewport: { width: 1920, height: 1080 },
|
|
311
|
+
userAgent: 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
|
|
312
|
+
});
|
|
313
|
+
const page = await context.newPage();
|
|
314
|
+
for (let ci = 0; ci < batch.length; ci++) {
|
|
315
|
+
const company = batch[ci];
|
|
316
|
+
if (ci > 0) {
|
|
317
|
+
console.log(`\n [WAIT] Waiting ${companyDelay / 1000}s before next company...`);
|
|
318
|
+
await page.waitForTimeout(companyDelay);
|
|
319
|
+
}
|
|
320
|
+
const ads = await scrapeCompany(page, company, maxScrolls);
|
|
321
|
+
allAds.push(...ads);
|
|
322
|
+
companiesScraped++;
|
|
323
|
+
}
|
|
324
|
+
await context.close();
|
|
325
|
+
}
|
|
326
|
+
}
|
|
327
|
+
finally {
|
|
328
|
+
if (browser)
|
|
329
|
+
await browser.close();
|
|
330
|
+
}
|
|
331
|
+
// Write CSV output if path specified
|
|
332
|
+
if (outputPath) {
|
|
333
|
+
const csv = formatCsv(allAds);
|
|
334
|
+
writeFileSync(outputPath, csv, 'utf-8');
|
|
335
|
+
console.log(`\nSaved ${allAds.length} ads to ${outputPath}`);
|
|
336
|
+
}
|
|
337
|
+
console.log(`\nBatch complete: ${allAds.length} total ads from ${companiesScraped} companies`);
|
|
338
|
+
return {
|
|
339
|
+
ads: allAds,
|
|
340
|
+
totalCompanies: companies.length,
|
|
341
|
+
companiesScraped,
|
|
342
|
+
outputPath,
|
|
343
|
+
};
|
|
344
|
+
}
|
|
345
|
+
// ── CSV Formatter ───────────────────────────────────────────────────
|
|
346
|
+
/** Escape a value for CSV (double-quote wrapping, escape inner quotes) */
|
|
347
|
+
function escapeCsvField(value) {
|
|
348
|
+
if (value.includes(',') ||
|
|
349
|
+
value.includes('"') ||
|
|
350
|
+
value.includes('\n') ||
|
|
351
|
+
value.includes('\r')) {
|
|
352
|
+
return `"${value.replace(/"/g, '""')}"`;
|
|
353
|
+
}
|
|
354
|
+
return value;
|
|
355
|
+
}
|
|
356
|
+
/** Convert ad records to CSV string */
|
|
357
|
+
export function formatCsv(ads) {
|
|
358
|
+
const header = CSV_FIELDS.join(',');
|
|
359
|
+
const rows = ads.map((ad) => CSV_FIELDS.map((field) => escapeCsvField(ad[field] ?? '')).join(','));
|
|
360
|
+
return [header, ...rows].join('\n') + '\n';
|
|
361
|
+
}
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
import { createClient } from '@supabase/supabase-js';
|
|
2
|
+
import 'dotenv/config';
|
|
3
|
+
const configs = {
|
|
4
|
+
optimal: { urlEnv: 'OPTIMAL_SUPABASE_URL', keyEnv: 'OPTIMAL_SUPABASE_SERVICE_KEY' },
|
|
5
|
+
returnpro: { urlEnv: 'RETURNPRO_SUPABASE_URL', keyEnv: 'RETURNPRO_SUPABASE_SERVICE_KEY' },
|
|
6
|
+
};
|
|
7
|
+
const clients = new Map();
|
|
8
|
+
export function getSupabase(instance) {
|
|
9
|
+
const existing = clients.get(instance);
|
|
10
|
+
if (existing)
|
|
11
|
+
return existing;
|
|
12
|
+
const config = configs[instance];
|
|
13
|
+
const url = process.env[config.urlEnv];
|
|
14
|
+
const key = process.env[config.keyEnv];
|
|
15
|
+
if (!url || !key)
|
|
16
|
+
throw new Error(`Missing env vars: ${config.urlEnv}, ${config.keyEnv}`);
|
|
17
|
+
const client = createClient(url, key);
|
|
18
|
+
clients.set(instance, client);
|
|
19
|
+
return client;
|
|
20
|
+
}
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Transaction & Staging Batch Deletion — Safe Preview and Execute
|
|
3
|
+
*
|
|
4
|
+
* Provides safe batch deletion of transactions (OptimalOS) and staging
|
|
5
|
+
* financials (ReturnPro) with preview mode defaulting to dryRun=true.
|
|
6
|
+
*
|
|
7
|
+
* Tables:
|
|
8
|
+
* - transactions → OptimalOS Supabase (getSupabase('optimal'))
|
|
9
|
+
* - stg_financials_raw → ReturnPro Supabase (getSupabase('returnpro'))
|
|
10
|
+
*
|
|
11
|
+
* Columns:
|
|
12
|
+
* transactions: id, user_id, date, description, amount, category, source, stamp_match_type, created_at
|
|
13
|
+
* stg_financials_raw: id, account_code, account_name, amount (TEXT), month (YYYY-MM), source, user_id, created_at
|
|
14
|
+
*/
|
|
15
|
+
import 'dotenv/config';
|
|
16
|
+
export interface DeleteBatchOptions {
|
|
17
|
+
table: 'transactions' | 'stg_financials_raw';
|
|
18
|
+
userId?: string;
|
|
19
|
+
filters: {
|
|
20
|
+
dateFrom?: string;
|
|
21
|
+
dateTo?: string;
|
|
22
|
+
source?: string;
|
|
23
|
+
category?: string;
|
|
24
|
+
accountCode?: string;
|
|
25
|
+
month?: string;
|
|
26
|
+
};
|
|
27
|
+
dryRun?: boolean;
|
|
28
|
+
}
|
|
29
|
+
export interface DeleteBatchResult {
|
|
30
|
+
table: string;
|
|
31
|
+
deletedCount: number;
|
|
32
|
+
dryRun: boolean;
|
|
33
|
+
filters: Record<string, string>;
|
|
34
|
+
}
|
|
35
|
+
export interface PreviewResult {
|
|
36
|
+
table: string;
|
|
37
|
+
matchCount: number;
|
|
38
|
+
sample: Array<Record<string, unknown>>;
|
|
39
|
+
groupedCounts: Record<string, number>;
|
|
40
|
+
}
|
|
41
|
+
/**
|
|
42
|
+
* Preview what would be deleted without touching any data.
|
|
43
|
+
*
|
|
44
|
+
* Returns:
|
|
45
|
+
* - matchCount: total rows matching the filters
|
|
46
|
+
* - sample: first 10 matching rows
|
|
47
|
+
* - groupedCounts: row counts grouped by `source` (transactions) or `account_code` (staging)
|
|
48
|
+
*/
|
|
49
|
+
export declare function previewBatch(opts: DeleteBatchOptions): Promise<PreviewResult>;
|
|
50
|
+
/**
|
|
51
|
+
* Delete matching rows in batch — or preview them without deleting (dryRun=true).
|
|
52
|
+
*
|
|
53
|
+
* Safety: dryRun defaults to TRUE. Caller must explicitly pass dryRun=false
|
|
54
|
+
* to execute an actual deletion.
|
|
55
|
+
*
|
|
56
|
+
* In dryRun mode: counts matching rows and returns deletedCount=0.
|
|
57
|
+
* In execute mode: issues a Supabase DELETE with the same filters and returns
|
|
58
|
+
* the number of rows deleted.
|
|
59
|
+
*/
|
|
60
|
+
export declare function deleteBatch(opts: DeleteBatchOptions): Promise<DeleteBatchResult>;
|
|
@@ -0,0 +1,203 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Transaction & Staging Batch Deletion — Safe Preview and Execute
|
|
3
|
+
*
|
|
4
|
+
* Provides safe batch deletion of transactions (OptimalOS) and staging
|
|
5
|
+
* financials (ReturnPro) with preview mode defaulting to dryRun=true.
|
|
6
|
+
*
|
|
7
|
+
* Tables:
|
|
8
|
+
* - transactions → OptimalOS Supabase (getSupabase('optimal'))
|
|
9
|
+
* - stg_financials_raw → ReturnPro Supabase (getSupabase('returnpro'))
|
|
10
|
+
*
|
|
11
|
+
* Columns:
|
|
12
|
+
* transactions: id, user_id, date, description, amount, category, source, stamp_match_type, created_at
|
|
13
|
+
* stg_financials_raw: id, account_code, account_name, amount (TEXT), month (YYYY-MM), source, user_id, created_at
|
|
14
|
+
*/
|
|
15
|
+
import 'dotenv/config';
|
|
16
|
+
import { getSupabase } from '../supabase.js';
|
|
17
|
+
// =============================================================================
|
|
18
|
+
// INTERNAL HELPERS
|
|
19
|
+
// =============================================================================
|
|
20
|
+
/**
|
|
21
|
+
* Return the correct Supabase client for the given table.
|
|
22
|
+
*/
|
|
23
|
+
function getClientForTable(table) {
|
|
24
|
+
return table === 'transactions'
|
|
25
|
+
? getSupabase('optimal')
|
|
26
|
+
: getSupabase('returnpro');
|
|
27
|
+
}
|
|
28
|
+
/**
|
|
29
|
+
* Apply the shared set of filters to a Supabase query builder.
|
|
30
|
+
* Works for both SELECT and DELETE queries because both are PostgREST filters.
|
|
31
|
+
*
|
|
32
|
+
* For `stg_financials_raw`:
|
|
33
|
+
* - dateFrom / dateTo are ignored (use `month` instead)
|
|
34
|
+
* - month is applied as an eq filter on the `month` column
|
|
35
|
+
* - accountCode is applied as an eq filter on `account_code`
|
|
36
|
+
*
|
|
37
|
+
* For `transactions`:
|
|
38
|
+
* - dateFrom / dateTo are applied as gte/lte on `date`
|
|
39
|
+
* - source is applied as an eq filter on `source`
|
|
40
|
+
* - category is applied as an eq filter on `category`
|
|
41
|
+
* - userId is applied as an eq filter on `user_id`
|
|
42
|
+
*/
|
|
43
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
44
|
+
function applyFilters(query, table, userId, filters) {
|
|
45
|
+
let q = query;
|
|
46
|
+
if (table === 'transactions') {
|
|
47
|
+
if (userId)
|
|
48
|
+
q = q.eq('user_id', userId);
|
|
49
|
+
if (filters.dateFrom)
|
|
50
|
+
q = q.gte('date', filters.dateFrom);
|
|
51
|
+
if (filters.dateTo)
|
|
52
|
+
q = q.lte('date', filters.dateTo);
|
|
53
|
+
if (filters.source)
|
|
54
|
+
q = q.eq('source', filters.source);
|
|
55
|
+
if (filters.category)
|
|
56
|
+
q = q.eq('category', filters.category);
|
|
57
|
+
}
|
|
58
|
+
else {
|
|
59
|
+
// stg_financials_raw
|
|
60
|
+
if (userId)
|
|
61
|
+
q = q.eq('user_id', userId);
|
|
62
|
+
if (filters.month)
|
|
63
|
+
q = q.eq('month', filters.month);
|
|
64
|
+
if (filters.accountCode)
|
|
65
|
+
q = q.eq('account_code', filters.accountCode);
|
|
66
|
+
if (filters.source)
|
|
67
|
+
q = q.eq('source', filters.source);
|
|
68
|
+
}
|
|
69
|
+
return q;
|
|
70
|
+
}
|
|
71
|
+
/**
|
|
72
|
+
* Serialize active filters for the result record (human-readable).
|
|
73
|
+
*/
|
|
74
|
+
function serializeFilters(table, userId, filters) {
|
|
75
|
+
const out = {};
|
|
76
|
+
if (userId)
|
|
77
|
+
out.user_id = userId;
|
|
78
|
+
if (table === 'transactions') {
|
|
79
|
+
if (filters.dateFrom)
|
|
80
|
+
out.dateFrom = filters.dateFrom;
|
|
81
|
+
if (filters.dateTo)
|
|
82
|
+
out.dateTo = filters.dateTo;
|
|
83
|
+
if (filters.source)
|
|
84
|
+
out.source = filters.source;
|
|
85
|
+
if (filters.category)
|
|
86
|
+
out.category = filters.category;
|
|
87
|
+
}
|
|
88
|
+
else {
|
|
89
|
+
if (filters.month)
|
|
90
|
+
out.month = filters.month;
|
|
91
|
+
if (filters.accountCode)
|
|
92
|
+
out.accountCode = filters.accountCode;
|
|
93
|
+
if (filters.source)
|
|
94
|
+
out.source = filters.source;
|
|
95
|
+
}
|
|
96
|
+
return out;
|
|
97
|
+
}
|
|
98
|
+
// =============================================================================
|
|
99
|
+
// PUBLIC FUNCTIONS
|
|
100
|
+
// =============================================================================
|
|
101
|
+
/**
|
|
102
|
+
* Preview what would be deleted without touching any data.
|
|
103
|
+
*
|
|
104
|
+
* Returns:
|
|
105
|
+
* - matchCount: total rows matching the filters
|
|
106
|
+
* - sample: first 10 matching rows
|
|
107
|
+
* - groupedCounts: row counts grouped by `source` (transactions) or `account_code` (staging)
|
|
108
|
+
*/
|
|
109
|
+
export async function previewBatch(opts) {
|
|
110
|
+
const { table, userId, filters } = opts;
|
|
111
|
+
const supabase = getClientForTable(table);
|
|
112
|
+
// --- Count matching rows ---
|
|
113
|
+
const countQuery = supabase
|
|
114
|
+
.from(table)
|
|
115
|
+
.select('*', { count: 'exact', head: true });
|
|
116
|
+
const countQueryWithFilters = applyFilters(countQuery, table, userId, filters);
|
|
117
|
+
const { count, error: countError } = await countQueryWithFilters;
|
|
118
|
+
if (countError) {
|
|
119
|
+
throw new Error(`previewBatch count error on ${table}: ${countError.message}`);
|
|
120
|
+
}
|
|
121
|
+
const matchCount = count ?? 0;
|
|
122
|
+
// --- Fetch sample rows (first 10) ---
|
|
123
|
+
const sampleQuery = supabase
|
|
124
|
+
.from(table)
|
|
125
|
+
.select('*')
|
|
126
|
+
.limit(10);
|
|
127
|
+
const sampleQueryWithFilters = applyFilters(sampleQuery, table, userId, filters);
|
|
128
|
+
const { data: sampleData, error: sampleError } = await sampleQueryWithFilters;
|
|
129
|
+
if (sampleError) {
|
|
130
|
+
throw new Error(`previewBatch sample error on ${table}: ${sampleError.message}`);
|
|
131
|
+
}
|
|
132
|
+
const sample = (sampleData ?? []);
|
|
133
|
+
// --- Grouped counts ---
|
|
134
|
+
// Group by `source` for transactions, `account_code` for staging
|
|
135
|
+
const groupCol = table === 'transactions' ? 'source' : 'account_code';
|
|
136
|
+
const groupQuery = supabase
|
|
137
|
+
.from(table)
|
|
138
|
+
.select(groupCol);
|
|
139
|
+
const groupQueryWithFilters = applyFilters(groupQuery, table, userId, filters);
|
|
140
|
+
const { data: groupData, error: groupError } = await groupQueryWithFilters;
|
|
141
|
+
if (groupError) {
|
|
142
|
+
throw new Error(`previewBatch group error on ${table}: ${groupError.message}`);
|
|
143
|
+
}
|
|
144
|
+
const groupedCounts = {};
|
|
145
|
+
for (const row of (groupData ?? [])) {
|
|
146
|
+
const key = row[groupCol] ?? '(unknown)';
|
|
147
|
+
groupedCounts[key] = (groupedCounts[key] ?? 0) + 1;
|
|
148
|
+
}
|
|
149
|
+
return {
|
|
150
|
+
table,
|
|
151
|
+
matchCount,
|
|
152
|
+
sample,
|
|
153
|
+
groupedCounts,
|
|
154
|
+
};
|
|
155
|
+
}
|
|
156
|
+
/**
|
|
157
|
+
* Delete matching rows in batch — or preview them without deleting (dryRun=true).
|
|
158
|
+
*
|
|
159
|
+
* Safety: dryRun defaults to TRUE. Caller must explicitly pass dryRun=false
|
|
160
|
+
* to execute an actual deletion.
|
|
161
|
+
*
|
|
162
|
+
* In dryRun mode: counts matching rows and returns deletedCount=0.
|
|
163
|
+
* In execute mode: issues a Supabase DELETE with the same filters and returns
|
|
164
|
+
* the number of rows deleted.
|
|
165
|
+
*/
|
|
166
|
+
export async function deleteBatch(opts) {
|
|
167
|
+
const { table, userId, filters } = opts;
|
|
168
|
+
const dryRun = opts.dryRun ?? true; // safe by default
|
|
169
|
+
const supabase = getClientForTable(table);
|
|
170
|
+
const serializedFilters = serializeFilters(table, userId, filters);
|
|
171
|
+
if (dryRun) {
|
|
172
|
+
// Count matching rows without deleting
|
|
173
|
+
const countQuery = supabase
|
|
174
|
+
.from(table)
|
|
175
|
+
.select('*', { count: 'exact', head: true });
|
|
176
|
+
const countQueryWithFilters = applyFilters(countQuery, table, userId, filters);
|
|
177
|
+
const { count, error } = await countQueryWithFilters;
|
|
178
|
+
if (error) {
|
|
179
|
+
throw new Error(`deleteBatch dry-run count error on ${table}: ${error.message}`);
|
|
180
|
+
}
|
|
181
|
+
return {
|
|
182
|
+
table,
|
|
183
|
+
deletedCount: 0,
|
|
184
|
+
dryRun: true,
|
|
185
|
+
filters: serializedFilters,
|
|
186
|
+
};
|
|
187
|
+
}
|
|
188
|
+
// Execute deletion
|
|
189
|
+
const deleteQuery = supabase
|
|
190
|
+
.from(table)
|
|
191
|
+
.delete({ count: 'exact' });
|
|
192
|
+
const deleteQueryWithFilters = applyFilters(deleteQuery, table, userId, filters);
|
|
193
|
+
const { count, error } = await deleteQueryWithFilters;
|
|
194
|
+
if (error) {
|
|
195
|
+
throw new Error(`deleteBatch execute error on ${table}: ${error.message}`);
|
|
196
|
+
}
|
|
197
|
+
return {
|
|
198
|
+
table,
|
|
199
|
+
deletedCount: count ?? 0,
|
|
200
|
+
dryRun: false,
|
|
201
|
+
filters: serializedFilters,
|
|
202
|
+
};
|
|
203
|
+
}
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Transaction Ingestion — CSV Parsing & Deduplication
|
|
3
|
+
*
|
|
4
|
+
* Ported from OptimalOS:
|
|
5
|
+
* - /home/optimal/optimalos/app/api/csv/ingest/route.ts
|
|
6
|
+
* - /home/optimal/optimalos/lib/csv/upload.ts
|
|
7
|
+
* - /home/optimal/optimalos/lib/stamp-engine/normalizers/
|
|
8
|
+
* - /home/optimal/optimalos/lib/stamp-engine/format-detector.ts
|
|
9
|
+
*
|
|
10
|
+
* Reads a CSV file from disk, auto-detects bank format, parses into
|
|
11
|
+
* normalized transactions, deduplicates against existing rows in Supabase,
|
|
12
|
+
* and batch-inserts new records into the `transactions` table.
|
|
13
|
+
*/
|
|
14
|
+
export interface RawTransaction {
|
|
15
|
+
date: string;
|
|
16
|
+
description: string;
|
|
17
|
+
amount: number;
|
|
18
|
+
originalCategory?: string;
|
|
19
|
+
transactionType?: string;
|
|
20
|
+
postDate?: string;
|
|
21
|
+
balance?: number;
|
|
22
|
+
extendedDetails?: string;
|
|
23
|
+
merchantAddress?: string;
|
|
24
|
+
}
|
|
25
|
+
export type BankFormat = 'chase_checking' | 'chase_credit' | 'discover' | 'amex' | 'generic' | 'unknown';
|
|
26
|
+
export interface IngestResult {
|
|
27
|
+
inserted: number;
|
|
28
|
+
skipped: number;
|
|
29
|
+
failed: number;
|
|
30
|
+
errors: string[];
|
|
31
|
+
format: BankFormat;
|
|
32
|
+
}
|
|
33
|
+
/**
|
|
34
|
+
* Ingest transactions from a CSV file.
|
|
35
|
+
*
|
|
36
|
+
* 1. Read & detect format
|
|
37
|
+
* 2. Parse into normalized transactions
|
|
38
|
+
* 3. Deduplicate against existing rows (by hash)
|
|
39
|
+
* 4. Batch-insert new rows into `transactions`
|
|
40
|
+
*
|
|
41
|
+
* @returns count of inserted, skipped (duplicate), and failed rows
|
|
42
|
+
*/
|
|
43
|
+
export declare function ingestTransactions(filePath: string, userId: string): Promise<IngestResult>;
|