optimal-cli 1.0.0 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (135) hide show
  1. package/dist/bin/optimal.d.ts +2 -0
  2. package/dist/bin/optimal.js +1590 -0
  3. package/dist/lib/assets/index.d.ts +79 -0
  4. package/dist/lib/assets/index.js +153 -0
  5. package/dist/lib/assets.d.ts +20 -0
  6. package/dist/lib/assets.js +112 -0
  7. package/dist/lib/auth/index.d.ts +83 -0
  8. package/dist/lib/auth/index.js +146 -0
  9. package/dist/lib/board/index.d.ts +39 -0
  10. package/dist/lib/board/index.js +285 -0
  11. package/dist/lib/board/types.d.ts +111 -0
  12. package/dist/lib/board/types.js +1 -0
  13. package/dist/lib/bot/claim.d.ts +3 -0
  14. package/dist/lib/bot/claim.js +20 -0
  15. package/dist/lib/bot/coordinator.d.ts +27 -0
  16. package/dist/lib/bot/coordinator.js +178 -0
  17. package/dist/lib/bot/heartbeat.d.ts +6 -0
  18. package/dist/lib/bot/heartbeat.js +30 -0
  19. package/dist/lib/bot/index.d.ts +9 -0
  20. package/dist/lib/bot/index.js +6 -0
  21. package/dist/lib/bot/protocol.d.ts +12 -0
  22. package/dist/lib/bot/protocol.js +74 -0
  23. package/dist/lib/bot/reporter.d.ts +3 -0
  24. package/dist/lib/bot/reporter.js +27 -0
  25. package/dist/lib/bot/skills.d.ts +26 -0
  26. package/dist/lib/bot/skills.js +69 -0
  27. package/dist/lib/budget/projections.d.ts +115 -0
  28. package/dist/lib/budget/projections.js +384 -0
  29. package/dist/lib/budget/scenarios.d.ts +93 -0
  30. package/dist/lib/budget/scenarios.js +214 -0
  31. package/dist/lib/cms/publish-blog.d.ts +62 -0
  32. package/dist/lib/cms/publish-blog.js +74 -0
  33. package/dist/lib/cms/strapi-client.d.ts +123 -0
  34. package/dist/lib/cms/strapi-client.js +213 -0
  35. package/dist/lib/config/registry.d.ts +17 -0
  36. package/dist/lib/config/registry.js +182 -0
  37. package/dist/lib/config/schema.d.ts +31 -0
  38. package/dist/lib/config/schema.js +25 -0
  39. package/dist/lib/config.d.ts +55 -0
  40. package/dist/lib/config.js +206 -0
  41. package/dist/lib/errors.d.ts +25 -0
  42. package/dist/lib/errors.js +91 -0
  43. package/dist/lib/format.d.ts +28 -0
  44. package/dist/lib/format.js +98 -0
  45. package/dist/lib/infra/deploy.d.ts +29 -0
  46. package/dist/lib/infra/deploy.js +58 -0
  47. package/dist/lib/infra/migrate.d.ts +34 -0
  48. package/dist/lib/infra/migrate.js +103 -0
  49. package/dist/lib/newsletter/distribute.d.ts +52 -0
  50. package/dist/lib/newsletter/distribute.js +193 -0
  51. package/{lib/newsletter/generate-insurance.ts → dist/lib/newsletter/generate-insurance.d.ts} +7 -24
  52. package/dist/lib/newsletter/generate-insurance.js +36 -0
  53. package/dist/lib/newsletter/generate.d.ts +104 -0
  54. package/dist/lib/newsletter/generate.js +571 -0
  55. package/dist/lib/returnpro/anomalies.d.ts +64 -0
  56. package/dist/lib/returnpro/anomalies.js +166 -0
  57. package/dist/lib/returnpro/audit.d.ts +32 -0
  58. package/dist/lib/returnpro/audit.js +147 -0
  59. package/dist/lib/returnpro/diagnose.d.ts +52 -0
  60. package/dist/lib/returnpro/diagnose.js +281 -0
  61. package/dist/lib/returnpro/kpis.d.ts +32 -0
  62. package/dist/lib/returnpro/kpis.js +192 -0
  63. package/dist/lib/returnpro/templates.d.ts +48 -0
  64. package/dist/lib/returnpro/templates.js +229 -0
  65. package/dist/lib/returnpro/upload-income.d.ts +25 -0
  66. package/dist/lib/returnpro/upload-income.js +235 -0
  67. package/dist/lib/returnpro/upload-netsuite.d.ts +37 -0
  68. package/dist/lib/returnpro/upload-netsuite.js +566 -0
  69. package/dist/lib/returnpro/upload-r1.d.ts +48 -0
  70. package/dist/lib/returnpro/upload-r1.js +398 -0
  71. package/dist/lib/returnpro/validate.d.ts +37 -0
  72. package/dist/lib/returnpro/validate.js +124 -0
  73. package/dist/lib/social/meta.d.ts +90 -0
  74. package/dist/lib/social/meta.js +160 -0
  75. package/dist/lib/social/post-generator.d.ts +83 -0
  76. package/dist/lib/social/post-generator.js +333 -0
  77. package/dist/lib/social/publish.d.ts +66 -0
  78. package/dist/lib/social/publish.js +226 -0
  79. package/dist/lib/social/scraper.d.ts +67 -0
  80. package/dist/lib/social/scraper.js +361 -0
  81. package/dist/lib/supabase.d.ts +4 -0
  82. package/dist/lib/supabase.js +20 -0
  83. package/dist/lib/transactions/delete-batch.d.ts +60 -0
  84. package/dist/lib/transactions/delete-batch.js +203 -0
  85. package/dist/lib/transactions/ingest.d.ts +43 -0
  86. package/dist/lib/transactions/ingest.js +555 -0
  87. package/dist/lib/transactions/stamp.d.ts +51 -0
  88. package/dist/lib/transactions/stamp.js +524 -0
  89. package/package.json +3 -4
  90. package/bin/optimal.ts +0 -1731
  91. package/lib/assets/index.ts +0 -225
  92. package/lib/assets.ts +0 -124
  93. package/lib/auth/index.ts +0 -189
  94. package/lib/board/index.ts +0 -309
  95. package/lib/board/types.ts +0 -124
  96. package/lib/bot/claim.ts +0 -43
  97. package/lib/bot/coordinator.ts +0 -254
  98. package/lib/bot/heartbeat.ts +0 -37
  99. package/lib/bot/index.ts +0 -9
  100. package/lib/bot/protocol.ts +0 -99
  101. package/lib/bot/reporter.ts +0 -42
  102. package/lib/bot/skills.ts +0 -81
  103. package/lib/budget/projections.ts +0 -561
  104. package/lib/budget/scenarios.ts +0 -312
  105. package/lib/cms/publish-blog.ts +0 -129
  106. package/lib/cms/strapi-client.ts +0 -302
  107. package/lib/config/registry.ts +0 -228
  108. package/lib/config/schema.ts +0 -58
  109. package/lib/config.ts +0 -247
  110. package/lib/errors.ts +0 -129
  111. package/lib/format.ts +0 -120
  112. package/lib/infra/.gitkeep +0 -0
  113. package/lib/infra/deploy.ts +0 -70
  114. package/lib/infra/migrate.ts +0 -141
  115. package/lib/newsletter/.gitkeep +0 -0
  116. package/lib/newsletter/distribute.ts +0 -256
  117. package/lib/newsletter/generate.ts +0 -735
  118. package/lib/returnpro/.gitkeep +0 -0
  119. package/lib/returnpro/anomalies.ts +0 -258
  120. package/lib/returnpro/audit.ts +0 -194
  121. package/lib/returnpro/diagnose.ts +0 -400
  122. package/lib/returnpro/kpis.ts +0 -255
  123. package/lib/returnpro/templates.ts +0 -323
  124. package/lib/returnpro/upload-income.ts +0 -311
  125. package/lib/returnpro/upload-netsuite.ts +0 -696
  126. package/lib/returnpro/upload-r1.ts +0 -563
  127. package/lib/returnpro/validate.ts +0 -154
  128. package/lib/social/meta.ts +0 -228
  129. package/lib/social/post-generator.ts +0 -468
  130. package/lib/social/publish.ts +0 -301
  131. package/lib/social/scraper.ts +0 -503
  132. package/lib/supabase.ts +0 -25
  133. package/lib/transactions/delete-batch.ts +0 -258
  134. package/lib/transactions/ingest.ts +0 -659
  135. package/lib/transactions/stamp.ts +0 -654
@@ -0,0 +1,226 @@
1
+ /**
2
+ * Social Post Publisher
3
+ *
4
+ * Handles publishing social posts from Strapi to platforms via n8n webhooks,
5
+ * with delivery status tracking written back to Strapi.
6
+ *
7
+ * Functions:
8
+ * publishSocialPosts() — Main orchestrator: fetch pending posts, publish to Strapi,
9
+ * trigger n8n webhook, update delivery_status
10
+ * getPublishQueue() — List posts ready to publish (pending + has scheduled_date)
11
+ * retryFailed() — Re-attempt posts with delivery_status = 'failed'
12
+ */
13
+ import 'dotenv/config';
14
+ import { strapiGet, strapiPut, publish, } from '../cms/strapi-client.js';
15
+ // ── Config ────────────────────────────────────────────────────────────
16
+ function getN8nWebhookUrl() {
17
+ const url = process.env.N8N_WEBHOOK_URL;
18
+ if (!url) {
19
+ throw new Error('Missing env var: N8N_WEBHOOK_URL\n' +
20
+ 'Set it in your .env file, e.g.:\n' +
21
+ ' N8N_WEBHOOK_URL=https://n8n.op-hub.com');
22
+ }
23
+ return url.replace(/\/+$/, '');
24
+ }
25
+ // ── Internal helpers ──────────────────────────────────────────────────
26
+ /** Trigger n8n webhook for a single social post */
27
+ async function triggerN8nWebhook(documentId, platform, brand) {
28
+ const baseUrl = getN8nWebhookUrl();
29
+ const webhookUrl = `${baseUrl}/webhook/social-post-publish`;
30
+ const res = await fetch(webhookUrl, {
31
+ method: 'POST',
32
+ headers: { 'Content-Type': 'application/json' },
33
+ body: JSON.stringify({ documentId, platform, brand }),
34
+ });
35
+ if (!res.ok) {
36
+ let detail = `HTTP ${res.status}: ${res.statusText}`;
37
+ try {
38
+ const body = await res.text();
39
+ if (body)
40
+ detail += ` — ${body.slice(0, 200)}`;
41
+ }
42
+ catch {
43
+ // non-text body, ignore
44
+ }
45
+ throw new Error(`n8n webhook failed: ${detail}`);
46
+ }
47
+ }
48
+ /** Fetch social posts by brand + delivery_status from Strapi */
49
+ async function fetchPostsByStatus(brand, deliveryStatus) {
50
+ const result = await strapiGet('/api/social-posts', {
51
+ 'filters[brand][$eq]': brand,
52
+ 'filters[delivery_status][$eq]': deliveryStatus,
53
+ 'sort': 'scheduled_date:asc',
54
+ 'pagination[pageSize]': '250',
55
+ });
56
+ return result.data;
57
+ }
58
+ /** Process a single post: publish in Strapi, trigger n8n, update status */
59
+ async function processPost(post, dryRun) {
60
+ const documentId = post.documentId;
61
+ const headline = post.headline ?? '(no headline)';
62
+ const platform = post.platform ?? 'unknown';
63
+ const brand = post.brand ?? 'unknown';
64
+ if (dryRun) {
65
+ return { status: 'skipped' };
66
+ }
67
+ try {
68
+ // Step 1: Publish in Strapi (set publishedAt)
69
+ await publish('social-posts', documentId);
70
+ // Step 2: Trigger n8n webhook
71
+ try {
72
+ await triggerN8nWebhook(documentId, platform, brand);
73
+ }
74
+ catch (webhookErr) {
75
+ // Webhook failure: mark failed, but don't rethrow — continue to next post
76
+ const errMsg = webhookErr instanceof Error ? webhookErr.message : String(webhookErr);
77
+ await strapiPut('/api/social-posts', documentId, {
78
+ delivery_status: 'failed',
79
+ delivery_errors: [{ timestamp: new Date().toISOString(), error: errMsg }],
80
+ });
81
+ return { status: 'failed', error: errMsg };
82
+ }
83
+ // Step 3: Update delivery_status to 'scheduled' on success
84
+ await strapiPut('/api/social-posts', documentId, {
85
+ delivery_status: 'scheduled',
86
+ });
87
+ return { status: 'published' };
88
+ }
89
+ catch (err) {
90
+ const errMsg = err instanceof Error ? err.message : String(err);
91
+ // Best-effort status update on unexpected errors
92
+ try {
93
+ await strapiPut('/api/social-posts', documentId, {
94
+ delivery_status: 'failed',
95
+ delivery_errors: [{ timestamp: new Date().toISOString(), error: errMsg }],
96
+ });
97
+ }
98
+ catch {
99
+ // Ignore secondary failure — original error is more important
100
+ }
101
+ return { status: 'failed', error: errMsg };
102
+ }
103
+ }
104
+ // ── Core orchestrator ─────────────────────────────────────────────────
105
+ /**
106
+ * Fetch pending social posts for a brand and publish them:
107
+ * 1. Publish in Strapi (set publishedAt)
108
+ * 2. Trigger n8n webhook
109
+ * 3. Update delivery_status to 'scheduled' (or 'failed' on error)
110
+ *
111
+ * @example
112
+ * const result = await publishSocialPosts({ brand: 'LIFEINSUR', limit: 3 })
113
+ * console.log(`Published: ${result.published}, Failed: ${result.failed}`)
114
+ */
115
+ export async function publishSocialPosts(opts) {
116
+ const { brand, limit, dryRun = false } = opts;
117
+ // Validate n8n URL up front (unless dry run)
118
+ if (!dryRun) {
119
+ getN8nWebhookUrl();
120
+ }
121
+ const posts = await fetchPostsByStatus(brand, 'pending');
122
+ const postsToProcess = limit !== undefined ? posts.slice(0, limit) : posts;
123
+ const result = {
124
+ published: 0,
125
+ failed: 0,
126
+ skipped: 0,
127
+ details: [],
128
+ };
129
+ for (const post of postsToProcess) {
130
+ const documentId = post.documentId;
131
+ const headline = post.headline ?? '(no headline)';
132
+ const outcome = await processPost(post, dryRun);
133
+ if (outcome.status === 'published')
134
+ result.published++;
135
+ else if (outcome.status === 'failed')
136
+ result.failed++;
137
+ else
138
+ result.skipped++;
139
+ result.details.push({
140
+ documentId,
141
+ headline,
142
+ status: outcome.status,
143
+ ...(outcome.error !== undefined && { error: outcome.error }),
144
+ });
145
+ }
146
+ return result;
147
+ }
148
+ // ── Publish queue ─────────────────────────────────────────────────────
149
+ /**
150
+ * List posts ready to publish: delivery_status = 'pending' AND has a scheduled_date.
151
+ *
152
+ * @example
153
+ * const queue = await getPublishQueue('LIFEINSUR')
154
+ * queue.forEach(p => console.log(p.scheduled_date, p.headline))
155
+ */
156
+ export async function getPublishQueue(brand) {
157
+ const posts = await fetchPostsByStatus(brand, 'pending');
158
+ return posts
159
+ .filter((post) => {
160
+ const scheduledDate = post.scheduled_date;
161
+ return scheduledDate != null && scheduledDate !== '';
162
+ })
163
+ .map((post) => ({
164
+ documentId: post.documentId,
165
+ headline: post.headline ?? '(no headline)',
166
+ platform: post.platform ?? 'unknown',
167
+ brand: post.brand ?? brand,
168
+ scheduled_date: post.scheduled_date,
169
+ }));
170
+ }
171
+ // ── Retry failed ──────────────────────────────────────────────────────
172
+ /**
173
+ * Re-attempt publishing posts with delivery_status = 'failed'.
174
+ * Resets delivery_status to 'pending' on each post before re-processing.
175
+ *
176
+ * @example
177
+ * const result = await retryFailed('LIFEINSUR')
178
+ * console.log(`Re-published: ${result.published}, Still failing: ${result.failed}`)
179
+ */
180
+ export async function retryFailed(brand) {
181
+ // Validate n8n URL up front
182
+ getN8nWebhookUrl();
183
+ const posts = await fetchPostsByStatus(brand, 'failed');
184
+ const result = {
185
+ published: 0,
186
+ failed: 0,
187
+ skipped: 0,
188
+ details: [],
189
+ };
190
+ for (const post of posts) {
191
+ const documentId = post.documentId;
192
+ const headline = post.headline ?? '(no headline)';
193
+ // Reset to pending so processPost can re-publish cleanly
194
+ try {
195
+ await strapiPut('/api/social-posts', documentId, {
196
+ delivery_status: 'pending',
197
+ delivery_errors: null,
198
+ });
199
+ }
200
+ catch (err) {
201
+ const errMsg = err instanceof Error ? err.message : String(err);
202
+ result.failed++;
203
+ result.details.push({
204
+ documentId,
205
+ headline,
206
+ status: 'failed',
207
+ error: `Could not reset delivery_status: ${errMsg}`,
208
+ });
209
+ continue;
210
+ }
211
+ const outcome = await processPost(post, false);
212
+ if (outcome.status === 'published')
213
+ result.published++;
214
+ else if (outcome.status === 'failed')
215
+ result.failed++;
216
+ else
217
+ result.skipped++;
218
+ result.details.push({
219
+ documentId,
220
+ headline,
221
+ status: outcome.status,
222
+ ...(outcome.error !== undefined && { error: outcome.error }),
223
+ });
224
+ }
225
+ return result;
226
+ }
@@ -0,0 +1,67 @@
1
+ /**
2
+ * Meta Ad Library Scraper
3
+ *
4
+ * Ported from Python: ~/projects/meta-ad-scraper/scripts/meta_ad_scraper_v2.py
5
+ *
6
+ * Scrapes Facebook Ad Library for competitor ad intelligence.
7
+ * Uses Playwright headless Chromium with anti-detection measures.
8
+ * Splits ads by Library ID pattern, extracts metadata via regex.
9
+ *
10
+ * Functions:
11
+ * buildUrl() — construct Facebook Ad Library URL for a company
12
+ * scrollAndLoad() — auto-scroll page to load all ads (max 15 scrolls)
13
+ * extractAds() — two-stage extraction: DOM containers, then text split fallback
14
+ * parseAdText() — regex extraction of ad metadata from text blocks
15
+ * extractLandingUrls() — find landing page URLs from DOM links
16
+ * scrapeCompany() — orchestrate single company scrape
17
+ * scrapeCompanies() — batch-scrape multiple companies with configurable parallelism
18
+ * formatCsv() — convert ad records to CSV string
19
+ */
20
+ import { type Page } from 'playwright';
21
+ export interface AdRecord {
22
+ company_searched: string;
23
+ ad_id: string;
24
+ page_name: string;
25
+ ad_text: string;
26
+ status: string;
27
+ start_date: string;
28
+ impressions: string;
29
+ spend: string;
30
+ media_type: string;
31
+ platforms: string;
32
+ landing_page_url: string;
33
+ full_text_snippet: string;
34
+ }
35
+ export interface ScrapeOptions {
36
+ /** Companies to scrape */
37
+ companies: string[];
38
+ /** Output file path (if undefined, return results only) */
39
+ outputPath?: string;
40
+ /** Batch size for parallel processing (default: 6) */
41
+ batchSize?: number;
42
+ /** Maximum scrolls per page (default: 15) */
43
+ maxScrolls?: number;
44
+ /** Delay between companies in ms (default: 4000) */
45
+ companyDelay?: number;
46
+ /** Run headless (default: true) */
47
+ headless?: boolean;
48
+ }
49
+ export interface ScrapeResult {
50
+ ads: AdRecord[];
51
+ totalCompanies: number;
52
+ companiesScraped: number;
53
+ outputPath?: string;
54
+ }
55
+ export declare function buildUrl(companyName: string): string;
56
+ export declare function scrollAndLoad(page: Page, maxScrolls?: number): Promise<void>;
57
+ export declare function parseAdText(text: string, companyName: string): AdRecord | null;
58
+ export declare function extractAds(page: Page, companyName: string, maxScrolls?: number): Promise<AdRecord[]>;
59
+ export declare function extractLandingUrls(page: Page, adIds: string[]): Promise<Record<string, string>>;
60
+ export declare function scrapeCompany(page: Page, companyName: string, maxScrolls?: number): Promise<AdRecord[]>;
61
+ /**
62
+ * Scrape multiple companies in batches.
63
+ * Default: 6 companies per batch, 3 parallel batches (as documented in memory).
64
+ */
65
+ export declare function scrapeCompanies(opts: ScrapeOptions): Promise<ScrapeResult>;
66
+ /** Convert ad records to CSV string */
67
+ export declare function formatCsv(ads: AdRecord[]): string;
@@ -0,0 +1,361 @@
1
+ /**
2
+ * Meta Ad Library Scraper
3
+ *
4
+ * Ported from Python: ~/projects/meta-ad-scraper/scripts/meta_ad_scraper_v2.py
5
+ *
6
+ * Scrapes Facebook Ad Library for competitor ad intelligence.
7
+ * Uses Playwright headless Chromium with anti-detection measures.
8
+ * Splits ads by Library ID pattern, extracts metadata via regex.
9
+ *
10
+ * Functions:
11
+ * buildUrl() — construct Facebook Ad Library URL for a company
12
+ * scrollAndLoad() — auto-scroll page to load all ads (max 15 scrolls)
13
+ * extractAds() — two-stage extraction: DOM containers, then text split fallback
14
+ * parseAdText() — regex extraction of ad metadata from text blocks
15
+ * extractLandingUrls() — find landing page URLs from DOM links
16
+ * scrapeCompany() — orchestrate single company scrape
17
+ * scrapeCompanies() — batch-scrape multiple companies with configurable parallelism
18
+ * formatCsv() — convert ad records to CSV string
19
+ */
20
+ import { chromium } from 'playwright';
21
+ import { writeFileSync } from 'node:fs';
22
+ // ── CSV Column Order ────────────────────────────────────────────────
23
+ const CSV_FIELDS = [
24
+ 'company_searched',
25
+ 'ad_id',
26
+ 'page_name',
27
+ 'ad_text',
28
+ 'status',
29
+ 'start_date',
30
+ 'impressions',
31
+ 'spend',
32
+ 'media_type',
33
+ 'platforms',
34
+ 'landing_page_url',
35
+ 'full_text_snippet',
36
+ ];
37
+ // ── URL Builder ─────────────────────────────────────────────────────
38
+ export function buildUrl(companyName) {
39
+ const base = 'https://www.facebook.com/ads/library/';
40
+ const params = `?active_status=active` +
41
+ `&ad_type=all` +
42
+ `&country=US` +
43
+ `&is_targeted_country=false` +
44
+ `&media_type=all` +
45
+ `&sort_data[mode]=total_impressions` +
46
+ `&sort_data[direction]=desc` +
47
+ `&q=${encodeURIComponent(companyName)}`;
48
+ return base + params;
49
+ }
50
+ // ── Scroll & Load ───────────────────────────────────────────────────
51
+ export async function scrollAndLoad(page, maxScrolls = 15) {
52
+ let prevHeight = 0;
53
+ for (let i = 0; i < maxScrolls; i++) {
54
+ await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));
55
+ await page.waitForTimeout(2000);
56
+ const currHeight = await page.evaluate(() => document.body.scrollHeight);
57
+ if (currHeight === prevHeight && i > 1)
58
+ break;
59
+ prevHeight = currHeight;
60
+ }
61
+ }
62
+ // ── Parse Ad Text ───────────────────────────────────────────────────
63
+ export function parseAdText(text, companyName) {
64
+ if (!text || text.length < 20)
65
+ return null;
66
+ const ad = { company_searched: companyName };
67
+ // Library ID
68
+ const idMatch = text.match(/Library ID:\s*(\d+)/);
69
+ if (idMatch) {
70
+ ad.ad_id = idMatch[1];
71
+ }
72
+ else {
73
+ return null; // Skip blocks without a Library ID
74
+ }
75
+ // Start date
76
+ const dateMatch = text.match(/Started running on\s+(\w+ \d+,?\s*\d*)/);
77
+ if (dateMatch) {
78
+ ad.start_date = dateMatch[1].trim();
79
+ }
80
+ else {
81
+ ad.start_date = '';
82
+ }
83
+ // Status (Active/Inactive)
84
+ if (text.includes('Active')) {
85
+ ad.status = 'Active';
86
+ }
87
+ else if (text.includes('Inactive')) {
88
+ ad.status = 'Inactive';
89
+ }
90
+ else {
91
+ ad.status = 'Unknown';
92
+ }
93
+ // Page name - look for "Sponsored" text preceded by the page name
94
+ const sponsorMatch = text.match(/(?:^|\n)([^\n]+)\nSponsored/);
95
+ if (sponsorMatch) {
96
+ ad.page_name = sponsorMatch[1].trim();
97
+ }
98
+ else {
99
+ ad.page_name = '';
100
+ }
101
+ // Ad creative text - text after "Sponsored" and before common end markers
102
+ const creativeMatch = text.match(/Sponsored\n(.+?)(?:\n(?:Learn More|Sign Up|Shop Now|Get Offer|Download|Apply Now|Book Now|Contact Us|Send Message|Watch More|See Menu|Get Quote|Subscribe|Get Showtimes)|\Z)/s);
103
+ if (creativeMatch) {
104
+ ad.ad_text = creativeMatch[1].trim().slice(0, 500);
105
+ }
106
+ else {
107
+ ad.ad_text = '';
108
+ }
109
+ // Impressions
110
+ const impMatch = text.match(/(?:impressions?)\s*[:\s]*([\d,.]+\s*[-\u2013]\s*[\d,.]+)/i);
111
+ if (impMatch) {
112
+ ad.impressions = impMatch[1];
113
+ }
114
+ else {
115
+ ad.impressions = '';
116
+ }
117
+ // Spend
118
+ const spendMatch = text.match(/(?:spend|spent)\s*[:\s]*\$?([\d,.]+\s*[-\u2013]\s*\$?[\d,.]+)/i);
119
+ if (spendMatch) {
120
+ ad.spend = spendMatch[1];
121
+ }
122
+ else {
123
+ ad.spend = '';
124
+ }
125
+ // Media type
126
+ const textLower = text.toLowerCase();
127
+ if (['video', '0:00', 'play'].some((kw) => textLower.includes(kw))) {
128
+ ad.media_type = 'video';
129
+ }
130
+ else if (textLower.includes('carousel') ||
131
+ textLower.includes('multiple versions')) {
132
+ ad.media_type = 'carousel/multiple';
133
+ }
134
+ else {
135
+ ad.media_type = 'image';
136
+ }
137
+ // Platforms
138
+ const platformNames = ['Facebook', 'Instagram', 'Messenger', 'Audience Network'];
139
+ const platforms = platformNames.filter((p) => textLower.includes(p.toLowerCase()));
140
+ ad.platforms = platforms.join(', ');
141
+ // Landing page URL (not available from text, would need DOM links)
142
+ ad.landing_page_url = '';
143
+ // Full text snippet for reference
144
+ ad.full_text_snippet = text.slice(0, 500);
145
+ return ad;
146
+ }
147
+ // ── Extract Ads ─────────────────────────────────────────────────────
148
+ export async function extractAds(page, companyName, maxScrolls = 15) {
149
+ const ads = [];
150
+ // Wait for content
151
+ try {
152
+ await page.waitForLoadState('networkidle', { timeout: 15000 });
153
+ }
154
+ catch {
155
+ // Timeout is acceptable — continue with what loaded
156
+ }
157
+ await page.waitForTimeout(3000);
158
+ // Check for no results
159
+ const pageText = await page.evaluate(() => document.body.innerText);
160
+ if (!pageText ||
161
+ pageText.toLowerCase().includes('no results') ||
162
+ pageText.toLowerCase().includes('no ads match')) {
163
+ console.log(` [INFO] No ads found for ${companyName}`);
164
+ return ads;
165
+ }
166
+ // Scroll to load all ads
167
+ await scrollAndLoad(page, maxScrolls);
168
+ // Also try to extract structured data from the DOM
169
+ const domAds = await page.evaluate(() => {
170
+ const results = [];
171
+ // Find all Library ID occurrences via DOM containers
172
+ const allElements = document.querySelectorAll('div');
173
+ const adContainers = [];
174
+ allElements.forEach((el) => {
175
+ const text = el.innerText || '';
176
+ // An ad container typically has EXACTLY ONE Library ID
177
+ const idMatches = text.match(/Library ID:\s*\d+/g);
178
+ if (idMatches && idMatches.length === 1) {
179
+ // Check it's not too small (just a label) or too large (parent of multiple ads)
180
+ const textLen = text.length;
181
+ if (textLen > 50 && textLen < 5000) {
182
+ adContainers.push({
183
+ text,
184
+ textLen,
185
+ tag: el.tagName,
186
+ });
187
+ }
188
+ }
189
+ });
190
+ // Deduplicate - remove containers that are subsets of other containers
191
+ // Sort by text length (smallest first - these are the most specific)
192
+ adContainers.sort((a, b) => a.textLen - b.textLen);
193
+ const seen = new Set();
194
+ adContainers.forEach((container) => {
195
+ const idMatch = container.text.match(/Library ID:\s*(\d+)/);
196
+ if (idMatch && !seen.has(idMatch[1])) {
197
+ seen.add(idMatch[1]);
198
+ results.push(container);
199
+ }
200
+ });
201
+ return results;
202
+ });
203
+ if (domAds && domAds.length > 0) {
204
+ console.log(` [DOM] Found ${domAds.length} individual ad containers`);
205
+ for (const raw of domAds) {
206
+ const ad = parseAdText(raw.text, companyName);
207
+ if (ad)
208
+ ads.push(ad);
209
+ }
210
+ }
211
+ else {
212
+ // Fallback: split page text by "Library ID:" pattern
213
+ console.log(` [TEXT] Falling back to text-based splitting`);
214
+ const fullText = await page.evaluate(() => document.body.innerText);
215
+ const sections = fullText.split(/(?=Library ID:\s*\d+)/);
216
+ for (const section of sections) {
217
+ const trimmed = section.trim();
218
+ if (!trimmed || trimmed.length < 30)
219
+ continue;
220
+ const ad = parseAdText(trimmed, companyName);
221
+ if (ad)
222
+ ads.push(ad);
223
+ }
224
+ }
225
+ return ads;
226
+ }
227
+ // ── Extract Landing URLs ────────────────────────────────────────────
228
+ export async function extractLandingUrls(page, adIds) {
229
+ return page.evaluate((ids) => {
230
+ const result = {};
231
+ const links = document.querySelectorAll('a[href*="l.facebook.com"]');
232
+ links.forEach((link) => {
233
+ const href = link.href || '';
234
+ const parent = link.closest('div');
235
+ if (parent) {
236
+ const text = parent.innerText || '';
237
+ for (const id of ids) {
238
+ if (text.includes(id) && !result[id]) {
239
+ result[id] = href;
240
+ }
241
+ }
242
+ }
243
+ });
244
+ return result;
245
+ }, adIds);
246
+ }
247
+ // ── Scrape Single Company ───────────────────────────────────────────
248
+ export async function scrapeCompany(page, companyName, maxScrolls = 15) {
249
+ const url = buildUrl(companyName);
250
+ console.log(`\n${'='.repeat(60)}`);
251
+ console.log(`Scraping: ${companyName}`);
252
+ console.log(`URL: ${url}`);
253
+ console.log(`${'='.repeat(60)}`);
254
+ try {
255
+ await page.goto(url, { timeout: 30000, waitUntil: 'domcontentloaded' });
256
+ }
257
+ catch {
258
+ console.log(` [ERROR] Page load timeout for ${companyName}`);
259
+ return [];
260
+ }
261
+ const ads = await extractAds(page, companyName, maxScrolls);
262
+ // Try to get landing URLs
263
+ if (ads.length > 0) {
264
+ const adIds = ads.map((a) => a.ad_id).filter(Boolean);
265
+ if (adIds.length > 0) {
266
+ const urls = await extractLandingUrls(page, adIds);
267
+ for (const ad of ads) {
268
+ if (ad.ad_id in urls) {
269
+ ad.landing_page_url = urls[ad.ad_id];
270
+ }
271
+ }
272
+ }
273
+ }
274
+ console.log(` [DONE] Extracted ${ads.length} individual ads for ${companyName}`);
275
+ return ads;
276
+ }
277
+ // ── Batch Scraper ───────────────────────────────────────────────────
278
+ /**
279
+ * Scrape multiple companies in batches.
280
+ * Default: 6 companies per batch, 3 parallel batches (as documented in memory).
281
+ */
282
+ export async function scrapeCompanies(opts) {
283
+ const { companies, outputPath, batchSize = 6, maxScrolls = 15, companyDelay = 4000, headless = true, } = opts;
284
+ console.log(`Starting Meta Ad Library scraper for ${companies.length} companies`);
285
+ if (outputPath)
286
+ console.log(`Output: ${outputPath}`);
287
+ const allAds = [];
288
+ let companiesScraped = 0;
289
+ // Split into batches
290
+ const batches = [];
291
+ for (let i = 0; i < companies.length; i += batchSize) {
292
+ batches.push(companies.slice(i, i + batchSize));
293
+ }
294
+ console.log(`Processing ${batches.length} batch(es) of up to ${batchSize} companies each`);
295
+ let browser = null;
296
+ try {
297
+ browser = await chromium.launch({
298
+ headless,
299
+ args: [
300
+ '--no-sandbox',
301
+ '--disable-setuid-sandbox',
302
+ '--disable-dev-shm-usage',
303
+ '--disable-blink-features=AutomationControlled',
304
+ ],
305
+ });
306
+ for (let bi = 0; bi < batches.length; bi++) {
307
+ const batch = batches[bi];
308
+ console.log(`\nBatch ${bi + 1}/${batches.length}: ${batch.length} companies`);
309
+ const context = await browser.newContext({
310
+ viewport: { width: 1920, height: 1080 },
311
+ userAgent: 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
312
+ });
313
+ const page = await context.newPage();
314
+ for (let ci = 0; ci < batch.length; ci++) {
315
+ const company = batch[ci];
316
+ if (ci > 0) {
317
+ console.log(`\n [WAIT] Waiting ${companyDelay / 1000}s before next company...`);
318
+ await page.waitForTimeout(companyDelay);
319
+ }
320
+ const ads = await scrapeCompany(page, company, maxScrolls);
321
+ allAds.push(...ads);
322
+ companiesScraped++;
323
+ }
324
+ await context.close();
325
+ }
326
+ }
327
+ finally {
328
+ if (browser)
329
+ await browser.close();
330
+ }
331
+ // Write CSV output if path specified
332
+ if (outputPath) {
333
+ const csv = formatCsv(allAds);
334
+ writeFileSync(outputPath, csv, 'utf-8');
335
+ console.log(`\nSaved ${allAds.length} ads to ${outputPath}`);
336
+ }
337
+ console.log(`\nBatch complete: ${allAds.length} total ads from ${companiesScraped} companies`);
338
+ return {
339
+ ads: allAds,
340
+ totalCompanies: companies.length,
341
+ companiesScraped,
342
+ outputPath,
343
+ };
344
+ }
345
+ // ── CSV Formatter ───────────────────────────────────────────────────
346
+ /** Escape a value for CSV (double-quote wrapping, escape inner quotes) */
347
+ function escapeCsvField(value) {
348
+ if (value.includes(',') ||
349
+ value.includes('"') ||
350
+ value.includes('\n') ||
351
+ value.includes('\r')) {
352
+ return `"${value.replace(/"/g, '""')}"`;
353
+ }
354
+ return value;
355
+ }
356
+ /** Convert ad records to CSV string */
357
+ export function formatCsv(ads) {
358
+ const header = CSV_FIELDS.join(',');
359
+ const rows = ads.map((ad) => CSV_FIELDS.map((field) => escapeCsvField(ad[field] ?? '')).join(','));
360
+ return [header, ...rows].join('\n') + '\n';
361
+ }
@@ -0,0 +1,4 @@
1
+ import { SupabaseClient } from '@supabase/supabase-js';
2
+ import 'dotenv/config';
3
+ export type SupabaseInstance = 'optimal' | 'returnpro';
4
+ export declare function getSupabase(instance: SupabaseInstance): SupabaseClient;
@@ -0,0 +1,20 @@
1
+ import { createClient } from '@supabase/supabase-js';
2
+ import 'dotenv/config';
3
+ const configs = {
4
+ optimal: { urlEnv: 'OPTIMAL_SUPABASE_URL', keyEnv: 'OPTIMAL_SUPABASE_SERVICE_KEY' },
5
+ returnpro: { urlEnv: 'RETURNPRO_SUPABASE_URL', keyEnv: 'RETURNPRO_SUPABASE_SERVICE_KEY' },
6
+ };
7
+ const clients = new Map();
8
+ export function getSupabase(instance) {
9
+ const existing = clients.get(instance);
10
+ if (existing)
11
+ return existing;
12
+ const config = configs[instance];
13
+ const url = process.env[config.urlEnv];
14
+ const key = process.env[config.keyEnv];
15
+ if (!url || !key)
16
+ throw new Error(`Missing env vars: ${config.urlEnv}, ${config.keyEnv}`);
17
+ const client = createClient(url, key);
18
+ clients.set(instance, client);
19
+ return client;
20
+ }