autokap 1.3.31 → 1.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,217 @@
1
+ const CRAWLER_UA = 'AutoKap-Crawler/1.0 (+https://autokap.app/crawler)';
2
+ const EMAIL_RE = /[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}/g;
3
+ const ROLE_PREFIXES = ['founder', 'contact', 'hello', 'team', 'support', 'info'];
4
+ const FREE_MAIL_DOMAINS = new Set(['gmail.com', 'outlook.com', 'hotmail.com', 'yahoo.com', 'protonmail.com', 'icloud.com', 'proton.me']);
5
+ const domainQueues = new Map();
6
+ export async function findEmail(opts) {
7
+ const visited = [];
8
+ const emails = new Set();
9
+ let handle = null;
10
+ let lang = null;
11
+ const productHostname = opts.productUrl ? hostnameOf(opts.productUrl) : null;
12
+ const visit = async (url) => {
13
+ const page = await fetchPage(url, opts.logger);
14
+ if (!page)
15
+ return;
16
+ visited.push(page);
17
+ const cheerio = await loadCheerio();
18
+ const $ = cheerio.load(page.html);
19
+ handle ??= extractHandle($);
20
+ if (!lang && productHostname && hostnameOf(page.url) === productHostname && page.status === 200) {
21
+ lang = extractLanguage($, page.text);
22
+ }
23
+ for (const email of extractEmailsFromText(extractMailtos($).join(' ')))
24
+ emails.add(email);
25
+ for (const email of extractEmailsFromText(page.text))
26
+ emails.add(email);
27
+ };
28
+ await visit(opts.betaListLaunchUrl);
29
+ let best = pickBestEmail([...emails], productHostname);
30
+ if (isHighRankEmail(best, productHostname) && handle) {
31
+ return { email: best, handle, lang };
32
+ }
33
+ const productUrls = buildProductUrls(opts.productUrl);
34
+ for (const url of productUrls) {
35
+ if (isHighRankEmail(best, productHostname) && handle)
36
+ break;
37
+ await visit(url);
38
+ best = pickBestEmail([...emails], productHostname);
39
+ }
40
+ if (!lang) {
41
+ const combinedText = visited.map((page) => page.text).join(' ');
42
+ lang = inferLanguageFromText(combinedText);
43
+ }
44
+ return {
45
+ email: best,
46
+ handle,
47
+ lang,
48
+ };
49
+ }
50
+ export function extractEmailsFromText(text) {
51
+ const matches = text.match(EMAIL_RE) ?? [];
52
+ return [...new Set(matches.map((email) => email.toLowerCase()).filter((email) => !isJunkEmail(email)))];
53
+ }
54
+ export function pickBestEmail(emails, productHostname) {
55
+ if (emails.length === 0)
56
+ return null;
57
+ const normalized = [...new Set(emails.map((email) => email.trim().toLowerCase()).filter(Boolean))];
58
+ if (normalized.length === 0)
59
+ return null;
60
+ return normalized.sort((a, b) => rankEmail(a, productHostname) - rankEmail(b, productHostname))[0] ?? null;
61
+ }
62
+ function isJunkEmail(email) {
63
+ const lower = email.toLowerCase();
64
+ return lower.includes('example.com')
65
+ || lower.includes('sentry.io')
66
+ || lower.includes('wixpress.com')
67
+ || lower.includes('@2x')
68
+ || lower.includes('png')
69
+ || lower.includes('jpg')
70
+ || lower.includes('svg');
71
+ }
72
+ function rankEmail(email, productHostname) {
73
+ const domain = email.split('@')[1]?.toLowerCase() ?? '';
74
+ const local = email.split('@')[0]?.toLowerCase() ?? '';
75
+ const sameDomain = productHostname ? domainsMatch(domain, productHostname) : false;
76
+ const roleRank = ROLE_PREFIXES.indexOf(local);
77
+ const isRole = roleRank !== -1;
78
+ const isFreeMail = FREE_MAIL_DOMAINS.has(domain);
79
+ if (sameDomain && isRole)
80
+ return roleRank;
81
+ if (sameDomain)
82
+ return 100;
83
+ if (isFreeMail && isRole)
84
+ return 200 + roleRank;
85
+ if (isFreeMail)
86
+ return 300;
87
+ return 400;
88
+ }
89
+ function isHighRankEmail(email, productHostname) {
90
+ return email !== null && rankEmail(email, productHostname) < 200;
91
+ }
92
+ function domainsMatch(emailDomain, productHostname) {
93
+ const normalizedHost = stripWww(productHostname);
94
+ const normalizedEmailDomain = stripWww(emailDomain);
95
+ return normalizedEmailDomain === normalizedHost || etldOne(normalizedEmailDomain) === etldOne(normalizedHost);
96
+ }
97
+ function etldOne(hostname) {
98
+ const parts = stripWww(hostname).split('.').filter(Boolean);
99
+ return parts.length <= 2 ? parts.join('.') : parts.slice(-2).join('.');
100
+ }
101
+ function stripWww(hostname) {
102
+ return hostname.toLowerCase().replace(/^www\./, '');
103
+ }
104
+ function buildProductUrls(productUrl) {
105
+ if (!productUrl)
106
+ return [];
107
+ try {
108
+ const base = new URL(productUrl);
109
+ const urls = [base.toString()];
110
+ for (const pathname of ['/contact', '/about', '/legal', '/mentions-legales']) {
111
+ const next = new URL(base.toString());
112
+ next.pathname = pathname;
113
+ next.search = '';
114
+ next.hash = '';
115
+ urls.push(next.toString());
116
+ }
117
+ return [...new Set(urls)];
118
+ }
119
+ catch {
120
+ return [];
121
+ }
122
+ }
123
+ async function fetchPage(url, logger) {
124
+ let parsed;
125
+ try {
126
+ parsed = new URL(url);
127
+ }
128
+ catch {
129
+ return null;
130
+ }
131
+ if (parsed.protocol !== 'http:' && parsed.protocol !== 'https:')
132
+ return null;
133
+ await waitForDomainTurn(parsed.hostname);
134
+ try {
135
+ const response = await fetch(parsed.toString(), {
136
+ headers: {
137
+ Accept: 'text/html',
138
+ 'User-Agent': CRAWLER_UA,
139
+ },
140
+ signal: AbortSignal.timeout(15_000),
141
+ });
142
+ if (!response.ok) {
143
+ logger.warn(`[crm-email] Fetch returned HTTP ${response.status} for ${parsed.toString()}`);
144
+ return null;
145
+ }
146
+ const html = await response.text();
147
+ const cheerio = await loadCheerio();
148
+ const $ = cheerio.load(html);
149
+ $('script, style, noscript, svg').remove();
150
+ return {
151
+ html,
152
+ text: $('body').text().replace(/\s+/g, ' ').trim(),
153
+ url: response.url || parsed.toString(),
154
+ status: response.status,
155
+ };
156
+ }
157
+ catch (error) {
158
+ logger.warn(`[crm-email] Fetch failed for ${parsed.toString()}: ${error.message}`);
159
+ return null;
160
+ }
161
+ }
162
+ async function waitForDomainTurn(hostname) {
163
+ const previous = domainQueues.get(hostname) ?? Promise.resolve();
164
+ let release = () => { };
165
+ const current = previous.then(() => new Promise((resolve) => {
166
+ release = resolve;
167
+ }));
168
+ domainQueues.set(hostname, current);
169
+ await previous;
170
+ setTimeout(release, 1_000);
171
+ }
172
+ async function loadCheerio() {
173
+ const importer = new Function('specifier', 'return import(specifier)');
174
+ return importer('cheerio');
175
+ }
176
+ function extractMailtos($) {
177
+ return $('a[href^="mailto:"]').toArray().map((anchor) => {
178
+ const href = $(anchor).attr('href') ?? '';
179
+ return decodeURIComponent(href.replace(/^mailto:/i, '').split('?')[0] ?? '');
180
+ });
181
+ }
182
+ function extractHandle($) {
183
+ for (const anchor of $('a[href]').toArray()) {
184
+ const href = $(anchor).attr('href') ?? '';
185
+ const twitter = href.match(/(?:twitter\.com|x\.com)\/([A-Za-z0-9_]{1,20})(?:[/?#]|$)/i);
186
+ if (twitter)
187
+ return `@${twitter[1]}`;
188
+ const linkedin = href.match(/linkedin\.com\/in\/([^/?#]+)/i);
189
+ if (linkedin)
190
+ return linkedin[1] ?? null;
191
+ }
192
+ return null;
193
+ }
194
+ function extractLanguage($, text) {
195
+ const lang = $('html').attr('lang')?.trim().split(/[-_]/)[0]?.toLowerCase();
196
+ if (lang)
197
+ return lang;
198
+ return inferLanguageFromText(text);
199
+ }
200
+ function inferLanguageFromText(text) {
201
+ if (!text)
202
+ return null;
203
+ if (/\b(bonjour|merci|à propos|mentions légales)\b/i.test(text))
204
+ return 'fr';
205
+ if (/\b(the|and|contact|about|privacy|terms|login|sign in)\b/i.test(text))
206
+ return 'en';
207
+ return null;
208
+ }
209
+ function hostnameOf(value) {
210
+ try {
211
+ return new URL(value).hostname;
212
+ }
213
+ catch {
214
+ return null;
215
+ }
216
+ }
217
+ //# sourceMappingURL=email-fallback.js.map
@@ -0,0 +1,28 @@
1
+ import { type Browser } from 'playwright';
2
+ import { findEmail } from './email-fallback.js';
3
+ import { scrapeBetaListLaunches } from './scrape-betalist.js';
4
+ import { scrapeLanding } from './scrape-landing.js';
5
+ export interface RunCampaignOptions {
6
+ runId: string;
7
+ lookbackDays: number;
8
+ apiBaseUrl: string;
9
+ runToken: string;
10
+ logger: {
11
+ info(msg: string): void;
12
+ warn(msg: string): void;
13
+ error(msg: string): void;
14
+ };
15
+ }
16
+ export interface RunCampaignDeps {
17
+ scrapeLaunches?: typeof scrapeBetaListLaunches;
18
+ scrapeLanding?: typeof scrapeLanding;
19
+ findEmail?: typeof findEmail;
20
+ fetch?: typeof fetch;
21
+ launchBrowser?: () => Promise<Browser>;
22
+ }
23
+ export declare function runCampaign(opts: RunCampaignOptions, deps?: RunCampaignDeps): Promise<{
24
+ scraped: number;
25
+ inserted: number;
26
+ disqualified: number;
27
+ skipped: number;
28
+ }>;
@@ -0,0 +1,405 @@
1
+ import fs from 'node:fs/promises';
2
+ import { chromium } from 'playwright';
3
+ import { findEmail } from './email-fallback.js';
4
+ import { scrapeBetaListLaunches } from './scrape-betalist.js';
5
+ import { scrapeLanding } from './scrape-landing.js';
6
+ import { LandingShotEndpointMissingError, uploadLandingShot } from './storage-upload.js';
7
+ const CRAWLER_UA = 'AutoKap-Crawler/1.0 (+https://autokap.app/crawler)';
8
+ // TODO(parent): read CRM vision model from a config slot after Phase B settles.
9
+ const DEFAULT_VISION_MODEL = 'openai/gpt-5.2-mini';
10
+ export async function runCampaign(opts, deps = {}) {
11
+ const fetchImpl = deps.fetch ?? globalThis.fetch;
12
+ const apiBaseUrl = opts.apiBaseUrl.replace(/\/+$/, '');
13
+ const logger = opts.logger;
14
+ let browser = null;
15
+ const context = {
16
+ apiBaseUrl,
17
+ runId: opts.runId,
18
+ runToken: opts.runToken,
19
+ logger,
20
+ fetchImpl,
21
+ };
22
+ try {
23
+ await postCheckpoint(context, { type: 'run_start', message: 'CLI booted, fetching launches' });
24
+ const launches = await (deps.scrapeLaunches ?? scrapeBetaListLaunches)({
25
+ lookbackDays: opts.lookbackDays,
26
+ userAgent: CRAWLER_UA,
27
+ logger,
28
+ });
29
+ if (launches.length === 0) {
30
+ await postCheckpoint(context, { type: 'page_fetched', scraped_count: 0 });
31
+ await postFinish(context, 'completed');
32
+ return { scraped: 0, inserted: 0, disqualified: 0, skipped: 0 };
33
+ }
34
+ await postCheckpoint(context, {
35
+ type: 'page_fetched',
36
+ scraped_count: launches.length,
37
+ message: `Found ${launches.length} launches`,
38
+ });
39
+ browser = deps.launchBrowser
40
+ ? await deps.launchBrowser()
41
+ : await chromium.launch({
42
+ args: process.platform === 'linux'
43
+ ? ['--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage']
44
+ : [],
45
+ headless: true,
46
+ });
47
+ const counters = { inserted: 0, disqualified: 0, skipped: 0 };
48
+ let lastProgressAt = 0;
49
+ const sendProgress = async (force = false) => {
50
+ const now = Date.now();
51
+ if (!force && now - lastProgressAt < 5_000)
52
+ return;
53
+ lastProgressAt = now;
54
+ await postCheckpoint(context, {
55
+ type: 'progress',
56
+ scraped_count: launches.length,
57
+ inserted_count: counters.inserted,
58
+ disqualified_count: counters.disqualified,
59
+ skipped_count: counters.skipped,
60
+ });
61
+ };
62
+ await processWithConcurrency(launches, 2, async (launch) => {
63
+ await processLaunch({
64
+ launch,
65
+ browser: browser,
66
+ counters,
67
+ context,
68
+ scrapeLandingFn: deps.scrapeLanding ?? scrapeLanding,
69
+ findEmailFn: deps.findEmail ?? findEmail,
70
+ });
71
+ await sendProgress();
72
+ });
73
+ await sendProgress(true);
74
+ await postFinish(context, 'completed');
75
+ return {
76
+ scraped: launches.length,
77
+ inserted: counters.inserted,
78
+ disqualified: counters.disqualified,
79
+ skipped: counters.skipped,
80
+ };
81
+ }
82
+ catch (error) {
83
+ const message = error instanceof Error ? error.message : String(error);
84
+ await postCheckpoint(context, { type: 'error', errorMessage: message, message });
85
+ await postFinish(context, 'failed', message);
86
+ throw error;
87
+ }
88
+ finally {
89
+ if (browser) {
90
+ await browser.close().catch((error) => {
91
+ logger.warn(`[crm-run] Browser close failed: ${error.message}`);
92
+ });
93
+ }
94
+ }
95
+ }
96
+ async function processLaunch(args) {
97
+ const { launch, counters, context, scrapeLandingFn, findEmailFn } = args;
98
+ const noFitReason = disqualifyReason(launch);
99
+ if (noFitReason) {
100
+ const response = await postProspect(context, {
101
+ ...baseProspect(launch),
102
+ status: 'closed_no_fit',
103
+ signals: { is_web_app: false },
104
+ free_signals: [{ label: noFitReason, weight: 0 }],
105
+ });
106
+ if (response?.status === 201)
107
+ counters.disqualified += 1;
108
+ else if (response?.status === 200)
109
+ counters.skipped += 1;
110
+ await postCheckpoint(context, {
111
+ type: 'launch_disqualified',
112
+ disqualified_count: counters.disqualified,
113
+ skipped_count: counters.skipped,
114
+ message: `${launch.productName} disqualified: ${noFitReason}`,
115
+ });
116
+ return;
117
+ }
118
+ const productUrl = launch.productUrl;
119
+ const scrape = await scrapeLandingFn({
120
+ productUrl,
121
+ userAgent: CRAWLER_UA,
122
+ logger: context.logger,
123
+ browser: args.browser,
124
+ });
125
+ if (scrape.error === 'robots_disallowed' || scrape.screenshotPath === null) {
126
+ const contact = await findEmailFn({
127
+ betaListLaunchUrl: launch.sourceUrl,
128
+ productUrl: launch.productUrl,
129
+ logger: context.logger,
130
+ });
131
+ const response = await postProspect(context, {
132
+ ...baseProspect(launch),
133
+ creator_email: contact.email ?? launch.creatorEmail,
134
+ creator_handle: contact.handle ?? launch.creatorHandle,
135
+ creator_lang: contact.lang ?? launch.creatorLang,
136
+ status: 'to_contact',
137
+ signals: { is_web_app: scrape.isWebApp, scrape_blocked: true },
138
+ free_signals: [{ label: 'scrape_blocked', weight: 0 }],
139
+ });
140
+ countInsertedOrSkipped(response, counters);
141
+ await postCheckpoint(context, {
142
+ type: 'launch_inserted',
143
+ inserted_count: counters.inserted,
144
+ skipped_count: counters.skipped,
145
+ message: `${launch.productName} inserted with scrape blocked`,
146
+ });
147
+ return;
148
+ }
149
+ const vision = await analyzeWithOptionalVision(context, launch, scrape);
150
+ const contact = await findEmailFn({
151
+ betaListLaunchUrl: launch.sourceUrl,
152
+ productUrl: launch.productUrl,
153
+ logger: context.logger,
154
+ });
155
+ const signals = mergeSignals(scrape, vision);
156
+ const freeSignals = sanitizeFreeSignals(vision?.free_signals ?? []);
157
+ const email = contact.email ?? launch.creatorEmail;
158
+ const response = await postProspect(context, {
159
+ ...baseProspect(launch),
160
+ creator_email: email,
161
+ creator_handle: contact.handle ?? launch.creatorHandle,
162
+ creator_lang: contact.lang ?? vision?.creator_lang_guess ?? launch.creatorLang,
163
+ status: email ? 'to_contact' : 'email_missing',
164
+ signals,
165
+ free_signals: freeSignals,
166
+ });
167
+ countInsertedOrSkipped(response, counters);
168
+ await postCheckpoint(context, {
169
+ type: 'launch_inserted',
170
+ inserted_count: counters.inserted,
171
+ skipped_count: counters.skipped,
172
+ message: `${launch.productName} inserted`,
173
+ });
174
+ }
175
+ function disqualifyReason(launch) {
176
+ if (!launch.productUrl)
177
+ return 'no_product_url';
178
+ const text = `${launch.productName} ${launch.tagline ?? ''}`;
179
+ if (/\b(iOS only|App Store only|iPhone only|iPad only|Android only|Google Play only|hardware|physical product)\b/i.test(text)) {
180
+ return 'not_web_app';
181
+ }
182
+ return null;
183
+ }
184
+ function baseProspect(launch) {
185
+ return {
186
+ source_url: launch.sourceUrl,
187
+ product_name: launch.productName,
188
+ product_url: launch.productUrl,
189
+ creator_name: launch.creatorName,
190
+ creator_email: launch.creatorEmail,
191
+ creator_lang: launch.creatorLang,
192
+ creator_handle: launch.creatorHandle,
193
+ };
194
+ }
195
+ function countInsertedOrSkipped(response, counters) {
196
+ if (response?.status === 201)
197
+ counters.inserted += 1;
198
+ else if (response?.status === 200)
199
+ counters.skipped += 1;
200
+ }
201
+ async function analyzeWithOptionalVision(context, launch, scrape) {
202
+ if (!scrape.screenshotPath)
203
+ return null;
204
+ try {
205
+ const pngBuffer = await fs.readFile(scrape.screenshotPath);
206
+ const upload = await uploadLandingShot({
207
+ runId: context.runId,
208
+ sourceUrl: launch.sourceUrl,
209
+ pngBuffer,
210
+ apiBaseUrl: context.apiBaseUrl,
211
+ runToken: context.runToken,
212
+ });
213
+ return await analyzeLandingWithVision({
214
+ signedUrl: upload.signedUrl,
215
+ rawText: scrape.rawText,
216
+ model: DEFAULT_VISION_MODEL,
217
+ fetchImpl: context.fetchImpl,
218
+ });
219
+ }
220
+ catch (error) {
221
+ if (error instanceof LandingShotEndpointMissingError || error.message === 'LANDING_SHOT_ENDPOINT_MISSING') {
222
+ context.logger.warn('[crm-run] Landing-shot endpoint missing; vision skipped');
223
+ return null;
224
+ }
225
+ context.logger.warn(`[crm-run] Vision analysis skipped for ${launch.sourceUrl}: ${error.message}`);
226
+ return null;
227
+ }
228
+ }
229
+ function mergeSignals(scrape, vision) {
230
+ return {
231
+ n_screenshots: clampInt(vision?.n_screenshots ?? scrape.htmlSignals.n_screenshots_html ?? 0, 0, 50),
232
+ has_video: vision?.has_video ?? scrape.htmlSignals.has_video_html ?? false,
233
+ has_docs: vision?.has_docs ?? scrape.htmlSignals.has_docs_html ?? false,
234
+ has_dashboard: vision?.has_dashboard ?? scrape.htmlSignals.has_dashboard_html ?? false,
235
+ beta_list_quality: vision?.beta_list_quality_inferred,
236
+ is_web_app: vision?.is_web_app ?? scrape.isWebApp,
237
+ };
238
+ }
239
+ function sanitizeFreeSignals(value) {
240
+ return value.slice(0, 8).map((signal) => ({
241
+ label: signal.label.slice(0, 120),
242
+ weight: clampInt(signal.weight, -5, 5),
243
+ })).filter((signal) => signal.label.trim().length > 0);
244
+ }
245
+ function clampInt(value, min, max) {
246
+ const int = Math.trunc(Number.isFinite(value) ? value : min);
247
+ return Math.max(min, Math.min(max, int));
248
+ }
249
+ async function postCheckpoint(context, body) {
250
+ try {
251
+ const response = await context.fetchImpl(`${context.apiBaseUrl}/api/cli/crm/runs/${context.runId}/checkpoint`, {
252
+ method: 'POST',
253
+ headers: jsonAuthHeaders(context.runToken),
254
+ body: JSON.stringify(body),
255
+ signal: AbortSignal.timeout(15_000),
256
+ });
257
+ if (!response.ok) {
258
+ const responseBody = await response.text().catch(() => response.statusText);
259
+ context.logger.warn(`[crm-run] checkpoint POST failed: status=${response.status} body=${responseBody.slice(0, 300)}`);
260
+ }
261
+ }
262
+ catch (error) {
263
+ context.logger.warn(`[crm-run] checkpoint POST errored: ${error.message}`);
264
+ }
265
+ }
266
+ async function postProspect(context, body) {
267
+ try {
268
+ const response = await context.fetchImpl(`${context.apiBaseUrl}/api/cli/crm/runs/${context.runId}/prospect`, {
269
+ method: 'POST',
270
+ headers: jsonAuthHeaders(context.runToken),
271
+ body: JSON.stringify(body),
272
+ signal: AbortSignal.timeout(15_000),
273
+ });
274
+ const text = await response.text().catch(() => '');
275
+ const json = text ? JSON.parse(text) : null;
276
+ if (!response.ok) {
277
+ context.logger.warn(`[crm-run] prospect POST failed: status=${response.status} body=${text.slice(0, 500)}`);
278
+ return null;
279
+ }
280
+ return { status: response.status, json };
281
+ }
282
+ catch (error) {
283
+ context.logger.warn(`[crm-run] prospect POST errored: ${error.message}`);
284
+ return null;
285
+ }
286
+ }
287
+ async function postFinish(context, status, errorMessage) {
288
+ try {
289
+ const response = await context.fetchImpl(`${context.apiBaseUrl}/api/cli/crm/runs/${context.runId}/finish`, {
290
+ method: 'POST',
291
+ headers: jsonAuthHeaders(context.runToken),
292
+ body: JSON.stringify(errorMessage ? { status, errorMessage } : { status }),
293
+ signal: AbortSignal.timeout(15_000),
294
+ });
295
+ if (!response.ok) {
296
+ const body = await response.text().catch(() => response.statusText);
297
+ context.logger.warn(`[crm-run] finish POST failed: status=${response.status} body=${body.slice(0, 300)}`);
298
+ }
299
+ }
300
+ catch (error) {
301
+ context.logger.warn(`[crm-run] finish POST errored: ${error.message}`);
302
+ }
303
+ }
304
+ function jsonAuthHeaders(runToken) {
305
+ return {
306
+ Authorization: `Bearer ${runToken}`,
307
+ 'Content-Type': 'application/json',
308
+ };
309
+ }
310
+ async function processWithConcurrency(items, concurrency, worker) {
311
+ let nextIndex = 0;
312
+ const workers = Array.from({ length: Math.min(concurrency, items.length) }, async () => {
313
+ while (nextIndex < items.length) {
314
+ const item = items[nextIndex];
315
+ nextIndex += 1;
316
+ if (item !== undefined)
317
+ await worker(item);
318
+ }
319
+ });
320
+ await Promise.all(workers);
321
+ }
322
+ async function analyzeLandingWithVision(args) {
323
+ const apiKey = process.env.OPENROUTER_API_KEY;
324
+ if (!apiKey)
325
+ throw new Error('OPENROUTER_API_KEY missing');
326
+ const fetchImpl = args.fetchImpl ?? globalThis.fetch;
327
+ const response = await fetchImpl('https://openrouter.ai/api/v1/chat/completions', {
328
+ method: 'POST',
329
+ headers: {
330
+ Authorization: `Bearer ${apiKey}`,
331
+ 'Content-Type': 'application/json',
332
+ 'HTTP-Referer': 'https://autokap.app',
333
+ 'X-Title': 'AutoKap CRM Scraper',
334
+ },
335
+ body: JSON.stringify({
336
+ model: args.model,
337
+ messages: [
338
+ {
339
+ role: 'system',
340
+ content: 'Analyze a SaaS product landing page screenshot and text. Return one JSON object with EXACT keys: n_screenshots (int 0..50, count product-screenshot-like images visible), has_video (bool, demo or product video), has_docs (bool, link to docs/api docs visible), has_dashboard (bool, mentions or shows a dashboard/app interface), is_web_app (bool, false if iOS/Android/hardware-only), beta_list_quality_inferred ("clean" | "sloppy", based on visual polish), free_signals (array of { label: string<=120, weight: int -5..5 } for notable strengths or weaknesses, max 6), creator_lang_guess ("fr" | "en" | null). Be terse and respond ONLY with JSON.',
341
+ },
342
+ {
343
+ role: 'user',
344
+ content: [
345
+ { type: 'text', text: args.rawText.slice(0, 4000) },
346
+ { type: 'image_url', image_url: { url: args.signedUrl } },
347
+ ],
348
+ },
349
+ ],
350
+ response_format: { type: 'json_object' },
351
+ temperature: 0,
352
+ }),
353
+ signal: AbortSignal.timeout(60_000),
354
+ });
355
+ if (!response.ok) {
356
+ const body = await response.text().catch(() => response.statusText);
357
+ throw new Error(`OpenRouter HTTP ${response.status}: ${body.slice(0, 300)}`);
358
+ }
359
+ const json = await response.json().catch(() => null);
360
+ const content = json?.choices?.[0]?.message?.content;
361
+ if (typeof content !== 'string')
362
+ return null;
363
+ let parsed;
364
+ try {
365
+ parsed = JSON.parse(content);
366
+ }
367
+ catch {
368
+ return null;
369
+ }
370
+ return validateVisionAnalysis(parsed);
371
+ }
372
+ function validateVisionAnalysis(value) {
373
+ if (!value || typeof value !== 'object')
374
+ return null;
375
+ const input = value;
376
+ const output = {};
377
+ if (typeof input.n_screenshots === 'number')
378
+ output.n_screenshots = clampInt(input.n_screenshots, 0, 50);
379
+ if (typeof input.has_video === 'boolean')
380
+ output.has_video = input.has_video;
381
+ if (typeof input.has_docs === 'boolean')
382
+ output.has_docs = input.has_docs;
383
+ if (typeof input.has_dashboard === 'boolean')
384
+ output.has_dashboard = input.has_dashboard;
385
+ if (typeof input.is_web_app === 'boolean')
386
+ output.is_web_app = input.is_web_app;
387
+ if (input.beta_list_quality_inferred === 'clean' || input.beta_list_quality_inferred === 'sloppy') {
388
+ output.beta_list_quality_inferred = input.beta_list_quality_inferred;
389
+ }
390
+ if (input.creator_lang_guess === 'fr' || input.creator_lang_guess === 'en' || input.creator_lang_guess === null) {
391
+ output.creator_lang_guess = input.creator_lang_guess;
392
+ }
393
+ if (Array.isArray(input.free_signals)) {
394
+ output.free_signals = sanitizeFreeSignals(input.free_signals.flatMap((item) => {
395
+ if (!item || typeof item !== 'object')
396
+ return [];
397
+ const signal = item;
398
+ if (typeof signal.label !== 'string' || typeof signal.weight !== 'number')
399
+ return [];
400
+ return [{ label: signal.label, weight: signal.weight }];
401
+ }).slice(0, 6));
402
+ }
403
+ return output;
404
+ }
405
+ //# sourceMappingURL=run-campaign.js.map
@@ -0,0 +1,20 @@
1
+ export type BetaListLaunch = {
2
+ sourceUrl: string;
3
+ productName: string;
4
+ productUrl: string | null;
5
+ creatorName: string | null;
6
+ creatorEmail: string | null;
7
+ creatorHandle: string | null;
8
+ creatorLang: string | null;
9
+ tagline?: string | null;
10
+ };
11
+ export interface ScrapeBetaListOptions {
12
+ lookbackDays: number;
13
+ userAgent: string;
14
+ logger: {
15
+ info(msg: string): void;
16
+ warn(msg: string): void;
17
+ error(msg: string): void;
18
+ };
19
+ }
20
+ export declare function scrapeBetaListLaunches(opts: ScrapeBetaListOptions): Promise<BetaListLaunch[]>;