autokap 1.4.3 → 1.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,102 @@
1
+ import fs from 'node:fs/promises';
2
+ import path from 'node:path';
3
+ import { logger } from './logger.js';
4
+ import { getConfigDir } from './cli-config.js';
5
+ const NPM_REGISTRY_URL = 'https://registry.npmjs.org/autokap';
6
+ const CACHE_TTL_MS = 24 * 60 * 60 * 1000;
7
+ const FETCH_TIMEOUT_MS = 2500;
8
+ const TOTAL_BUDGET_MS = 1000;
9
+ function getCachePath() {
10
+ return path.join(getConfigDir(), 'version-check.json');
11
+ }
12
+ async function readCache() {
13
+ try {
14
+ const raw = await fs.readFile(getCachePath(), 'utf-8');
15
+ const parsed = JSON.parse(raw);
16
+ if (typeof parsed.latestVersion !== 'string' || typeof parsed.checkedAt !== 'number') {
17
+ return null;
18
+ }
19
+ return { latestVersion: parsed.latestVersion, checkedAt: parsed.checkedAt };
20
+ }
21
+ catch {
22
+ return null;
23
+ }
24
+ }
25
+ async function writeCache(entry) {
26
+ try {
27
+ await fs.mkdir(getConfigDir(), { recursive: true });
28
+ await fs.writeFile(getCachePath(), JSON.stringify(entry, null, 2), 'utf-8');
29
+ }
30
+ catch {
31
+ // Cache write failure is non-fatal; we just won't have a cache next time
32
+ }
33
+ }
34
+ export async function fetchLatestVersionFromRegistry() {
35
+ const controller = new AbortController();
36
+ const timeout = setTimeout(() => controller.abort(), FETCH_TIMEOUT_MS);
37
+ try {
38
+ const res = await fetch(NPM_REGISTRY_URL, { signal: controller.signal });
39
+ if (!res.ok)
40
+ return null;
41
+ const data = (await res.json());
42
+ return data['dist-tags']?.latest ?? null;
43
+ }
44
+ catch {
45
+ return null;
46
+ }
47
+ finally {
48
+ clearTimeout(timeout);
49
+ }
50
+ }
51
+ function parseSemver(version) {
52
+ const stripped = version.split('-')[0];
53
+ const parts = stripped.split('.').map(Number);
54
+ if (parts.length !== 3 || parts.some(n => !Number.isFinite(n)))
55
+ return null;
56
+ return [parts[0], parts[1], parts[2]];
57
+ }
58
+ export function isNewerVersion(latest, current) {
59
+ const a = parseSemver(latest);
60
+ const b = parseSemver(current);
61
+ if (!a || !b)
62
+ return false;
63
+ if (a[0] !== b[0])
64
+ return a[0] > b[0];
65
+ if (a[1] !== b[1])
66
+ return a[1] > b[1];
67
+ return a[2] > b[2];
68
+ }
69
+ function isPreRelease(version) {
70
+ return version.includes('-');
71
+ }
72
+ export async function getCachedOrFetchLatest() {
73
+ const cache = await readCache();
74
+ if (cache && Date.now() - cache.checkedAt < CACHE_TTL_MS) {
75
+ return cache.latestVersion;
76
+ }
77
+ const latest = await fetchLatestVersionFromRegistry();
78
+ if (latest) {
79
+ await writeCache({ latestVersion: latest, checkedAt: Date.now() });
80
+ return latest;
81
+ }
82
+ return cache?.latestVersion ?? null;
83
+ }
84
+ export async function displayNewVersionNoticeIfAvailable(currentVersion) {
85
+ if (isPreRelease(currentVersion))
86
+ return;
87
+ try {
88
+ const latest = await Promise.race([
89
+ getCachedOrFetchLatest(),
90
+ new Promise(resolve => setTimeout(() => resolve(null), TOTAL_BUDGET_MS)),
91
+ ]);
92
+ if (!latest)
93
+ return;
94
+ if (!isNewerVersion(latest, currentVersion))
95
+ return;
96
+ logger.info(`A new version of autokap (${latest}) is available, run npm install -g autokap@latest to update`);
97
+ }
98
+ catch {
99
+ // Silent failure — the version check must never block or break the CLI
100
+ }
101
+ }
102
+ //# sourceMappingURL=version-check.js.map
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "autokap",
3
- "version": "1.4.3",
3
+ "version": "1.5.1",
4
4
  "description": "AI-powered CLI tool for capturing clean screenshots of websites",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",
@@ -231,8 +231,7 @@
231
231
  "satori": "^0.26.0",
232
232
  "wawoff2": "^2.0.1",
233
233
  "ws": "^8.20.0",
234
- "zod": "^4.3.6",
235
- "cheerio": "^1.1.2"
234
+ "zod": "^4.3.6"
236
235
  },
237
236
  "devDependencies": {
238
237
  "@types/node": "^25.3.3",
@@ -1,16 +0,0 @@
1
- export interface EmailFallbackOptions {
2
- betaListLaunchUrl: string;
3
- productUrl: string | null;
4
- logger: {
5
- info(msg: string): void;
6
- warn(msg: string): void;
7
- error(msg: string): void;
8
- };
9
- }
10
- export declare function findEmail(opts: EmailFallbackOptions): Promise<{
11
- email: string | null;
12
- handle: string | null;
13
- lang: string | null;
14
- }>;
15
- export declare function extractEmailsFromText(text: string): string[];
16
- export declare function pickBestEmail(emails: string[], productHostname: string | null): string | null;
@@ -1,217 +0,0 @@
1
- const CRAWLER_UA = 'AutoKap-Crawler/1.0 (+https://autokap.app/crawler)';
2
- const EMAIL_RE = /[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}/g;
3
- const ROLE_PREFIXES = ['founder', 'contact', 'hello', 'team', 'support', 'info'];
4
- const FREE_MAIL_DOMAINS = new Set(['gmail.com', 'outlook.com', 'hotmail.com', 'yahoo.com', 'protonmail.com', 'icloud.com', 'proton.me']);
5
- const domainQueues = new Map();
6
- export async function findEmail(opts) {
7
- const visited = [];
8
- const emails = new Set();
9
- let handle = null;
10
- let lang = null;
11
- const productHostname = opts.productUrl ? hostnameOf(opts.productUrl) : null;
12
- const visit = async (url) => {
13
- const page = await fetchPage(url, opts.logger);
14
- if (!page)
15
- return;
16
- visited.push(page);
17
- const cheerio = await loadCheerio();
18
- const $ = cheerio.load(page.html);
19
- handle ??= extractHandle($);
20
- if (!lang && productHostname && hostnameOf(page.url) === productHostname && page.status === 200) {
21
- lang = extractLanguage($, page.text);
22
- }
23
- for (const email of extractEmailsFromText(extractMailtos($).join(' ')))
24
- emails.add(email);
25
- for (const email of extractEmailsFromText(page.text))
26
- emails.add(email);
27
- };
28
- await visit(opts.betaListLaunchUrl);
29
- let best = pickBestEmail([...emails], productHostname);
30
- if (isHighRankEmail(best, productHostname) && handle) {
31
- return { email: best, handle, lang };
32
- }
33
- const productUrls = buildProductUrls(opts.productUrl);
34
- for (const url of productUrls) {
35
- if (isHighRankEmail(best, productHostname) && handle)
36
- break;
37
- await visit(url);
38
- best = pickBestEmail([...emails], productHostname);
39
- }
40
- if (!lang) {
41
- const combinedText = visited.map((page) => page.text).join(' ');
42
- lang = inferLanguageFromText(combinedText);
43
- }
44
- return {
45
- email: best,
46
- handle,
47
- lang,
48
- };
49
- }
50
- export function extractEmailsFromText(text) {
51
- const matches = text.match(EMAIL_RE) ?? [];
52
- return [...new Set(matches.map((email) => email.toLowerCase()).filter((email) => !isJunkEmail(email)))];
53
- }
54
- export function pickBestEmail(emails, productHostname) {
55
- if (emails.length === 0)
56
- return null;
57
- const normalized = [...new Set(emails.map((email) => email.trim().toLowerCase()).filter(Boolean))];
58
- if (normalized.length === 0)
59
- return null;
60
- return normalized.sort((a, b) => rankEmail(a, productHostname) - rankEmail(b, productHostname))[0] ?? null;
61
- }
62
- function isJunkEmail(email) {
63
- const lower = email.toLowerCase();
64
- return lower.includes('example.com')
65
- || lower.includes('sentry.io')
66
- || lower.includes('wixpress.com')
67
- || lower.includes('@2x')
68
- || lower.includes('png')
69
- || lower.includes('jpg')
70
- || lower.includes('svg');
71
- }
72
- function rankEmail(email, productHostname) {
73
- const domain = email.split('@')[1]?.toLowerCase() ?? '';
74
- const local = email.split('@')[0]?.toLowerCase() ?? '';
75
- const sameDomain = productHostname ? domainsMatch(domain, productHostname) : false;
76
- const roleRank = ROLE_PREFIXES.indexOf(local);
77
- const isRole = roleRank !== -1;
78
- const isFreeMail = FREE_MAIL_DOMAINS.has(domain);
79
- if (sameDomain && isRole)
80
- return roleRank;
81
- if (sameDomain)
82
- return 100;
83
- if (isFreeMail && isRole)
84
- return 200 + roleRank;
85
- if (isFreeMail)
86
- return 300;
87
- return 400;
88
- }
89
- function isHighRankEmail(email, productHostname) {
90
- return email !== null && rankEmail(email, productHostname) < 200;
91
- }
92
- function domainsMatch(emailDomain, productHostname) {
93
- const normalizedHost = stripWww(productHostname);
94
- const normalizedEmailDomain = stripWww(emailDomain);
95
- return normalizedEmailDomain === normalizedHost || etldOne(normalizedEmailDomain) === etldOne(normalizedHost);
96
- }
97
- function etldOne(hostname) {
98
- const parts = stripWww(hostname).split('.').filter(Boolean);
99
- return parts.length <= 2 ? parts.join('.') : parts.slice(-2).join('.');
100
- }
101
- function stripWww(hostname) {
102
- return hostname.toLowerCase().replace(/^www\./, '');
103
- }
104
- function buildProductUrls(productUrl) {
105
- if (!productUrl)
106
- return [];
107
- try {
108
- const base = new URL(productUrl);
109
- const urls = [base.toString()];
110
- for (const pathname of ['/contact', '/about', '/legal', '/mentions-legales']) {
111
- const next = new URL(base.toString());
112
- next.pathname = pathname;
113
- next.search = '';
114
- next.hash = '';
115
- urls.push(next.toString());
116
- }
117
- return [...new Set(urls)];
118
- }
119
- catch {
120
- return [];
121
- }
122
- }
123
- async function fetchPage(url, logger) {
124
- let parsed;
125
- try {
126
- parsed = new URL(url);
127
- }
128
- catch {
129
- return null;
130
- }
131
- if (parsed.protocol !== 'http:' && parsed.protocol !== 'https:')
132
- return null;
133
- await waitForDomainTurn(parsed.hostname);
134
- try {
135
- const response = await fetch(parsed.toString(), {
136
- headers: {
137
- Accept: 'text/html',
138
- 'User-Agent': CRAWLER_UA,
139
- },
140
- signal: AbortSignal.timeout(15_000),
141
- });
142
- if (!response.ok) {
143
- logger.warn(`[crm-email] Fetch returned HTTP ${response.status} for ${parsed.toString()}`);
144
- return null;
145
- }
146
- const html = await response.text();
147
- const cheerio = await loadCheerio();
148
- const $ = cheerio.load(html);
149
- $('script, style, noscript, svg').remove();
150
- return {
151
- html,
152
- text: $('body').text().replace(/\s+/g, ' ').trim(),
153
- url: response.url || parsed.toString(),
154
- status: response.status,
155
- };
156
- }
157
- catch (error) {
158
- logger.warn(`[crm-email] Fetch failed for ${parsed.toString()}: ${error.message}`);
159
- return null;
160
- }
161
- }
162
- async function waitForDomainTurn(hostname) {
163
- const previous = domainQueues.get(hostname) ?? Promise.resolve();
164
- let release = () => { };
165
- const current = previous.then(() => new Promise((resolve) => {
166
- release = resolve;
167
- }));
168
- domainQueues.set(hostname, current);
169
- await previous;
170
- setTimeout(release, 1_000);
171
- }
172
- async function loadCheerio() {
173
- const importer = new Function('specifier', 'return import(specifier)');
174
- return importer('cheerio');
175
- }
176
- function extractMailtos($) {
177
- return $('a[href^="mailto:"]').toArray().map((anchor) => {
178
- const href = $(anchor).attr('href') ?? '';
179
- return decodeURIComponent(href.replace(/^mailto:/i, '').split('?')[0] ?? '');
180
- });
181
- }
182
- function extractHandle($) {
183
- for (const anchor of $('a[href]').toArray()) {
184
- const href = $(anchor).attr('href') ?? '';
185
- const twitter = href.match(/(?:twitter\.com|x\.com)\/([A-Za-z0-9_]{1,20})(?:[/?#]|$)/i);
186
- if (twitter)
187
- return `@${twitter[1]}`;
188
- const linkedin = href.match(/linkedin\.com\/in\/([^/?#]+)/i);
189
- if (linkedin)
190
- return linkedin[1] ?? null;
191
- }
192
- return null;
193
- }
194
- function extractLanguage($, text) {
195
- const lang = $('html').attr('lang')?.trim().split(/[-_]/)[0]?.toLowerCase();
196
- if (lang)
197
- return lang;
198
- return inferLanguageFromText(text);
199
- }
200
- function inferLanguageFromText(text) {
201
- if (!text)
202
- return null;
203
- if (/\b(bonjour|merci|à propos|mentions légales)\b/i.test(text))
204
- return 'fr';
205
- if (/\b(the|and|contact|about|privacy|terms|login|sign in)\b/i.test(text))
206
- return 'en';
207
- return null;
208
- }
209
- function hostnameOf(value) {
210
- try {
211
- return new URL(value).hostname;
212
- }
213
- catch {
214
- return null;
215
- }
216
- }
217
- //# sourceMappingURL=email-fallback.js.map
@@ -1,28 +0,0 @@
1
- import { type Browser } from 'playwright';
2
- import { findEmail } from './email-fallback.js';
3
- import { scrapeBetaListLaunches } from './scrape-betalist.js';
4
- import { scrapeLanding } from './scrape-landing.js';
5
- export interface RunCampaignOptions {
6
- runId: string;
7
- lookbackDays: number;
8
- apiBaseUrl: string;
9
- runToken: string;
10
- logger: {
11
- info(msg: string): void;
12
- warn(msg: string): void;
13
- error(msg: string): void;
14
- };
15
- }
16
- export interface RunCampaignDeps {
17
- scrapeLaunches?: typeof scrapeBetaListLaunches;
18
- scrapeLanding?: typeof scrapeLanding;
19
- findEmail?: typeof findEmail;
20
- fetch?: typeof fetch;
21
- launchBrowser?: () => Promise<Browser>;
22
- }
23
- export declare function runCampaign(opts: RunCampaignOptions, deps?: RunCampaignDeps): Promise<{
24
- scraped: number;
25
- inserted: number;
26
- disqualified: number;
27
- skipped: number;
28
- }>;