mailpop 1.0.6 → 1.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +38 -12
- package/dist/logger.js +2 -1
- package/dist/utils/validators.js +43 -0
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -9,6 +9,7 @@ import { readFileSync } from 'fs';
|
|
|
9
9
|
import path from 'path';
|
|
10
10
|
import { fileURLToPath } from 'url';
|
|
11
11
|
import { normalizeDomain, findWebsiteInRow } from './utils/normalize.js';
|
|
12
|
+
import { verifyEmailFallback } from './utils/validators.js';
|
|
12
13
|
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|
13
14
|
const pkg = JSON.parse(readFileSync(path.resolve(__dirname, '../package.json'), 'utf-8'));
|
|
14
15
|
const version = pkg.version || 'unknown';
|
|
@@ -135,8 +136,7 @@ Options:
|
|
|
135
136
|
await Logger.error('app-initialize-fail', undefined, undefined, `Failed to read input CSV headers: ${errorMsg}`);
|
|
136
137
|
process.exit(1);
|
|
137
138
|
}
|
|
138
|
-
// Construct combined output headers,
|
|
139
|
-
const outputHeaders = [...inputHeaders];
|
|
139
|
+
// Construct combined output headers, placing new email columns next to the website column
|
|
140
140
|
const newColumns = [
|
|
141
141
|
'email',
|
|
142
142
|
'email_source',
|
|
@@ -144,11 +144,20 @@ Options:
|
|
|
144
144
|
'confidence_score',
|
|
145
145
|
'discovery_method',
|
|
146
146
|
];
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
147
|
+
// Filter out any existing occurrences of these columns to avoid duplicates
|
|
148
|
+
const cleanInputHeaders = inputHeaders.filter((h) => !newColumns.includes(h));
|
|
149
|
+
// Find standard website column names
|
|
150
|
+
const websiteKey = cleanInputHeaders.find((h) => ['website', 'websiteurl', 'website_url', 'url', 'site', 'web'].includes(h.toLowerCase().trim()));
|
|
151
|
+
const outputHeaders = [];
|
|
152
|
+
for (const h of cleanInputHeaders) {
|
|
153
|
+
outputHeaders.push(h);
|
|
154
|
+
if (h === websiteKey) {
|
|
155
|
+
outputHeaders.push(...newColumns);
|
|
150
156
|
}
|
|
151
157
|
}
|
|
158
|
+
if (!websiteKey) {
|
|
159
|
+
outputHeaders.push(...newColumns);
|
|
160
|
+
}
|
|
152
161
|
// 2. Initialize crawler and browser
|
|
153
162
|
crawlerInstance = new Crawler();
|
|
154
163
|
await crawlerInstance.initialize(config.headless);
|
|
@@ -216,15 +225,32 @@ Options:
|
|
|
216
225
|
return;
|
|
217
226
|
}
|
|
218
227
|
// Map crawling result, retaining all original row keys
|
|
228
|
+
let selectedEmail = result.selectedEmail ? result.selectedEmail.email : '';
|
|
229
|
+
let emailSource = result.selectedEmail ? result.selectedEmail.emailSource : '';
|
|
230
|
+
let emailType = result.selectedEmail ? result.selectedEmail.emailType : '';
|
|
231
|
+
let confidenceScore = result.selectedEmail
|
|
232
|
+
? String(result.selectedEmail.confidenceScore)
|
|
233
|
+
: '';
|
|
234
|
+
let discoveryMethod = result.selectedEmail ? result.selectedEmail.discoveryMethod : '';
|
|
235
|
+
// If no email detected, try to fall back to hello@domain
|
|
236
|
+
if (!selectedEmail) {
|
|
237
|
+
const fallbackEmail = `hello@${target.domain}`;
|
|
238
|
+
const isFallbackValid = await verifyEmailFallback(fallbackEmail);
|
|
239
|
+
if (isFallbackValid) {
|
|
240
|
+
selectedEmail = fallbackEmail;
|
|
241
|
+
emailSource = target.website;
|
|
242
|
+
emailType = 'role';
|
|
243
|
+
confidenceScore = '50';
|
|
244
|
+
discoveryMethod = 'fallback-hello';
|
|
245
|
+
}
|
|
246
|
+
}
|
|
219
247
|
const outputRow = {
|
|
220
248
|
...row,
|
|
221
|
-
email:
|
|
222
|
-
email_source:
|
|
223
|
-
email_type:
|
|
224
|
-
confidence_score:
|
|
225
|
-
|
|
226
|
-
: '',
|
|
227
|
-
discovery_method: result.selectedEmail ? result.selectedEmail.discoveryMethod : '',
|
|
249
|
+
email: selectedEmail,
|
|
250
|
+
email_source: emailSource,
|
|
251
|
+
email_type: emailType,
|
|
252
|
+
confidence_score: confidenceScore,
|
|
253
|
+
discovery_method: discoveryMethod,
|
|
228
254
|
};
|
|
229
255
|
// Append output row incrementally matching the dynamic headers list
|
|
230
256
|
await appendCsvRow(outputPath, outputRow, outputHeaders, false);
|
package/dist/logger.js
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import fs from 'fs/promises';
|
|
2
2
|
import path from 'path';
|
|
3
|
-
|
|
3
|
+
import os from 'os';
|
|
4
|
+
const LOGS_DIR = path.join(os.tmpdir(), 'mailpop-logs');
|
|
4
5
|
// ANSI escape codes for styling
|
|
5
6
|
const RESET = '\x1b[0m';
|
|
6
7
|
const BOLD = '\x1b[1m';
|
package/dist/utils/validators.js
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { normalizeDomain } from './normalize.js';
|
|
2
2
|
import { config } from '../config.js';
|
|
3
|
+
import dns from 'dns/promises';
|
|
3
4
|
const EMAIL_REGEX = /^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$/;
|
|
4
5
|
const REJECTED_PREFIXES = [
|
|
5
6
|
'noreply',
|
|
@@ -59,6 +60,10 @@ export function isValidEmail(email) {
|
|
|
59
60
|
if (REJECTED_DOMAINS.includes(domainPart)) {
|
|
60
61
|
return false;
|
|
61
62
|
}
|
|
63
|
+
// Reject Sentry ingest reporting domains
|
|
64
|
+
if (domainPart.includes('sentry.io')) {
|
|
65
|
+
return false;
|
|
66
|
+
}
|
|
62
67
|
// Simple heuristics for temporary or obviously fake emails
|
|
63
68
|
if (localPart.startsWith('noreply') ||
|
|
64
69
|
localPart.startsWith('no-reply') ||
|
|
@@ -93,3 +98,41 @@ export function isDomainMatch(email, targetDomainOrUrl) {
|
|
|
93
98
|
const targetDomain = normalizeDomain(targetDomainOrUrl);
|
|
94
99
|
return emailDomain === targetDomain || emailDomain.endsWith('.' + targetDomain);
|
|
95
100
|
}
|
|
101
|
+
/**
|
|
102
|
+
* Verifies if a fallback email is valid using Disify API with local DNS MX lookup fallback.
|
|
103
|
+
* @param email - The fallback email to verify.
|
|
104
|
+
*/
|
|
105
|
+
export async function verifyEmailFallback(email) {
|
|
106
|
+
if (!isValidEmail(email)) {
|
|
107
|
+
return false;
|
|
108
|
+
}
|
|
109
|
+
const domain = email.split('@')[1];
|
|
110
|
+
if (!domain) {
|
|
111
|
+
return false;
|
|
112
|
+
}
|
|
113
|
+
// 1. Try Disify email verification API
|
|
114
|
+
try {
|
|
115
|
+
const controller = new AbortController();
|
|
116
|
+
const timeoutId = setTimeout(() => controller.abort(), 3000);
|
|
117
|
+
const res = await fetch(`https://api.disify.com/v1/email/${email}`, {
|
|
118
|
+
signal: controller.signal,
|
|
119
|
+
});
|
|
120
|
+
clearTimeout(timeoutId);
|
|
121
|
+
if (res.ok) {
|
|
122
|
+
const data = (await res.json());
|
|
123
|
+
// Email is valid if format matches, not disposable, and DNS MX records exist
|
|
124
|
+
return data.format && !data.disposable && data.dns;
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
catch (_e) {
|
|
128
|
+
// If API fails, fall back to direct DNS check
|
|
129
|
+
}
|
|
130
|
+
// 2. DNS MX records lookup fallback
|
|
131
|
+
try {
|
|
132
|
+
const mx = await dns.resolveMx(domain);
|
|
133
|
+
return mx && mx.length > 0;
|
|
134
|
+
}
|
|
135
|
+
catch (_e) {
|
|
136
|
+
return false;
|
|
137
|
+
}
|
|
138
|
+
}
|