mailpop 1.0.7 → 1.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -9,6 +9,7 @@ import { readFileSync } from 'fs';
9
9
  import path from 'path';
10
10
  import { fileURLToPath } from 'url';
11
11
  import { normalizeDomain, findWebsiteInRow } from './utils/normalize.js';
12
+ import { verifyEmailFallback } from './utils/validators.js';
12
13
  const __dirname = path.dirname(fileURLToPath(import.meta.url));
13
14
  const pkg = JSON.parse(readFileSync(path.resolve(__dirname, '../package.json'), 'utf-8'));
14
15
  const version = pkg.version || 'unknown';
@@ -135,8 +136,7 @@ Options:
135
136
  await Logger.error('app-initialize-fail', undefined, undefined, `Failed to read input CSV headers: ${errorMsg}`);
136
137
  process.exit(1);
137
138
  }
138
- // Construct combined output headers, preserving original columns and adding new ones
139
- const outputHeaders = [...inputHeaders];
139
+ // Construct combined output headers, placing new email columns next to the website column
140
140
  const newColumns = [
141
141
  'email',
142
142
  'email_source',
@@ -144,11 +144,20 @@ Options:
144
144
  'confidence_score',
145
145
  'discovery_method',
146
146
  ];
147
- for (const col of newColumns) {
148
- if (!outputHeaders.includes(col)) {
149
- outputHeaders.push(col);
147
+ // Filter out any existing occurrences of these columns to avoid duplicates
148
+ const cleanInputHeaders = inputHeaders.filter((h) => !newColumns.includes(h));
149
+ // Find standard website column names
150
+ const websiteKey = cleanInputHeaders.find((h) => ['website', 'websiteurl', 'website_url', 'url', 'site', 'web'].includes(h.toLowerCase().trim()));
151
+ const outputHeaders = [];
152
+ for (const h of cleanInputHeaders) {
153
+ outputHeaders.push(h);
154
+ if (h === websiteKey) {
155
+ outputHeaders.push(...newColumns);
150
156
  }
151
157
  }
158
+ if (!websiteKey) {
159
+ outputHeaders.push(...newColumns);
160
+ }
152
161
  // 2. Initialize crawler and browser
153
162
  crawlerInstance = new Crawler();
154
163
  await crawlerInstance.initialize(config.headless);
@@ -216,15 +225,33 @@ Options:
216
225
  return;
217
226
  }
218
227
  // Map crawling result, retaining all original row keys
228
+ let selectedEmail = result.selectedEmail ? result.selectedEmail.email : '';
229
+ let emailSource = result.selectedEmail ? result.selectedEmail.emailSource : '';
230
+ let emailType = result.selectedEmail ? result.selectedEmail.emailType : '';
231
+ let confidenceScore = result.selectedEmail
232
+ ? String(result.selectedEmail.confidenceScore)
233
+ : '';
234
+ let discoveryMethod = result.selectedEmail ? result.selectedEmail.discoveryMethod : '';
235
+ // If no email detected, try to fall back to hello@domain
236
+ if (!selectedEmail) {
237
+ const fallbackDomain = normalizeDomain(target.domain);
238
+ const fallbackEmail = `hello@${fallbackDomain}`;
239
+ const isFallbackValid = await verifyEmailFallback(fallbackEmail);
240
+ if (isFallbackValid) {
241
+ selectedEmail = fallbackEmail;
242
+ emailSource = target.website;
243
+ emailType = 'role';
244
+ confidenceScore = '50';
245
+ discoveryMethod = 'fallback-hello';
246
+ }
247
+ }
219
248
  const outputRow = {
220
249
  ...row,
221
- email: result.selectedEmail ? result.selectedEmail.email : '',
222
- email_source: result.selectedEmail ? result.selectedEmail.emailSource : '',
223
- email_type: result.selectedEmail ? result.selectedEmail.emailType : '',
224
- confidence_score: result.selectedEmail
225
- ? String(result.selectedEmail.confidenceScore)
226
- : '',
227
- discovery_method: result.selectedEmail ? result.selectedEmail.discoveryMethod : '',
250
+ email: selectedEmail,
251
+ email_source: emailSource,
252
+ email_type: emailType,
253
+ confidence_score: confidenceScore,
254
+ discovery_method: discoveryMethod,
228
255
  };
229
256
  // Append output row incrementally matching the dynamic headers list
230
257
  await appendCsvRow(outputPath, outputRow, outputHeaders, false);
@@ -1,6 +1,9 @@
1
1
  import { normalizeDomain } from './normalize.js';
2
2
  import { config } from '../config.js';
3
+ import dns from 'dns/promises';
3
4
  const EMAIL_REGEX = /^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$/;
5
+ const MAX_EMAIL_LENGTH = 254;
6
+ const MAX_LOCAL_PART_LENGTH = 64;
4
7
  const REJECTED_PREFIXES = [
5
8
  'noreply',
6
9
  'no-reply',
@@ -33,6 +36,9 @@ const REJECTED_DOMAINS = [
33
36
  * @param email - The email to check.
34
37
  */
35
38
  export function isValidEmail(email) {
39
+ if (email.length > MAX_EMAIL_LENGTH) {
40
+ return false;
41
+ }
36
42
  if (!EMAIL_REGEX.test(email)) {
37
43
  return false;
38
44
  }
@@ -42,6 +48,9 @@ export function isValidEmail(email) {
42
48
  }
43
49
  const localPart = parts[0].toLowerCase().trim();
44
50
  const domainPart = parts[1].toLowerCase().trim();
51
+ if (localPart.length > MAX_LOCAL_PART_LENGTH) {
52
+ return false;
53
+ }
45
54
  // Reject blacklisted prefixes
46
55
  if (REJECTED_PREFIXES.includes(localPart)) {
47
56
  return false;
@@ -59,6 +68,10 @@ export function isValidEmail(email) {
59
68
  if (REJECTED_DOMAINS.includes(domainPart)) {
60
69
  return false;
61
70
  }
71
+ // Reject Sentry ingest reporting domains
72
+ if (domainPart.includes('sentry.io')) {
73
+ return false;
74
+ }
62
75
  // Simple heuristics for temporary or obviously fake emails
63
76
  if (localPart.startsWith('noreply') ||
64
77
  localPart.startsWith('no-reply') ||
@@ -93,3 +106,41 @@ export function isDomainMatch(email, targetDomainOrUrl) {
93
106
  const targetDomain = normalizeDomain(targetDomainOrUrl);
94
107
  return emailDomain === targetDomain || emailDomain.endsWith('.' + targetDomain);
95
108
  }
109
+ /**
110
+ * Verifies if a fallback email is valid using Disify API with local DNS MX lookup fallback.
111
+ * @param email - The fallback email to verify.
112
+ */
113
+ export async function verifyEmailFallback(email) {
114
+ if (!isValidEmail(email)) {
115
+ return false;
116
+ }
117
+ const domain = email.split('@')[1];
118
+ if (!domain) {
119
+ return false;
120
+ }
121
+ // 1. Try Disify email verification API
122
+ try {
123
+ const controller = new AbortController();
124
+ const timeoutId = setTimeout(() => controller.abort(), 3000);
125
+ const res = await fetch(`https://api.disify.com/v1/email/${email}`, {
126
+ signal: controller.signal,
127
+ });
128
+ clearTimeout(timeoutId);
129
+ if (res.ok) {
130
+ const data = (await res.json());
131
+ // Email is valid if format matches, not disposable, and DNS MX records exist
132
+ return data.format && !data.disposable && data.dns;
133
+ }
134
+ }
135
+ catch (_e) {
136
+ // If API fails, fall back to direct DNS check
137
+ }
138
+ // 2. DNS MX records lookup fallback
139
+ try {
140
+ const mx = await dns.resolveMx(domain);
141
+ return mx && mx.length > 0;
142
+ }
143
+ catch (_e) {
144
+ return false;
145
+ }
146
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "mailpop",
3
- "version": "1.0.7",
3
+ "version": "1.0.9",
4
4
  "description": "Production-ready public contact email discovery tool from company websites.",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",