mailpop 1.0.2 → 1.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/config.js +4 -0
- package/dist/index.js +24 -1
- package/dist/logger.js +29 -3
- package/dist/utils/validators.js +5 -0
- package/package.json +1 -1
package/dist/config.js
CHANGED
|
@@ -20,6 +20,10 @@ export const config = {
|
|
|
20
20
|
outputCsv: path.resolve(process.env.OUTPUT_CSV || 'output/output.csv'),
|
|
21
21
|
checkpointFile: path.resolve(process.env.CHECKPOINT_FILE || 'output/checkpoint.json'),
|
|
22
22
|
cacheDir: path.resolve(process.env.CACHE_DIR || 'output/cache'),
|
|
23
|
+
excludePrefixes: (process.env.EXCLUDE_PREFIXES || '')
|
|
24
|
+
.split(',')
|
|
25
|
+
.map((s) => s.trim().toLowerCase())
|
|
26
|
+
.filter(Boolean),
|
|
23
27
|
concurrency: getEnvNumber('CONCURRENCY', 5),
|
|
24
28
|
maxDepth: getEnvNumber('MAX_DEPTH', 2),
|
|
25
29
|
maxPagesPerSite: getEnvNumber('MAX_PAGES_PER_SITE', 25),
|
package/dist/index.js
CHANGED
|
@@ -5,8 +5,13 @@ import { Crawler } from './crawler.js';
|
|
|
5
5
|
import { Logger } from './logger.js';
|
|
6
6
|
import pLimit from 'p-limit';
|
|
7
7
|
import fs from 'fs/promises';
|
|
8
|
+
import { readFileSync } from 'fs';
|
|
8
9
|
import path from 'path';
|
|
10
|
+
import { fileURLToPath } from 'url';
|
|
9
11
|
import { normalizeDomain, findWebsiteInRow } from './utils/normalize.js';
|
|
12
|
+
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|
13
|
+
const pkg = JSON.parse(readFileSync(path.resolve(__dirname, '../package.json'), 'utf-8'));
|
|
14
|
+
const version = pkg.version || 'unknown';
|
|
10
15
|
let highestContiguousIndex = -1;
|
|
11
16
|
const completedIndices = new Set();
|
|
12
17
|
let completedUrls = [];
|
|
@@ -71,6 +76,7 @@ async function main() {
|
|
|
71
76
|
const args = process.argv.slice(2);
|
|
72
77
|
let inputPath = config.inputCsv;
|
|
73
78
|
let outputPath = config.outputCsv;
|
|
79
|
+
const positionals = [];
|
|
74
80
|
for (let i = 0; i < args.length; i++) {
|
|
75
81
|
if (args[i] === '-i' || args[i] === '--input') {
|
|
76
82
|
inputPath = path.resolve(args[i + 1]);
|
|
@@ -80,6 +86,19 @@ async function main() {
|
|
|
80
86
|
outputPath = path.resolve(args[i + 1]);
|
|
81
87
|
i++;
|
|
82
88
|
}
|
|
89
|
+
else if (args[i] === '-e' || args[i] === '--exclude') {
|
|
90
|
+
const excludeStr = args[i + 1] || '';
|
|
91
|
+
const list = excludeStr
|
|
92
|
+
.split(',')
|
|
93
|
+
.map((s) => s.trim().toLowerCase())
|
|
94
|
+
.filter(Boolean);
|
|
95
|
+
config.excludePrefixes = Array.from(new Set([...config.excludePrefixes, ...list]));
|
|
96
|
+
i++;
|
|
97
|
+
}
|
|
98
|
+
else if (args[i] === '-v' || args[i] === '--version') {
|
|
99
|
+
process.stdout.write(`mailpop v${version}\n`);
|
|
100
|
+
process.exit(0);
|
|
101
|
+
}
|
|
83
102
|
else if (args[i] === '-h' || args[i] === '--help') {
|
|
84
103
|
process.stdout.write(`
|
|
85
104
|
mailpop - CLI Guide
|
|
@@ -88,13 +107,17 @@ Usage: npx mailpop [options] [input.csv] [output.csv]
|
|
|
88
107
|
Options:
|
|
89
108
|
-i, --input <path> Path to the input CSV file
|
|
90
109
|
-o, --output <path> Path to the output CSV file
|
|
110
|
+
-e, --exclude <list> Comma-separated list of email local-parts to exclude
|
|
111
|
+
-v, --version Display the version number
|
|
91
112
|
-h, --help Display this help message
|
|
92
113
|
\n`);
|
|
93
114
|
process.exit(0);
|
|
94
115
|
}
|
|
116
|
+
else if (!args[i].startsWith('-')) {
|
|
117
|
+
positionals.push(args[i]);
|
|
118
|
+
}
|
|
95
119
|
}
|
|
96
120
|
// Fallback to positional arguments
|
|
97
|
-
const positionals = args.filter((a) => !a.startsWith('-'));
|
|
98
121
|
if (positionals.length >= 1) {
|
|
99
122
|
inputPath = path.resolve(positionals[0]);
|
|
100
123
|
}
|
package/dist/logger.js
CHANGED
|
@@ -1,6 +1,19 @@
|
|
|
1
1
|
import fs from 'fs/promises';
|
|
2
2
|
import path from 'path';
|
|
3
3
|
const LOGS_DIR = path.resolve('logs');
|
|
4
|
+
// ANSI escape codes for styling
|
|
5
|
+
const RESET = '\x1b[0m';
|
|
6
|
+
const BOLD = '\x1b[1m';
|
|
7
|
+
const FG_CYAN = '\x1b[36m';
|
|
8
|
+
const FG_GREEN = '\x1b[32m';
|
|
9
|
+
const FG_RED = '\x1b[31m';
|
|
10
|
+
const FG_YELLOW = '\x1b[33m';
|
|
11
|
+
const FG_GRAY = '\x1b[90m';
|
|
12
|
+
const FG_WHITE = '\x1b[37m';
|
|
13
|
+
const TEXT_BLACK = '\x1b[30m';
|
|
14
|
+
const BG_CYAN = '\x1b[46m';
|
|
15
|
+
const BG_GREEN = '\x1b[42m';
|
|
16
|
+
const BG_RED = '\x1b[41m';
|
|
4
17
|
/**
|
|
5
18
|
* Ensures that the logs directory exists on disk.
|
|
6
19
|
*/
|
|
@@ -41,7 +54,12 @@ export class Logger {
|
|
|
41
54
|
result,
|
|
42
55
|
message,
|
|
43
56
|
};
|
|
44
|
-
const
|
|
57
|
+
const levelTag = `${BOLD}${BG_CYAN}${TEXT_BLACK} INFO ${RESET}`;
|
|
58
|
+
const domainStr = domain ? ` ${FG_GRAY}[${FG_CYAN}${domain}${FG_GRAY}]${RESET}` : '';
|
|
59
|
+
const actionStr = ` ${BOLD}${action}${RESET}`;
|
|
60
|
+
const resultStr = result ? ` -> ${FG_GREEN}${result}${RESET}` : '';
|
|
61
|
+
const msgStr = message ? ` | ${FG_GRAY}${message}${RESET}` : '';
|
|
62
|
+
const consoleMsg = `${levelTag}${domainStr}${actionStr}${resultStr}${msgStr}`;
|
|
45
63
|
process.stdout.write(consoleMsg + '\n');
|
|
46
64
|
await writeLog('app.log', entry);
|
|
47
65
|
}
|
|
@@ -58,7 +76,11 @@ export class Logger {
|
|
|
58
76
|
error: errorMsg,
|
|
59
77
|
stack,
|
|
60
78
|
};
|
|
61
|
-
const
|
|
79
|
+
const levelTag = `${BOLD}${BG_RED}${FG_WHITE} ERROR ${RESET}`;
|
|
80
|
+
const domainStr = domain ? ` ${FG_GRAY}[${FG_RED}${domain}${FG_GRAY}]${RESET}` : '';
|
|
81
|
+
const actionStr = ` ${BOLD}${action}${RESET}`;
|
|
82
|
+
const errorStr = errorMsg ? `: ${FG_RED}${errorMsg}${RESET}` : '';
|
|
83
|
+
const consoleMsg = `${levelTag}${domainStr}${actionStr}${errorStr}`;
|
|
62
84
|
process.stderr.write(consoleMsg + '\n');
|
|
63
85
|
await writeLog('app.log', entry);
|
|
64
86
|
await writeLog('errors.log', entry);
|
|
@@ -75,7 +97,11 @@ export class Logger {
|
|
|
75
97
|
confidenceScore: confidence,
|
|
76
98
|
discoveryMethod: method,
|
|
77
99
|
};
|
|
78
|
-
const
|
|
100
|
+
const levelTag = `${BOLD}${BG_GREEN}${TEXT_BLACK} EMAIL ${RESET}`;
|
|
101
|
+
const domainStr = ` ${FG_GRAY}[${FG_CYAN}${domain}${FG_GRAY}]${RESET}`;
|
|
102
|
+
const emailStr = ` Found ${BOLD}${FG_GREEN}${email}${RESET}`;
|
|
103
|
+
const detailsStr = ` (${FG_YELLOW}${method}${RESET}, confidence: ${BOLD}${confidence}${RESET}) at ${FG_GRAY}${source}${RESET}`;
|
|
104
|
+
const consoleMsg = `${levelTag}${domainStr}${emailStr}${detailsStr}`;
|
|
79
105
|
process.stdout.write(consoleMsg + '\n');
|
|
80
106
|
await writeLog('discovered-emails.log', entry);
|
|
81
107
|
}
|
package/dist/utils/validators.js
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { normalizeDomain } from './normalize.js';
|
|
2
|
+
import { config } from '../config.js';
|
|
2
3
|
const EMAIL_REGEX = /^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$/;
|
|
3
4
|
const REJECTED_PREFIXES = [
|
|
4
5
|
'noreply',
|
|
@@ -45,6 +46,10 @@ export function isValidEmail(email) {
|
|
|
45
46
|
if (REJECTED_PREFIXES.includes(localPart)) {
|
|
46
47
|
return false;
|
|
47
48
|
}
|
|
49
|
+
// Reject user-configured excluded prefixes
|
|
50
|
+
if (config.excludePrefixes.includes(localPart)) {
|
|
51
|
+
return false;
|
|
52
|
+
}
|
|
48
53
|
// Reject blacklisted domains
|
|
49
54
|
if (REJECTED_DOMAINS.includes(domainPart)) {
|
|
50
55
|
return false;
|