webpeel 0.8.1 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +39 -5
- package/dist/cli.js +1299 -85
- package/dist/cli.js.map +1 -1
- package/dist/core/application-tracker.d.ts +85 -0
- package/dist/core/application-tracker.d.ts.map +1 -0
- package/dist/core/application-tracker.js +184 -0
- package/dist/core/application-tracker.js.map +1 -0
- package/dist/core/apply.d.ts +163 -0
- package/dist/core/apply.d.ts.map +1 -0
- package/dist/core/apply.js +817 -0
- package/dist/core/apply.js.map +1 -0
- package/dist/core/branding.d.ts +1 -1
- package/dist/core/branding.d.ts.map +1 -1
- package/dist/core/budget.d.ts +43 -0
- package/dist/core/budget.d.ts.map +1 -0
- package/dist/core/budget.js +325 -0
- package/dist/core/budget.js.map +1 -0
- package/dist/core/challenge-detection.d.ts +27 -0
- package/dist/core/challenge-detection.d.ts.map +1 -0
- package/dist/core/challenge-detection.js +436 -0
- package/dist/core/challenge-detection.js.map +1 -0
- package/dist/core/change-tracking.d.ts.map +1 -1
- package/dist/core/change-tracking.js +10 -1
- package/dist/core/change-tracking.js.map +1 -1
- package/dist/core/crawler.d.ts.map +1 -1
- package/dist/core/crawler.js +17 -4
- package/dist/core/crawler.js.map +1 -1
- package/dist/core/diff.d.ts +62 -0
- package/dist/core/diff.d.ts.map +1 -0
- package/dist/core/diff.js +289 -0
- package/dist/core/diff.js.map +1 -0
- package/dist/core/extract-listings.d.ts +39 -0
- package/dist/core/extract-listings.d.ts.map +1 -0
- package/dist/core/extract-listings.js +331 -0
- package/dist/core/extract-listings.js.map +1 -0
- package/dist/core/extract.d.ts.map +1 -1
- package/dist/core/extract.js +15 -2
- package/dist/core/extract.js.map +1 -1
- package/dist/core/fetcher.d.ts +29 -3
- package/dist/core/fetcher.d.ts.map +1 -1
- package/dist/core/fetcher.js +158 -20
- package/dist/core/fetcher.js.map +1 -1
- package/dist/core/human.d.ts +176 -0
- package/dist/core/human.d.ts.map +1 -0
- package/dist/core/human.js +681 -0
- package/dist/core/human.js.map +1 -0
- package/dist/core/jobs.d.ts +12 -2
- package/dist/core/jobs.d.ts.map +1 -1
- package/dist/core/jobs.js +124 -2
- package/dist/core/jobs.js.map +1 -1
- package/dist/core/map.d.ts.map +1 -1
- package/dist/core/map.js +14 -2
- package/dist/core/map.js.map +1 -1
- package/dist/core/paginate.d.ts +32 -0
- package/dist/core/paginate.d.ts.map +1 -0
- package/dist/core/paginate.js +107 -0
- package/dist/core/paginate.js.map +1 -0
- package/dist/core/rate-governor.d.ts +81 -0
- package/dist/core/rate-governor.d.ts.map +1 -0
- package/dist/core/rate-governor.js +238 -0
- package/dist/core/rate-governor.js.map +1 -0
- package/dist/core/search-provider.d.ts +5 -0
- package/dist/core/search-provider.d.ts.map +1 -1
- package/dist/core/search-provider.js +81 -2
- package/dist/core/search-provider.js.map +1 -1
- package/dist/core/site-search.d.ts +45 -0
- package/dist/core/site-search.d.ts.map +1 -0
- package/dist/core/site-search.js +253 -0
- package/dist/core/site-search.js.map +1 -0
- package/dist/core/strategies.d.ts +8 -0
- package/dist/core/strategies.d.ts.map +1 -1
- package/dist/core/strategies.js +185 -45
- package/dist/core/strategies.js.map +1 -1
- package/dist/core/strategy-hooks.d.ts +6 -0
- package/dist/core/strategy-hooks.d.ts.map +1 -1
- package/dist/core/strategy-hooks.js.map +1 -1
- package/dist/core/table-format.d.ts +31 -0
- package/dist/core/table-format.d.ts.map +1 -0
- package/dist/core/table-format.js +147 -0
- package/dist/core/table-format.js.map +1 -0
- package/dist/core/user-agents.d.ts +58 -0
- package/dist/core/user-agents.d.ts.map +1 -0
- package/dist/core/user-agents.js +159 -0
- package/dist/core/user-agents.js.map +1 -0
- package/dist/core/watch.d.ts +100 -0
- package/dist/core/watch.d.ts.map +1 -0
- package/dist/core/watch.js +368 -0
- package/dist/core/watch.js.map +1 -0
- package/dist/index.d.ts +13 -2
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +41 -4
- package/dist/index.js.map +1 -1
- package/dist/mcp/server.js +3 -0
- package/dist/mcp/server.js.map +1 -1
- package/dist/types.d.ts +73 -0
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js.map +1 -1
- package/llms.txt +1 -1
- package/package.json +3 -3
package/dist/cli.js
CHANGED
|
@@ -18,6 +18,8 @@ import { writeFileSync, readFileSync } from 'fs';
|
|
|
18
18
|
import { peel, peelBatch, cleanup } from './index.js';
|
|
19
19
|
import { checkUsage, showUsageFooter, handleLogin, handleLogout, handleUsage, loadConfig, saveConfig } from './cli-auth.js';
|
|
20
20
|
import { getCache, setCache, parseTTL, clearCache, cacheStats } from './cache.js';
|
|
21
|
+
import { estimateTokens } from './core/markdown.js';
|
|
22
|
+
import { distillToBudget, budgetListings } from './core/budget.js';
|
|
21
23
|
const program = new Command();
|
|
22
24
|
// Read version from package.json dynamically
|
|
23
25
|
import { fileURLToPath } from 'url';
|
|
@@ -115,6 +117,8 @@ function parseActions(actionStrings) {
|
|
|
115
117
|
return { type: 'hover', selector: value };
|
|
116
118
|
case 'waitFor':
|
|
117
119
|
return { type: 'waitForSelector', selector: value };
|
|
120
|
+
case 'wait-for':
|
|
121
|
+
return { type: 'waitForSelector', selector: value, timeout: 10000 };
|
|
118
122
|
case 'screenshot':
|
|
119
123
|
return { type: 'screenshot' };
|
|
120
124
|
default:
|
|
@@ -130,7 +134,7 @@ program
|
|
|
130
134
|
.option('--html', 'Output raw HTML instead of markdown')
|
|
131
135
|
.option('--text', 'Output plain text instead of markdown')
|
|
132
136
|
.option('--json', 'Output as JSON')
|
|
133
|
-
.option('-t, --timeout <ms>', 'Request timeout (ms)', parseInt, 30000)
|
|
137
|
+
.option('-t, --timeout <ms>', 'Request timeout (ms)', (v) => parseInt(v, 10), 30000)
|
|
134
138
|
.option('--ua <agent>', 'Custom user agent')
|
|
135
139
|
.option('-s, --silent', 'Silent mode (no spinner)')
|
|
136
140
|
.option('--screenshot [path]', 'Take a screenshot (optionally save to file path)')
|
|
@@ -142,7 +146,8 @@ program
|
|
|
142
146
|
.option('--only-main-content', 'Shortcut for --include-tags main,article')
|
|
143
147
|
.option('-H, --header <header...>', 'Custom headers (e.g., "Authorization: Bearer token")')
|
|
144
148
|
.option('--cookie <cookie...>', 'Cookies to set (e.g., "session=abc123")')
|
|
145
|
-
.option('--cache <ttl>', 'Cache results locally (e.g., "5m", "1h", "1d")')
|
|
149
|
+
.option('--cache <ttl>', 'Cache results locally (e.g., "5m", "1h", "1d") — default: 5m')
|
|
150
|
+
.option('--no-cache', 'Disable automatic caching for this request')
|
|
146
151
|
.option('--links', 'Output only the links found on the page')
|
|
147
152
|
.option('--images', 'Output image URLs from the page')
|
|
148
153
|
.option('--meta', 'Output only the page metadata (title, description, author, etc.)')
|
|
@@ -155,57 +160,112 @@ program
|
|
|
155
160
|
.option('--location <country>', 'ISO country code for geo-targeting (e.g., "US", "DE", "JP")')
|
|
156
161
|
.option('--language <lang>', 'Language preference (e.g., "en", "de", "ja")')
|
|
157
162
|
.option('--max-tokens <n>', 'Maximum token count for output (truncate if exceeded)', parseInt)
|
|
163
|
+
.option('--budget <n>', 'Smart token budget — distill content to fit within N tokens (heuristic, no LLM key needed)', parseInt)
|
|
164
|
+
.option('--extract-all', 'Auto-detect and extract repeated listing items (e.g., search results)')
|
|
165
|
+
.option('--scroll-extract [count]', 'Scroll page N times to load lazy content, then extract (implies --render)', (v) => parseInt(v, 10))
|
|
166
|
+
.option('--csv', 'Output extraction results as CSV')
|
|
167
|
+
.option('--table', 'Output extraction results as a formatted table')
|
|
168
|
+
.option('--pages <n>', 'Follow pagination "Next" links for N pages (max 10)', (v) => parseInt(v, 10))
|
|
169
|
+
.option('--profile <path>', 'Use a persistent browser profile directory (cookies/sessions survive between calls)')
|
|
170
|
+
.option('--headed', 'Run browser in headed (visible) mode — useful for profile setup and debugging')
|
|
171
|
+
.option('--agent', 'Agent mode: sets --json, --silent, --extract-all, and --budget 4000 (override with --budget N)')
|
|
158
172
|
.action(async (url, options) => {
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
173
|
+
// --agent sets sensible defaults for AI agents; explicit flags override
|
|
174
|
+
if (options.agent) {
|
|
175
|
+
if (!options.json)
|
|
176
|
+
options.json = true;
|
|
177
|
+
if (!options.silent)
|
|
178
|
+
options.silent = true;
|
|
179
|
+
if (!options.extractAll)
|
|
180
|
+
options.extractAll = true;
|
|
181
|
+
if (options.budget === undefined)
|
|
182
|
+
options.budget = 4000;
|
|
183
|
+
}
|
|
184
|
+
const isJson = options.json;
|
|
185
|
+
// --- #5: Concise error for missing URL (no help dump) ---
|
|
186
|
+
if (!url || url.trim() === '') {
|
|
187
|
+
if (isJson) {
|
|
188
|
+
await writeStdout(JSON.stringify({ error: 'URL is required', code: 'URL_REQUIRED' }) + '\n');
|
|
189
|
+
}
|
|
190
|
+
else {
|
|
191
|
+
console.error('Error: URL is required');
|
|
192
|
+
console.error('Usage: webpeel <url> [options]');
|
|
193
|
+
console.error('Run "webpeel --help" for full usage.');
|
|
194
|
+
}
|
|
195
|
+
process.exit(1);
|
|
196
|
+
}
|
|
197
|
+
// --- #6: Helper to output JSON errors and exit ---
|
|
198
|
+
function exitWithJsonError(message, code) {
|
|
199
|
+
if (isJson) {
|
|
200
|
+
process.stdout.write(JSON.stringify({ error: message, code }) + '\n');
|
|
201
|
+
}
|
|
202
|
+
else {
|
|
203
|
+
console.error(`Error: ${message}`);
|
|
204
|
+
}
|
|
162
205
|
process.exit(1);
|
|
163
206
|
}
|
|
164
207
|
// SECURITY: Enhanced URL validation
|
|
165
208
|
if (url.length > 2048) {
|
|
166
|
-
|
|
167
|
-
process.exit(1);
|
|
209
|
+
exitWithJsonError('URL too long (max 2048 characters)', 'INVALID_URL');
|
|
168
210
|
}
|
|
169
211
|
// Check for control characters
|
|
170
212
|
if (/[\x00-\x1F\x7F]/.test(url)) {
|
|
171
|
-
|
|
172
|
-
process.exit(1);
|
|
213
|
+
exitWithJsonError('URL contains invalid control characters', 'INVALID_URL');
|
|
173
214
|
}
|
|
174
215
|
// Validate URL format
|
|
175
216
|
try {
|
|
176
217
|
const parsed = new URL(url);
|
|
177
218
|
if (!['http:', 'https:'].includes(parsed.protocol)) {
|
|
178
|
-
|
|
179
|
-
process.exit(1);
|
|
219
|
+
exitWithJsonError('Only HTTP and HTTPS protocols are allowed', 'INVALID_URL');
|
|
180
220
|
}
|
|
181
221
|
}
|
|
182
222
|
catch {
|
|
183
|
-
|
|
184
|
-
process.exit(1);
|
|
223
|
+
exitWithJsonError(`Invalid URL format: ${url}`, 'INVALID_URL');
|
|
185
224
|
}
|
|
186
225
|
const useStealth = options.stealth || false;
|
|
187
226
|
// Check usage quota
|
|
188
227
|
const usageCheck = await checkUsage();
|
|
189
228
|
if (!usageCheck.allowed) {
|
|
229
|
+
if (isJson) {
|
|
230
|
+
await writeStdout(JSON.stringify({ error: usageCheck.message, code: 'BLOCKED' }) + '\n');
|
|
231
|
+
process.exit(1);
|
|
232
|
+
}
|
|
190
233
|
console.error(usageCheck.message);
|
|
191
234
|
process.exit(1);
|
|
192
235
|
}
|
|
193
236
|
// Check cache first (before spinner/network)
|
|
237
|
+
// Default: 5m TTL for all CLI fetches unless --no-cache is set
|
|
194
238
|
let cacheTtlMs;
|
|
195
|
-
|
|
239
|
+
const cacheDisabled = options.cache === false; // --no-cache sets options.cache to false
|
|
240
|
+
const explicitTtl = typeof options.cache === 'string' ? options.cache : undefined;
|
|
241
|
+
if (!cacheDisabled) {
|
|
242
|
+
const ttlStr = explicitTtl || '5m';
|
|
196
243
|
try {
|
|
197
|
-
cacheTtlMs = parseTTL(
|
|
244
|
+
cacheTtlMs = parseTTL(ttlStr);
|
|
198
245
|
}
|
|
199
246
|
catch (e) {
|
|
200
|
-
|
|
201
|
-
process.exit(1);
|
|
247
|
+
exitWithJsonError(e.message, 'FETCH_FAILED');
|
|
202
248
|
}
|
|
203
|
-
const
|
|
204
|
-
|
|
249
|
+
const cacheOptions = {
|
|
250
|
+
render: options.render,
|
|
251
|
+
stealth: options.stealth,
|
|
252
|
+
selector: options.selector,
|
|
253
|
+
format: options.html ? 'html' : options.text ? 'text' : 'markdown',
|
|
254
|
+
budget: null, // Budget excluded from cache key — cache stores full content
|
|
255
|
+
};
|
|
256
|
+
const cachedResult = getCache(url, cacheOptions);
|
|
257
|
+
if (cachedResult) {
|
|
205
258
|
if (!options.silent) {
|
|
206
|
-
console.error(`\x1b[36m⚡ Cache hit\x1b[0m (TTL: ${
|
|
259
|
+
console.error(`\x1b[36m⚡ Cache hit\x1b[0m (TTL: ${ttlStr})`);
|
|
260
|
+
}
|
|
261
|
+
// Apply budget to cached content (cache stores full, budget is post-process)
|
|
262
|
+
if (options.budget && options.budget > 0 && cachedResult.content) {
|
|
263
|
+
const { distillToBudget } = await import('./core/budget.js');
|
|
264
|
+
const fmt = options.text ? 'text' : 'markdown';
|
|
265
|
+
cachedResult.content = distillToBudget(cachedResult.content, options.budget, fmt);
|
|
266
|
+
cachedResult.tokens = Math.ceil(cachedResult.content.length / 4);
|
|
207
267
|
}
|
|
208
|
-
outputResult(
|
|
268
|
+
await outputResult(cachedResult, options, { cached: true });
|
|
209
269
|
process.exit(0);
|
|
210
270
|
}
|
|
211
271
|
}
|
|
@@ -213,8 +273,7 @@ program
|
|
|
213
273
|
try {
|
|
214
274
|
// Validate options
|
|
215
275
|
if (options.wait && (options.wait < 0 || options.wait > 60000)) {
|
|
216
|
-
|
|
217
|
-
process.exit(1);
|
|
276
|
+
throw Object.assign(new Error('Wait time must be between 0 and 60000ms'), { _code: 'FETCH_FAILED' });
|
|
218
277
|
}
|
|
219
278
|
// Parse custom headers
|
|
220
279
|
let headers;
|
|
@@ -223,9 +282,7 @@ program
|
|
|
223
282
|
for (const header of options.header) {
|
|
224
283
|
const colonIndex = header.indexOf(':');
|
|
225
284
|
if (colonIndex === -1) {
|
|
226
|
-
|
|
227
|
-
console.error('Expected format: "Key: Value"');
|
|
228
|
-
process.exit(1);
|
|
285
|
+
throw Object.assign(new Error(`Invalid header format: ${header}. Expected "Key: Value"`), { _code: 'FETCH_FAILED' });
|
|
229
286
|
}
|
|
230
287
|
const key = header.slice(0, colonIndex).trim();
|
|
231
288
|
const value = header.slice(colonIndex + 1).trim();
|
|
@@ -239,8 +296,7 @@ program
|
|
|
239
296
|
actions = parseActions(options.action);
|
|
240
297
|
}
|
|
241
298
|
catch (e) {
|
|
242
|
-
|
|
243
|
-
process.exit(1);
|
|
299
|
+
throw Object.assign(new Error(e.message), { _code: 'FETCH_FAILED' });
|
|
244
300
|
}
|
|
245
301
|
}
|
|
246
302
|
// Parse extract
|
|
@@ -254,8 +310,7 @@ program
|
|
|
254
310
|
llmBaseUrl: process.env.WEBPEEL_LLM_BASE_URL || 'https://api.openai.com/v1',
|
|
255
311
|
};
|
|
256
312
|
if (!extract.llmApiKey) {
|
|
257
|
-
|
|
258
|
-
process.exit(1);
|
|
313
|
+
throw Object.assign(new Error('--llm-extract requires OPENAI_API_KEY environment variable'), { _code: 'FETCH_FAILED' });
|
|
259
314
|
}
|
|
260
315
|
}
|
|
261
316
|
else if (options.extract) {
|
|
@@ -264,15 +319,13 @@ program
|
|
|
264
319
|
extract = { selectors: JSON.parse(options.extract) };
|
|
265
320
|
}
|
|
266
321
|
catch {
|
|
267
|
-
|
|
268
|
-
process.exit(1);
|
|
322
|
+
throw Object.assign(new Error('--extract must be valid JSON (e.g., \'{"title": "h1", "price": ".price"}\')'), { _code: 'FETCH_FAILED' });
|
|
269
323
|
}
|
|
270
324
|
}
|
|
271
325
|
// Validate maxTokens
|
|
272
326
|
if (options.maxTokens !== undefined) {
|
|
273
327
|
if (isNaN(options.maxTokens) || options.maxTokens < 100) {
|
|
274
|
-
|
|
275
|
-
process.exit(1);
|
|
328
|
+
throw Object.assign(new Error('--max-tokens must be at least 100'), { _code: 'FETCH_FAILED' });
|
|
276
329
|
}
|
|
277
330
|
}
|
|
278
331
|
// Parse include-tags and exclude-tags
|
|
@@ -301,7 +354,20 @@ program
|
|
|
301
354
|
// Build peel options
|
|
302
355
|
// --stealth auto-enables --render (stealth requires browser)
|
|
303
356
|
// --action auto-enables --render (actions require browser)
|
|
304
|
-
|
|
357
|
+
// --scroll-extract implies --render (needs browser)
|
|
358
|
+
const scrollExtractCount = options.scrollExtract !== undefined
|
|
359
|
+
? (typeof options.scrollExtract === 'number' ? options.scrollExtract : 3)
|
|
360
|
+
: 0;
|
|
361
|
+
const useRender = options.render || options.stealth || (actions && actions.length > 0) || scrollExtractCount > 0 || false;
|
|
362
|
+
// Inject scroll actions when --scroll-extract is used
|
|
363
|
+
if (scrollExtractCount > 0) {
|
|
364
|
+
const scrollActions = [];
|
|
365
|
+
for (let i = 0; i < scrollExtractCount; i++) {
|
|
366
|
+
scrollActions.push({ type: 'scroll', to: 'bottom' });
|
|
367
|
+
scrollActions.push({ type: 'wait', ms: 1500 });
|
|
368
|
+
}
|
|
369
|
+
actions = actions ? [...actions, ...scrollActions] : scrollActions;
|
|
370
|
+
}
|
|
305
371
|
const peelOptions = {
|
|
306
372
|
render: useRender,
|
|
307
373
|
stealth: options.stealth || false,
|
|
@@ -319,16 +385,20 @@ program
|
|
|
319
385
|
raw: options.raw || false,
|
|
320
386
|
actions,
|
|
321
387
|
maxTokens: options.maxTokens,
|
|
388
|
+
// Note: budget is applied AFTER caching (so cache stores full content)
|
|
389
|
+
// We pass it to peel() for programmatic API compatibility, but the CLI
|
|
390
|
+
// also applies it post-fetch (see below) to ensure cache stores full result.
|
|
322
391
|
extract,
|
|
323
392
|
images: options.images || false,
|
|
324
393
|
location: locationOptions,
|
|
394
|
+
profileDir: options.profile || undefined,
|
|
395
|
+
headed: options.headed || false,
|
|
325
396
|
};
|
|
326
397
|
// Add summary option if requested
|
|
327
398
|
if (options.summary) {
|
|
328
399
|
const llmApiKey = options.llmKey || process.env.OPENAI_API_KEY;
|
|
329
400
|
if (!llmApiKey) {
|
|
330
|
-
|
|
331
|
-
process.exit(1);
|
|
401
|
+
throw Object.assign(new Error('--summary requires --llm-key or OPENAI_API_KEY environment variable'), { _code: 'FETCH_FAILED' });
|
|
332
402
|
}
|
|
333
403
|
peelOptions.summary = true;
|
|
334
404
|
peelOptions.llm = {
|
|
@@ -371,12 +441,162 @@ program
|
|
|
371
441
|
delete result.screenshot;
|
|
372
442
|
}
|
|
373
443
|
}
|
|
374
|
-
// Store in cache
|
|
375
|
-
if (cacheTtlMs) {
|
|
376
|
-
setCache(url, result, cacheTtlMs, {
|
|
444
|
+
// Store full result in cache (before budget distillation so cache is reusable)
|
|
445
|
+
if (cacheTtlMs && !cacheDisabled) {
|
|
446
|
+
setCache(url, result, cacheTtlMs, {
|
|
447
|
+
render: options.render,
|
|
448
|
+
stealth: useStealth,
|
|
449
|
+
selector: options.selector,
|
|
450
|
+
format: peelOptions.format,
|
|
451
|
+
budget: null, // Budget excluded — cache stores full content, budget applied post-cache
|
|
452
|
+
});
|
|
453
|
+
}
|
|
454
|
+
// Apply smart budget distillation AFTER caching (cache always stores full content)
|
|
455
|
+
// When --agent is set, always apply budget even with --extract-all (listings will be budgeted
|
|
456
|
+
// separately, but if no listings are found the content itself still needs trimming).
|
|
457
|
+
const skipBudgetForExtract = (options.extractAll || options.scrollExtract !== undefined) && !options.agent;
|
|
458
|
+
let contentTruncated = false;
|
|
459
|
+
if (options.budget && options.budget > 0 && !skipBudgetForExtract) {
|
|
460
|
+
const budgetFormat = peelOptions.format === 'text' ? 'text' : 'markdown';
|
|
461
|
+
const distilled = distillToBudget(result.content, options.budget, budgetFormat);
|
|
462
|
+
if (distilled !== result.content) {
|
|
463
|
+
contentTruncated = true;
|
|
464
|
+
result.content = distilled;
|
|
465
|
+
result.tokens = estimateTokens(distilled);
|
|
466
|
+
}
|
|
467
|
+
}
|
|
468
|
+
// --- #4: Content quality warning ---
|
|
469
|
+
const isHtmlContent = result.contentType ? result.contentType.toLowerCase().includes('html') : true;
|
|
470
|
+
const isRedirect = false; // peel() follows redirects — final result is always 200
|
|
471
|
+
if (result.tokens < 20 && !useRender && isHtmlContent && !isRedirect) {
|
|
472
|
+
const warningMsg = `Low content detected (${result.tokens} tokens). Try: webpeel ${url} --render`;
|
|
473
|
+
if (isJson) {
|
|
474
|
+
result.warning = warningMsg;
|
|
475
|
+
}
|
|
476
|
+
else {
|
|
477
|
+
console.error(`⚠ ${warningMsg}`);
|
|
478
|
+
}
|
|
479
|
+
}
|
|
480
|
+
// --- Extract-all / pagination / output formatting ---
|
|
481
|
+
const wantsExtractAll = options.extractAll || options.scrollExtract !== undefined;
|
|
482
|
+
const pagesCount = Math.min(Math.max(options.pages || 1, 1), 10);
|
|
483
|
+
if (wantsExtractAll) {
|
|
484
|
+
const { extractListings } = await import('./core/extract-listings.js');
|
|
485
|
+
const { findNextPageUrl } = await import('./core/paginate.js');
|
|
486
|
+
// We need the raw HTML for extraction. Re-fetch with format=html if needed.
|
|
487
|
+
let allListings = [];
|
|
488
|
+
// Fetch HTML for extraction
|
|
489
|
+
const htmlResult = peelOptions.format === 'html'
|
|
490
|
+
? result
|
|
491
|
+
: await peel(url, { ...peelOptions, format: 'html', maxTokens: undefined });
|
|
492
|
+
allListings.push(...extractListings(htmlResult.content, result.url));
|
|
493
|
+
// Pagination: follow "Next" links
|
|
494
|
+
if (pagesCount > 1) {
|
|
495
|
+
let currentHtml = htmlResult.content;
|
|
496
|
+
let currentUrl = result.url;
|
|
497
|
+
for (let page = 1; page < pagesCount; page++) {
|
|
498
|
+
const nextUrl = findNextPageUrl(currentHtml, currentUrl);
|
|
499
|
+
if (!nextUrl)
|
|
500
|
+
break;
|
|
501
|
+
try {
|
|
502
|
+
const nextResult = await peel(nextUrl, { ...peelOptions, format: 'html', maxTokens: undefined });
|
|
503
|
+
const pageListings = extractListings(nextResult.content, nextResult.url);
|
|
504
|
+
allListings.push(...pageListings);
|
|
505
|
+
currentHtml = nextResult.content;
|
|
506
|
+
currentUrl = nextResult.url;
|
|
507
|
+
}
|
|
508
|
+
catch {
|
|
509
|
+
break; // Stop paginating on error
|
|
510
|
+
}
|
|
511
|
+
}
|
|
512
|
+
}
|
|
513
|
+
// Apply budget to listings if requested
|
|
514
|
+
let listingsTruncated = false;
|
|
515
|
+
let totalAvailableListings;
|
|
516
|
+
if (options.budget && options.budget > 0 && allListings.length > 0) {
|
|
517
|
+
const { maxItems, truncated, totalAvailable } = budgetListings(allListings.length, options.budget);
|
|
518
|
+
if (truncated) {
|
|
519
|
+
listingsTruncated = true;
|
|
520
|
+
totalAvailableListings = totalAvailable;
|
|
521
|
+
allListings = allListings.slice(0, maxItems);
|
|
522
|
+
}
|
|
523
|
+
}
|
|
524
|
+
// Output based on format flags
|
|
525
|
+
if (options.csv) {
|
|
526
|
+
const csvOutput = formatListingsCsv(allListings);
|
|
527
|
+
await writeStdout(csvOutput);
|
|
528
|
+
}
|
|
529
|
+
else if (options.table) {
|
|
530
|
+
const { formatTable } = await import('./core/table-format.js');
|
|
531
|
+
const tableRows = allListings.map(item => {
|
|
532
|
+
const row = {};
|
|
533
|
+
for (const [k, v] of Object.entries(item)) {
|
|
534
|
+
if (v !== undefined)
|
|
535
|
+
row[k] = v;
|
|
536
|
+
}
|
|
537
|
+
return row;
|
|
538
|
+
});
|
|
539
|
+
await writeStdout(formatTable(tableRows) + '\n');
|
|
540
|
+
}
|
|
541
|
+
else if (isJson) {
|
|
542
|
+
// Use unified envelope for JSON output
|
|
543
|
+
const structured = allListings;
|
|
544
|
+
const envelope = buildEnvelope(result, {
|
|
545
|
+
cached: false,
|
|
546
|
+
structured,
|
|
547
|
+
truncated: listingsTruncated || undefined,
|
|
548
|
+
totalAvailable: totalAvailableListings,
|
|
549
|
+
});
|
|
550
|
+
// Also include legacy fields for backward compat
|
|
551
|
+
envelope.listings = allListings;
|
|
552
|
+
envelope.count = allListings.length;
|
|
553
|
+
await writeStdout(JSON.stringify(envelope, null, 2) + '\n');
|
|
554
|
+
}
|
|
555
|
+
else {
|
|
556
|
+
// Formatted text output
|
|
557
|
+
if (allListings.length === 0) {
|
|
558
|
+
await writeStdout('No listings found.\n');
|
|
559
|
+
}
|
|
560
|
+
else {
|
|
561
|
+
const truncNote = listingsTruncated && totalAvailableListings
|
|
562
|
+
? ` (${totalAvailableListings} total — budget limited to ${allListings.length})`
|
|
563
|
+
: '';
|
|
564
|
+
await writeStdout(`Found ${allListings.length} listings${truncNote}:\n\n`);
|
|
565
|
+
allListings.forEach((item, i) => {
|
|
566
|
+
const pricePart = item.price ? ` — ${item.price}` : '';
|
|
567
|
+
const line = `${i + 1}. ${item.title}${pricePart}\n`;
|
|
568
|
+
process.stdout.write(line);
|
|
569
|
+
if (item.link) {
|
|
570
|
+
process.stdout.write(` ${item.link}\n`);
|
|
571
|
+
}
|
|
572
|
+
process.stdout.write('\n');
|
|
573
|
+
});
|
|
574
|
+
}
|
|
575
|
+
}
|
|
576
|
+
}
|
|
577
|
+
else if (options.csv || options.table) {
|
|
578
|
+
// CSV / table output for --extract (CSS selector extraction)
|
|
579
|
+
if (result.extracted) {
|
|
580
|
+
const rows = normaliseExtractedToRows(result.extracted);
|
|
581
|
+
if (options.csv) {
|
|
582
|
+
await writeStdout(formatListingsCsv(rows));
|
|
583
|
+
}
|
|
584
|
+
else {
|
|
585
|
+
const { formatTable } = await import('./core/table-format.js');
|
|
586
|
+
await writeStdout(formatTable(rows) + '\n');
|
|
587
|
+
}
|
|
588
|
+
}
|
|
589
|
+
else {
|
|
590
|
+
console.error('--csv / --table require --extract-all or --extract to produce structured data.');
|
|
591
|
+
}
|
|
592
|
+
}
|
|
593
|
+
else {
|
|
594
|
+
// Output results (default path)
|
|
595
|
+
await outputResult(result, options, {
|
|
596
|
+
cached: false,
|
|
597
|
+
truncated: contentTruncated || undefined,
|
|
598
|
+
});
|
|
377
599
|
}
|
|
378
|
-
// Output results
|
|
379
|
-
await outputResult(result, options);
|
|
380
600
|
// Clean up and exit
|
|
381
601
|
await cleanup();
|
|
382
602
|
process.exit(0);
|
|
@@ -385,6 +605,14 @@ program
|
|
|
385
605
|
if (spinner) {
|
|
386
606
|
spinner.fail('Failed to fetch');
|
|
387
607
|
}
|
|
608
|
+
// --- #6: Consistent JSON error output ---
|
|
609
|
+
if (isJson) {
|
|
610
|
+
const errMsg = error instanceof Error ? error.message : 'Unknown error';
|
|
611
|
+
const errCode = classifyErrorCode(error);
|
|
612
|
+
await writeStdout(JSON.stringify({ error: errMsg, code: errCode }) + '\n');
|
|
613
|
+
await cleanup();
|
|
614
|
+
process.exit(1);
|
|
615
|
+
}
|
|
388
616
|
if (error instanceof Error) {
|
|
389
617
|
console.error(`\nError: ${error.message}`);
|
|
390
618
|
// Provide actionable hints based on error type
|
|
@@ -418,22 +646,122 @@ program
|
|
|
418
646
|
// Search command
|
|
419
647
|
program
|
|
420
648
|
.command('search <query>')
|
|
421
|
-
.description('Search the web (DuckDuckGo by default, or
|
|
649
|
+
.description('Search the web (DuckDuckGo by default, or use --site for site-specific search)')
|
|
422
650
|
.option('-n, --count <n>', 'Number of results (1-10)', '5')
|
|
651
|
+
.option('--top <n>', 'Limit results (alias for --count)')
|
|
423
652
|
.option('--provider <provider>', 'Search provider: duckduckgo (default) or brave')
|
|
424
653
|
.option('--search-api-key <key>', 'API key for the search provider (or env WEBPEEL_BRAVE_API_KEY)')
|
|
654
|
+
.option('--site <site>', 'Search a specific site (e.g. ebay, amazon, github). Run "webpeel sites" for full list.')
|
|
425
655
|
.option('--json', 'Output as JSON')
|
|
656
|
+
.option('--urls-only', 'Output only URLs, one per line (pipe-friendly)')
|
|
657
|
+
.option('--table', 'Output site-search results as a formatted table (requires --site)')
|
|
658
|
+
.option('--csv', 'Output site-search results as CSV (requires --site)')
|
|
659
|
+
.option('--budget <n>', 'Token budget for site-search result content', parseInt)
|
|
426
660
|
.option('-s, --silent', 'Silent mode')
|
|
427
661
|
.action(async (query, options) => {
|
|
428
662
|
const isJson = options.json;
|
|
429
663
|
const isSilent = options.silent;
|
|
430
|
-
|
|
664
|
+
// --top overrides --count when both are provided
|
|
665
|
+
const count = parseInt(options.top ?? options.count) || 5;
|
|
431
666
|
// Check usage quota
|
|
432
667
|
const usageCheck = await checkUsage();
|
|
433
668
|
if (!usageCheck.allowed) {
|
|
434
669
|
console.error(usageCheck.message);
|
|
435
670
|
process.exit(1);
|
|
436
671
|
}
|
|
672
|
+
// ── --site: site-specific structured search ───────────────────────────
|
|
673
|
+
if (options.site) {
|
|
674
|
+
const spinner = isSilent ? null : ora(`Searching ${options.site}...`).start();
|
|
675
|
+
try {
|
|
676
|
+
const { buildSiteSearchUrl } = await import('./core/site-search.js');
|
|
677
|
+
const siteResult = buildSiteSearchUrl(options.site, query);
|
|
678
|
+
// Fetch the raw HTML (needed for listing extraction)
|
|
679
|
+
const htmlResult = await peel(siteResult.url, {
|
|
680
|
+
format: 'html',
|
|
681
|
+
timeout: 30000,
|
|
682
|
+
});
|
|
683
|
+
if (spinner) {
|
|
684
|
+
spinner.succeed(`Fetched ${siteResult.site} in ${htmlResult.elapsed}ms`);
|
|
685
|
+
}
|
|
686
|
+
// Extract listings from the HTML
|
|
687
|
+
const { extractListings } = await import('./core/extract-listings.js');
|
|
688
|
+
let listings = extractListings(htmlResult.content, siteResult.url);
|
|
689
|
+
// Apply budget if requested
|
|
690
|
+
if (options.budget && options.budget > 0 && listings.length > 0) {
|
|
691
|
+
const { budgetListings } = await import('./core/budget.js');
|
|
692
|
+
const { maxItems } = budgetListings(listings.length, options.budget);
|
|
693
|
+
listings = listings.slice(0, maxItems);
|
|
694
|
+
}
|
|
695
|
+
// Show usage footer
|
|
696
|
+
if (usageCheck.usageInfo && !isSilent) {
|
|
697
|
+
showUsageFooter(usageCheck.usageInfo, usageCheck.isAnonymous || false, false);
|
|
698
|
+
}
|
|
699
|
+
// Output
|
|
700
|
+
if (options.csv) {
|
|
701
|
+
const rows = listings.map(item => {
|
|
702
|
+
const row = {};
|
|
703
|
+
for (const [k, v] of Object.entries(item)) {
|
|
704
|
+
if (v !== undefined)
|
|
705
|
+
row[k] = v;
|
|
706
|
+
}
|
|
707
|
+
return row;
|
|
708
|
+
});
|
|
709
|
+
await writeStdout(formatListingsCsv(rows));
|
|
710
|
+
}
|
|
711
|
+
else if (options.table) {
|
|
712
|
+
const { formatTable } = await import('./core/table-format.js');
|
|
713
|
+
const rows = listings.map(item => {
|
|
714
|
+
const row = {};
|
|
715
|
+
for (const [k, v] of Object.entries(item)) {
|
|
716
|
+
if (v !== undefined)
|
|
717
|
+
row[k] = v;
|
|
718
|
+
}
|
|
719
|
+
return row;
|
|
720
|
+
});
|
|
721
|
+
await writeStdout(formatTable(rows) + '\n');
|
|
722
|
+
}
|
|
723
|
+
else if (isJson) {
|
|
724
|
+
const envelope = {
|
|
725
|
+
site: siteResult.site,
|
|
726
|
+
query: siteResult.query,
|
|
727
|
+
url: siteResult.url,
|
|
728
|
+
count: listings.length,
|
|
729
|
+
items: listings,
|
|
730
|
+
elapsed: htmlResult.elapsed,
|
|
731
|
+
};
|
|
732
|
+
await writeStdout(JSON.stringify(envelope, null, 2) + '\n');
|
|
733
|
+
}
|
|
734
|
+
else {
|
|
735
|
+
if (listings.length === 0) {
|
|
736
|
+
await writeStdout('No listings found.\n');
|
|
737
|
+
}
|
|
738
|
+
else {
|
|
739
|
+
await writeStdout(`Found ${listings.length} listings on ${siteResult.site}:\n\n`);
|
|
740
|
+
for (const [i, item] of listings.entries()) {
|
|
741
|
+
const pricePart = item.price ? ` — ${item.price}` : '';
|
|
742
|
+
process.stdout.write(`${i + 1}. ${item.title}${pricePart}\n`);
|
|
743
|
+
if (item.link)
|
|
744
|
+
process.stdout.write(` ${item.link}\n`);
|
|
745
|
+
process.stdout.write('\n');
|
|
746
|
+
}
|
|
747
|
+
}
|
|
748
|
+
}
|
|
749
|
+
await cleanup();
|
|
750
|
+
process.exit(0);
|
|
751
|
+
}
|
|
752
|
+
catch (error) {
|
|
753
|
+
if (spinner)
|
|
754
|
+
spinner.fail('Site search failed');
|
|
755
|
+
if (error instanceof Error) {
|
|
756
|
+
console.error(`\nError: ${error.message}`);
|
|
757
|
+
}
|
|
758
|
+
else {
|
|
759
|
+
console.error('\nError: Unknown error occurred');
|
|
760
|
+
}
|
|
761
|
+
await cleanup();
|
|
762
|
+
process.exit(1);
|
|
763
|
+
}
|
|
764
|
+
}
|
|
437
765
|
const spinner = isSilent ? null : ora('Searching...').start();
|
|
438
766
|
try {
|
|
439
767
|
const { getSearchProvider } = await import('./core/search-provider.js');
|
|
@@ -456,16 +784,15 @@ program
|
|
|
456
784
|
if (usageCheck.usageInfo && !isSilent) {
|
|
457
785
|
showUsageFooter(usageCheck.usageInfo, usageCheck.isAnonymous || false, false);
|
|
458
786
|
}
|
|
459
|
-
if (
|
|
787
|
+
if (options.urlsOnly) {
|
|
788
|
+
// Pipe-friendly: one URL per line
|
|
789
|
+
for (const result of results) {
|
|
790
|
+
await writeStdout(result.url + '\n');
|
|
791
|
+
}
|
|
792
|
+
}
|
|
793
|
+
else if (isJson) {
|
|
460
794
|
const jsonStr = JSON.stringify(results, null, 2);
|
|
461
|
-
await
|
|
462
|
-
process.stdout.write(jsonStr + '\n', (err) => {
|
|
463
|
-
if (err)
|
|
464
|
-
reject(err);
|
|
465
|
-
else
|
|
466
|
-
resolve();
|
|
467
|
-
});
|
|
468
|
-
});
|
|
795
|
+
await writeStdout(jsonStr + '\n');
|
|
469
796
|
}
|
|
470
797
|
else {
|
|
471
798
|
for (const result of results) {
|
|
@@ -497,6 +824,44 @@ program
|
|
|
497
824
|
process.exit(1);
|
|
498
825
|
}
|
|
499
826
|
});
|
|
827
|
+
// Sites command — list all supported site templates
|
|
828
|
+
program
|
|
829
|
+
.command('sites')
|
|
830
|
+
.description('List all sites supported by "webpeel search --site <site>"')
|
|
831
|
+
.option('--json', 'Output as JSON')
|
|
832
|
+
.option('--category <cat>', 'Filter by category (shopping, social, tech, jobs, general, real-estate, food)')
|
|
833
|
+
.action(async (options) => {
|
|
834
|
+
const { listSites } = await import('./core/site-search.js');
|
|
835
|
+
let sites = listSites();
|
|
836
|
+
if (options.category) {
|
|
837
|
+
sites = sites.filter(s => s.category === options.category);
|
|
838
|
+
}
|
|
839
|
+
if (options.json) {
|
|
840
|
+
await writeStdout(JSON.stringify(sites, null, 2) + '\n');
|
|
841
|
+
process.exit(0);
|
|
842
|
+
}
|
|
843
|
+
// Group by category for pretty output
|
|
844
|
+
const byCategory = new Map();
|
|
845
|
+
for (const site of sites) {
|
|
846
|
+
if (!byCategory.has(site.category))
|
|
847
|
+
byCategory.set(site.category, []);
|
|
848
|
+
byCategory.get(site.category).push(site);
|
|
849
|
+
}
|
|
850
|
+
const categoryOrder = ['shopping', 'general', 'social', 'tech', 'jobs', 'real-estate', 'food'];
|
|
851
|
+
const sortedCategories = categoryOrder.filter(c => byCategory.has(c));
|
|
852
|
+
console.log('\nWebPeel Site-Aware Search — supported sites\n');
|
|
853
|
+
console.log('Usage: webpeel search --site <id> "<query>"\n');
|
|
854
|
+
for (const cat of sortedCategories) {
|
|
855
|
+
const catSites = byCategory.get(cat);
|
|
856
|
+
const label = cat.charAt(0).toUpperCase() + cat.slice(1);
|
|
857
|
+
console.log(` ${label}:`);
|
|
858
|
+
for (const s of catSites) {
|
|
859
|
+
console.log(` ${s.id.padEnd(16)} ${s.name}`);
|
|
860
|
+
}
|
|
861
|
+
console.log('');
|
|
862
|
+
}
|
|
863
|
+
process.exit(0);
|
|
864
|
+
});
|
|
500
865
|
// Batch command
|
|
501
866
|
program
|
|
502
867
|
.command('batch [file]')
|
|
@@ -632,12 +997,12 @@ program
|
|
|
632
997
|
program
|
|
633
998
|
.command('crawl <url>')
|
|
634
999
|
.description('Crawl a website starting from a URL')
|
|
635
|
-
.option('--max-pages <number>', 'Maximum number of pages to crawl (default: 10, max: 100)', parseInt, 10)
|
|
636
|
-
.option('--max-depth <number>', 'Maximum depth to crawl (default: 2, max: 5)', parseInt, 2)
|
|
1000
|
+
.option('--max-pages <number>', 'Maximum number of pages to crawl (default: 10, max: 100)', (v) => parseInt(v, 10), 10)
|
|
1001
|
+
.option('--max-depth <number>', 'Maximum depth to crawl (default: 2, max: 5)', (v) => parseInt(v, 10), 2)
|
|
637
1002
|
.option('--allowed-domains <domains...>', 'Only crawl these domains (default: same as starting URL)')
|
|
638
1003
|
.option('--exclude <patterns...>', 'Exclude URLs matching these regex patterns')
|
|
639
1004
|
.option('--ignore-robots', 'Ignore robots.txt (default: respect robots.txt)')
|
|
640
|
-
.option('--rate-limit <ms>', 'Rate limit between requests in ms (default: 1000)', parseInt, 1000)
|
|
1005
|
+
.option('--rate-limit <ms>', 'Rate limit between requests in ms (default: 1000)', (v) => parseInt(v, 10), 1000)
|
|
641
1006
|
.option('-r, --render', 'Use headless browser for all pages')
|
|
642
1007
|
.option('--stealth', 'Use stealth mode for all pages')
|
|
643
1008
|
.option('-s, --silent', 'Silent mode (no spinner)')
|
|
@@ -710,7 +1075,7 @@ program
|
|
|
710
1075
|
.description('Discover all URLs on a domain (sitemap + crawl)')
|
|
711
1076
|
.option('--no-sitemap', 'Skip sitemap.xml discovery')
|
|
712
1077
|
.option('--no-crawl', 'Skip homepage crawl')
|
|
713
|
-
.option('--max <n>', 'Maximum URLs to discover (default: 5000)', parseInt, 5000)
|
|
1078
|
+
.option('--max <n>', 'Maximum URLs to discover (default: 5000)', (v) => parseInt(v, 10), 5000)
|
|
714
1079
|
.option('--include <patterns...>', 'Include only URLs matching these regex patterns')
|
|
715
1080
|
.option('--exclude <patterns...>', 'Exclude URLs matching these regex patterns')
|
|
716
1081
|
.option('--json', 'Output as JSON')
|
|
@@ -751,6 +1116,177 @@ program
|
|
|
751
1116
|
process.exit(1);
|
|
752
1117
|
}
|
|
753
1118
|
});
|
|
1119
|
+
// Watch command - monitor a URL for changes / assertion failures
|
|
1120
|
+
program
|
|
1121
|
+
.command('watch <url>')
|
|
1122
|
+
.description('Monitor a URL for changes and assertion failures')
|
|
1123
|
+
.option('--interval <duration>', 'Check interval (e.g. 30s, 5m, 1h)', '5m')
|
|
1124
|
+
.option('--assert <condition...>', 'Assertion(s) to check (e.g. "status=200" "body.health=ok")')
|
|
1125
|
+
.option('--webhook <url>', 'POST this URL on assertion failure or content change')
|
|
1126
|
+
.option('-t, --timeout <ms>', 'Per-request timeout in ms', (v) => parseInt(v, 10), 10000)
|
|
1127
|
+
.option('--max-checks <n>', 'Stop after N checks (default: unlimited)', (v) => parseInt(v, 10))
|
|
1128
|
+
.option('--json', 'Output each check as NDJSON to stdout')
|
|
1129
|
+
.option('-s, --silent', 'Only output on failures/changes')
|
|
1130
|
+
.option('-r, --render', 'Use browser rendering for checks')
|
|
1131
|
+
.action(async (url, options) => {
|
|
1132
|
+
const { watch: runWatch, parseDuration, parseAssertion } = await import('./core/watch.js');
|
|
1133
|
+
// Validate URL
|
|
1134
|
+
try {
|
|
1135
|
+
const parsed = new URL(url);
|
|
1136
|
+
if (!['http:', 'https:'].includes(parsed.protocol)) {
|
|
1137
|
+
console.error('Error: Only HTTP and HTTPS protocols are allowed');
|
|
1138
|
+
process.exit(1);
|
|
1139
|
+
}
|
|
1140
|
+
}
|
|
1141
|
+
catch {
|
|
1142
|
+
console.error(`Error: Invalid URL format: ${url}`);
|
|
1143
|
+
process.exit(1);
|
|
1144
|
+
}
|
|
1145
|
+
// Parse interval
|
|
1146
|
+
let intervalMs;
|
|
1147
|
+
try {
|
|
1148
|
+
intervalMs = parseDuration(options.interval);
|
|
1149
|
+
}
|
|
1150
|
+
catch (e) {
|
|
1151
|
+
console.error(`Error: ${e.message}`);
|
|
1152
|
+
process.exit(1);
|
|
1153
|
+
}
|
|
1154
|
+
// Parse assertions
|
|
1155
|
+
const assertions = [];
|
|
1156
|
+
if (options.assert && Array.isArray(options.assert)) {
|
|
1157
|
+
for (const expr of options.assert) {
|
|
1158
|
+
try {
|
|
1159
|
+
assertions.push(parseAssertion(expr));
|
|
1160
|
+
}
|
|
1161
|
+
catch (e) {
|
|
1162
|
+
console.error(`Error: ${e.message}`);
|
|
1163
|
+
process.exit(1);
|
|
1164
|
+
}
|
|
1165
|
+
}
|
|
1166
|
+
}
|
|
1167
|
+
if (!options.json && !options.silent) {
|
|
1168
|
+
const intervalLabel = options.interval;
|
|
1169
|
+
const assertLabel = assertions.length > 0
|
|
1170
|
+
? ` with ${assertions.length} assertion(s)`
|
|
1171
|
+
: '';
|
|
1172
|
+
process.stderr.write(`Watching ${url} every ${intervalLabel}${assertLabel}. Press Ctrl+C to stop.\n`);
|
|
1173
|
+
}
|
|
1174
|
+
const watchOptions = {
|
|
1175
|
+
url,
|
|
1176
|
+
intervalMs,
|
|
1177
|
+
assertions,
|
|
1178
|
+
webhookUrl: options.webhook,
|
|
1179
|
+
timeout: options.timeout,
|
|
1180
|
+
maxChecks: options.maxChecks,
|
|
1181
|
+
render: options.render || false,
|
|
1182
|
+
json: options.json || false,
|
|
1183
|
+
silent: options.silent || false,
|
|
1184
|
+
};
|
|
1185
|
+
try {
|
|
1186
|
+
await runWatch(watchOptions);
|
|
1187
|
+
}
|
|
1188
|
+
catch (error) {
|
|
1189
|
+
console.error(`Error: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
1190
|
+
process.exit(1);
|
|
1191
|
+
}
|
|
1192
|
+
process.exit(0);
|
|
1193
|
+
});
|
|
1194
|
+
// Diff command - semantic diff against last snapshot
|
|
1195
|
+
program
|
|
1196
|
+
.command('diff <url>')
|
|
1197
|
+
.description('Show semantic diff between current content and the last tracked snapshot')
|
|
1198
|
+
.option('--last', 'Compare against last tracked snapshot (default)')
|
|
1199
|
+
.option('--against <snapshot-url>', 'Compare against the snapshot stored for a different URL')
|
|
1200
|
+
.option('--fields <fields>', 'For JSON responses: only diff these fields (comma-separated dot-notation)')
|
|
1201
|
+
.option('--json', 'Output diff as JSON')
|
|
1202
|
+
.option('-r, --render', 'Use browser rendering')
|
|
1203
|
+
.option('-t, --timeout <ms>', 'Request timeout in ms', (v) => parseInt(v, 10), 30000)
|
|
1204
|
+
.option('-s, --silent', 'Silent mode (no spinner)')
|
|
1205
|
+
.action(async (url, options) => {
|
|
1206
|
+
const isJson = options.json;
|
|
1207
|
+
// Validate URL
|
|
1208
|
+
try {
|
|
1209
|
+
const parsed = new URL(url);
|
|
1210
|
+
if (!['http:', 'https:'].includes(parsed.protocol)) {
|
|
1211
|
+
if (isJson) {
|
|
1212
|
+
await writeStdout(JSON.stringify({ error: 'Only HTTP and HTTPS protocols are allowed', code: 'INVALID_URL' }) + '\n');
|
|
1213
|
+
}
|
|
1214
|
+
else {
|
|
1215
|
+
console.error('Error: Only HTTP and HTTPS protocols are allowed');
|
|
1216
|
+
}
|
|
1217
|
+
process.exit(1);
|
|
1218
|
+
}
|
|
1219
|
+
}
|
|
1220
|
+
catch {
|
|
1221
|
+
if (isJson) {
|
|
1222
|
+
await writeStdout(JSON.stringify({ error: `Invalid URL format: ${url}`, code: 'INVALID_URL' }) + '\n');
|
|
1223
|
+
}
|
|
1224
|
+
else {
|
|
1225
|
+
console.error(`Error: Invalid URL format: ${url}`);
|
|
1226
|
+
}
|
|
1227
|
+
process.exit(1);
|
|
1228
|
+
}
|
|
1229
|
+
const spinner = options.silent ? null : ora('Fetching and diffing...').start();
|
|
1230
|
+
try {
|
|
1231
|
+
const { diffUrl } = await import('./core/diff.js');
|
|
1232
|
+
const fields = options.fields
|
|
1233
|
+
? options.fields.split(',').map((f) => f.trim()).filter(Boolean)
|
|
1234
|
+
: undefined;
|
|
1235
|
+
const result = await diffUrl(url, {
|
|
1236
|
+
render: options.render || false,
|
|
1237
|
+
timeout: options.timeout,
|
|
1238
|
+
fields,
|
|
1239
|
+
});
|
|
1240
|
+
if (spinner) {
|
|
1241
|
+
spinner.succeed(`Diff completed in ${result.changed ? 'CHANGED' : 'no change'}`);
|
|
1242
|
+
}
|
|
1243
|
+
if (isJson) {
|
|
1244
|
+
await writeStdout(JSON.stringify(result, null, 2) + '\n');
|
|
1245
|
+
}
|
|
1246
|
+
else {
|
|
1247
|
+
// Human-readable output
|
|
1248
|
+
const ago = result.previousTimestamp
|
|
1249
|
+
? formatRelativeTime(new Date(result.previousTimestamp))
|
|
1250
|
+
: 'unknown';
|
|
1251
|
+
console.log(`\nComparing ${result.url} (now vs ${ago})\n`);
|
|
1252
|
+
if (!result.changed) {
|
|
1253
|
+
console.log(' No changes detected.');
|
|
1254
|
+
}
|
|
1255
|
+
else {
|
|
1256
|
+
for (const change of result.changes) {
|
|
1257
|
+
const label = change.field ?? change.path ?? '(unknown)';
|
|
1258
|
+
if (change.type === 'modified') {
|
|
1259
|
+
console.log(` Modified: ${label} ${change.before} → ${change.after}`);
|
|
1260
|
+
}
|
|
1261
|
+
else if (change.type === 'added') {
|
|
1262
|
+
console.log(` Added: ${label} ${change.after}`);
|
|
1263
|
+
}
|
|
1264
|
+
else if (change.type === 'removed') {
|
|
1265
|
+
console.log(` Removed: ${label} ${change.before}`);
|
|
1266
|
+
}
|
|
1267
|
+
}
|
|
1268
|
+
}
|
|
1269
|
+
console.log(`\nSummary: ${result.summary}`);
|
|
1270
|
+
}
|
|
1271
|
+
await cleanup();
|
|
1272
|
+
process.exit(0);
|
|
1273
|
+
}
|
|
1274
|
+
catch (error) {
|
|
1275
|
+
if (spinner)
|
|
1276
|
+
spinner.fail('Diff failed');
|
|
1277
|
+
if (isJson) {
|
|
1278
|
+
await writeStdout(JSON.stringify({
|
|
1279
|
+
error: error instanceof Error ? error.message : 'Unknown error',
|
|
1280
|
+
code: 'FETCH_FAILED',
|
|
1281
|
+
}) + '\n');
|
|
1282
|
+
}
|
|
1283
|
+
else {
|
|
1284
|
+
console.error(`Error: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
1285
|
+
}
|
|
1286
|
+
await cleanup();
|
|
1287
|
+
process.exit(1);
|
|
1288
|
+
}
|
|
1289
|
+
});
|
|
754
1290
|
program
|
|
755
1291
|
.command('login')
|
|
756
1292
|
.description('Authenticate the CLI with your API key')
|
|
@@ -987,33 +1523,46 @@ program
|
|
|
987
1523
|
// Track command - track changes on a URL
|
|
988
1524
|
program
|
|
989
1525
|
.command('track <url>')
|
|
990
|
-
.description('Track changes on a URL (
|
|
1526
|
+
.description('Track changes on a URL (saves snapshot for use with `webpeel diff`)')
|
|
991
1527
|
.option('-s, --silent', 'Silent mode (no spinner)')
|
|
992
1528
|
.option('--json', 'Output as JSON')
|
|
1529
|
+
.option('-r, --render', 'Use browser rendering')
|
|
993
1530
|
.action(async (url, options) => {
|
|
994
1531
|
const spinner = options.silent ? null : ora('Fetching and tracking...').start();
|
|
995
1532
|
try {
|
|
996
|
-
|
|
1533
|
+
// changeTracking: true saves the snapshot to ~/.webpeel/snapshots/ so that
|
|
1534
|
+
// `webpeel diff` can compare against it later.
|
|
1535
|
+
const result = await peel(url, {
|
|
1536
|
+
render: options.render || false,
|
|
1537
|
+
changeTracking: true,
|
|
1538
|
+
});
|
|
997
1539
|
if (spinner) {
|
|
998
1540
|
spinner.succeed(`Tracked in ${result.elapsed}ms`);
|
|
999
1541
|
}
|
|
1542
|
+
const changeStatus = result.changeTracking?.changeStatus ?? 'new';
|
|
1543
|
+
const previousScrapeAt = result.changeTracking?.previousScrapeAt ?? null;
|
|
1000
1544
|
if (options.json) {
|
|
1001
|
-
|
|
1545
|
+
await writeStdout(JSON.stringify({
|
|
1002
1546
|
url: result.url,
|
|
1003
1547
|
title: result.title,
|
|
1004
1548
|
fingerprint: result.fingerprint,
|
|
1005
1549
|
tokens: result.tokens,
|
|
1006
1550
|
contentType: result.contentType,
|
|
1551
|
+
changeStatus,
|
|
1552
|
+
previousScrapeAt,
|
|
1007
1553
|
lastChecked: new Date().toISOString(),
|
|
1008
|
-
}, null, 2));
|
|
1554
|
+
}, null, 2) + '\n');
|
|
1009
1555
|
}
|
|
1010
1556
|
else {
|
|
1011
1557
|
console.log(`URL: ${result.url}`);
|
|
1012
1558
|
console.log(`Title: ${result.title}`);
|
|
1013
1559
|
console.log(`Fingerprint: ${result.fingerprint}`);
|
|
1014
1560
|
console.log(`Tokens: ${result.tokens}`);
|
|
1561
|
+
console.log(`Status: ${changeStatus}`);
|
|
1562
|
+
if (previousScrapeAt)
|
|
1563
|
+
console.log(`Previous check: ${previousScrapeAt}`);
|
|
1015
1564
|
console.log(`Last checked: ${new Date().toISOString()}`);
|
|
1016
|
-
console.log('\
|
|
1565
|
+
console.log('\nSnapshot saved. Run `webpeel diff <url> --last` to compare future changes.');
|
|
1017
1566
|
}
|
|
1018
1567
|
await cleanup();
|
|
1019
1568
|
process.exit(0);
|
|
@@ -1145,25 +1694,39 @@ program
|
|
|
1145
1694
|
process.exit(1);
|
|
1146
1695
|
}
|
|
1147
1696
|
});
|
|
1148
|
-
// Jobs command
|
|
1149
|
-
program
|
|
1150
|
-
.command('jobs
|
|
1151
|
-
.description('
|
|
1697
|
+
// ── Jobs command group ─────────────────────────────────────────────────────
|
|
1698
|
+
const jobsCmd = program
|
|
1699
|
+
.command('jobs')
|
|
1700
|
+
.description('Job board operations: search listings and auto-apply (LinkedIn, Indeed, Glassdoor, Upwork)')
|
|
1701
|
+
.argument('[keywords]', 'Search keywords — shorthand for "jobs search <keywords>"')
|
|
1152
1702
|
.option('-l, --location <location>', 'Location filter')
|
|
1153
|
-
.option('-s, --source <source>', 'Job board: glassdoor, indeed, or
|
|
1703
|
+
.option('-s, --source <source>', 'Job board: glassdoor, indeed, linkedin, or upwork (default: linkedin)', 'linkedin')
|
|
1154
1704
|
.option('-n, --limit <number>', 'Max results (default: 25)', '25')
|
|
1155
1705
|
.option('-d, --details <number>', 'Fetch full details for top N results (default: 0)', '0')
|
|
1156
1706
|
.option('--json', 'Output raw JSON')
|
|
1157
1707
|
.option('--timeout <ms>', 'Request timeout in ms (default: 30000)', '30000')
|
|
1158
1708
|
.option('--silent', 'Silent mode (no spinner)')
|
|
1159
1709
|
.action(async (keywords, options) => {
|
|
1710
|
+
// Default action: when called as `webpeel jobs <keywords>`, act as search
|
|
1711
|
+
if (!keywords) {
|
|
1712
|
+
jobsCmd.help();
|
|
1713
|
+
process.exit(0);
|
|
1714
|
+
}
|
|
1715
|
+
// Delegate to shared search logic
|
|
1716
|
+
await runJobSearch(keywords, options);
|
|
1717
|
+
});
|
|
1718
|
+
// ── Shared job-search logic (used by both `jobs` default and `jobs search`) ───
|
|
1719
|
+
async function runJobSearch(keywords, options) {
|
|
1160
1720
|
const spinner = options.silent ? null : ora('Searching jobs...').start();
|
|
1161
1721
|
try {
|
|
1162
1722
|
const { searchJobs } = await import('./core/jobs.js');
|
|
1163
|
-
const
|
|
1164
|
-
const
|
|
1165
|
-
|
|
1166
|
-
|
|
1723
|
+
const VALID_SOURCES = ['glassdoor', 'indeed', 'linkedin', 'upwork'];
|
|
1724
|
+
const source = (VALID_SOURCES.includes((options.source ?? 'linkedin'))
|
|
1725
|
+
? options.source
|
|
1726
|
+
: 'linkedin');
|
|
1727
|
+
const limit = Math.min(Math.max(parseInt(options.limit ?? '25', 10) || 25, 1), 100);
|
|
1728
|
+
const fetchDetails = Math.min(Math.max(parseInt(options.details ?? '0', 10) || 0, 0), limit);
|
|
1729
|
+
const timeout = parseInt(options.timeout ?? '30000', 10) || 30000;
|
|
1167
1730
|
const result = await searchJobs({
|
|
1168
1731
|
keywords,
|
|
1169
1732
|
location: options.location,
|
|
@@ -1174,12 +1737,10 @@ program
|
|
|
1174
1737
|
});
|
|
1175
1738
|
if (spinner)
|
|
1176
1739
|
spinner.stop();
|
|
1177
|
-
// --json: raw output
|
|
1178
1740
|
if (options.json) {
|
|
1179
1741
|
await writeStdout(JSON.stringify(result, null, 2) + '\n');
|
|
1180
1742
|
process.exit(0);
|
|
1181
1743
|
}
|
|
1182
|
-
// Formatted table output
|
|
1183
1744
|
const totalLabel = result.totalFound >= 1000
|
|
1184
1745
|
? `${(result.totalFound / 1000).toFixed(0).replace(/\.0$/, '')}k+`
|
|
1185
1746
|
: String(result.totalFound);
|
|
@@ -1189,7 +1750,6 @@ program
|
|
|
1189
1750
|
console.log(' No jobs found.\n');
|
|
1190
1751
|
process.exit(0);
|
|
1191
1752
|
}
|
|
1192
|
-
// Column widths
|
|
1193
1753
|
const colNum = 3;
|
|
1194
1754
|
const colTitle = 40;
|
|
1195
1755
|
const colCompany = 18;
|
|
@@ -1198,18 +1758,15 @@ program
|
|
|
1198
1758
|
const colPosted = 10;
|
|
1199
1759
|
const pad = (s, w) => s.length > w ? s.slice(0, w - 1) + '…' : s.padEnd(w);
|
|
1200
1760
|
const rpad = (s, w) => s.padStart(w);
|
|
1201
|
-
|
|
1202
|
-
console.log(` ${rpad('#', colNum)} ${pad('Title', colTitle)} ${pad('Company', colCompany)} ${pad('Location', colLocation)} ${pad('Salary', colSalary)} ${pad('Posted', colPosted)}`);
|
|
1203
|
-
// Rows
|
|
1761
|
+
console.log(` ${rpad('#', colNum)} ${pad('Title', colTitle)} ${pad('Company', colCompany)} ${pad('Location', colLocation)} ${pad('Salary/Budget', colSalary)} ${pad('Posted', colPosted)}`);
|
|
1204
1762
|
result.jobs.forEach((job, i) => {
|
|
1205
|
-
const
|
|
1206
|
-
|
|
1763
|
+
const titleStr = job.title + (job.remote ? ' 🏠' : '');
|
|
1764
|
+
const salaryStr = job.salary ?? ('budget' in job ? job.budget : '') ?? '';
|
|
1765
|
+
console.log(` ${rpad(String(i + 1), colNum)} ${pad(titleStr, colTitle)} ${pad(job.company, colCompany)} ${pad(job.location, colLocation)} ${pad(salaryStr, colSalary)} ${pad(job.postedAt ?? '', colPosted)}`);
|
|
1207
1766
|
});
|
|
1208
|
-
// Footer
|
|
1209
1767
|
const timeSec = (result.timeTakenMs / 1000).toFixed(1);
|
|
1210
1768
|
const detailsNote = fetchDetails > 0 ? ` | Details: ${result.detailsFetched} fetched` : '';
|
|
1211
1769
|
console.log(`\nFetched ${result.jobs.length} jobs in ${timeSec}s${detailsNote}\n`);
|
|
1212
|
-
// Detailed job cards (when --details > 0)
|
|
1213
1770
|
const detailedJobs = result.jobs.filter((j) => 'description' in j);
|
|
1214
1771
|
for (let i = 0; i < detailedJobs.length; i++) {
|
|
1215
1772
|
const job = detailedJobs[i];
|
|
@@ -1251,7 +1808,238 @@ program
|
|
|
1251
1808
|
}
|
|
1252
1809
|
catch (error) {
|
|
1253
1810
|
if (spinner)
|
|
1254
|
-
spinner.fail('Job search failed');
|
|
1811
|
+
spinner.fail?.('Job search failed');
|
|
1812
|
+
console.error(`Error: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
1813
|
+
process.exit(1);
|
|
1814
|
+
}
|
|
1815
|
+
}
|
|
1816
|
+
// jobs search <keywords> — explicit subcommand (same logic as default action)
|
|
1817
|
+
jobsCmd
|
|
1818
|
+
.command('search <keywords>')
|
|
1819
|
+
.description('Search job boards for listings (LinkedIn, Indeed, Glassdoor, Upwork)')
|
|
1820
|
+
.alias('s')
|
|
1821
|
+
.option('-l, --location <location>', 'Location filter')
|
|
1822
|
+
.option('-s, --source <source>', 'Job board: glassdoor, indeed, linkedin, or upwork (default: linkedin)', 'linkedin')
|
|
1823
|
+
.option('-n, --limit <number>', 'Max results (default: 25)', '25')
|
|
1824
|
+
.option('-d, --details <number>', 'Fetch full details for top N results (default: 0)', '0')
|
|
1825
|
+
.option('--json', 'Output raw JSON')
|
|
1826
|
+
.option('--timeout <ms>', 'Request timeout in ms (default: 30000)', '30000')
|
|
1827
|
+
.option('--silent', 'Silent mode (no spinner)')
|
|
1828
|
+
.action(async (keywords, options) => {
|
|
1829
|
+
await runJobSearch(keywords, options);
|
|
1830
|
+
});
|
|
1831
|
+
// ── jobs apply <url> ─────────────────────────────────────────────────────────
|
|
1832
|
+
// Stealth automated job application using human behavior simulation
|
|
1833
|
+
jobsCmd
|
|
1834
|
+
.command('apply <url>')
|
|
1835
|
+
.description('Stealth automated job application using human behavior simulation')
|
|
1836
|
+
.option('--profile <path>', 'Path to profile JSON file', `${process.env.HOME ?? '~'}/.webpeel/profile.json`)
|
|
1837
|
+
.option('--resume <path>', 'Path to resume PDF (overrides profile.resumePath)')
|
|
1838
|
+
.option('--mode <mode>', 'Submission mode: auto | review | dry-run (default: review)', 'review')
|
|
1839
|
+
.option('--session-dir <path>', 'Browser session directory (preserves login cookies)')
|
|
1840
|
+
.option('--llm-key <key>', 'LLM API key for custom question answers')
|
|
1841
|
+
.option('--llm-provider <name>', 'LLM provider: openai | anthropic (default: openai)', 'openai')
|
|
1842
|
+
.option('--daily-limit <n>', 'Max applications per day (default: 8)', '8')
|
|
1843
|
+
.option('--no-warmup', 'Skip browsing warmup phase')
|
|
1844
|
+
.option('--json', 'Output result as JSON')
|
|
1845
|
+
.option('--silent', 'Minimal output')
|
|
1846
|
+
.action(async (url, options) => {
|
|
1847
|
+
const isSilent = options.silent;
|
|
1848
|
+
const isJson = options.json;
|
|
1849
|
+
const mode = (['auto', 'review', 'dry-run'].includes(options.mode)
|
|
1850
|
+
? options.mode
|
|
1851
|
+
: 'review');
|
|
1852
|
+
if (!isSilent) {
|
|
1853
|
+
console.log(`\n🤖 WebPeel Auto-Apply — mode: ${mode}`);
|
|
1854
|
+
console.log(` URL: ${url}\n`);
|
|
1855
|
+
}
|
|
1856
|
+
// Load profile
|
|
1857
|
+
const profilePath = options.profile;
|
|
1858
|
+
let profile;
|
|
1859
|
+
try {
|
|
1860
|
+
const raw = readFileSync(profilePath, 'utf-8');
|
|
1861
|
+
profile = JSON.parse(raw);
|
|
1862
|
+
}
|
|
1863
|
+
catch {
|
|
1864
|
+
console.error(`Error: Could not load profile from ${profilePath}`);
|
|
1865
|
+
console.error(`Run "webpeel jobs apply-setup" to create a profile.`);
|
|
1866
|
+
process.exit(1);
|
|
1867
|
+
}
|
|
1868
|
+
if (options.resume) {
|
|
1869
|
+
profile.resumePath = options.resume;
|
|
1870
|
+
}
|
|
1871
|
+
const spinner = isSilent ? null : ora('Applying...').start();
|
|
1872
|
+
try {
|
|
1873
|
+
const { applyToJob } = await import('./core/apply.js');
|
|
1874
|
+
const result = await applyToJob({
|
|
1875
|
+
url,
|
|
1876
|
+
profile,
|
|
1877
|
+
mode,
|
|
1878
|
+
sessionDir: options.sessionDir,
|
|
1879
|
+
llmKey: options.llmKey,
|
|
1880
|
+
llmProvider: options.llmProvider,
|
|
1881
|
+
dailyLimit: parseInt(options.dailyLimit, 10) || 8,
|
|
1882
|
+
warmup: options.warmup !== false,
|
|
1883
|
+
onProgress: isSilent
|
|
1884
|
+
? undefined
|
|
1885
|
+
: (event) => {
|
|
1886
|
+
if (spinner)
|
|
1887
|
+
spinner.text = `[${event.stage}] ${event.message}`;
|
|
1888
|
+
else
|
|
1889
|
+
console.log(` [${event.stage}] ${event.message}`);
|
|
1890
|
+
},
|
|
1891
|
+
});
|
|
1892
|
+
if (spinner)
|
|
1893
|
+
spinner.stop();
|
|
1894
|
+
if (isJson) {
|
|
1895
|
+
await writeStdout(JSON.stringify(result, null, 2) + '\n');
|
|
1896
|
+
process.exit(result.error ? 1 : 0);
|
|
1897
|
+
}
|
|
1898
|
+
const statusIcon = result.submitted ? '✅' : result.error ? '❌' : '📋';
|
|
1899
|
+
console.log(`\n${statusIcon} ${result.submitted
|
|
1900
|
+
? 'Application submitted!'
|
|
1901
|
+
: result.error
|
|
1902
|
+
? `Error: ${result.error}`
|
|
1903
|
+
: 'Application completed (not submitted)'}`);
|
|
1904
|
+
if (result.job.title || result.job.company) {
|
|
1905
|
+
console.log(` ${result.job.title}${result.job.company ? ` @ ${result.job.company}` : ''}`);
|
|
1906
|
+
}
|
|
1907
|
+
console.log(`\n Fields filled: ${result.fieldsFilled}`);
|
|
1908
|
+
if (result.llmAnswers > 0)
|
|
1909
|
+
console.log(` LLM answers: ${result.llmAnswers}`);
|
|
1910
|
+
if (result.fieldsSkipped.length > 0)
|
|
1911
|
+
console.log(` Skipped: ${result.fieldsSkipped.join(', ')}`);
|
|
1912
|
+
if (result.warnings.length > 0 && !isSilent) {
|
|
1913
|
+
console.log(`\n Warnings:`);
|
|
1914
|
+
result.warnings.forEach(w => console.log(` ⚠️ ${w}`));
|
|
1915
|
+
}
|
|
1916
|
+
console.log(` Time: ${(result.elapsed / 1000).toFixed(1)}s\n`);
|
|
1917
|
+
process.exit(result.error ? 1 : 0);
|
|
1918
|
+
}
|
|
1919
|
+
catch (error) {
|
|
1920
|
+
if (spinner)
|
|
1921
|
+
spinner.fail('Application failed');
|
|
1922
|
+
console.error(`Error: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
1923
|
+
process.exit(1);
|
|
1924
|
+
}
|
|
1925
|
+
});
|
|
1926
|
+
// ── jobs apply-setup ─────────────────────────────────────────────────────────
|
|
1927
|
+
// Interactive wizard to create ~/.webpeel/profile.json
|
|
1928
|
+
jobsCmd
|
|
1929
|
+
.command('apply-setup')
|
|
1930
|
+
.description('Interactive setup wizard — creates ~/.webpeel/profile.json')
|
|
1931
|
+
.action(async () => {
|
|
1932
|
+
const { createInterface } = await import('readline');
|
|
1933
|
+
const rl = createInterface({ input: process.stdin, output: process.stdout });
|
|
1934
|
+
const ask = (q) => new Promise(resolve => rl.question(q, ans => resolve(ans.trim())));
|
|
1935
|
+
console.log('\n🤖 WebPeel Apply Setup — Create your applicant profile\n');
|
|
1936
|
+
console.log('This creates ~/.webpeel/profile.json used by "webpeel jobs apply".\n');
|
|
1937
|
+
try {
|
|
1938
|
+
const name = await ask('Full name: ');
|
|
1939
|
+
const email = await ask('Email address: ');
|
|
1940
|
+
const phone = await ask('Phone number: ');
|
|
1941
|
+
const linkedin = await ask('LinkedIn URL (optional, press Enter to skip): ');
|
|
1942
|
+
const website = await ask('Portfolio/website URL (optional): ');
|
|
1943
|
+
const location = await ask('City, State (e.g. San Francisco, CA): ');
|
|
1944
|
+
const workAuth = await ask('Work authorization (e.g. US Citizen, Permanent Resident, H-1B, Need Sponsorship): ');
|
|
1945
|
+
const yearsExp = await ask('Years of experience: ');
|
|
1946
|
+
const currentTitle = await ask('Current/most recent job title: ');
|
|
1947
|
+
const skills = await ask('Skills (comma-separated, e.g. TypeScript, React, Node.js): ');
|
|
1948
|
+
const education = await ask('Education (e.g. B.S. Computer Science, MIT): ');
|
|
1949
|
+
const resumePath = await ask('Path to resume PDF (e.g. /Users/you/resume.pdf): ');
|
|
1950
|
+
const summary = await ask('Professional summary (1-3 sentences): ');
|
|
1951
|
+
const salaryMin = await ask('Minimum desired salary (optional, e.g. 120000): ');
|
|
1952
|
+
const salaryMax = await ask('Maximum desired salary (optional, e.g. 180000): ');
|
|
1953
|
+
const relocate = await ask('Willing to relocate? (y/n): ');
|
|
1954
|
+
const sponsorship = await ask('Need visa sponsorship? (y/n): ');
|
|
1955
|
+
rl.close();
|
|
1956
|
+
const profileData = {
|
|
1957
|
+
name,
|
|
1958
|
+
email,
|
|
1959
|
+
phone,
|
|
1960
|
+
...(linkedin ? { linkedin } : {}),
|
|
1961
|
+
...(website ? { website } : {}),
|
|
1962
|
+
location,
|
|
1963
|
+
workAuthorization: workAuth,
|
|
1964
|
+
yearsExperience: parseInt(yearsExp, 10) || 0,
|
|
1965
|
+
currentTitle,
|
|
1966
|
+
skills: skills.split(',').map(s => s.trim()).filter(Boolean),
|
|
1967
|
+
education,
|
|
1968
|
+
resumePath,
|
|
1969
|
+
summary,
|
|
1970
|
+
...(salaryMin && salaryMax
|
|
1971
|
+
? { salaryRange: { min: parseInt(salaryMin, 10), max: parseInt(salaryMax, 10) } }
|
|
1972
|
+
: {}),
|
|
1973
|
+
willingToRelocate: relocate.toLowerCase().startsWith('y'),
|
|
1974
|
+
needsSponsorship: sponsorship.toLowerCase().startsWith('y'),
|
|
1975
|
+
};
|
|
1976
|
+
const { mkdirSync: mk, writeFileSync: wf, existsSync: ex } = await import('fs');
|
|
1977
|
+
const { join: j } = await import('path');
|
|
1978
|
+
const { homedir: hd } = await import('os');
|
|
1979
|
+
const webpeelDir = j(hd(), '.webpeel');
|
|
1980
|
+
if (!ex(webpeelDir))
|
|
1981
|
+
mk(webpeelDir, { recursive: true });
|
|
1982
|
+
const profilePath = j(webpeelDir, 'profile.json');
|
|
1983
|
+
wf(profilePath, JSON.stringify(profileData, null, 2), 'utf-8');
|
|
1984
|
+
console.log(`\n✅ Profile saved to: ${profilePath}`);
|
|
1985
|
+
console.log('\nNext steps:');
|
|
1986
|
+
console.log(' 1. Apply to a job: webpeel jobs apply https://linkedin.com/jobs/view/...');
|
|
1987
|
+
console.log(' (First run opens a browser — log in to LinkedIn, then the session is saved)\n');
|
|
1988
|
+
}
|
|
1989
|
+
catch (error) {
|
|
1990
|
+
rl.close();
|
|
1991
|
+
console.error(`\nError: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
1992
|
+
process.exit(1);
|
|
1993
|
+
}
|
|
1994
|
+
});
|
|
1995
|
+
// ── jobs apply-history ───────────────────────────────────────────────────────
|
|
1996
|
+
// View application history from ~/.webpeel/applications.json
|
|
1997
|
+
jobsCmd
|
|
1998
|
+
.command('apply-history')
|
|
1999
|
+
.description('View application history from ~/.webpeel/applications.json')
|
|
2000
|
+
.option('--json', 'Output as JSON')
|
|
2001
|
+
.option('--limit <n>', 'Number of recent applications to show (default: 20)', '20')
|
|
2002
|
+
.action(async (options) => {
|
|
2003
|
+
const isJson = options.json;
|
|
2004
|
+
const limit = parseInt(options.limit, 10) || 20;
|
|
2005
|
+
try {
|
|
2006
|
+
const { loadApplications } = await import('./core/apply.js');
|
|
2007
|
+
const allApps = loadApplications();
|
|
2008
|
+
const apps = allApps.slice().reverse().slice(0, limit);
|
|
2009
|
+
if (isJson) {
|
|
2010
|
+
await writeStdout(JSON.stringify(apps, null, 2) + '\n');
|
|
2011
|
+
process.exit(0);
|
|
2012
|
+
}
|
|
2013
|
+
if (apps.length === 0) {
|
|
2014
|
+
console.log('\nNo applications yet. Use "webpeel jobs apply <url>" to start.\n');
|
|
2015
|
+
process.exit(0);
|
|
2016
|
+
}
|
|
2017
|
+
console.log(`\n📋 Application History (${apps.length} of ${allApps.length} total)\n`);
|
|
2018
|
+
const colDate = 22;
|
|
2019
|
+
const colStatus = 10;
|
|
2020
|
+
const colTitle = 35;
|
|
2021
|
+
const colCompany = 20;
|
|
2022
|
+
const colMode = 8;
|
|
2023
|
+
const pad = (s, w) => (s.length > w ? s.slice(0, w - 1) + '…' : s.padEnd(w));
|
|
2024
|
+
console.log(` ${pad('Applied', colDate)} ${pad('Status', colStatus)} ${pad('Title', colTitle)} ${pad('Company', colCompany)} ${pad('Mode', colMode)}`);
|
|
2025
|
+
console.log(` ${'-'.repeat(colDate)} ${'-'.repeat(colStatus)} ${'-'.repeat(colTitle)} ${'-'.repeat(colCompany)} ${'-'.repeat(colMode)}`);
|
|
2026
|
+
for (const app of apps) {
|
|
2027
|
+
const date = new Date(app.appliedAt).toLocaleString('en-US', {
|
|
2028
|
+
month: 'short',
|
|
2029
|
+
day: 'numeric',
|
|
2030
|
+
year: 'numeric',
|
|
2031
|
+
hour: '2-digit',
|
|
2032
|
+
minute: '2-digit',
|
|
2033
|
+
});
|
|
2034
|
+
const statusEmoji = { applied: '📤', interview: '🎯', offer: '🎉', rejected: '❌', withdrawn: '🚫' }[app.status] ?? '';
|
|
2035
|
+
console.log(` ${pad(date, colDate)} ${pad(`${statusEmoji} ${app.status}`, colStatus)} ${pad(app.title, colTitle)} ${pad(app.company, colCompany)} ${pad(app.mode, colMode)}`);
|
|
2036
|
+
}
|
|
2037
|
+
const today = new Date().toISOString().slice(0, 10);
|
|
2038
|
+
const todayCount = allApps.filter(a => a.appliedAt.startsWith(today)).length;
|
|
2039
|
+
console.log(`\n Today: ${todayCount} application(s)\n`);
|
|
2040
|
+
process.exit(0);
|
|
2041
|
+
}
|
|
2042
|
+
catch (error) {
|
|
1255
2043
|
console.error(`Error: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
1256
2044
|
process.exit(1);
|
|
1257
2045
|
}
|
|
@@ -1451,7 +2239,7 @@ program
|
|
|
1451
2239
|
.option('--format <fmt>', 'Image format: png (default) or jpeg', 'png')
|
|
1452
2240
|
.option('--quality <n>', 'JPEG quality 1-100 (ignored for PNG)', parseInt)
|
|
1453
2241
|
.option('-w, --wait <ms>', 'Wait time after page load (ms)', parseInt)
|
|
1454
|
-
.option('-t, --timeout <ms>', 'Request timeout (ms)', parseInt, 30000)
|
|
2242
|
+
.option('-t, --timeout <ms>', 'Request timeout (ms)', (v) => parseInt(v, 10), 30000)
|
|
1455
2243
|
.option('--stealth', 'Use stealth mode to bypass bot detection')
|
|
1456
2244
|
.option('--action <actions...>', 'Page actions before screenshot (e.g., "click:.btn" "wait:2000")')
|
|
1457
2245
|
.option('-o, --output <path>', 'Output file path (default: screenshot.png)')
|
|
@@ -1558,11 +2346,376 @@ program
|
|
|
1558
2346
|
process.exit(1);
|
|
1559
2347
|
}
|
|
1560
2348
|
});
|
|
2349
|
+
// ── Top-level Apply command group ──────────────────────────────────────────
|
|
2350
|
+
//
|
|
2351
|
+
// webpeel apply <url> — submit a job application
|
|
2352
|
+
// webpeel apply init — interactive profile setup wizard
|
|
2353
|
+
// webpeel apply status — show application stats
|
|
2354
|
+
// webpeel apply list — list tracked applications (with filters)
|
|
2355
|
+
// webpeel apply rate — show rate-governor status
|
|
2356
|
+
const applyCmd = program
|
|
2357
|
+
.command('apply')
|
|
2358
|
+
.description('Auto-apply pipeline: submit applications, track history, manage rate limits');
|
|
2359
|
+
// apply <url> — auto-apply to a job posting
|
|
2360
|
+
applyCmd
|
|
2361
|
+
.command('submit <url>')
|
|
2362
|
+
.description('Auto-apply to a job posting')
|
|
2363
|
+
.alias('s')
|
|
2364
|
+
.option('--profile-path <path>', 'Path to apply profile JSON', `${process.env.HOME ?? '~'}/.webpeel/profile.json`)
|
|
2365
|
+
.option('--browser-profile <path>', 'Path to persistent browser data dir', `${process.env.HOME ?? '~'}/.webpeel/browser-profile`)
|
|
2366
|
+
.option('--headed', 'Run browser visibly (default for apply)')
|
|
2367
|
+
.option('--headless', 'Run browser invisibly')
|
|
2368
|
+
.option('--confirm', 'Pause for confirmation before submit (default: true)')
|
|
2369
|
+
.option('--no-confirm', 'Skip confirmation, auto-submit')
|
|
2370
|
+
.option('--dry-run', 'Go through flow but do not submit')
|
|
2371
|
+
.option('--generate-cover', 'Generate tailored cover letter (needs OPENAI_API_KEY)')
|
|
2372
|
+
.option('--timeout <ms>', 'Timeout in ms (default: 300000)', '300000')
|
|
2373
|
+
.option('--json', 'Output result as JSON')
|
|
2374
|
+
.option('--silent', 'Silent mode')
|
|
2375
|
+
.action(async (url, options) => {
|
|
2376
|
+
const isSilent = options.silent;
|
|
2377
|
+
const isJson = options.json;
|
|
2378
|
+
// Load profile
|
|
2379
|
+
const profilePath = options.profilePath;
|
|
2380
|
+
let profile;
|
|
2381
|
+
try {
|
|
2382
|
+
const raw = readFileSync(profilePath, 'utf-8');
|
|
2383
|
+
profile = JSON.parse(raw);
|
|
2384
|
+
}
|
|
2385
|
+
catch {
|
|
2386
|
+
const msg = `Could not load profile from ${profilePath}. Run "webpeel apply init" to create one.`;
|
|
2387
|
+
if (isJson) {
|
|
2388
|
+
await writeStdout(JSON.stringify({ error: msg }) + '\n');
|
|
2389
|
+
}
|
|
2390
|
+
else {
|
|
2391
|
+
console.error(`Error: ${msg}`);
|
|
2392
|
+
}
|
|
2393
|
+
process.exit(1);
|
|
2394
|
+
}
|
|
2395
|
+
const spinner = isSilent ? null : ora('Applying...').start();
|
|
2396
|
+
try {
|
|
2397
|
+
const { applyToJob } = await import('./core/apply.js');
|
|
2398
|
+
const result = await applyToJob({
|
|
2399
|
+
url,
|
|
2400
|
+
profile,
|
|
2401
|
+
// Use sessionDir for persistent session storage (renamed from browserProfile)
|
|
2402
|
+
sessionDir: options.browserProfile,
|
|
2403
|
+
// Map dryRun flag → mode: 'dry-run'
|
|
2404
|
+
mode: (options.dryRun ? 'dry-run' : (options.noConfirm ? 'auto' : 'review')),
|
|
2405
|
+
timeout: parseInt(options.timeout, 10) || 300_000,
|
|
2406
|
+
});
|
|
2407
|
+
if (spinner)
|
|
2408
|
+
spinner.stop();
|
|
2409
|
+
// Normalize result to a consistent output shape
|
|
2410
|
+
const success = result.submitted && !result.error;
|
|
2411
|
+
const jobTitle = result.job?.title ?? '';
|
|
2412
|
+
const jobCompany = result.job?.company ?? '';
|
|
2413
|
+
if (isJson) {
|
|
2414
|
+
await writeStdout(JSON.stringify(result, null, 2) + '\n');
|
|
2415
|
+
process.exit(success ? 0 : 1);
|
|
2416
|
+
}
|
|
2417
|
+
const icon = success ? '✅' : '❌';
|
|
2418
|
+
console.log(`\n${icon} ${success ? 'Application submitted!' : `Failed: ${result.error ?? 'Unknown error'}`}`);
|
|
2419
|
+
if (jobTitle)
|
|
2420
|
+
console.log(` ${jobTitle}${jobCompany ? ` @ ${jobCompany}` : ''}`);
|
|
2421
|
+
if (options.dryRun)
|
|
2422
|
+
console.log(' (Dry run — not submitted)');
|
|
2423
|
+
console.log(` Time: ${(result.elapsed / 1000).toFixed(1)}s\n`);
|
|
2424
|
+
process.exit(success ? 0 : 1);
|
|
2425
|
+
}
|
|
2426
|
+
catch (error) {
|
|
2427
|
+
if (spinner)
|
|
2428
|
+
spinner.fail('Application failed');
|
|
2429
|
+
const msg = error instanceof Error ? error.message : 'Unknown error';
|
|
2430
|
+
if (isJson) {
|
|
2431
|
+
await writeStdout(JSON.stringify({ error: msg }) + '\n');
|
|
2432
|
+
}
|
|
2433
|
+
else {
|
|
2434
|
+
console.error(`Error: ${msg}`);
|
|
2435
|
+
}
|
|
2436
|
+
process.exit(1);
|
|
2437
|
+
}
|
|
2438
|
+
});
|
|
2439
|
+
// apply init — interactive profile setup
|
|
2440
|
+
applyCmd
|
|
2441
|
+
.command('init')
|
|
2442
|
+
.description('Interactive profile setup — creates ~/.webpeel/profile.json')
|
|
2443
|
+
.action(async () => {
|
|
2444
|
+
const { createInterface } = await import('readline');
|
|
2445
|
+
const rl = createInterface({ input: process.stdin, output: process.stdout });
|
|
2446
|
+
const ask = (q) => new Promise((resolve) => rl.question(q, (ans) => resolve(ans.trim())));
|
|
2447
|
+
console.log('\n🤖 WebPeel Apply Setup — Create your applicant profile\n');
|
|
2448
|
+
console.log('This creates ~/.webpeel/profile.json used by "webpeel apply submit".\n');
|
|
2449
|
+
try {
|
|
2450
|
+
const name = await ask('Full name: ');
|
|
2451
|
+
const email = await ask('Email address: ');
|
|
2452
|
+
const phone = await ask('Phone number (optional): ');
|
|
2453
|
+
const resumePath = await ask('Path to resume PDF (e.g. /Users/you/resume.pdf): ');
|
|
2454
|
+
const currentTitle = await ask('Current/most recent job title: ');
|
|
2455
|
+
const yearsExp = await ask('Years of experience: ');
|
|
2456
|
+
const skills = await ask('Skills (comma-separated, e.g. TypeScript, React, Node.js): ');
|
|
2457
|
+
const education = await ask('Education (e.g. B.S. Computer Science, MIT): ');
|
|
2458
|
+
const location = await ask('City, State (e.g. San Francisco, CA): ');
|
|
2459
|
+
const workAuth = await ask('Work authorization (e.g. US Citizen, Permanent Resident, H-1B, Need Sponsorship): ');
|
|
2460
|
+
const linkedinUrl = await ask('LinkedIn URL (optional): ');
|
|
2461
|
+
const websiteUrl = await ask('Portfolio/website URL (optional): ');
|
|
2462
|
+
const desiredSalary = await ask('Desired salary (optional, e.g. $150,000): ');
|
|
2463
|
+
rl.close();
|
|
2464
|
+
const { mkdirSync: mk, writeFileSync: wf } = await import('fs');
|
|
2465
|
+
const { join: j } = await import('path');
|
|
2466
|
+
const { homedir: hd } = await import('os');
|
|
2467
|
+
const webpeelDir = j(hd(), '.webpeel');
|
|
2468
|
+
mk(webpeelDir, { recursive: true });
|
|
2469
|
+
const profile = {
|
|
2470
|
+
name,
|
|
2471
|
+
email,
|
|
2472
|
+
...(phone ? { phone } : {}),
|
|
2473
|
+
resumePath,
|
|
2474
|
+
currentTitle,
|
|
2475
|
+
yearsExperience: parseInt(yearsExp, 10) || 0,
|
|
2476
|
+
skills: skills.split(',').map((s) => s.trim()).filter(Boolean),
|
|
2477
|
+
education,
|
|
2478
|
+
location,
|
|
2479
|
+
workAuthorization: workAuth,
|
|
2480
|
+
...(linkedinUrl ? { linkedinUrl } : {}),
|
|
2481
|
+
...(websiteUrl ? { websiteUrl } : {}),
|
|
2482
|
+
...(desiredSalary ? { desiredSalary } : {}),
|
|
2483
|
+
};
|
|
2484
|
+
const profilePath = j(webpeelDir, 'profile.json');
|
|
2485
|
+
wf(profilePath, JSON.stringify(profile, null, 2), 'utf-8');
|
|
2486
|
+
console.log(`\n✅ Profile saved to: ${profilePath}`);
|
|
2487
|
+
console.log('\nNext steps:');
|
|
2488
|
+
console.log(' • Apply to a job: webpeel apply submit <url>');
|
|
2489
|
+
console.log(' • Dry run first: webpeel apply submit <url> --dry-run');
|
|
2490
|
+
console.log(' • View stats: webpeel apply status\n');
|
|
2491
|
+
}
|
|
2492
|
+
catch (error) {
|
|
2493
|
+
rl.close();
|
|
2494
|
+
console.error(`\nError: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
2495
|
+
process.exit(1);
|
|
2496
|
+
}
|
|
2497
|
+
});
|
|
2498
|
+
// apply status — application stats summary
|
|
2499
|
+
applyCmd
|
|
2500
|
+
.command('status')
|
|
2501
|
+
.description('Show application stats')
|
|
2502
|
+
.option('--json', 'Output as JSON')
|
|
2503
|
+
.action(async (options) => {
|
|
2504
|
+
try {
|
|
2505
|
+
const { ApplicationTracker } = await import('./core/application-tracker.js');
|
|
2506
|
+
const tracker = new ApplicationTracker();
|
|
2507
|
+
const stats = tracker.stats();
|
|
2508
|
+
if (options.json) {
|
|
2509
|
+
await writeStdout(JSON.stringify(stats, null, 2) + '\n');
|
|
2510
|
+
process.exit(0);
|
|
2511
|
+
}
|
|
2512
|
+
console.log('\n📊 Application Stats\n');
|
|
2513
|
+
console.log(` Total: ${stats.total}`);
|
|
2514
|
+
console.log(` Today: ${stats.today}`);
|
|
2515
|
+
console.log(` This week: ${stats.thisWeek}`);
|
|
2516
|
+
if (Object.keys(stats.byPlatform).length > 0) {
|
|
2517
|
+
console.log('\n By Platform:');
|
|
2518
|
+
for (const [platform, count] of Object.entries(stats.byPlatform)) {
|
|
2519
|
+
console.log(` ${platform.padEnd(12)} ${count}`);
|
|
2520
|
+
}
|
|
2521
|
+
}
|
|
2522
|
+
if (Object.keys(stats.byStatus).length > 0) {
|
|
2523
|
+
console.log('\n By Status:');
|
|
2524
|
+
for (const [status, count] of Object.entries(stats.byStatus)) {
|
|
2525
|
+
console.log(` ${status.padEnd(12)} ${count}`);
|
|
2526
|
+
}
|
|
2527
|
+
}
|
|
2528
|
+
console.log('');
|
|
2529
|
+
process.exit(0);
|
|
2530
|
+
}
|
|
2531
|
+
catch (error) {
|
|
2532
|
+
console.error(`Error: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
2533
|
+
process.exit(1);
|
|
2534
|
+
}
|
|
2535
|
+
});
|
|
2536
|
+
// apply list — list applications with optional filters
|
|
2537
|
+
applyCmd
|
|
2538
|
+
.command('list')
|
|
2539
|
+
.description('List tracked applications')
|
|
2540
|
+
.option('--platform <platform>', 'Filter by platform (e.g. linkedin, upwork)')
|
|
2541
|
+
.option('--status <status>', 'Filter by status (applied, interview, rejected, offer, ...)')
|
|
2542
|
+
.option('--since <date>', 'Filter to applications on or after this date (YYYY-MM-DD)')
|
|
2543
|
+
.option('--json', 'Output as JSON')
|
|
2544
|
+
.option('--limit <n>', 'Max records to show (default: 50)', '50')
|
|
2545
|
+
.action(async (options) => {
|
|
2546
|
+
try {
|
|
2547
|
+
const { ApplicationTracker } = await import('./core/application-tracker.js');
|
|
2548
|
+
const tracker = new ApplicationTracker();
|
|
2549
|
+
const limit = parseInt(options.limit, 10) || 50;
|
|
2550
|
+
const records = tracker.list({
|
|
2551
|
+
platform: options.platform,
|
|
2552
|
+
status: options.status,
|
|
2553
|
+
since: options.since,
|
|
2554
|
+
}).slice(0, limit);
|
|
2555
|
+
if (options.json) {
|
|
2556
|
+
await writeStdout(JSON.stringify(records, null, 2) + '\n');
|
|
2557
|
+
process.exit(0);
|
|
2558
|
+
}
|
|
2559
|
+
if (records.length === 0) {
|
|
2560
|
+
console.log('\nNo applications found.\n');
|
|
2561
|
+
process.exit(0);
|
|
2562
|
+
}
|
|
2563
|
+
console.log(`\n📋 Applications (${records.length})\n`);
|
|
2564
|
+
const colDate = 12;
|
|
2565
|
+
const colStatus = 10;
|
|
2566
|
+
const colTitle = 35;
|
|
2567
|
+
const colCompany = 20;
|
|
2568
|
+
const pad = (s, w) => s.length > w ? s.slice(0, w - 1) + '…' : s.padEnd(w);
|
|
2569
|
+
console.log(` ${'Date'.padEnd(colDate)} ${'Status'.padEnd(colStatus)} ${'Title'.padEnd(colTitle)} ${'Company'.padEnd(colCompany)}`);
|
|
2570
|
+
console.log(` ${'-'.repeat(colDate)} ${'-'.repeat(colStatus)} ${'-'.repeat(colTitle)} ${'-'.repeat(colCompany)}`);
|
|
2571
|
+
for (const r of records) {
|
|
2572
|
+
const dateStr = r.appliedAt.slice(0, 10);
|
|
2573
|
+
console.log(` ${pad(dateStr, colDate)} ${pad(r.status, colStatus)} ${pad(r.title, colTitle)} ${pad(r.company, colCompany)}`);
|
|
2574
|
+
}
|
|
2575
|
+
console.log('');
|
|
2576
|
+
process.exit(0);
|
|
2577
|
+
}
|
|
2578
|
+
catch (error) {
|
|
2579
|
+
console.error(`Error: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
2580
|
+
process.exit(1);
|
|
2581
|
+
}
|
|
2582
|
+
});
|
|
2583
|
+
// apply rate — rate governor status
|
|
2584
|
+
applyCmd
|
|
2585
|
+
.command('rate')
|
|
2586
|
+
.description('Show rate governor status (daily limits, cooldown, next allowed time)')
|
|
2587
|
+
.option('--json', 'Output as JSON')
|
|
2588
|
+
.option('--reset-cooldown', 'Clear any active cooldown (manual override)')
|
|
2589
|
+
.action(async (options) => {
|
|
2590
|
+
try {
|
|
2591
|
+
const { RateGovernor, formatDuration } = await import('./core/rate-governor.js');
|
|
2592
|
+
const governor = new RateGovernor();
|
|
2593
|
+
if (options.resetCooldown) {
|
|
2594
|
+
governor.resetCooldown();
|
|
2595
|
+
console.log('✅ Cooldown cleared.');
|
|
2596
|
+
process.exit(0);
|
|
2597
|
+
}
|
|
2598
|
+
const state = governor.getState();
|
|
2599
|
+
const config = governor.getConfig();
|
|
2600
|
+
const check = governor.canApply();
|
|
2601
|
+
if (options.json) {
|
|
2602
|
+
await writeStdout(JSON.stringify({
|
|
2603
|
+
state,
|
|
2604
|
+
config,
|
|
2605
|
+
canApply: check.allowed,
|
|
2606
|
+
reason: check.reason,
|
|
2607
|
+
waitMs: check.waitMs,
|
|
2608
|
+
nextDelayMs: governor.getNextDelay(),
|
|
2609
|
+
}, null, 2) + '\n');
|
|
2610
|
+
process.exit(0);
|
|
2611
|
+
}
|
|
2612
|
+
console.log('\n⏱ Rate Governor Status\n');
|
|
2613
|
+
console.log(` Today's applications: ${state.todayCount} / ${config.maxPerDay}`);
|
|
2614
|
+
console.log(` Total applications: ${state.totalApplications}`);
|
|
2615
|
+
console.log(` Can apply now: ${check.allowed ? '✅ Yes' : '❌ No'}`);
|
|
2616
|
+
if (!check.allowed && check.reason) {
|
|
2617
|
+
console.log(` Reason: ${check.reason}`);
|
|
2618
|
+
}
|
|
2619
|
+
if (!check.allowed && check.waitMs) {
|
|
2620
|
+
console.log(` Wait time: ${formatDuration(check.waitMs)}`);
|
|
2621
|
+
}
|
|
2622
|
+
if (state.cooldownUntil > 0) {
|
|
2623
|
+
const remaining = state.cooldownUntil - Date.now();
|
|
2624
|
+
console.log(` Cooldown: Active (${formatDuration(Math.max(0, remaining))} remaining)`);
|
|
2625
|
+
}
|
|
2626
|
+
console.log(` Min delay: ${formatDuration(config.minDelayMs)}`);
|
|
2627
|
+
console.log(` Max delay: ${formatDuration(config.maxDelayMs)}`);
|
|
2628
|
+
console.log(` Active hours: ${config.activeHours[0]}:00 – ${config.activeHours[1]}:00`);
|
|
2629
|
+
console.log(` Weekdays only: ${config.weekdaysOnly ? 'Yes' : 'No'}`);
|
|
2630
|
+
console.log('');
|
|
2631
|
+
process.exit(0);
|
|
2632
|
+
}
|
|
2633
|
+
catch (error) {
|
|
2634
|
+
console.error(`Error: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
2635
|
+
process.exit(1);
|
|
2636
|
+
}
|
|
2637
|
+
});
|
|
1561
2638
|
program.parse();
|
|
1562
2639
|
// ============================================================
|
|
2640
|
+
// Time formatting helper
|
|
2641
|
+
// ============================================================
|
|
2642
|
+
/**
|
|
2643
|
+
* Format a past Date relative to now (e.g. "2h ago", "5m ago").
|
|
2644
|
+
*/
|
|
2645
|
+
function formatRelativeTime(past) {
|
|
2646
|
+
const diffMs = Date.now() - past.getTime();
|
|
2647
|
+
const diffSec = Math.round(diffMs / 1000);
|
|
2648
|
+
if (diffSec < 60)
|
|
2649
|
+
return `${diffSec}s ago`;
|
|
2650
|
+
const diffMin = Math.round(diffSec / 60);
|
|
2651
|
+
if (diffMin < 60)
|
|
2652
|
+
return `${diffMin}m ago`;
|
|
2653
|
+
const diffHr = Math.round(diffMin / 60);
|
|
2654
|
+
if (diffHr < 24)
|
|
2655
|
+
return `${diffHr}h ago`;
|
|
2656
|
+
const diffDay = Math.round(diffHr / 24);
|
|
2657
|
+
return `${diffDay}d ago`;
|
|
2658
|
+
}
|
|
2659
|
+
// ============================================================
|
|
2660
|
+
// Error classification for JSON error output (#6)
|
|
2661
|
+
// ============================================================
|
|
2662
|
+
function classifyErrorCode(error) {
|
|
2663
|
+
if (!(error instanceof Error))
|
|
2664
|
+
return 'FETCH_FAILED';
|
|
2665
|
+
// Check for our custom _code first (set in pre-fetch validation)
|
|
2666
|
+
if (error._code)
|
|
2667
|
+
return error._code;
|
|
2668
|
+
const msg = error.message.toLowerCase();
|
|
2669
|
+
const name = error.name || '';
|
|
2670
|
+
if (name === 'TimeoutError' || msg.includes('timeout') || msg.includes('timed out')) {
|
|
2671
|
+
return 'TIMEOUT';
|
|
2672
|
+
}
|
|
2673
|
+
if (name === 'BlockedError' || msg.includes('blocked') || msg.includes('403') || msg.includes('cloudflare')) {
|
|
2674
|
+
return 'BLOCKED';
|
|
2675
|
+
}
|
|
2676
|
+
if (msg.includes('enotfound') || msg.includes('getaddrinfo') || msg.includes('dns resolution failed') || msg.includes('not found')) {
|
|
2677
|
+
return 'DNS_FAILED';
|
|
2678
|
+
}
|
|
2679
|
+
if (msg.includes('invalid url') || msg.includes('invalid hostname') || msg.includes('only http')) {
|
|
2680
|
+
return 'INVALID_URL';
|
|
2681
|
+
}
|
|
2682
|
+
return 'FETCH_FAILED';
|
|
2683
|
+
}
|
|
2684
|
+
/**
|
|
2685
|
+
* Build a unified PeelEnvelope from a PeelResult.
|
|
2686
|
+
*
|
|
2687
|
+
* All existing PeelResult fields are spread first (backward compatibility),
|
|
2688
|
+
* then canonical envelope fields override/extend them.
|
|
2689
|
+
*/
|
|
2690
|
+
function buildEnvelope(result, extra) {
|
|
2691
|
+
const envelope = {
|
|
2692
|
+
// Spread all PeelResult fields for backward compatibility
|
|
2693
|
+
...result,
|
|
2694
|
+
// Required envelope fields (override PeelResult where they overlap)
|
|
2695
|
+
url: result.url,
|
|
2696
|
+
status: 200,
|
|
2697
|
+
content: result.content,
|
|
2698
|
+
metadata: {
|
|
2699
|
+
title: result.title,
|
|
2700
|
+
...result.metadata,
|
|
2701
|
+
},
|
|
2702
|
+
tokens: result.tokens,
|
|
2703
|
+
cached: extra.cached ?? false,
|
|
2704
|
+
elapsed: result.elapsed,
|
|
2705
|
+
};
|
|
2706
|
+
// Optional envelope fields — only include when meaningful
|
|
2707
|
+
if (extra.structured !== undefined)
|
|
2708
|
+
envelope.structured = extra.structured;
|
|
2709
|
+
if (extra.truncated)
|
|
2710
|
+
envelope.truncated = true;
|
|
2711
|
+
if (extra.totalAvailable !== undefined)
|
|
2712
|
+
envelope.totalAvailable = extra.totalAvailable;
|
|
2713
|
+
return envelope;
|
|
2714
|
+
}
|
|
2715
|
+
// ============================================================
|
|
1563
2716
|
// Shared output helper
|
|
1564
2717
|
// ============================================================
|
|
1565
|
-
async function outputResult(result, options) {
|
|
2718
|
+
async function outputResult(result, options, extra = {}) {
|
|
1566
2719
|
// --links: output only links
|
|
1567
2720
|
if (options.links) {
|
|
1568
2721
|
if (options.json) {
|
|
@@ -1603,6 +2756,7 @@ async function outputResult(result, options) {
|
|
|
1603
2756
|
method: result.method,
|
|
1604
2757
|
elapsed: result.elapsed,
|
|
1605
2758
|
tokens: result.tokens,
|
|
2759
|
+
cached: extra.cached ?? false,
|
|
1606
2760
|
...result.metadata,
|
|
1607
2761
|
};
|
|
1608
2762
|
if (options.json) {
|
|
@@ -1624,12 +2778,14 @@ async function outputResult(result, options) {
|
|
|
1624
2778
|
console.log(`Method: ${meta.method}`);
|
|
1625
2779
|
console.log(`Elapsed: ${meta.elapsed}ms`);
|
|
1626
2780
|
console.log(`Tokens: ${meta.tokens}`);
|
|
2781
|
+
console.log(`Cached: ${meta.cached}`);
|
|
1627
2782
|
}
|
|
1628
2783
|
return;
|
|
1629
2784
|
}
|
|
1630
2785
|
// Default: full output
|
|
1631
2786
|
if (options.json) {
|
|
1632
|
-
|
|
2787
|
+
const envelope = buildEnvelope(result, extra);
|
|
2788
|
+
await writeStdout(JSON.stringify(envelope, null, 2) + '\n');
|
|
1633
2789
|
}
|
|
1634
2790
|
else {
|
|
1635
2791
|
await writeStdout(result.content + '\n');
|
|
@@ -1645,6 +2801,64 @@ function writeStdout(data) {
|
|
|
1645
2801
|
});
|
|
1646
2802
|
});
|
|
1647
2803
|
}
|
|
2804
|
+
/**
|
|
2805
|
+
* Convert an array of listing items to CSV.
|
|
2806
|
+
*/
|
|
2807
|
+
function formatListingsCsv(items) {
|
|
2808
|
+
if (items.length === 0)
|
|
2809
|
+
return '';
|
|
2810
|
+
// Collect all keys
|
|
2811
|
+
const keySet = new Set();
|
|
2812
|
+
for (const item of items) {
|
|
2813
|
+
for (const key of Object.keys(item)) {
|
|
2814
|
+
if (item[key] !== undefined)
|
|
2815
|
+
keySet.add(key);
|
|
2816
|
+
}
|
|
2817
|
+
}
|
|
2818
|
+
const keys = Array.from(keySet);
|
|
2819
|
+
const escapeCsv = (s) => {
|
|
2820
|
+
if (s === undefined || s === null)
|
|
2821
|
+
return '""';
|
|
2822
|
+
const str = String(s);
|
|
2823
|
+
if (str.includes('"') || str.includes(',') || str.includes('\n') || str.includes('\r')) {
|
|
2824
|
+
return '"' + str.replace(/"/g, '""') + '"';
|
|
2825
|
+
}
|
|
2826
|
+
return '"' + str + '"';
|
|
2827
|
+
};
|
|
2828
|
+
const lines = [keys.join(',')];
|
|
2829
|
+
for (const item of items) {
|
|
2830
|
+
lines.push(keys.map(k => escapeCsv(item[k])).join(','));
|
|
2831
|
+
}
|
|
2832
|
+
return lines.join('\n') + '\n';
|
|
2833
|
+
}
|
|
2834
|
+
/**
|
|
2835
|
+
* Normalise the result of --extract (which may be a flat object or contain
|
|
2836
|
+
* arrays) into an array of row objects suitable for CSV / table rendering.
|
|
2837
|
+
*/
|
|
2838
|
+
function normaliseExtractedToRows(extracted) {
|
|
2839
|
+
// If every value is an array of the same length, zip them into rows
|
|
2840
|
+
const values = Object.values(extracted);
|
|
2841
|
+
const allArrays = values.length > 0 && values.every(v => Array.isArray(v));
|
|
2842
|
+
if (allArrays) {
|
|
2843
|
+
const length = values[0].length;
|
|
2844
|
+
const rows = [];
|
|
2845
|
+
for (let i = 0; i < length; i++) {
|
|
2846
|
+
const row = {};
|
|
2847
|
+
for (const key of Object.keys(extracted)) {
|
|
2848
|
+
const val = extracted[key][i];
|
|
2849
|
+
row[key] = val != null ? String(val) : undefined;
|
|
2850
|
+
}
|
|
2851
|
+
rows.push(row);
|
|
2852
|
+
}
|
|
2853
|
+
return rows;
|
|
2854
|
+
}
|
|
2855
|
+
// Otherwise treat as a single row
|
|
2856
|
+
const row = {};
|
|
2857
|
+
for (const [k, v] of Object.entries(extracted)) {
|
|
2858
|
+
row[k] = v != null ? String(v) : undefined;
|
|
2859
|
+
}
|
|
2860
|
+
return [row];
|
|
2861
|
+
}
|
|
1648
2862
|
// Helper function to extract colors from content
|
|
1649
2863
|
function extractColors(content) {
|
|
1650
2864
|
const colors = [];
|