webpeel 0.20.2 → 0.20.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/server/app.d.ts +14 -0
- package/dist/server/app.js +384 -0
- package/dist/server/auth-store.d.ts +27 -0
- package/dist/server/auth-store.js +88 -0
- package/dist/server/email-service.d.ts +21 -0
- package/dist/server/email-service.js +79 -0
- package/dist/server/job-queue.d.ts +100 -0
- package/dist/server/job-queue.js +145 -0
- package/dist/server/logger.d.ts +10 -0
- package/dist/server/logger.js +37 -0
- package/dist/server/middleware/auth.d.ts +28 -0
- package/dist/server/middleware/auth.js +221 -0
- package/dist/server/middleware/rate-limit.d.ts +24 -0
- package/dist/server/middleware/rate-limit.js +167 -0
- package/dist/server/middleware/url-validator.d.ts +15 -0
- package/dist/server/middleware/url-validator.js +186 -0
- package/dist/server/openapi.yaml +6418 -0
- package/dist/server/pg-auth-store.d.ts +132 -0
- package/dist/server/pg-auth-store.js +472 -0
- package/dist/server/pg-job-queue.d.ts +59 -0
- package/dist/server/pg-job-queue.js +375 -0
- package/dist/server/premium/domain-intel.d.ts +16 -0
- package/dist/server/premium/domain-intel.js +133 -0
- package/dist/server/premium/index.d.ts +17 -0
- package/dist/server/premium/index.js +35 -0
- package/dist/server/premium/swr-cache.d.ts +14 -0
- package/dist/server/premium/swr-cache.js +34 -0
- package/dist/server/routes/activity.d.ts +6 -0
- package/dist/server/routes/activity.js +74 -0
- package/dist/server/routes/answer.d.ts +5 -0
- package/dist/server/routes/answer.js +125 -0
- package/dist/server/routes/ask.d.ts +28 -0
- package/dist/server/routes/ask.js +229 -0
- package/dist/server/routes/batch.d.ts +6 -0
- package/dist/server/routes/batch.js +493 -0
- package/dist/server/routes/cli-usage.d.ts +6 -0
- package/dist/server/routes/cli-usage.js +127 -0
- package/dist/server/routes/compat.d.ts +23 -0
- package/dist/server/routes/compat.js +652 -0
- package/dist/server/routes/deep-fetch.d.ts +8 -0
- package/dist/server/routes/deep-fetch.js +57 -0
- package/dist/server/routes/demo.d.ts +24 -0
- package/dist/server/routes/demo.js +517 -0
- package/dist/server/routes/do.d.ts +8 -0
- package/dist/server/routes/do.js +72 -0
- package/dist/server/routes/extract.d.ts +8 -0
- package/dist/server/routes/extract.js +235 -0
- package/dist/server/routes/fetch.d.ts +7 -0
- package/dist/server/routes/fetch.js +999 -0
- package/dist/server/routes/health.d.ts +7 -0
- package/dist/server/routes/health.js +19 -0
- package/dist/server/routes/jobs.d.ts +7 -0
- package/dist/server/routes/jobs.js +573 -0
- package/dist/server/routes/mcp.d.ts +14 -0
- package/dist/server/routes/mcp.js +141 -0
- package/dist/server/routes/oauth.d.ts +9 -0
- package/dist/server/routes/oauth.js +396 -0
- package/dist/server/routes/playground.d.ts +17 -0
- package/dist/server/routes/playground.js +283 -0
- package/dist/server/routes/screenshot.d.ts +22 -0
- package/dist/server/routes/screenshot.js +816 -0
- package/dist/server/routes/search.d.ts +6 -0
- package/dist/server/routes/search.js +303 -0
- package/dist/server/routes/session.d.ts +15 -0
- package/dist/server/routes/session.js +397 -0
- package/dist/server/routes/stats.d.ts +6 -0
- package/dist/server/routes/stats.js +71 -0
- package/dist/server/routes/stripe.d.ts +15 -0
- package/dist/server/routes/stripe.js +294 -0
- package/dist/server/routes/users.d.ts +8 -0
- package/dist/server/routes/users.js +1671 -0
- package/dist/server/routes/watch.d.ts +15 -0
- package/dist/server/routes/watch.js +309 -0
- package/dist/server/routes/webhooks.d.ts +26 -0
- package/dist/server/routes/webhooks.js +170 -0
- package/dist/server/routes/youtube.d.ts +6 -0
- package/dist/server/routes/youtube.js +130 -0
- package/dist/server/sentry.d.ts +13 -0
- package/dist/server/sentry.js +38 -0
- package/dist/server/types.d.ts +15 -0
- package/dist/server/types.js +7 -0
- package/dist/server/utils/response.d.ts +44 -0
- package/dist/server/utils/response.js +69 -0
- package/dist/server/utils/sse.d.ts +22 -0
- package/dist/server/utils/sse.js +38 -0
- package/package.json +2 -1
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Health check endpoint
|
|
3
|
+
* NOTE: This route is mounted BEFORE auth/rate-limit middleware in app.ts
|
|
4
|
+
* so it's never blocked by rate limiting (Render hits it every ~30s).
|
|
5
|
+
*/
|
|
6
|
+
import { Router } from 'express';
|
|
7
|
+
export declare function createHealthRouter(): Router;
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Health check endpoint
|
|
3
|
+
* NOTE: This route is mounted BEFORE auth/rate-limit middleware in app.ts
|
|
4
|
+
* so it's never blocked by rate limiting (Render hits it every ~30s).
|
|
5
|
+
*/
|
|
6
|
+
import { Router } from 'express';
|
|
7
|
+
const startTime = Date.now();
|
|
8
|
+
export function createHealthRouter() {
|
|
9
|
+
const router = Router();
|
|
10
|
+
router.get('/health', (_req, res) => {
|
|
11
|
+
const uptime = Math.floor((Date.now() - startTime) / 1000);
|
|
12
|
+
res.json({
|
|
13
|
+
status: 'healthy',
|
|
14
|
+
uptime,
|
|
15
|
+
timestamp: new Date().toISOString(),
|
|
16
|
+
});
|
|
17
|
+
});
|
|
18
|
+
return router;
|
|
19
|
+
}
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Async jobs API - crawl endpoints with SSE support
|
|
3
|
+
*/
|
|
4
|
+
import { Router } from 'express';
|
|
5
|
+
import type { AuthStore } from '../auth-store.js';
|
|
6
|
+
import type { IJobQueue } from '../job-queue.js';
|
|
7
|
+
export declare function createJobsRouter(jobQueue: IJobQueue, authStore: AuthStore): Router;
|
|
@@ -0,0 +1,573 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Async jobs API - crawl endpoints with SSE support
|
|
3
|
+
*/
|
|
4
|
+
import { Router } from 'express';
|
|
5
|
+
import crypto from 'crypto';
|
|
6
|
+
import { crawl } from '../../index.js';
|
|
7
|
+
import { searchJobs } from '../../core/jobs.js';
|
|
8
|
+
import { sendWebhook, normalizeWebhook } from './webhooks.js';
|
|
9
|
+
import { initSSE, sendSSE, endSSE, wantsSSE } from '../utils/sse.js';
|
|
10
|
+
export function createJobsRouter(jobQueue, authStore) {
|
|
11
|
+
const router = Router();
|
|
12
|
+
/**
|
|
13
|
+
* POST /v1/crawl - Start async crawl job (or stream via SSE)
|
|
14
|
+
*/
|
|
15
|
+
router.post('/v1/crawl', async (req, res) => {
|
|
16
|
+
try {
|
|
17
|
+
const { url, limit, maxDepth, scrapeOptions, webhook, location, languages } = req.body;
|
|
18
|
+
// Validate required parameters
|
|
19
|
+
if (!url || typeof url !== 'string') {
|
|
20
|
+
res.status(400).json({
|
|
21
|
+
success: false,
|
|
22
|
+
error: {
|
|
23
|
+
type: 'invalid_request',
|
|
24
|
+
message: 'Missing or invalid "url" parameter',
|
|
25
|
+
hint: 'Pass a valid URL in the request body: { "url": "https://example.com" }',
|
|
26
|
+
docs: 'https://webpeel.dev/docs/errors#invalid-request',
|
|
27
|
+
},
|
|
28
|
+
requestId: req.requestId || crypto.randomUUID(),
|
|
29
|
+
});
|
|
30
|
+
return;
|
|
31
|
+
}
|
|
32
|
+
// Validate URL
|
|
33
|
+
try {
|
|
34
|
+
new URL(url);
|
|
35
|
+
}
|
|
36
|
+
catch {
|
|
37
|
+
res.status(400).json({
|
|
38
|
+
success: false,
|
|
39
|
+
error: {
|
|
40
|
+
type: 'invalid_url',
|
|
41
|
+
message: 'Invalid URL format',
|
|
42
|
+
hint: 'Ensure the URL includes a protocol: https://example.com',
|
|
43
|
+
docs: 'https://webpeel.dev/docs/errors#invalid-url',
|
|
44
|
+
},
|
|
45
|
+
requestId: req.requestId || crypto.randomUUID(),
|
|
46
|
+
});
|
|
47
|
+
return;
|
|
48
|
+
}
|
|
49
|
+
const ownerId = req.auth?.keyInfo?.accountId;
|
|
50
|
+
// Normalize webhook (accept both string URL and WebhookConfig object)
|
|
51
|
+
const normalizedWebhook = webhook ? normalizeWebhook(webhook) : undefined;
|
|
52
|
+
// ── SSE streaming path ────────────────────────────────────────────────
|
|
53
|
+
if (wantsSSE(req)) {
|
|
54
|
+
const job = await jobQueue.createJob('crawl', normalizedWebhook, ownerId);
|
|
55
|
+
// Set SSE headers (X-Request-Id is already set by global middleware)
|
|
56
|
+
initSSE(res);
|
|
57
|
+
// Send started event
|
|
58
|
+
sendSSE(res, 'started', {
|
|
59
|
+
jobId: job.id,
|
|
60
|
+
url,
|
|
61
|
+
depth: maxDepth || 3,
|
|
62
|
+
});
|
|
63
|
+
// Heartbeat every 15 seconds to keep connection alive
|
|
64
|
+
let closed = false;
|
|
65
|
+
const heartbeat = setInterval(() => {
|
|
66
|
+
if (!closed) {
|
|
67
|
+
res.write('event: ping\ndata: {}\n\n');
|
|
68
|
+
}
|
|
69
|
+
}, 15_000);
|
|
70
|
+
req.on('close', () => {
|
|
71
|
+
closed = true;
|
|
72
|
+
clearInterval(heartbeat);
|
|
73
|
+
});
|
|
74
|
+
let completedCount = 0;
|
|
75
|
+
let failedCount = 0;
|
|
76
|
+
const startTime = Date.now();
|
|
77
|
+
try {
|
|
78
|
+
jobQueue.updateJob(job.id, { status: 'processing' });
|
|
79
|
+
const resolvedLocation = location || languages ? {
|
|
80
|
+
country: location,
|
|
81
|
+
languages: Array.isArray(languages) ? languages : (languages ? [languages] : undefined),
|
|
82
|
+
} : undefined;
|
|
83
|
+
const crawlOptions = {
|
|
84
|
+
maxPages: limit || 100,
|
|
85
|
+
maxDepth: maxDepth || 3,
|
|
86
|
+
onProgress: (progress) => {
|
|
87
|
+
const total = progress.crawled + progress.queued;
|
|
88
|
+
jobQueue.updateJob(job.id, {
|
|
89
|
+
total,
|
|
90
|
+
completed: progress.crawled,
|
|
91
|
+
creditsUsed: progress.crawled,
|
|
92
|
+
});
|
|
93
|
+
},
|
|
94
|
+
onPage: (pageResult) => {
|
|
95
|
+
if (closed)
|
|
96
|
+
return;
|
|
97
|
+
const total = completedCount + failedCount + 1;
|
|
98
|
+
if (pageResult.error) {
|
|
99
|
+
failedCount++;
|
|
100
|
+
sendSSE(res, 'error', {
|
|
101
|
+
url: pageResult.url,
|
|
102
|
+
error: 'FETCH_ERROR',
|
|
103
|
+
message: pageResult.error,
|
|
104
|
+
});
|
|
105
|
+
}
|
|
106
|
+
else {
|
|
107
|
+
completedCount++;
|
|
108
|
+
sendSSE(res, 'page', {
|
|
109
|
+
url: pageResult.url,
|
|
110
|
+
content: pageResult.markdown,
|
|
111
|
+
metadata: {
|
|
112
|
+
title: pageResult.title,
|
|
113
|
+
depth: pageResult.depth,
|
|
114
|
+
parent: pageResult.parent,
|
|
115
|
+
elapsed: pageResult.elapsed,
|
|
116
|
+
},
|
|
117
|
+
progress: {
|
|
118
|
+
completed: completedCount,
|
|
119
|
+
total,
|
|
120
|
+
},
|
|
121
|
+
});
|
|
122
|
+
}
|
|
123
|
+
},
|
|
124
|
+
...scrapeOptions,
|
|
125
|
+
location: resolvedLocation,
|
|
126
|
+
};
|
|
127
|
+
const results = await crawl(url, crawlOptions);
|
|
128
|
+
jobQueue.updateJob(job.id, {
|
|
129
|
+
status: 'completed',
|
|
130
|
+
data: results,
|
|
131
|
+
total: results.length,
|
|
132
|
+
completed: results.length,
|
|
133
|
+
creditsUsed: results.length,
|
|
134
|
+
});
|
|
135
|
+
if (!closed) {
|
|
136
|
+
sendSSE(res, 'done', {
|
|
137
|
+
jobId: job.id,
|
|
138
|
+
completed: completedCount,
|
|
139
|
+
failed: failedCount,
|
|
140
|
+
duration: Date.now() - startTime,
|
|
141
|
+
});
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
catch (error) {
|
|
145
|
+
jobQueue.updateJob(job.id, {
|
|
146
|
+
status: 'failed',
|
|
147
|
+
error: error.message || 'Unknown error',
|
|
148
|
+
});
|
|
149
|
+
if (!closed) {
|
|
150
|
+
sendSSE(res, 'error', {
|
|
151
|
+
error: 'CRAWL_FAILED',
|
|
152
|
+
message: error.message || 'Unknown error',
|
|
153
|
+
});
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
finally {
|
|
157
|
+
clearInterval(heartbeat);
|
|
158
|
+
if (!closed) {
|
|
159
|
+
endSSE(res);
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
return;
|
|
163
|
+
}
|
|
164
|
+
// ── Regular async job path (backward compat) ─────────────────────────
|
|
165
|
+
const job = await jobQueue.createJob('crawl', normalizedWebhook, ownerId);
|
|
166
|
+
// Start crawl in background
|
|
167
|
+
setImmediate(async () => {
|
|
168
|
+
try {
|
|
169
|
+
// Update job to processing
|
|
170
|
+
jobQueue.updateJob(job.id, { status: 'processing' });
|
|
171
|
+
// Send started webhook
|
|
172
|
+
if (normalizedWebhook) {
|
|
173
|
+
await sendWebhook(normalizedWebhook, 'started', {
|
|
174
|
+
jobId: job.id,
|
|
175
|
+
url,
|
|
176
|
+
});
|
|
177
|
+
}
|
|
178
|
+
// Build crawl options
|
|
179
|
+
const crawlOptions = {
|
|
180
|
+
maxPages: limit || 100,
|
|
181
|
+
maxDepth: maxDepth || 3,
|
|
182
|
+
onProgress: (progress) => {
|
|
183
|
+
// Update job progress
|
|
184
|
+
const total = progress.crawled + progress.queued;
|
|
185
|
+
jobQueue.updateJob(job.id, {
|
|
186
|
+
total,
|
|
187
|
+
completed: progress.crawled,
|
|
188
|
+
creditsUsed: progress.crawled,
|
|
189
|
+
});
|
|
190
|
+
// Send page webhook
|
|
191
|
+
if (normalizedWebhook && progress.currentUrl) {
|
|
192
|
+
sendWebhook(normalizedWebhook, 'page', {
|
|
193
|
+
jobId: job.id,
|
|
194
|
+
url: progress.currentUrl,
|
|
195
|
+
completed: progress.crawled,
|
|
196
|
+
total,
|
|
197
|
+
}).catch(() => { }); // Fire and forget
|
|
198
|
+
}
|
|
199
|
+
},
|
|
200
|
+
// Spread existing scrapeOptions
|
|
201
|
+
...scrapeOptions,
|
|
202
|
+
// Add location support if provided (CrawlOptions extends PeelOptions)
|
|
203
|
+
location: location || languages ? {
|
|
204
|
+
country: location,
|
|
205
|
+
languages: Array.isArray(languages) ? languages : (languages ? [languages] : undefined),
|
|
206
|
+
} : undefined,
|
|
207
|
+
};
|
|
208
|
+
// Run crawl
|
|
209
|
+
const results = await crawl(url, crawlOptions);
|
|
210
|
+
// Update job with results
|
|
211
|
+
jobQueue.updateJob(job.id, {
|
|
212
|
+
status: 'completed',
|
|
213
|
+
data: results,
|
|
214
|
+
total: results.length,
|
|
215
|
+
completed: results.length,
|
|
216
|
+
creditsUsed: results.length,
|
|
217
|
+
});
|
|
218
|
+
// Send completed webhook and store delivery result
|
|
219
|
+
if (normalizedWebhook) {
|
|
220
|
+
const delivery = await sendWebhook(normalizedWebhook, 'completed', {
|
|
221
|
+
jobId: job.id,
|
|
222
|
+
total: results.length,
|
|
223
|
+
});
|
|
224
|
+
if (delivery) {
|
|
225
|
+
jobQueue.updateJob(job.id, { webhookDelivery: delivery });
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
catch (error) {
|
|
230
|
+
// Update job with error
|
|
231
|
+
jobQueue.updateJob(job.id, {
|
|
232
|
+
status: 'failed',
|
|
233
|
+
error: error.message || 'Unknown error',
|
|
234
|
+
});
|
|
235
|
+
// Send failed webhook
|
|
236
|
+
if (normalizedWebhook) {
|
|
237
|
+
await sendWebhook(normalizedWebhook, 'failed', {
|
|
238
|
+
jobId: job.id,
|
|
239
|
+
error: error.message || 'Unknown error',
|
|
240
|
+
});
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
});
|
|
244
|
+
// Return job ID immediately
|
|
245
|
+
res.status(202).json({
|
|
246
|
+
success: true,
|
|
247
|
+
id: job.id,
|
|
248
|
+
url: `/v1/crawl/${job.id}`,
|
|
249
|
+
});
|
|
250
|
+
}
|
|
251
|
+
catch (error) {
|
|
252
|
+
console.error('Crawl job creation error:', error);
|
|
253
|
+
res.status(500).json({
|
|
254
|
+
success: false,
|
|
255
|
+
error: {
|
|
256
|
+
type: 'internal_error',
|
|
257
|
+
message: 'Failed to create crawl job',
|
|
258
|
+
docs: 'https://webpeel.dev/docs/errors#internal-error',
|
|
259
|
+
},
|
|
260
|
+
requestId: req.requestId || crypto.randomUUID(),
|
|
261
|
+
});
|
|
262
|
+
}
|
|
263
|
+
});
|
|
264
|
+
/**
|
|
265
|
+
* GET /v1/crawl/:id - Get crawl job status + results (with SSE support)
|
|
266
|
+
*/
|
|
267
|
+
router.get('/v1/crawl/:id', async (req, res) => {
|
|
268
|
+
try {
|
|
269
|
+
const id = req.params.id;
|
|
270
|
+
const job = await jobQueue.getJob(id);
|
|
271
|
+
if (!job) {
|
|
272
|
+
res.status(404).json({
|
|
273
|
+
success: false,
|
|
274
|
+
error: {
|
|
275
|
+
type: 'not_found',
|
|
276
|
+
message: 'Job not found',
|
|
277
|
+
hint: 'Check the job ID and ensure it has not expired.',
|
|
278
|
+
docs: 'https://webpeel.dev/docs/errors#not-found',
|
|
279
|
+
},
|
|
280
|
+
requestId: req.requestId || crypto.randomUUID(),
|
|
281
|
+
});
|
|
282
|
+
return;
|
|
283
|
+
}
|
|
284
|
+
// SECURITY: Verify the requester owns this job
|
|
285
|
+
const requestOwnerId = req.auth?.keyInfo?.accountId;
|
|
286
|
+
if (job.ownerId && requestOwnerId && job.ownerId !== requestOwnerId) {
|
|
287
|
+
res.status(404).json({
|
|
288
|
+
success: false,
|
|
289
|
+
error: {
|
|
290
|
+
type: 'not_found',
|
|
291
|
+
message: 'Job not found',
|
|
292
|
+
hint: 'Check the job ID and ensure it has not expired.',
|
|
293
|
+
docs: 'https://webpeel.dev/docs/errors#not-found',
|
|
294
|
+
},
|
|
295
|
+
requestId: req.requestId || crypto.randomUUID(),
|
|
296
|
+
});
|
|
297
|
+
return;
|
|
298
|
+
}
|
|
299
|
+
// Check for SSE request
|
|
300
|
+
const acceptHeader = req.get('Accept');
|
|
301
|
+
const isSSE = acceptHeader?.includes('text/event-stream');
|
|
302
|
+
if (isSSE) {
|
|
303
|
+
// Set SSE headers
|
|
304
|
+
res.setHeader('Content-Type', 'text/event-stream');
|
|
305
|
+
res.setHeader('Cache-Control', 'no-cache');
|
|
306
|
+
res.setHeader('Connection', 'keep-alive');
|
|
307
|
+
// Send initial event
|
|
308
|
+
const sendEvent = (data) => {
|
|
309
|
+
res.write(`data: ${JSON.stringify(data)}\n\n`);
|
|
310
|
+
};
|
|
311
|
+
sendEvent({
|
|
312
|
+
event: 'status',
|
|
313
|
+
...job,
|
|
314
|
+
});
|
|
315
|
+
// Poll for updates every second
|
|
316
|
+
const interval = setInterval(async () => {
|
|
317
|
+
const updatedJob = await jobQueue.getJob(id);
|
|
318
|
+
if (!updatedJob) {
|
|
319
|
+
clearInterval(interval);
|
|
320
|
+
res.end();
|
|
321
|
+
return;
|
|
322
|
+
}
|
|
323
|
+
sendEvent({
|
|
324
|
+
event: 'status',
|
|
325
|
+
...updatedJob,
|
|
326
|
+
});
|
|
327
|
+
// End stream if job is complete
|
|
328
|
+
if (updatedJob.status === 'completed' || updatedJob.status === 'failed' || updatedJob.status === 'cancelled') {
|
|
329
|
+
clearInterval(interval);
|
|
330
|
+
res.end();
|
|
331
|
+
}
|
|
332
|
+
}, 1000);
|
|
333
|
+
// Clean up on client disconnect
|
|
334
|
+
req.on('close', () => {
|
|
335
|
+
clearInterval(interval);
|
|
336
|
+
});
|
|
337
|
+
}
|
|
338
|
+
else {
|
|
339
|
+
// Return JSON response
|
|
340
|
+
res.json({
|
|
341
|
+
success: true,
|
|
342
|
+
status: job.status,
|
|
343
|
+
progress: job.progress,
|
|
344
|
+
total: job.total,
|
|
345
|
+
completed: job.completed,
|
|
346
|
+
creditsUsed: job.creditsUsed,
|
|
347
|
+
data: job.data,
|
|
348
|
+
error: job.error,
|
|
349
|
+
expiresAt: job.expiresAt,
|
|
350
|
+
...(job.webhookDelivery ? { webhook: job.webhookDelivery } : {}),
|
|
351
|
+
});
|
|
352
|
+
}
|
|
353
|
+
}
|
|
354
|
+
catch (error) {
|
|
355
|
+
console.error('Get crawl job error:', error);
|
|
356
|
+
res.status(500).json({
|
|
357
|
+
success: false,
|
|
358
|
+
error: {
|
|
359
|
+
type: 'internal_error',
|
|
360
|
+
message: 'Failed to retrieve job',
|
|
361
|
+
docs: 'https://webpeel.dev/docs/errors#internal-error',
|
|
362
|
+
},
|
|
363
|
+
requestId: req.requestId || crypto.randomUUID(),
|
|
364
|
+
});
|
|
365
|
+
}
|
|
366
|
+
});
|
|
367
|
+
/**
|
|
368
|
+
* DELETE /v1/crawl/:id - Cancel crawl job
|
|
369
|
+
*/
|
|
370
|
+
router.delete('/v1/crawl/:id', async (req, res) => {
|
|
371
|
+
try {
|
|
372
|
+
const id = req.params.id;
|
|
373
|
+
// SECURITY: Verify the requester owns this job before cancelling
|
|
374
|
+
const job = await jobQueue.getJob(id);
|
|
375
|
+
const requestOwnerId = req.auth?.keyInfo?.accountId;
|
|
376
|
+
if (job?.ownerId && requestOwnerId && job.ownerId !== requestOwnerId) {
|
|
377
|
+
res.status(404).json({
|
|
378
|
+
success: false,
|
|
379
|
+
error: {
|
|
380
|
+
type: 'not_found',
|
|
381
|
+
message: 'Job not found or cannot be cancelled',
|
|
382
|
+
hint: 'Check the job ID and ensure you own this job.',
|
|
383
|
+
docs: 'https://webpeel.dev/docs/errors#not-found',
|
|
384
|
+
},
|
|
385
|
+
requestId: req.requestId || crypto.randomUUID(),
|
|
386
|
+
});
|
|
387
|
+
return;
|
|
388
|
+
}
|
|
389
|
+
const cancelled = await jobQueue.cancelJob(id);
|
|
390
|
+
if (!cancelled) {
|
|
391
|
+
res.status(404).json({
|
|
392
|
+
success: false,
|
|
393
|
+
error: {
|
|
394
|
+
type: 'not_found',
|
|
395
|
+
message: 'Job not found or cannot be cancelled',
|
|
396
|
+
hint: 'The job may have already completed or expired.',
|
|
397
|
+
docs: 'https://webpeel.dev/docs/errors#not-found',
|
|
398
|
+
},
|
|
399
|
+
requestId: req.requestId || crypto.randomUUID(),
|
|
400
|
+
});
|
|
401
|
+
return;
|
|
402
|
+
}
|
|
403
|
+
res.json({
|
|
404
|
+
success: true,
|
|
405
|
+
message: 'Job cancelled',
|
|
406
|
+
});
|
|
407
|
+
}
|
|
408
|
+
catch (error) {
|
|
409
|
+
console.error('Cancel crawl job error:', error);
|
|
410
|
+
res.status(500).json({
|
|
411
|
+
success: false,
|
|
412
|
+
error: {
|
|
413
|
+
type: 'internal_error',
|
|
414
|
+
message: 'Failed to cancel job',
|
|
415
|
+
docs: 'https://webpeel.dev/docs/errors#internal-error',
|
|
416
|
+
},
|
|
417
|
+
requestId: req.requestId || crypto.randomUUID(),
|
|
418
|
+
});
|
|
419
|
+
}
|
|
420
|
+
});
|
|
421
|
+
/**
|
|
422
|
+
* GET /v1/jobs - List all jobs
|
|
423
|
+
*/
|
|
424
|
+
router.get('/v1/jobs', async (req, res) => {
|
|
425
|
+
try {
|
|
426
|
+
const { type, status, limit } = req.query;
|
|
427
|
+
// SECURITY: Filter jobs by the authenticated user's ownership
|
|
428
|
+
const ownerId = req.auth?.keyInfo?.accountId;
|
|
429
|
+
const jobs = await jobQueue.listJobs({
|
|
430
|
+
type: type,
|
|
431
|
+
status: status,
|
|
432
|
+
limit: limit ? parseInt(limit, 10) : 50,
|
|
433
|
+
ownerId,
|
|
434
|
+
});
|
|
435
|
+
res.json({
|
|
436
|
+
success: true,
|
|
437
|
+
count: jobs.length,
|
|
438
|
+
jobs,
|
|
439
|
+
});
|
|
440
|
+
}
|
|
441
|
+
catch (error) {
|
|
442
|
+
console.error('List jobs error:', error);
|
|
443
|
+
res.status(500).json({
|
|
444
|
+
success: false,
|
|
445
|
+
error: {
|
|
446
|
+
type: 'internal_error',
|
|
447
|
+
message: 'Failed to list jobs',
|
|
448
|
+
docs: 'https://webpeel.dev/docs/errors#internal-error',
|
|
449
|
+
},
|
|
450
|
+
requestId: req.requestId || crypto.randomUUID(),
|
|
451
|
+
});
|
|
452
|
+
}
|
|
453
|
+
});
|
|
454
|
+
/**
|
|
455
|
+
* POST /v1/jobs — Search job boards (LinkedIn, Indeed, Glassdoor)
|
|
456
|
+
*
|
|
457
|
+
* Credits: 1 for the search + 1 per detail page fetched.
|
|
458
|
+
*/
|
|
459
|
+
router.post('/v1/jobs', async (req, res) => {
|
|
460
|
+
try {
|
|
461
|
+
const { url, keywords, location, source, limit, fetchDetails, timeout, webhook: jobWebhook, } = req.body;
|
|
462
|
+
// Must provide either url or keywords
|
|
463
|
+
if (!url && !keywords) {
|
|
464
|
+
res.status(400).json({
|
|
465
|
+
success: false,
|
|
466
|
+
error: {
|
|
467
|
+
type: 'invalid_request',
|
|
468
|
+
message: 'Provide either "url" or "keywords" in the request body.',
|
|
469
|
+
hint: 'Example: { "keywords": "software engineer", "location": "New York" }',
|
|
470
|
+
docs: 'https://webpeel.dev/docs/errors#invalid-request',
|
|
471
|
+
},
|
|
472
|
+
requestId: req.requestId || crypto.randomUUID(),
|
|
473
|
+
});
|
|
474
|
+
return;
|
|
475
|
+
}
|
|
476
|
+
// Validate source
|
|
477
|
+
const validSources = ['glassdoor', 'indeed', 'linkedin'];
|
|
478
|
+
if (source && !validSources.includes(source)) {
|
|
479
|
+
res.status(400).json({
|
|
480
|
+
success: false,
|
|
481
|
+
error: {
|
|
482
|
+
type: 'invalid_request',
|
|
483
|
+
message: `Invalid "source": must be one of ${validSources.join(', ')}`,
|
|
484
|
+
hint: `Use one of: ${validSources.join(', ')}`,
|
|
485
|
+
docs: 'https://webpeel.dev/docs/errors#invalid-request',
|
|
486
|
+
},
|
|
487
|
+
requestId: req.requestId || crypto.randomUUID(),
|
|
488
|
+
});
|
|
489
|
+
return;
|
|
490
|
+
}
|
|
491
|
+
// Validate numeric params
|
|
492
|
+
const resolvedLimit = typeof limit === 'number' ? Math.min(Math.max(limit, 1), 100) : 25;
|
|
493
|
+
const resolvedDetails = typeof fetchDetails === 'number' ? Math.min(Math.max(fetchDetails, 0), resolvedLimit) : 0;
|
|
494
|
+
const resolvedTimeout = typeof timeout === 'number' ? Math.min(Math.max(timeout, 5000), 120000) : 30000;
|
|
495
|
+
const searchOpts = {
|
|
496
|
+
url: url || undefined,
|
|
497
|
+
keywords: keywords || undefined,
|
|
498
|
+
location: location || undefined,
|
|
499
|
+
source: source || undefined,
|
|
500
|
+
limit: resolvedLimit,
|
|
501
|
+
fetchDetails: resolvedDetails,
|
|
502
|
+
timeout: resolvedTimeout,
|
|
503
|
+
};
|
|
504
|
+
const startTime = Date.now();
|
|
505
|
+
const result = await searchJobs(searchOpts);
|
|
506
|
+
const elapsed = Date.now() - startTime;
|
|
507
|
+
// Credits: 1 for the search + 1 per detail page fetched
|
|
508
|
+
const creditsUsed = 1 + result.detailsFetched;
|
|
509
|
+
// Track usage
|
|
510
|
+
const isSoftLimited = req.auth?.softLimited === true;
|
|
511
|
+
const hasExtraUsage = req.auth?.extraUsageAvailable === true;
|
|
512
|
+
const pgStore = authStore;
|
|
513
|
+
if (req.auth?.keyInfo?.accountId && typeof pgStore.pool !== 'undefined') {
|
|
514
|
+
pgStore.pool.query(`INSERT INTO usage_logs
|
|
515
|
+
(user_id, endpoint, url, method, processing_time_ms, status_code, ip_address, user_agent)
|
|
516
|
+
VALUES ($1, $2, $3, $4, $5, $6, $7, $8)`, [
|
|
517
|
+
req.auth.keyInfo.accountId,
|
|
518
|
+
'jobs',
|
|
519
|
+
result.searchUrl || keywords || url || '',
|
|
520
|
+
'basic',
|
|
521
|
+
elapsed,
|
|
522
|
+
200,
|
|
523
|
+
req.ip || req.socket.remoteAddress,
|
|
524
|
+
req.get('user-agent'),
|
|
525
|
+
]).catch((err) => {
|
|
526
|
+
console.error('Failed to log jobs request to usage_logs:', err);
|
|
527
|
+
});
|
|
528
|
+
}
|
|
529
|
+
if (req.auth?.keyInfo?.key && typeof pgStore.trackBurstUsage === 'function') {
|
|
530
|
+
await pgStore.trackBurstUsage(req.auth.keyInfo.key);
|
|
531
|
+
if (isSoftLimited && hasExtraUsage) {
|
|
532
|
+
const extraResult = await pgStore.trackExtraUsage(req.auth.keyInfo.key, 'search', result.searchUrl || keywords || url || '', elapsed, 200);
|
|
533
|
+
if (extraResult.success) {
|
|
534
|
+
res.setHeader('X-Extra-Usage-Charged', `$${extraResult.cost.toFixed(4)}`);
|
|
535
|
+
res.setHeader('X-Extra-Usage-New-Balance', extraResult.newBalance.toFixed(2));
|
|
536
|
+
}
|
|
537
|
+
}
|
|
538
|
+
else if (!isSoftLimited) {
|
|
539
|
+
await pgStore.trackUsage(req.auth.keyInfo.key, 'search');
|
|
540
|
+
}
|
|
541
|
+
}
|
|
542
|
+
res.setHeader('X-Credits-Used', creditsUsed.toString());
|
|
543
|
+
res.setHeader('X-Processing-Time', elapsed.toString());
|
|
544
|
+
res.json({
|
|
545
|
+
success: true,
|
|
546
|
+
data: result,
|
|
547
|
+
creditsUsed,
|
|
548
|
+
});
|
|
549
|
+
// Fire webhook after response (non-blocking, fire-and-forget)
|
|
550
|
+
if (jobWebhook) {
|
|
551
|
+
const webhookConfig = normalizeWebhook(jobWebhook, ['completed']);
|
|
552
|
+
sendWebhook(webhookConfig, 'completed', {
|
|
553
|
+
keywords: keywords || url,
|
|
554
|
+
total: Array.isArray(result.jobs) ? result.jobs.length : 0,
|
|
555
|
+
data: result,
|
|
556
|
+
}).catch(() => { });
|
|
557
|
+
}
|
|
558
|
+
}
|
|
559
|
+
catch (error) {
|
|
560
|
+
console.error('POST /v1/jobs error:', error);
|
|
561
|
+
res.status(500).json({
|
|
562
|
+
success: false,
|
|
563
|
+
error: {
|
|
564
|
+
type: 'internal_error',
|
|
565
|
+
message: 'Job search failed. Please try again.',
|
|
566
|
+
docs: 'https://webpeel.dev/docs/errors#internal-error',
|
|
567
|
+
},
|
|
568
|
+
requestId: req.requestId || crypto.randomUUID(),
|
|
569
|
+
});
|
|
570
|
+
}
|
|
571
|
+
});
|
|
572
|
+
return router;
|
|
573
|
+
}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Hosted MCP endpoint — POST /mcp, POST /v2/mcp, POST /:apiKey/v2/mcp
|
|
3
|
+
*
|
|
4
|
+
* Thin HTTP/SSE transport wrapper. All tool logic lives in the shared handler
|
|
5
|
+
* registry at src/mcp/handlers/. This file handles:
|
|
6
|
+
* - Express routing and auth
|
|
7
|
+
* - MCP Streamable HTTP transport setup
|
|
8
|
+
* - Passing McpContext (accountId, pool) to handlers
|
|
9
|
+
*/
|
|
10
|
+
import { Router } from 'express';
|
|
11
|
+
import type { AuthStore } from '../auth-store.js';
|
|
12
|
+
import '../types.js';
|
|
13
|
+
import type { Pool } from 'pg';
|
|
14
|
+
export declare function createMcpRouter(_authStore?: AuthStore, pool?: Pool | null): Router;
|