webpeel 0.20.2 → 0.20.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/server/app.d.ts +14 -0
- package/dist/server/app.js +384 -0
- package/dist/server/auth-store.d.ts +27 -0
- package/dist/server/auth-store.js +88 -0
- package/dist/server/email-service.d.ts +21 -0
- package/dist/server/email-service.js +79 -0
- package/dist/server/job-queue.d.ts +100 -0
- package/dist/server/job-queue.js +145 -0
- package/dist/server/logger.d.ts +10 -0
- package/dist/server/logger.js +37 -0
- package/dist/server/middleware/auth.d.ts +28 -0
- package/dist/server/middleware/auth.js +221 -0
- package/dist/server/middleware/rate-limit.d.ts +24 -0
- package/dist/server/middleware/rate-limit.js +167 -0
- package/dist/server/middleware/url-validator.d.ts +15 -0
- package/dist/server/middleware/url-validator.js +186 -0
- package/dist/server/openapi.yaml +6418 -0
- package/dist/server/pg-auth-store.d.ts +132 -0
- package/dist/server/pg-auth-store.js +472 -0
- package/dist/server/pg-job-queue.d.ts +59 -0
- package/dist/server/pg-job-queue.js +375 -0
- package/dist/server/premium/domain-intel.d.ts +16 -0
- package/dist/server/premium/domain-intel.js +133 -0
- package/dist/server/premium/index.d.ts +17 -0
- package/dist/server/premium/index.js +35 -0
- package/dist/server/premium/swr-cache.d.ts +14 -0
- package/dist/server/premium/swr-cache.js +34 -0
- package/dist/server/routes/activity.d.ts +6 -0
- package/dist/server/routes/activity.js +74 -0
- package/dist/server/routes/answer.d.ts +5 -0
- package/dist/server/routes/answer.js +125 -0
- package/dist/server/routes/ask.d.ts +28 -0
- package/dist/server/routes/ask.js +229 -0
- package/dist/server/routes/batch.d.ts +6 -0
- package/dist/server/routes/batch.js +493 -0
- package/dist/server/routes/cli-usage.d.ts +6 -0
- package/dist/server/routes/cli-usage.js +127 -0
- package/dist/server/routes/compat.d.ts +23 -0
- package/dist/server/routes/compat.js +652 -0
- package/dist/server/routes/deep-fetch.d.ts +8 -0
- package/dist/server/routes/deep-fetch.js +57 -0
- package/dist/server/routes/demo.d.ts +24 -0
- package/dist/server/routes/demo.js +517 -0
- package/dist/server/routes/do.d.ts +8 -0
- package/dist/server/routes/do.js +72 -0
- package/dist/server/routes/extract.d.ts +8 -0
- package/dist/server/routes/extract.js +235 -0
- package/dist/server/routes/fetch.d.ts +7 -0
- package/dist/server/routes/fetch.js +999 -0
- package/dist/server/routes/health.d.ts +7 -0
- package/dist/server/routes/health.js +19 -0
- package/dist/server/routes/jobs.d.ts +7 -0
- package/dist/server/routes/jobs.js +573 -0
- package/dist/server/routes/mcp.d.ts +14 -0
- package/dist/server/routes/mcp.js +141 -0
- package/dist/server/routes/oauth.d.ts +9 -0
- package/dist/server/routes/oauth.js +396 -0
- package/dist/server/routes/playground.d.ts +17 -0
- package/dist/server/routes/playground.js +283 -0
- package/dist/server/routes/screenshot.d.ts +22 -0
- package/dist/server/routes/screenshot.js +816 -0
- package/dist/server/routes/search.d.ts +6 -0
- package/dist/server/routes/search.js +303 -0
- package/dist/server/routes/session.d.ts +15 -0
- package/dist/server/routes/session.js +397 -0
- package/dist/server/routes/stats.d.ts +6 -0
- package/dist/server/routes/stats.js +71 -0
- package/dist/server/routes/stripe.d.ts +15 -0
- package/dist/server/routes/stripe.js +294 -0
- package/dist/server/routes/users.d.ts +8 -0
- package/dist/server/routes/users.js +1671 -0
- package/dist/server/routes/watch.d.ts +15 -0
- package/dist/server/routes/watch.js +309 -0
- package/dist/server/routes/webhooks.d.ts +26 -0
- package/dist/server/routes/webhooks.js +170 -0
- package/dist/server/routes/youtube.d.ts +6 -0
- package/dist/server/routes/youtube.js +130 -0
- package/dist/server/sentry.d.ts +13 -0
- package/dist/server/sentry.js +38 -0
- package/dist/server/types.d.ts +15 -0
- package/dist/server/types.js +7 -0
- package/dist/server/utils/response.d.ts +44 -0
- package/dist/server/utils/response.js +69 -0
- package/dist/server/utils/sse.d.ts +22 -0
- package/dist/server/utils/sse.js +38 -0
- package/package.json +2 -1
|
@@ -0,0 +1,493 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Batch scrape API - process multiple URLs concurrently
|
|
3
|
+
*/
|
|
4
|
+
import { Router } from 'express';
|
|
5
|
+
import crypto from 'crypto';
|
|
6
|
+
import { peel } from '../../index.js';
|
|
7
|
+
import { sendWebhook, normalizeWebhook } from './webhooks.js';
|
|
8
|
+
import { initSSE, sendSSE, endSSE, wantsSSE } from '../utils/sse.js';
|
|
9
|
+
export function createBatchRouter(jobQueue) {
|
|
10
|
+
const router = Router();
|
|
11
|
+
/**
|
|
12
|
+
* POST /v1/batch/scrape - Submit batch of URLs
|
|
13
|
+
*/
|
|
14
|
+
router.post('/v1/batch/scrape', async (req, res) => {
|
|
15
|
+
try {
|
|
16
|
+
const { urls, formats, extract, maxTokens, webhook, concurrency } = req.body;
|
|
17
|
+
// Validate required parameters
|
|
18
|
+
if (!urls || !Array.isArray(urls) || urls.length === 0) {
|
|
19
|
+
res.status(400).json({
|
|
20
|
+
success: false,
|
|
21
|
+
error: {
|
|
22
|
+
type: 'invalid_request',
|
|
23
|
+
message: 'Missing or invalid "urls" parameter (must be non-empty array)',
|
|
24
|
+
hint: 'Pass a non-empty array of URL strings in the "urls" field.',
|
|
25
|
+
docs: 'https://webpeel.dev/docs/errors#invalid_request',
|
|
26
|
+
},
|
|
27
|
+
requestId: crypto.randomUUID(),
|
|
28
|
+
});
|
|
29
|
+
return;
|
|
30
|
+
}
|
|
31
|
+
// Limit batch size
|
|
32
|
+
if (urls.length > 100) {
|
|
33
|
+
res.status(400).json({
|
|
34
|
+
success: false,
|
|
35
|
+
error: {
|
|
36
|
+
type: 'invalid_request',
|
|
37
|
+
message: 'Batch size too large (max 100 URLs)',
|
|
38
|
+
hint: 'Split your request into batches of 100 URLs or fewer.',
|
|
39
|
+
docs: 'https://webpeel.dev/docs/errors#invalid_request',
|
|
40
|
+
},
|
|
41
|
+
requestId: crypto.randomUUID(),
|
|
42
|
+
});
|
|
43
|
+
return;
|
|
44
|
+
}
|
|
45
|
+
// Validate URLs
|
|
46
|
+
for (const url of urls) {
|
|
47
|
+
if (typeof url !== 'string') {
|
|
48
|
+
res.status(400).json({
|
|
49
|
+
success: false,
|
|
50
|
+
error: {
|
|
51
|
+
type: 'invalid_request',
|
|
52
|
+
message: 'All URLs must be strings',
|
|
53
|
+
hint: 'Each element in the "urls" array must be a string.',
|
|
54
|
+
docs: 'https://webpeel.dev/docs/errors#invalid_request',
|
|
55
|
+
},
|
|
56
|
+
requestId: crypto.randomUUID(),
|
|
57
|
+
});
|
|
58
|
+
return;
|
|
59
|
+
}
|
|
60
|
+
try {
|
|
61
|
+
new URL(url);
|
|
62
|
+
}
|
|
63
|
+
catch {
|
|
64
|
+
res.status(400).json({
|
|
65
|
+
success: false,
|
|
66
|
+
error: {
|
|
67
|
+
type: 'invalid_url',
|
|
68
|
+
message: `Invalid URL format: ${url}`,
|
|
69
|
+
hint: 'Ensure each URL includes a scheme (https://) and a valid hostname.',
|
|
70
|
+
docs: 'https://webpeel.dev/docs/errors#invalid_url',
|
|
71
|
+
},
|
|
72
|
+
requestId: crypto.randomUUID(),
|
|
73
|
+
});
|
|
74
|
+
return;
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
const ownerId = req.auth?.keyInfo?.accountId;
|
|
78
|
+
// ── SSE streaming path ────────────────────────────────────────────────
|
|
79
|
+
if (wantsSSE(req)) {
|
|
80
|
+
const normalizedWebhook = webhook ? normalizeWebhook(webhook) : undefined;
|
|
81
|
+
const job = await jobQueue.createJob('batch', normalizedWebhook, ownerId);
|
|
82
|
+
await jobQueue.updateJob(job.id, { total: urls.length });
|
|
83
|
+
// Set SSE headers (X-Request-Id already set by global middleware)
|
|
84
|
+
initSSE(res);
|
|
85
|
+
sendSSE(res, 'started', {
|
|
86
|
+
batchId: job.id,
|
|
87
|
+
totalUrls: urls.length,
|
|
88
|
+
});
|
|
89
|
+
let closed = false;
|
|
90
|
+
const heartbeat = setInterval(() => {
|
|
91
|
+
if (!closed)
|
|
92
|
+
res.write('event: ping\ndata: {}\n\n');
|
|
93
|
+
}, 15_000);
|
|
94
|
+
req.on('close', () => {
|
|
95
|
+
closed = true;
|
|
96
|
+
clearInterval(heartbeat);
|
|
97
|
+
});
|
|
98
|
+
const startTime = Date.now();
|
|
99
|
+
let completedCount = 0;
|
|
100
|
+
let failedCount = 0;
|
|
101
|
+
try {
|
|
102
|
+
jobQueue.updateJob(job.id, { status: 'processing' });
|
|
103
|
+
const peelOptions = {
|
|
104
|
+
format: formats?.[0] || 'markdown',
|
|
105
|
+
extract,
|
|
106
|
+
maxTokens,
|
|
107
|
+
};
|
|
108
|
+
// Process URLs with bounded concurrency (caller-specified, max 10)
|
|
109
|
+
const results = new Array(urls.length);
|
|
110
|
+
const maxConcurrent = Math.min(Math.max(parseInt(concurrency) || 5, 1), 10);
|
|
111
|
+
let activeCount = 0;
|
|
112
|
+
let urlIndex = 0;
|
|
113
|
+
const runBatch = async () => {
|
|
114
|
+
while (urlIndex < urls.length) {
|
|
115
|
+
// Check cancellation
|
|
116
|
+
const currentJob = await jobQueue.getJob(job.id);
|
|
117
|
+
if (currentJob?.status === 'cancelled')
|
|
118
|
+
break;
|
|
119
|
+
while (activeCount >= maxConcurrent) {
|
|
120
|
+
await new Promise(resolve => setTimeout(resolve, 50));
|
|
121
|
+
}
|
|
122
|
+
const url = urls[urlIndex];
|
|
123
|
+
const index = urlIndex;
|
|
124
|
+
urlIndex++;
|
|
125
|
+
activeCount++;
|
|
126
|
+
(async () => {
|
|
127
|
+
try {
|
|
128
|
+
const result = await peel(url, peelOptions);
|
|
129
|
+
results[index] = result;
|
|
130
|
+
completedCount++;
|
|
131
|
+
jobQueue.updateJob(job.id, {
|
|
132
|
+
completed: completedCount + failedCount,
|
|
133
|
+
creditsUsed: completedCount + failedCount,
|
|
134
|
+
});
|
|
135
|
+
if (!closed) {
|
|
136
|
+
sendSSE(res, 'result', {
|
|
137
|
+
url,
|
|
138
|
+
content: result.content,
|
|
139
|
+
metadata: result.metadata,
|
|
140
|
+
index,
|
|
141
|
+
});
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
catch (err) {
|
|
145
|
+
failedCount++;
|
|
146
|
+
results[index] = { url, error: err.message || 'Unknown error' };
|
|
147
|
+
jobQueue.updateJob(job.id, {
|
|
148
|
+
completed: completedCount + failedCount,
|
|
149
|
+
creditsUsed: completedCount + failedCount,
|
|
150
|
+
});
|
|
151
|
+
if (!closed) {
|
|
152
|
+
sendSSE(res, 'error', {
|
|
153
|
+
url,
|
|
154
|
+
error: 'FETCH_ERROR',
|
|
155
|
+
message: err.message || 'Unknown error',
|
|
156
|
+
index,
|
|
157
|
+
});
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
finally {
|
|
161
|
+
activeCount--;
|
|
162
|
+
}
|
|
163
|
+
})();
|
|
164
|
+
}
|
|
165
|
+
// Wait for all in-flight requests
|
|
166
|
+
while (activeCount > 0) {
|
|
167
|
+
await new Promise(resolve => setTimeout(resolve, 50));
|
|
168
|
+
}
|
|
169
|
+
};
|
|
170
|
+
await runBatch();
|
|
171
|
+
jobQueue.updateJob(job.id, {
|
|
172
|
+
status: 'completed',
|
|
173
|
+
data: results,
|
|
174
|
+
});
|
|
175
|
+
if (!closed) {
|
|
176
|
+
sendSSE(res, 'done', {
|
|
177
|
+
batchId: job.id,
|
|
178
|
+
completed: completedCount,
|
|
179
|
+
failed: failedCount,
|
|
180
|
+
duration: Date.now() - startTime,
|
|
181
|
+
});
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
catch (error) {
|
|
185
|
+
jobQueue.updateJob(job.id, {
|
|
186
|
+
status: 'failed',
|
|
187
|
+
error: error.message || 'Unknown error',
|
|
188
|
+
});
|
|
189
|
+
if (!closed) {
|
|
190
|
+
sendSSE(res, 'error', {
|
|
191
|
+
error: 'BATCH_FAILED',
|
|
192
|
+
message: error.message || 'Unknown error',
|
|
193
|
+
});
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
finally {
|
|
197
|
+
clearInterval(heartbeat);
|
|
198
|
+
if (!closed)
|
|
199
|
+
endSSE(res);
|
|
200
|
+
}
|
|
201
|
+
return;
|
|
202
|
+
}
|
|
203
|
+
// ── Regular async job path (backward compat) ─────────────────────────
|
|
204
|
+
const normalizedWebhook2 = webhook ? normalizeWebhook(webhook) : undefined;
|
|
205
|
+
const job = await jobQueue.createJob('batch', normalizedWebhook2, ownerId);
|
|
206
|
+
await jobQueue.updateJob(job.id, {
|
|
207
|
+
total: urls.length,
|
|
208
|
+
});
|
|
209
|
+
// Start batch processing in background
|
|
210
|
+
setImmediate(async () => {
|
|
211
|
+
try {
|
|
212
|
+
// Update job to processing
|
|
213
|
+
jobQueue.updateJob(job.id, { status: 'processing' });
|
|
214
|
+
// Send started webhook
|
|
215
|
+
if (normalizedWebhook2) {
|
|
216
|
+
await sendWebhook(normalizedWebhook2, 'started', {
|
|
217
|
+
jobId: job.id,
|
|
218
|
+
total: urls.length,
|
|
219
|
+
});
|
|
220
|
+
}
|
|
221
|
+
// Build peel options
|
|
222
|
+
const peelOptions = {
|
|
223
|
+
format: formats?.[0] || 'markdown',
|
|
224
|
+
extract,
|
|
225
|
+
maxTokens,
|
|
226
|
+
};
|
|
227
|
+
// Process URLs with semaphore (caller-specified, max 10 concurrent)
|
|
228
|
+
const results = [];
|
|
229
|
+
const maxConcurrent = Math.min(Math.max(parseInt(concurrency) || 5, 1), 10);
|
|
230
|
+
let activeCount = 0;
|
|
231
|
+
let urlIndex = 0;
|
|
232
|
+
const processBatch = async () => {
|
|
233
|
+
while (urlIndex < urls.length) {
|
|
234
|
+
// Check if job was cancelled
|
|
235
|
+
const currentJob = await jobQueue.getJob(job.id);
|
|
236
|
+
if (currentJob?.status === 'cancelled') {
|
|
237
|
+
return;
|
|
238
|
+
}
|
|
239
|
+
// Wait for available slot
|
|
240
|
+
while (activeCount >= maxConcurrent) {
|
|
241
|
+
await new Promise(resolve => setTimeout(resolve, 100));
|
|
242
|
+
}
|
|
243
|
+
const url = urls[urlIndex];
|
|
244
|
+
const index = urlIndex;
|
|
245
|
+
urlIndex++;
|
|
246
|
+
activeCount++;
|
|
247
|
+
// Process URL
|
|
248
|
+
(async () => {
|
|
249
|
+
try {
|
|
250
|
+
const result = await peel(url, peelOptions);
|
|
251
|
+
results[index] = result;
|
|
252
|
+
// Update progress
|
|
253
|
+
const completed = results.filter(r => r !== undefined).length;
|
|
254
|
+
jobQueue.updateJob(job.id, {
|
|
255
|
+
completed,
|
|
256
|
+
creditsUsed: completed,
|
|
257
|
+
});
|
|
258
|
+
// Send page webhook
|
|
259
|
+
if (normalizedWebhook2) {
|
|
260
|
+
sendWebhook(normalizedWebhook2, 'page', {
|
|
261
|
+
jobId: job.id,
|
|
262
|
+
url,
|
|
263
|
+
completed,
|
|
264
|
+
total: urls.length,
|
|
265
|
+
}).catch(() => { }); // Fire and forget
|
|
266
|
+
}
|
|
267
|
+
}
|
|
268
|
+
catch (error) {
|
|
269
|
+
// Store error as result
|
|
270
|
+
results[index] = {
|
|
271
|
+
url,
|
|
272
|
+
error: error.message || 'Unknown error',
|
|
273
|
+
};
|
|
274
|
+
// Update progress
|
|
275
|
+
const completed = results.filter(r => r !== undefined).length;
|
|
276
|
+
jobQueue.updateJob(job.id, {
|
|
277
|
+
completed,
|
|
278
|
+
creditsUsed: completed,
|
|
279
|
+
});
|
|
280
|
+
}
|
|
281
|
+
finally {
|
|
282
|
+
activeCount--;
|
|
283
|
+
}
|
|
284
|
+
})();
|
|
285
|
+
}
|
|
286
|
+
// Wait for all tasks to complete
|
|
287
|
+
while (activeCount > 0) {
|
|
288
|
+
await new Promise(resolve => setTimeout(resolve, 100));
|
|
289
|
+
}
|
|
290
|
+
};
|
|
291
|
+
await processBatch();
|
|
292
|
+
// Update job with results
|
|
293
|
+
jobQueue.updateJob(job.id, {
|
|
294
|
+
status: 'completed',
|
|
295
|
+
data: results,
|
|
296
|
+
});
|
|
297
|
+
// Send completed webhook and store delivery result
|
|
298
|
+
if (normalizedWebhook2) {
|
|
299
|
+
const delivery = await sendWebhook(normalizedWebhook2, 'completed', {
|
|
300
|
+
jobId: job.id,
|
|
301
|
+
total: urls.length,
|
|
302
|
+
completed: results.length,
|
|
303
|
+
});
|
|
304
|
+
if (delivery) {
|
|
305
|
+
jobQueue.updateJob(job.id, { webhookDelivery: delivery });
|
|
306
|
+
}
|
|
307
|
+
}
|
|
308
|
+
}
|
|
309
|
+
catch (error) {
|
|
310
|
+
// Update job with error
|
|
311
|
+
jobQueue.updateJob(job.id, {
|
|
312
|
+
status: 'failed',
|
|
313
|
+
error: error.message || 'Unknown error',
|
|
314
|
+
});
|
|
315
|
+
// Send failed webhook
|
|
316
|
+
if (normalizedWebhook2) {
|
|
317
|
+
await sendWebhook(normalizedWebhook2, 'failed', {
|
|
318
|
+
jobId: job.id,
|
|
319
|
+
error: error.message || 'Unknown error',
|
|
320
|
+
});
|
|
321
|
+
}
|
|
322
|
+
}
|
|
323
|
+
});
|
|
324
|
+
// Return job ID immediately
|
|
325
|
+
res.status(202).json({
|
|
326
|
+
success: true,
|
|
327
|
+
id: job.id,
|
|
328
|
+
url: `/v1/batch/scrape/${job.id}`,
|
|
329
|
+
});
|
|
330
|
+
}
|
|
331
|
+
catch (error) {
|
|
332
|
+
console.error('Batch scrape creation error:', error);
|
|
333
|
+
res.status(500).json({
|
|
334
|
+
success: false,
|
|
335
|
+
error: {
|
|
336
|
+
type: 'internal_error',
|
|
337
|
+
message: 'Failed to create batch scrape job',
|
|
338
|
+
docs: 'https://webpeel.dev/docs/errors#internal_error',
|
|
339
|
+
},
|
|
340
|
+
requestId: crypto.randomUUID(),
|
|
341
|
+
});
|
|
342
|
+
}
|
|
343
|
+
});
|
|
344
|
+
/**
|
|
345
|
+
* GET /v1/batch/scrape/:id - Get batch scrape status + results
|
|
346
|
+
*/
|
|
347
|
+
router.get('/v1/batch/scrape/:id', async (req, res) => {
|
|
348
|
+
try {
|
|
349
|
+
const id = req.params.id;
|
|
350
|
+
const job = await jobQueue.getJob(id);
|
|
351
|
+
if (!job) {
|
|
352
|
+
res.status(404).json({
|
|
353
|
+
success: false,
|
|
354
|
+
error: {
|
|
355
|
+
type: 'not_found',
|
|
356
|
+
message: 'Job not found',
|
|
357
|
+
docs: 'https://webpeel.dev/docs/errors#not_found',
|
|
358
|
+
},
|
|
359
|
+
requestId: crypto.randomUUID(),
|
|
360
|
+
});
|
|
361
|
+
return;
|
|
362
|
+
}
|
|
363
|
+
// SECURITY: Verify the requester owns this job
|
|
364
|
+
const requestOwnerId = req.auth?.keyInfo?.accountId;
|
|
365
|
+
if (job.ownerId && requestOwnerId && job.ownerId !== requestOwnerId) {
|
|
366
|
+
res.status(404).json({
|
|
367
|
+
success: false,
|
|
368
|
+
error: {
|
|
369
|
+
type: 'not_found',
|
|
370
|
+
message: 'Job not found',
|
|
371
|
+
docs: 'https://webpeel.dev/docs/errors#not_found',
|
|
372
|
+
},
|
|
373
|
+
requestId: crypto.randomUUID(),
|
|
374
|
+
});
|
|
375
|
+
return;
|
|
376
|
+
}
|
|
377
|
+
if (job.type !== 'batch') {
|
|
378
|
+
res.status(400).json({
|
|
379
|
+
success: false,
|
|
380
|
+
error: {
|
|
381
|
+
type: 'invalid_request',
|
|
382
|
+
message: 'Job is not a batch scrape job',
|
|
383
|
+
docs: 'https://webpeel.dev/docs/errors#invalid_request',
|
|
384
|
+
},
|
|
385
|
+
requestId: crypto.randomUUID(),
|
|
386
|
+
});
|
|
387
|
+
return;
|
|
388
|
+
}
|
|
389
|
+
res.json({
|
|
390
|
+
success: true,
|
|
391
|
+
status: job.status,
|
|
392
|
+
progress: job.progress,
|
|
393
|
+
total: job.total,
|
|
394
|
+
completed: job.completed,
|
|
395
|
+
creditsUsed: job.creditsUsed,
|
|
396
|
+
data: job.data,
|
|
397
|
+
error: job.error,
|
|
398
|
+
expiresAt: job.expiresAt,
|
|
399
|
+
...(job.webhookDelivery ? { webhook: job.webhookDelivery } : {}),
|
|
400
|
+
});
|
|
401
|
+
}
|
|
402
|
+
catch (error) {
|
|
403
|
+
console.error('Get batch scrape error:', error);
|
|
404
|
+
res.status(500).json({
|
|
405
|
+
success: false,
|
|
406
|
+
error: {
|
|
407
|
+
type: 'internal_error',
|
|
408
|
+
message: 'Failed to retrieve job',
|
|
409
|
+
docs: 'https://webpeel.dev/docs/errors#internal_error',
|
|
410
|
+
},
|
|
411
|
+
requestId: crypto.randomUUID(),
|
|
412
|
+
});
|
|
413
|
+
}
|
|
414
|
+
});
|
|
415
|
+
/**
|
|
416
|
+
* DELETE /v1/batch/scrape/:id - Cancel batch scrape job
|
|
417
|
+
*/
|
|
418
|
+
router.delete('/v1/batch/scrape/:id', async (req, res) => {
|
|
419
|
+
try {
|
|
420
|
+
const id = req.params.id;
|
|
421
|
+
const job = await jobQueue.getJob(id);
|
|
422
|
+
if (!job) {
|
|
423
|
+
res.status(404).json({
|
|
424
|
+
success: false,
|
|
425
|
+
error: {
|
|
426
|
+
type: 'not_found',
|
|
427
|
+
message: 'Job not found',
|
|
428
|
+
docs: 'https://webpeel.dev/docs/errors#not_found',
|
|
429
|
+
},
|
|
430
|
+
requestId: crypto.randomUUID(),
|
|
431
|
+
});
|
|
432
|
+
return;
|
|
433
|
+
}
|
|
434
|
+
// SECURITY: Verify the requester owns this job
|
|
435
|
+
const requestOwnerId = req.auth?.keyInfo?.accountId;
|
|
436
|
+
if (job.ownerId && requestOwnerId && job.ownerId !== requestOwnerId) {
|
|
437
|
+
res.status(404).json({
|
|
438
|
+
success: false,
|
|
439
|
+
error: {
|
|
440
|
+
type: 'not_found',
|
|
441
|
+
message: 'Job not found',
|
|
442
|
+
docs: 'https://webpeel.dev/docs/errors#not_found',
|
|
443
|
+
},
|
|
444
|
+
requestId: crypto.randomUUID(),
|
|
445
|
+
});
|
|
446
|
+
return;
|
|
447
|
+
}
|
|
448
|
+
if (job.type !== 'batch') {
|
|
449
|
+
res.status(400).json({
|
|
450
|
+
success: false,
|
|
451
|
+
error: {
|
|
452
|
+
type: 'invalid_request',
|
|
453
|
+
message: 'Job is not a batch scrape job',
|
|
454
|
+
docs: 'https://webpeel.dev/docs/errors#invalid_request',
|
|
455
|
+
},
|
|
456
|
+
requestId: crypto.randomUUID(),
|
|
457
|
+
});
|
|
458
|
+
return;
|
|
459
|
+
}
|
|
460
|
+
const cancelled = await jobQueue.cancelJob(id);
|
|
461
|
+
if (!cancelled) {
|
|
462
|
+
res.status(400).json({
|
|
463
|
+
success: false,
|
|
464
|
+
error: {
|
|
465
|
+
type: 'invalid_request',
|
|
466
|
+
message: 'Job cannot be cancelled (already completed or failed)',
|
|
467
|
+
hint: 'Only pending or processing jobs can be cancelled.',
|
|
468
|
+
docs: 'https://webpeel.dev/docs/errors#invalid_request',
|
|
469
|
+
},
|
|
470
|
+
requestId: crypto.randomUUID(),
|
|
471
|
+
});
|
|
472
|
+
return;
|
|
473
|
+
}
|
|
474
|
+
res.json({
|
|
475
|
+
success: true,
|
|
476
|
+
message: 'Job cancelled',
|
|
477
|
+
});
|
|
478
|
+
}
|
|
479
|
+
catch (error) {
|
|
480
|
+
console.error('Cancel batch scrape error:', error);
|
|
481
|
+
res.status(500).json({
|
|
482
|
+
success: false,
|
|
483
|
+
error: {
|
|
484
|
+
type: 'internal_error',
|
|
485
|
+
message: 'Failed to cancel job',
|
|
486
|
+
docs: 'https://webpeel.dev/docs/errors#internal_error',
|
|
487
|
+
},
|
|
488
|
+
requestId: crypto.randomUUID(),
|
|
489
|
+
});
|
|
490
|
+
}
|
|
491
|
+
});
|
|
492
|
+
return router;
|
|
493
|
+
}
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CLI Usage endpoint — works with API key auth (not JWT)
|
|
3
|
+
* Used by the `webpeel usage` command and pre-fetch usage checks
|
|
4
|
+
*/
|
|
5
|
+
import { Router } from 'express';
|
|
6
|
+
import pg from 'pg';
|
|
7
|
+
const { Pool } = pg;
|
|
8
|
+
export function createCLIUsageRouter() {
|
|
9
|
+
const router = Router();
|
|
10
|
+
const dbUrl = process.env.DATABASE_URL;
|
|
11
|
+
if (!dbUrl) {
|
|
12
|
+
// If no DB, return a stub router
|
|
13
|
+
router.get('/v1/cli/usage', (req, res) => {
|
|
14
|
+
res.status(501).json({
|
|
15
|
+
success: false,
|
|
16
|
+
error: {
|
|
17
|
+
type: 'not_configured',
|
|
18
|
+
message: 'Usage tracking requires PostgreSQL backend',
|
|
19
|
+
docs: 'https://webpeel.dev/docs/errors#not_configured',
|
|
20
|
+
},
|
|
21
|
+
requestId: req.requestId,
|
|
22
|
+
});
|
|
23
|
+
});
|
|
24
|
+
return router;
|
|
25
|
+
}
|
|
26
|
+
const pool = new Pool({
|
|
27
|
+
connectionString: dbUrl,
|
|
28
|
+
// TLS: enabled when DATABASE_URL contains sslmode=require.
|
|
29
|
+
// Secure by default (rejectUnauthorized: true); set PG_REJECT_UNAUTHORIZED=false
|
|
30
|
+
// only for managed DBs (Render/Neon/Supabase) that use self-signed certs.
|
|
31
|
+
ssl: process.env.DATABASE_URL?.includes('sslmode=require')
|
|
32
|
+
? { rejectUnauthorized: process.env.PG_REJECT_UNAUTHORIZED !== 'false' }
|
|
33
|
+
: undefined,
|
|
34
|
+
});
|
|
35
|
+
/**
|
|
36
|
+
* GET /v1/cli/usage
|
|
37
|
+
* Returns usage info for the authenticated API key's owner
|
|
38
|
+
* Auth: API key via Authorization: Bearer <key> or X-API-Key header
|
|
39
|
+
*/
|
|
40
|
+
router.get('/v1/cli/usage', async (req, res) => {
|
|
41
|
+
try {
|
|
42
|
+
// Require API key auth (set by global auth middleware)
|
|
43
|
+
if (!req.auth?.keyInfo?.accountId) {
|
|
44
|
+
res.status(401).json({ success: false, error: { type: 'unauthorized', message: 'Valid API key required. Run `webpeel login` to authenticate.', docs: 'https://webpeel.dev/docs/authentication' }, requestId: req.requestId });
|
|
45
|
+
return;
|
|
46
|
+
}
|
|
47
|
+
const userId = req.auth.keyInfo.accountId;
|
|
48
|
+
// Get user plan info
|
|
49
|
+
const planResult = await pool.query('SELECT tier, weekly_limit, burst_limit FROM users WHERE id = $1', [userId]);
|
|
50
|
+
if (planResult.rows.length === 0) {
|
|
51
|
+
res.status(404).json({ success: false, error: { type: 'user_not_found', message: 'User not found', docs: 'https://webpeel.dev/docs/errors#user_not_found' }, requestId: req.requestId });
|
|
52
|
+
return;
|
|
53
|
+
}
|
|
54
|
+
const plan = planResult.rows[0];
|
|
55
|
+
// Current week (ISO format)
|
|
56
|
+
const now = new Date();
|
|
57
|
+
const year = now.getUTCFullYear();
|
|
58
|
+
const jan4 = new Date(Date.UTC(year, 0, 4));
|
|
59
|
+
const weekNum = Math.ceil(((now.getTime() - jan4.getTime()) / 86400000 + jan4.getUTCDay() + 1) / 7);
|
|
60
|
+
const currentWeek = `${year}-W${String(weekNum).padStart(2, '0')}`;
|
|
61
|
+
// Current hour bucket
|
|
62
|
+
const currentHour = now.toISOString().substring(0, 13);
|
|
63
|
+
// Get weekly usage
|
|
64
|
+
const weeklyResult = await pool.query(`SELECT
|
|
65
|
+
COALESCE(SUM(wu.total_count), 0) as total_used,
|
|
66
|
+
COALESCE(SUM(wu.basic_count), 0) as basic_used,
|
|
67
|
+
COALESCE(SUM(wu.stealth_count), 0) as stealth_used,
|
|
68
|
+
COALESCE(SUM(wu.search_count), 0) as search_used
|
|
69
|
+
FROM api_keys ak
|
|
70
|
+
LEFT JOIN weekly_usage wu ON wu.api_key_id = ak.id AND wu.week = $2
|
|
71
|
+
WHERE ak.user_id = $1 AND ak.is_active = true`, [userId, currentWeek]);
|
|
72
|
+
const weekly = weeklyResult.rows[0];
|
|
73
|
+
const totalUsed = parseInt(weekly.total_used) || 0;
|
|
74
|
+
const weeklyLimit = plan.weekly_limit || 125;
|
|
75
|
+
const remaining = Math.max(0, weeklyLimit - totalUsed);
|
|
76
|
+
// Get burst usage
|
|
77
|
+
const burstResult = await pool.query(`SELECT COALESCE(SUM(bu.count), 0) as burst_used
|
|
78
|
+
FROM api_keys ak
|
|
79
|
+
LEFT JOIN burst_usage bu ON bu.api_key_id = ak.id AND bu.hour_bucket = $2
|
|
80
|
+
WHERE ak.user_id = $1 AND ak.is_active = true`, [userId, currentHour]);
|
|
81
|
+
const burstUsed = parseInt(burstResult.rows[0]?.burst_used) || 0;
|
|
82
|
+
const burstLimit = plan.burst_limit || 25;
|
|
83
|
+
const minutesRemaining = 59 - now.getUTCMinutes();
|
|
84
|
+
// Get next Monday reset time
|
|
85
|
+
const dayOfWeek = now.getUTCDay();
|
|
86
|
+
const daysUntilMonday = dayOfWeek === 0 ? 1 : 8 - dayOfWeek;
|
|
87
|
+
const nextMonday = new Date(now);
|
|
88
|
+
nextMonday.setUTCDate(now.getUTCDate() + daysUntilMonday);
|
|
89
|
+
nextMonday.setUTCHours(0, 0, 0, 0);
|
|
90
|
+
res.json({
|
|
91
|
+
plan: {
|
|
92
|
+
tier: plan.tier,
|
|
93
|
+
weeklyLimit,
|
|
94
|
+
burstLimit,
|
|
95
|
+
},
|
|
96
|
+
weekly: {
|
|
97
|
+
used: totalUsed,
|
|
98
|
+
limit: weeklyLimit,
|
|
99
|
+
remaining,
|
|
100
|
+
resetsAt: nextMonday.toISOString(),
|
|
101
|
+
percentUsed: Math.round((totalUsed / weeklyLimit) * 100),
|
|
102
|
+
},
|
|
103
|
+
burst: {
|
|
104
|
+
used: burstUsed,
|
|
105
|
+
limit: burstLimit,
|
|
106
|
+
resetsIn: minutesRemaining <= 0 ? '< 1 min' : `${minutesRemaining}m`,
|
|
107
|
+
},
|
|
108
|
+
// Simple boolean flags for CLI to check quickly
|
|
109
|
+
canFetch: remaining > 0 && burstUsed < burstLimit,
|
|
110
|
+
upgradeUrl: 'https://webpeel.dev/pricing',
|
|
111
|
+
});
|
|
112
|
+
}
|
|
113
|
+
catch (error) {
|
|
114
|
+
console.error('CLI usage error:', error);
|
|
115
|
+
res.status(500).json({
|
|
116
|
+
success: false,
|
|
117
|
+
error: {
|
|
118
|
+
type: 'internal_error',
|
|
119
|
+
message: 'Failed to retrieve usage',
|
|
120
|
+
docs: 'https://webpeel.dev/docs/errors#internal_error',
|
|
121
|
+
},
|
|
122
|
+
requestId: req.requestId,
|
|
123
|
+
});
|
|
124
|
+
}
|
|
125
|
+
});
|
|
126
|
+
return router;
|
|
127
|
+
}
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Firecrawl API Compatibility Layer
|
|
3
|
+
*
|
|
4
|
+
* Drop-in replacement for Firecrawl's API - users can switch by ONLY changing the base URL.
|
|
5
|
+
* This is our killer acquisition feature.
|
|
6
|
+
*
|
|
7
|
+
* NOTE: Error responses in this file intentionally use Firecrawl's format:
|
|
8
|
+
* { success: false, error: "Human-readable message" }
|
|
9
|
+
* This is required for Firecrawl drop-in compatibility and differs from the
|
|
10
|
+
* standard WebPeel API error format: { error: "error_code", message: "description" }.
|
|
11
|
+
* Do NOT change this format — it would break Firecrawl-compatible integrations.
|
|
12
|
+
*
|
|
13
|
+
* Implements Firecrawl endpoints:
|
|
14
|
+
* - POST /v1/scrape
|
|
15
|
+
* - POST /v2/scrape (v2 with formats: ["screenshot"] support)
|
|
16
|
+
* - POST /v1/crawl
|
|
17
|
+
* - GET /v1/crawl/:id
|
|
18
|
+
* - POST /v1/search
|
|
19
|
+
* - POST /v1/map
|
|
20
|
+
*/
|
|
21
|
+
import { Router } from 'express';
|
|
22
|
+
import type { IJobQueue } from '../job-queue.js';
|
|
23
|
+
export declare function createCompatRouter(jobQueue: IJobQueue): Router;
|