webpeel 0.20.2 → 0.20.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. package/dist/server/app.d.ts +14 -0
  2. package/dist/server/app.js +384 -0
  3. package/dist/server/auth-store.d.ts +27 -0
  4. package/dist/server/auth-store.js +88 -0
  5. package/dist/server/email-service.d.ts +21 -0
  6. package/dist/server/email-service.js +79 -0
  7. package/dist/server/job-queue.d.ts +100 -0
  8. package/dist/server/job-queue.js +145 -0
  9. package/dist/server/logger.d.ts +10 -0
  10. package/dist/server/logger.js +37 -0
  11. package/dist/server/middleware/auth.d.ts +28 -0
  12. package/dist/server/middleware/auth.js +221 -0
  13. package/dist/server/middleware/rate-limit.d.ts +24 -0
  14. package/dist/server/middleware/rate-limit.js +167 -0
  15. package/dist/server/middleware/url-validator.d.ts +15 -0
  16. package/dist/server/middleware/url-validator.js +186 -0
  17. package/dist/server/openapi.yaml +6418 -0
  18. package/dist/server/pg-auth-store.d.ts +132 -0
  19. package/dist/server/pg-auth-store.js +472 -0
  20. package/dist/server/pg-job-queue.d.ts +59 -0
  21. package/dist/server/pg-job-queue.js +375 -0
  22. package/dist/server/premium/domain-intel.d.ts +16 -0
  23. package/dist/server/premium/domain-intel.js +133 -0
  24. package/dist/server/premium/index.d.ts +17 -0
  25. package/dist/server/premium/index.js +35 -0
  26. package/dist/server/premium/swr-cache.d.ts +14 -0
  27. package/dist/server/premium/swr-cache.js +34 -0
  28. package/dist/server/routes/activity.d.ts +6 -0
  29. package/dist/server/routes/activity.js +74 -0
  30. package/dist/server/routes/answer.d.ts +5 -0
  31. package/dist/server/routes/answer.js +125 -0
  32. package/dist/server/routes/ask.d.ts +28 -0
  33. package/dist/server/routes/ask.js +229 -0
  34. package/dist/server/routes/batch.d.ts +6 -0
  35. package/dist/server/routes/batch.js +493 -0
  36. package/dist/server/routes/cli-usage.d.ts +6 -0
  37. package/dist/server/routes/cli-usage.js +127 -0
  38. package/dist/server/routes/compat.d.ts +23 -0
  39. package/dist/server/routes/compat.js +652 -0
  40. package/dist/server/routes/deep-fetch.d.ts +8 -0
  41. package/dist/server/routes/deep-fetch.js +57 -0
  42. package/dist/server/routes/demo.d.ts +24 -0
  43. package/dist/server/routes/demo.js +517 -0
  44. package/dist/server/routes/do.d.ts +8 -0
  45. package/dist/server/routes/do.js +72 -0
  46. package/dist/server/routes/extract.d.ts +8 -0
  47. package/dist/server/routes/extract.js +235 -0
  48. package/dist/server/routes/fetch.d.ts +7 -0
  49. package/dist/server/routes/fetch.js +999 -0
  50. package/dist/server/routes/health.d.ts +7 -0
  51. package/dist/server/routes/health.js +19 -0
  52. package/dist/server/routes/jobs.d.ts +7 -0
  53. package/dist/server/routes/jobs.js +573 -0
  54. package/dist/server/routes/mcp.d.ts +14 -0
  55. package/dist/server/routes/mcp.js +141 -0
  56. package/dist/server/routes/oauth.d.ts +9 -0
  57. package/dist/server/routes/oauth.js +396 -0
  58. package/dist/server/routes/playground.d.ts +17 -0
  59. package/dist/server/routes/playground.js +283 -0
  60. package/dist/server/routes/screenshot.d.ts +22 -0
  61. package/dist/server/routes/screenshot.js +816 -0
  62. package/dist/server/routes/search.d.ts +6 -0
  63. package/dist/server/routes/search.js +303 -0
  64. package/dist/server/routes/session.d.ts +15 -0
  65. package/dist/server/routes/session.js +397 -0
  66. package/dist/server/routes/stats.d.ts +6 -0
  67. package/dist/server/routes/stats.js +71 -0
  68. package/dist/server/routes/stripe.d.ts +15 -0
  69. package/dist/server/routes/stripe.js +294 -0
  70. package/dist/server/routes/users.d.ts +8 -0
  71. package/dist/server/routes/users.js +1671 -0
  72. package/dist/server/routes/watch.d.ts +15 -0
  73. package/dist/server/routes/watch.js +309 -0
  74. package/dist/server/routes/webhooks.d.ts +26 -0
  75. package/dist/server/routes/webhooks.js +170 -0
  76. package/dist/server/routes/youtube.d.ts +6 -0
  77. package/dist/server/routes/youtube.js +130 -0
  78. package/dist/server/sentry.d.ts +13 -0
  79. package/dist/server/sentry.js +38 -0
  80. package/dist/server/types.d.ts +15 -0
  81. package/dist/server/types.js +7 -0
  82. package/dist/server/utils/response.d.ts +44 -0
  83. package/dist/server/utils/response.js +69 -0
  84. package/dist/server/utils/sse.d.ts +22 -0
  85. package/dist/server/utils/sse.js +38 -0
  86. package/package.json +2 -1
@@ -0,0 +1,72 @@
1
+ /**
2
+ * /v1/do — Intent-based endpoint.
3
+ * One endpoint that understands natural language and routes internally.
4
+ * POST /v1/do { task: "find Stripe fees" }
5
+ * GET /v1/do?task=find+Stripe+fees
6
+ */
7
+ import { Router } from 'express';
8
+ import { parseIntent } from '../../mcp/smart-router.js';
9
+ import { getHandler } from '../../mcp/handlers/index.js';
10
+ export function createDoRouter() {
11
+ const router = Router();
12
+ async function handleDo(req, res) {
13
+ const task = req.body?.task || req.query?.task;
14
+ if (!task?.trim()) {
15
+ res.status(400).json({ success: false, error: { type: 'missing_task', message: 'Provide task= parameter or {"task": "..."}', hint: 'POST {"task": "search for X"} or GET /v1/do?task=search+for+X', docs: 'https://webpeel.dev/docs/errors#missing_task' }, requestId: req.requestId });
16
+ return;
17
+ }
18
+ const startMs = Date.now();
19
+ const intent = parseIntent(task);
20
+ // Map intent to handler
21
+ const toolName = `webpeel_${intent.intent}`;
22
+ const handler = getHandler(toolName);
23
+ if (!handler) {
24
+ res.status(400).json({ success: false, error: { type: 'unknown_intent', message: `Could not understand: "${task}"`, hint: 'Try phrasing as: "fetch https://example.com", "search for AI news", or "screenshot https://example.com"', docs: 'https://webpeel.dev/docs/errors#unknown_intent' }, requestId: req.requestId });
25
+ return;
26
+ }
27
+ // Build args from parsed intent
28
+ const args = { ...intent.params };
29
+ if (intent.url)
30
+ args.url = intent.url;
31
+ if (intent.query)
32
+ args.query = intent.query;
33
+ try {
34
+ const ctx = {
35
+ accountId: req.auth?.keyInfo?.accountId || req.user?.userId,
36
+ };
37
+ const result = await handler(args, ctx);
38
+ // Extract text content from MCP result format
39
+ const firstItem = result.content?.[0];
40
+ const content = firstItem?.type === 'text' ? firstItem.text : undefined;
41
+ let parsed;
42
+ try {
43
+ parsed = JSON.parse(content || '{}');
44
+ }
45
+ catch {
46
+ parsed = { raw: content };
47
+ }
48
+ res.json({
49
+ task,
50
+ intent: intent.intent,
51
+ ...(intent.url ? { url: intent.url } : {}),
52
+ ...(intent.query ? { query: intent.query } : {}),
53
+ result: parsed,
54
+ elapsed: Date.now() - startMs,
55
+ });
56
+ }
57
+ catch (err) {
58
+ res.status(500).json({
59
+ success: false,
60
+ error: {
61
+ type: 'execution_failed',
62
+ message: err.message,
63
+ docs: 'https://webpeel.dev/docs/errors#execution_failed',
64
+ },
65
+ requestId: req.requestId,
66
+ });
67
+ }
68
+ }
69
+ router.get('/', handleDo);
70
+ router.post('/', handleDo);
71
+ return router;
72
+ }
@@ -0,0 +1,8 @@
1
+ /**
2
+ * POST /v1/extract — Firecrawl-compatible JSON Schema extraction endpoint.
3
+ *
4
+ * Body: { url: string, schema?: object, prompt?: string, llmApiKey?: string, model?: string }
5
+ * Returns: { success: true, data: <extracted data> }
6
+ */
7
+ import { Router } from 'express';
8
+ export declare function createExtractRouter(): Router;
@@ -0,0 +1,235 @@
1
+ /**
2
+ * POST /v1/extract — Firecrawl-compatible JSON Schema extraction endpoint.
3
+ *
4
+ * Body: { url: string, schema?: object, prompt?: string, llmApiKey?: string, model?: string }
5
+ * Returns: { success: true, data: <extracted data> }
6
+ */
7
+ import { Router } from 'express';
8
+ import crypto from 'crypto';
9
+ import { peel } from '../../index.js';
10
+ import { extractWithLLM } from '../../core/llm-extract.js';
11
+ export function createExtractRouter() {
12
+ const router = Router();
13
+ router.post('/v1/extract', async (req, res) => {
14
+ try {
15
+ const { url, schema, prompt, llmApiKey, llmProvider, model, baseUrl, } = req.body;
16
+ // Validate URL
17
+ if (!url || typeof url !== 'string') {
18
+ res.status(400).json({
19
+ success: false,
20
+ error: {
21
+ type: 'invalid_request',
22
+ message: 'Missing or invalid "url" field in request body.',
23
+ hint: 'Pass a URL in the request body: { "url": "https://example.com", "schema": { ... } }',
24
+ docs: 'https://webpeel.dev/docs/errors#invalid-request',
25
+ },
26
+ requestId: req.requestId || crypto.randomUUID(),
27
+ });
28
+ return;
29
+ }
30
+ if (url.length > 2048) {
31
+ res.status(400).json({
32
+ success: false,
33
+ error: {
34
+ type: 'invalid_url',
35
+ message: 'URL too long (max 2048 characters)',
36
+ hint: 'Shorten the URL to under 2048 characters.',
37
+ docs: 'https://webpeel.dev/docs/errors#invalid-url',
38
+ },
39
+ requestId: req.requestId || crypto.randomUUID(),
40
+ });
41
+ return;
42
+ }
43
+ // Validate URL format
44
+ let parsedUrl;
45
+ try {
46
+ parsedUrl = new URL(url);
47
+ if (!['http:', 'https:'].includes(parsedUrl.protocol)) {
48
+ res.status(400).json({
49
+ success: false,
50
+ error: {
51
+ type: 'invalid_url',
52
+ message: 'Only HTTP and HTTPS URLs are supported',
53
+ hint: 'Ensure the URL starts with http:// or https://',
54
+ docs: 'https://webpeel.dev/docs/errors#invalid-url',
55
+ },
56
+ requestId: req.requestId || crypto.randomUUID(),
57
+ });
58
+ return;
59
+ }
60
+ }
61
+ catch {
62
+ res.status(400).json({
63
+ success: false,
64
+ error: {
65
+ type: 'invalid_url',
66
+ message: `Invalid URL format: ${url}`,
67
+ hint: 'Ensure the URL is well-formed: https://example.com',
68
+ docs: 'https://webpeel.dev/docs/errors#invalid-url',
69
+ },
70
+ requestId: req.requestId || crypto.randomUUID(),
71
+ });
72
+ return;
73
+ }
74
+ // Require at least schema or prompt
75
+ if (!schema && !prompt) {
76
+ res.status(400).json({
77
+ success: false,
78
+ error: {
79
+ type: 'invalid_request',
80
+ message: 'Either "schema" or "prompt" is required for structured extraction.',
81
+ hint: 'Include a JSON schema or a natural language prompt in the request body.',
82
+ docs: 'https://webpeel.dev/docs/errors#invalid-request',
83
+ },
84
+ requestId: req.requestId || crypto.randomUUID(),
85
+ });
86
+ return;
87
+ }
88
+ // Resolve provider and API key
89
+ const resolvedProvider = (['openai', 'anthropic', 'google'].includes(llmProvider || ''))
90
+ ? llmProvider
91
+ : 'openai';
92
+ // Resolve API key from request body or environment
93
+ const resolvedApiKey = llmApiKey || process.env.OPENAI_API_KEY;
94
+ if (!resolvedApiKey) {
95
+ res.status(400).json({
96
+ success: false,
97
+ error: {
98
+ type: 'missing_api_key',
99
+ message: 'LLM API key required. Provide "llmApiKey" in the request body or set OPENAI_API_KEY on the server.',
100
+ hint: 'Pass your API key: { "llmApiKey": "sk-...", "llmProvider": "openai" }',
101
+ docs: 'https://webpeel.dev/docs/errors#missing-api-key',
102
+ },
103
+ requestId: req.requestId || crypto.randomUUID(),
104
+ });
105
+ return;
106
+ }
107
+ // Fetch the page content
108
+ const peelResult = await peel(url, {
109
+ format: 'markdown',
110
+ timeout: 30000,
111
+ });
112
+ const startTime = Date.now();
113
+ // Extract structured data with LLM
114
+ const extractResult = await extractWithLLM({
115
+ content: peelResult.content,
116
+ instruction: prompt,
117
+ prompt,
118
+ schema,
119
+ apiKey: resolvedApiKey,
120
+ llmApiKey: resolvedApiKey,
121
+ llmProvider: resolvedProvider,
122
+ model: model || process.env.WEBPEEL_LLM_MODEL || undefined,
123
+ llmModel: model || process.env.WEBPEEL_LLM_MODEL || undefined,
124
+ baseUrl: baseUrl || process.env.WEBPEEL_LLM_BASE_URL || 'https://api.openai.com/v1',
125
+ });
126
+ const elapsed = Date.now() - startTime;
127
+ // Return in Firecrawl-compatible format with llm metadata
128
+ res.json({
129
+ success: true,
130
+ data: extractResult.items.length === 1 ? extractResult.items[0] : extractResult.items,
131
+ llm: {
132
+ provider: extractResult.provider || resolvedProvider,
133
+ model: extractResult.model,
134
+ tokens: extractResult.tokensUsed,
135
+ },
136
+ url: peelResult.url,
137
+ elapsed,
138
+ metadata: {
139
+ url: peelResult.url,
140
+ title: peelResult.title,
141
+ tokensUsed: extractResult.tokensUsed,
142
+ model: extractResult.model,
143
+ cost: extractResult.cost,
144
+ elapsed: peelResult.elapsed,
145
+ },
146
+ });
147
+ }
148
+ catch (error) {
149
+ console.error('[/v1/extract] Error:', error instanceof Error ? error.message : String(error));
150
+ const msg = error instanceof Error ? error.message : 'Unknown error';
151
+ if (msg.includes('authentication failed') || msg.includes('401')) {
152
+ res.status(401).json({ success: false, error: { type: 'llm_auth_failed', message: msg }, requestId: req.requestId });
153
+ return;
154
+ }
155
+ if (msg.includes('rate limit') || msg.includes('429')) {
156
+ res.status(429).json({
157
+ success: false,
158
+ error: {
159
+ type: 'llm_rate_limited',
160
+ message: msg,
161
+ hint: 'You have hit the LLM provider rate limit. Try again in a moment.',
162
+ docs: 'https://webpeel.dev/docs/errors#llm-rate-limited',
163
+ },
164
+ requestId: req.requestId || crypto.randomUUID(),
165
+ });
166
+ return;
167
+ }
168
+ res.status(500).json({
169
+ success: false,
170
+ error: {
171
+ type: 'extraction_failed',
172
+ message: msg,
173
+ docs: 'https://webpeel.dev/docs/errors#extraction-failed',
174
+ },
175
+ requestId: req.requestId || crypto.randomUUID(),
176
+ });
177
+ }
178
+ });
179
+ router.get('/v1/extract/auto', async (req, res) => {
180
+ const url = req.query.url;
181
+ if (!url) {
182
+ res.status(400).json({
183
+ success: false,
184
+ error: {
185
+ type: 'missing_url',
186
+ message: 'Missing url parameter',
187
+ hint: 'Pass a URL: GET /v1/extract/auto?url=https://example.com',
188
+ docs: 'https://webpeel.dev/docs/errors#missing-url',
189
+ },
190
+ requestId: req.requestId || crypto.randomUUID(),
191
+ });
192
+ return;
193
+ }
194
+ const { autoExtract } = await import('../../core/auto-extract.js');
195
+ const result = await peel(url, { format: 'html' });
196
+ const extracted = autoExtract(result.content || '', url);
197
+ res.json({ url, pageType: extracted.type, structured: extracted });
198
+ });
199
+ router.post('/v1/extract/auto', async (req, res) => {
200
+ const { url, ...rest } = req.body;
201
+ if (!url || typeof url !== 'string') {
202
+ res.status(400).json({
203
+ success: false,
204
+ error: {
205
+ type: 'missing_url',
206
+ message: 'Missing or invalid url field in request body',
207
+ hint: 'Pass a URL in the request body: { "url": "https://example.com" }',
208
+ docs: 'https://webpeel.dev/docs/errors#missing-url',
209
+ },
210
+ requestId: req.requestId || crypto.randomUUID(),
211
+ });
212
+ return;
213
+ }
214
+ try {
215
+ const { autoExtract } = await import('../../core/auto-extract.js');
216
+ const result = await peel(url, { format: 'html', ...rest });
217
+ const extracted = autoExtract(result.content || '', url);
218
+ res.json({ url, pageType: extracted.type, structured: extracted });
219
+ }
220
+ catch (error) {
221
+ const msg = error instanceof Error ? error.message : 'Unknown error';
222
+ console.error('[/v1/extract/auto POST] Error:', msg);
223
+ res.status(500).json({
224
+ success: false,
225
+ error: {
226
+ type: 'extraction_failed',
227
+ message: msg,
228
+ docs: 'https://webpeel.dev/docs/errors#extraction-failed',
229
+ },
230
+ requestId: req.requestId || crypto.randomUUID(),
231
+ });
232
+ }
233
+ });
234
+ return router;
235
+ }
@@ -0,0 +1,7 @@
1
+ /**
2
+ * Fetch endpoint with caching
3
+ */
4
+ import { Router } from 'express';
5
+ import '../types.js';
6
+ import { AuthStore } from '../auth-store.js';
7
+ export declare function createFetchRouter(authStore: AuthStore): Router;