n8n-nodes-alterlab 0.2.2 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/credentials/AlterLabOAuth2Api.credentials.d.ts +2 -1
- package/dist/credentials/AlterLabOAuth2Api.credentials.d.ts.map +1 -1
- package/dist/credentials/AlterLabOAuth2Api.credentials.js +7 -0
- package/dist/credentials/AlterLabOAuth2Api.credentials.js.map +1 -1
- package/dist/nodes/AlterLab/AlterLab.node.d.ts +1 -1
- package/dist/nodes/AlterLab/AlterLab.node.d.ts.map +1 -1
- package/dist/nodes/AlterLab/AlterLab.node.js +652 -263
- package/dist/nodes/AlterLab/AlterLab.node.js.map +1 -1
- package/dist/nodes/AlterLab/AlterLab.node.json +35 -30
- package/dist/nodes/AlterLab/alterlab.svg +10 -3
- package/package.json +60 -61
|
@@ -2,137 +2,195 @@
|
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
3
|
exports.AlterLab = void 0;
|
|
4
4
|
const n8n_workflow_1 = require("n8n-workflow");
|
|
5
|
-
const UTM =
|
|
5
|
+
const UTM = "utm_source=n8n&utm_medium=integration&utm_campaign=community_node";
|
|
6
|
+
const BASE_URL = "https://api.alterlab.io";
|
|
7
|
+
function sleep(ms) {
|
|
8
|
+
return new Promise((resolve) => {
|
|
9
|
+
globalThis.setTimeout(resolve, ms);
|
|
10
|
+
});
|
|
11
|
+
}
|
|
6
12
|
class AlterLab {
|
|
7
13
|
constructor() {
|
|
8
14
|
this.description = {
|
|
9
|
-
displayName:
|
|
10
|
-
name:
|
|
11
|
-
icon:
|
|
12
|
-
group: [
|
|
15
|
+
displayName: "AlterLab",
|
|
16
|
+
name: "alterLab",
|
|
17
|
+
icon: "file:alterlab.svg",
|
|
18
|
+
group: ["transform"],
|
|
13
19
|
version: 1,
|
|
14
|
-
subtitle: '={{$parameter["mode"] + " scrape"}}',
|
|
15
|
-
description:
|
|
20
|
+
subtitle: '={{$parameter["operation"] === "estimateCost" ? "cost estimate" : $parameter["operation"] === "batchScrape" ? "batch scrape" : $parameter["mode"] + " scrape"}}',
|
|
21
|
+
description: "Scrape any website with anti-bot bypass, JS rendering, structured extraction, OCR, and more",
|
|
16
22
|
defaults: {
|
|
17
|
-
name:
|
|
23
|
+
name: "AlterLab",
|
|
18
24
|
},
|
|
19
|
-
inputs: [
|
|
20
|
-
outputs: [
|
|
25
|
+
inputs: ["main"],
|
|
26
|
+
outputs: ["main"],
|
|
21
27
|
credentials: [
|
|
22
28
|
{
|
|
23
|
-
name:
|
|
24
|
-
displayName:
|
|
29
|
+
name: "alterLabApi",
|
|
30
|
+
displayName: "API Key",
|
|
25
31
|
},
|
|
26
32
|
{
|
|
27
|
-
name:
|
|
28
|
-
displayName:
|
|
33
|
+
name: "alterLabOAuth2Api",
|
|
34
|
+
displayName: "OAuth2 (Recommended)",
|
|
29
35
|
},
|
|
30
36
|
],
|
|
37
|
+
requestDefaults: {
|
|
38
|
+
baseURL: BASE_URL,
|
|
39
|
+
},
|
|
31
40
|
properties: [
|
|
41
|
+
// ── Operation ────────────────────────────────────────
|
|
42
|
+
{
|
|
43
|
+
displayName: "Operation",
|
|
44
|
+
name: "operation",
|
|
45
|
+
type: "options",
|
|
46
|
+
noDataExpression: true,
|
|
47
|
+
default: "scrape",
|
|
48
|
+
options: [
|
|
49
|
+
{
|
|
50
|
+
name: "Scrape",
|
|
51
|
+
value: "scrape",
|
|
52
|
+
description: "Scrape a URL and return its content",
|
|
53
|
+
action: "Scrape a URL",
|
|
54
|
+
},
|
|
55
|
+
{
|
|
56
|
+
name: "Batch Scrape",
|
|
57
|
+
value: "batchScrape",
|
|
58
|
+
description: "Scrape up to 100 URLs in a single batch request",
|
|
59
|
+
action: "Scrape a batch of URLs",
|
|
60
|
+
},
|
|
61
|
+
{
|
|
62
|
+
name: "Estimate Cost",
|
|
63
|
+
value: "estimateCost",
|
|
64
|
+
description: "Estimate the cost of scraping a URL without actually scraping it",
|
|
65
|
+
action: "Estimate scraping cost",
|
|
66
|
+
},
|
|
67
|
+
],
|
|
68
|
+
description: "The operation to perform",
|
|
69
|
+
},
|
|
32
70
|
// ── Primary ──────────────────────────────────────────
|
|
33
71
|
{
|
|
34
|
-
displayName:
|
|
35
|
-
name:
|
|
36
|
-
type:
|
|
37
|
-
default:
|
|
72
|
+
displayName: "URL",
|
|
73
|
+
name: "url",
|
|
74
|
+
type: "string",
|
|
75
|
+
default: "",
|
|
38
76
|
required: true,
|
|
39
|
-
placeholder:
|
|
40
|
-
description:
|
|
77
|
+
placeholder: "https://www.example.com/page",
|
|
78
|
+
description: "The URL to scrape",
|
|
41
79
|
},
|
|
42
80
|
{
|
|
43
|
-
displayName:
|
|
44
|
-
name:
|
|
45
|
-
type:
|
|
46
|
-
default:
|
|
81
|
+
displayName: "Mode",
|
|
82
|
+
name: "mode",
|
|
83
|
+
type: "options",
|
|
84
|
+
default: "auto",
|
|
47
85
|
options: [
|
|
48
86
|
{
|
|
49
|
-
name:
|
|
50
|
-
value:
|
|
51
|
-
description:
|
|
87
|
+
name: "Auto",
|
|
88
|
+
value: "auto",
|
|
89
|
+
description: "Automatically choose the best scraping method",
|
|
52
90
|
},
|
|
53
91
|
{
|
|
54
|
-
name:
|
|
55
|
-
value:
|
|
56
|
-
description:
|
|
92
|
+
name: "HTML",
|
|
93
|
+
value: "html",
|
|
94
|
+
description: "Fast HTTP-only scraping for static pages",
|
|
57
95
|
},
|
|
58
96
|
{
|
|
59
|
-
name:
|
|
60
|
-
value:
|
|
61
|
-
description:
|
|
97
|
+
name: "JavaScript",
|
|
98
|
+
value: "js",
|
|
99
|
+
description: "Render JavaScript with headless browser",
|
|
62
100
|
},
|
|
63
101
|
{
|
|
64
|
-
name:
|
|
65
|
-
value:
|
|
66
|
-
description:
|
|
102
|
+
name: "PDF",
|
|
103
|
+
value: "pdf",
|
|
104
|
+
description: "Extract text from PDF documents",
|
|
67
105
|
},
|
|
68
106
|
{
|
|
69
|
-
name:
|
|
70
|
-
value:
|
|
71
|
-
description:
|
|
107
|
+
name: "OCR",
|
|
108
|
+
value: "ocr",
|
|
109
|
+
description: "Extract text from images",
|
|
72
110
|
},
|
|
73
111
|
],
|
|
74
|
-
description:
|
|
112
|
+
description: "Scraping mode to use",
|
|
75
113
|
},
|
|
76
114
|
// ── Output Options ───────────────────────────────────
|
|
77
115
|
{
|
|
78
|
-
displayName:
|
|
79
|
-
name:
|
|
80
|
-
type:
|
|
81
|
-
placeholder:
|
|
116
|
+
displayName: "Output Options",
|
|
117
|
+
name: "outputOptions",
|
|
118
|
+
type: "collection",
|
|
119
|
+
placeholder: "Add Option",
|
|
82
120
|
default: {},
|
|
121
|
+
displayOptions: {
|
|
122
|
+
show: {
|
|
123
|
+
operation: ["scrape", "batchScrape"],
|
|
124
|
+
},
|
|
125
|
+
},
|
|
83
126
|
options: [
|
|
84
127
|
{
|
|
85
|
-
displayName:
|
|
86
|
-
name:
|
|
87
|
-
type:
|
|
88
|
-
default: [
|
|
128
|
+
displayName: "Formats",
|
|
129
|
+
name: "formats",
|
|
130
|
+
type: "multiOptions",
|
|
131
|
+
default: ["markdown", "json"],
|
|
89
132
|
options: [
|
|
90
|
-
{ name:
|
|
91
|
-
{ name:
|
|
92
|
-
{
|
|
93
|
-
|
|
133
|
+
{ name: "Markdown", value: "markdown" },
|
|
134
|
+
{ name: "JSON", value: "json" },
|
|
135
|
+
{
|
|
136
|
+
name: "JSON v2 (Structured)",
|
|
137
|
+
value: "json_v2",
|
|
138
|
+
description: "Deterministic extraction with section tree, classified links, and structured tables",
|
|
139
|
+
},
|
|
140
|
+
{ name: "HTML", value: "html" },
|
|
141
|
+
{ name: "Text", value: "text" },
|
|
142
|
+
{
|
|
143
|
+
name: "RAG (Chunked)",
|
|
144
|
+
value: "rag",
|
|
145
|
+
description: "Chunked markdown with token counts and metadata for vector DB ingestion",
|
|
146
|
+
},
|
|
94
147
|
],
|
|
95
|
-
description:
|
|
148
|
+
description: "Output formats. JSON v2 provides structured extraction with section trees. RAG produces chunked output optimized for vector databases.",
|
|
96
149
|
},
|
|
97
150
|
{
|
|
98
|
-
displayName:
|
|
99
|
-
name:
|
|
100
|
-
type:
|
|
151
|
+
displayName: "Include Raw HTML",
|
|
152
|
+
name: "includeRawHtml",
|
|
153
|
+
type: "boolean",
|
|
101
154
|
default: false,
|
|
102
|
-
description:
|
|
155
|
+
description: "Whether to include the raw HTML in the response",
|
|
103
156
|
},
|
|
104
157
|
{
|
|
105
|
-
displayName:
|
|
106
|
-
name:
|
|
107
|
-
type:
|
|
158
|
+
displayName: "Timeout (Seconds)",
|
|
159
|
+
name: "timeout",
|
|
160
|
+
type: "number",
|
|
108
161
|
default: 90,
|
|
109
162
|
typeOptions: { minValue: 1, maxValue: 300 },
|
|
110
|
-
description:
|
|
163
|
+
description: "Request timeout in seconds (1-300)",
|
|
111
164
|
},
|
|
112
165
|
],
|
|
113
166
|
},
|
|
114
167
|
// ── Execution Mode ───────────────────────────────────
|
|
115
168
|
{
|
|
116
|
-
displayName:
|
|
117
|
-
name:
|
|
118
|
-
type:
|
|
119
|
-
placeholder:
|
|
169
|
+
displayName: "Execution Mode",
|
|
170
|
+
name: "executionMode",
|
|
171
|
+
type: "collection",
|
|
172
|
+
placeholder: "Add Option",
|
|
120
173
|
default: {},
|
|
174
|
+
displayOptions: {
|
|
175
|
+
show: {
|
|
176
|
+
operation: ["scrape", "batchScrape"],
|
|
177
|
+
},
|
|
178
|
+
},
|
|
121
179
|
options: [
|
|
122
180
|
{
|
|
123
|
-
displayName:
|
|
124
|
-
name:
|
|
125
|
-
type:
|
|
181
|
+
displayName: "Cache",
|
|
182
|
+
name: "cache",
|
|
183
|
+
type: "boolean",
|
|
126
184
|
default: false,
|
|
127
|
-
description:
|
|
185
|
+
description: "Whether to enable response caching",
|
|
128
186
|
},
|
|
129
187
|
{
|
|
130
|
-
displayName:
|
|
131
|
-
name:
|
|
132
|
-
type:
|
|
188
|
+
displayName: "Cache TTL (Seconds)",
|
|
189
|
+
name: "cacheTtl",
|
|
190
|
+
type: "number",
|
|
133
191
|
default: 900,
|
|
134
192
|
typeOptions: { minValue: 60, maxValue: 86400 },
|
|
135
|
-
description:
|
|
193
|
+
description: "Cache time-to-live in seconds (60-86400)",
|
|
136
194
|
displayOptions: {
|
|
137
195
|
show: {
|
|
138
196
|
cache: [true],
|
|
@@ -143,25 +201,25 @@ class AlterLab {
|
|
|
143
201
|
},
|
|
144
202
|
// ── Advanced Options ─────────────────────────────────
|
|
145
203
|
{
|
|
146
|
-
displayName:
|
|
147
|
-
name:
|
|
148
|
-
type:
|
|
149
|
-
placeholder:
|
|
204
|
+
displayName: "Advanced Options",
|
|
205
|
+
name: "advancedOptions",
|
|
206
|
+
type: "collection",
|
|
207
|
+
placeholder: "Add Option",
|
|
150
208
|
default: {},
|
|
151
209
|
options: [
|
|
152
210
|
{
|
|
153
|
-
displayName:
|
|
154
|
-
name:
|
|
155
|
-
type:
|
|
211
|
+
displayName: "Render JavaScript",
|
|
212
|
+
name: "renderJs",
|
|
213
|
+
type: "boolean",
|
|
156
214
|
default: false,
|
|
157
|
-
description:
|
|
215
|
+
description: "Whether to render JavaScript with a headless browser (+$0.0006)",
|
|
158
216
|
},
|
|
159
217
|
{
|
|
160
|
-
displayName:
|
|
161
|
-
name:
|
|
162
|
-
type:
|
|
218
|
+
displayName: "Screenshot",
|
|
219
|
+
name: "screenshot",
|
|
220
|
+
type: "boolean",
|
|
163
221
|
default: false,
|
|
164
|
-
description:
|
|
222
|
+
description: "Whether to capture a full-page screenshot (+$0.0002, requires Render JavaScript)",
|
|
165
223
|
displayOptions: {
|
|
166
224
|
show: {
|
|
167
225
|
renderJs: [true],
|
|
@@ -169,11 +227,11 @@ class AlterLab {
|
|
|
169
227
|
},
|
|
170
228
|
},
|
|
171
229
|
{
|
|
172
|
-
displayName:
|
|
173
|
-
name:
|
|
174
|
-
type:
|
|
230
|
+
displayName: "Generate PDF",
|
|
231
|
+
name: "generatePdf",
|
|
232
|
+
type: "boolean",
|
|
175
233
|
default: false,
|
|
176
|
-
description:
|
|
234
|
+
description: "Whether to generate a PDF of the rendered page (+$0.0004, requires Render JavaScript)",
|
|
177
235
|
displayOptions: {
|
|
178
236
|
show: {
|
|
179
237
|
renderJs: [true],
|
|
@@ -181,26 +239,26 @@ class AlterLab {
|
|
|
181
239
|
},
|
|
182
240
|
},
|
|
183
241
|
{
|
|
184
|
-
displayName:
|
|
185
|
-
name:
|
|
186
|
-
type:
|
|
242
|
+
displayName: "OCR",
|
|
243
|
+
name: "ocr",
|
|
244
|
+
type: "boolean",
|
|
187
245
|
default: false,
|
|
188
|
-
description:
|
|
246
|
+
description: "Whether to extract text from images using OCR (+$0.001, refunded if no images found)",
|
|
189
247
|
},
|
|
190
248
|
{
|
|
191
|
-
displayName:
|
|
192
|
-
name:
|
|
193
|
-
type:
|
|
249
|
+
displayName: "Use Proxy",
|
|
250
|
+
name: "useProxy",
|
|
251
|
+
type: "boolean",
|
|
194
252
|
default: false,
|
|
195
|
-
description:
|
|
253
|
+
description: "Whether to route through a premium proxy (+$0.0002)",
|
|
196
254
|
},
|
|
197
255
|
{
|
|
198
|
-
displayName:
|
|
199
|
-
name:
|
|
200
|
-
type:
|
|
201
|
-
default:
|
|
202
|
-
placeholder:
|
|
203
|
-
description:
|
|
256
|
+
displayName: "Proxy Country",
|
|
257
|
+
name: "proxyCountry",
|
|
258
|
+
type: "string",
|
|
259
|
+
default: "",
|
|
260
|
+
placeholder: "US",
|
|
261
|
+
description: "Preferred proxy country code for geo-targeting (e.g. US, DE, GB)",
|
|
204
262
|
displayOptions: {
|
|
205
263
|
show: {
|
|
206
264
|
useProxy: [true],
|
|
@@ -208,28 +266,28 @@ class AlterLab {
|
|
|
208
266
|
},
|
|
209
267
|
},
|
|
210
268
|
{
|
|
211
|
-
displayName:
|
|
212
|
-
name:
|
|
213
|
-
type:
|
|
214
|
-
default:
|
|
269
|
+
displayName: "Wait Condition",
|
|
270
|
+
name: "waitCondition",
|
|
271
|
+
type: "options",
|
|
272
|
+
default: "networkidle",
|
|
215
273
|
options: [
|
|
216
274
|
{
|
|
217
|
-
name:
|
|
218
|
-
value:
|
|
219
|
-
description:
|
|
275
|
+
name: "Network Idle",
|
|
276
|
+
value: "networkidle",
|
|
277
|
+
description: "Wait until network is idle",
|
|
220
278
|
},
|
|
221
279
|
{
|
|
222
|
-
name:
|
|
223
|
-
value:
|
|
224
|
-
description:
|
|
280
|
+
name: "DOM Content Loaded",
|
|
281
|
+
value: "domcontentloaded",
|
|
282
|
+
description: "Wait until DOM content is loaded",
|
|
225
283
|
},
|
|
226
284
|
{
|
|
227
|
-
name:
|
|
228
|
-
value:
|
|
229
|
-
description:
|
|
285
|
+
name: "Load",
|
|
286
|
+
value: "load",
|
|
287
|
+
description: "Wait until page load event",
|
|
230
288
|
},
|
|
231
289
|
],
|
|
232
|
-
description:
|
|
290
|
+
description: "When to consider the page ready (JS rendering only)",
|
|
233
291
|
displayOptions: {
|
|
234
292
|
show: {
|
|
235
293
|
renderJs: [true],
|
|
@@ -237,156 +295,263 @@ class AlterLab {
|
|
|
237
295
|
},
|
|
238
296
|
},
|
|
239
297
|
{
|
|
240
|
-
displayName:
|
|
241
|
-
name:
|
|
242
|
-
type:
|
|
298
|
+
displayName: "Remove Cookie Banners",
|
|
299
|
+
name: "removeCookieBanners",
|
|
300
|
+
type: "boolean",
|
|
243
301
|
default: true,
|
|
244
|
-
description:
|
|
302
|
+
description: "Whether to remove cookie consent banners before content extraction",
|
|
245
303
|
},
|
|
246
304
|
],
|
|
247
305
|
},
|
|
248
306
|
// ── Extraction ───────────────────────────────────────
|
|
249
307
|
{
|
|
250
|
-
displayName:
|
|
251
|
-
name:
|
|
252
|
-
type:
|
|
253
|
-
placeholder:
|
|
308
|
+
displayName: "Extraction",
|
|
309
|
+
name: "extraction",
|
|
310
|
+
type: "collection",
|
|
311
|
+
placeholder: "Add Option",
|
|
254
312
|
default: {},
|
|
313
|
+
displayOptions: {
|
|
314
|
+
show: {
|
|
315
|
+
operation: ["scrape", "batchScrape"],
|
|
316
|
+
},
|
|
317
|
+
},
|
|
255
318
|
options: [
|
|
256
319
|
{
|
|
257
|
-
displayName:
|
|
258
|
-
name:
|
|
259
|
-
type:
|
|
260
|
-
default:
|
|
320
|
+
displayName: "Extraction Profile",
|
|
321
|
+
name: "extractionProfile",
|
|
322
|
+
type: "options",
|
|
323
|
+
default: "auto",
|
|
261
324
|
options: [
|
|
262
|
-
{ name:
|
|
263
|
-
{ name:
|
|
264
|
-
{ name:
|
|
265
|
-
{ name:
|
|
266
|
-
{ name:
|
|
267
|
-
{ name:
|
|
268
|
-
{ name:
|
|
325
|
+
{ name: "Auto", value: "auto" },
|
|
326
|
+
{ name: "Product", value: "product" },
|
|
327
|
+
{ name: "Article", value: "article" },
|
|
328
|
+
{ name: "Job Posting", value: "job_posting" },
|
|
329
|
+
{ name: "FAQ", value: "faq" },
|
|
330
|
+
{ name: "Recipe", value: "recipe" },
|
|
331
|
+
{ name: "Event", value: "event" },
|
|
269
332
|
],
|
|
270
|
-
description:
|
|
333
|
+
description: "Pre-defined extraction profile for structured data",
|
|
271
334
|
},
|
|
272
335
|
{
|
|
273
|
-
displayName:
|
|
274
|
-
name:
|
|
275
|
-
type:
|
|
336
|
+
displayName: "Extraction Prompt",
|
|
337
|
+
name: "extractionPrompt",
|
|
338
|
+
type: "string",
|
|
276
339
|
typeOptions: { rows: 4 },
|
|
277
|
-
default:
|
|
278
|
-
placeholder:
|
|
279
|
-
description:
|
|
340
|
+
default: "",
|
|
341
|
+
placeholder: "Extract the product name, price, and rating...",
|
|
342
|
+
description: "Natural language instructions for what data to extract",
|
|
280
343
|
},
|
|
281
344
|
{
|
|
282
|
-
displayName:
|
|
283
|
-
name:
|
|
284
|
-
type:
|
|
285
|
-
default:
|
|
345
|
+
displayName: "Extraction Schema (JSON)",
|
|
346
|
+
name: "extractionSchema",
|
|
347
|
+
type: "json",
|
|
348
|
+
default: "",
|
|
286
349
|
placeholder: '{"name": "string", "price": "number"}',
|
|
287
|
-
description:
|
|
350
|
+
description: "JSON Schema to filter and structure extracted data",
|
|
288
351
|
},
|
|
289
352
|
{
|
|
290
|
-
displayName:
|
|
291
|
-
name:
|
|
292
|
-
type:
|
|
353
|
+
displayName: "Promote Schema.org",
|
|
354
|
+
name: "promoteSchemaOrg",
|
|
355
|
+
type: "boolean",
|
|
293
356
|
default: true,
|
|
294
|
-
description:
|
|
357
|
+
description: "Whether to use Schema.org structured data as primary output when available",
|
|
295
358
|
},
|
|
296
359
|
{
|
|
297
|
-
displayName:
|
|
298
|
-
name:
|
|
299
|
-
type:
|
|
360
|
+
displayName: "Evidence",
|
|
361
|
+
name: "evidence",
|
|
362
|
+
type: "boolean",
|
|
300
363
|
default: false,
|
|
301
|
-
description:
|
|
364
|
+
description: "Whether to include provenance/evidence for extracted fields",
|
|
302
365
|
},
|
|
303
366
|
],
|
|
304
367
|
},
|
|
305
368
|
// ── Cost Controls ────────────────────────────────────
|
|
306
369
|
{
|
|
307
|
-
displayName:
|
|
308
|
-
name:
|
|
309
|
-
type:
|
|
310
|
-
placeholder:
|
|
370
|
+
displayName: "Cost Controls",
|
|
371
|
+
name: "costControls",
|
|
372
|
+
type: "collection",
|
|
373
|
+
placeholder: "Add Option",
|
|
311
374
|
default: {},
|
|
312
375
|
options: [
|
|
313
376
|
{
|
|
314
|
-
displayName:
|
|
315
|
-
name:
|
|
316
|
-
type:
|
|
377
|
+
displayName: "Max Spend",
|
|
378
|
+
name: "maxCredits",
|
|
379
|
+
type: "number",
|
|
317
380
|
default: 0,
|
|
318
381
|
typeOptions: { minValue: 0 },
|
|
319
|
-
description:
|
|
382
|
+
description: "Maximum to spend per request in microcents (0 = no limit)",
|
|
320
383
|
},
|
|
321
384
|
{
|
|
322
|
-
displayName:
|
|
323
|
-
name:
|
|
324
|
-
type:
|
|
325
|
-
default:
|
|
385
|
+
displayName: "Force Tier",
|
|
386
|
+
name: "forceTier",
|
|
387
|
+
type: "options",
|
|
388
|
+
default: "",
|
|
326
389
|
options: [
|
|
327
|
-
{ name:
|
|
328
|
-
{ name:
|
|
329
|
-
{ name:
|
|
330
|
-
{ name:
|
|
331
|
-
{ name:
|
|
332
|
-
{ name:
|
|
390
|
+
{ name: "None", value: "" },
|
|
391
|
+
{ name: "T1 Curl — $0.0002", value: "1" },
|
|
392
|
+
{ name: "T2 HTTP — $0.0003", value: "2" },
|
|
393
|
+
{ name: "T3 Stealth — $0.0005", value: "3" },
|
|
394
|
+
{ name: "T3.5 Light JS — $0.0007", value: "3.5" },
|
|
395
|
+
{ name: "T4 Browser — $0.001", value: "4" },
|
|
333
396
|
],
|
|
334
|
-
description:
|
|
397
|
+
description: "Force a specific scraping tier (skip escalation)",
|
|
335
398
|
},
|
|
336
399
|
{
|
|
337
|
-
displayName:
|
|
338
|
-
name:
|
|
339
|
-
type:
|
|
340
|
-
default:
|
|
400
|
+
displayName: "Max Tier",
|
|
401
|
+
name: "maxTier",
|
|
402
|
+
type: "options",
|
|
403
|
+
default: "",
|
|
341
404
|
options: [
|
|
342
|
-
{ name:
|
|
343
|
-
{ name:
|
|
344
|
-
{ name:
|
|
345
|
-
{ name:
|
|
346
|
-
{ name:
|
|
347
|
-
{ name:
|
|
405
|
+
{ name: "None", value: "" },
|
|
406
|
+
{ name: "T1 Curl — $0.0002", value: "1" },
|
|
407
|
+
{ name: "T2 HTTP — $0.0003", value: "2" },
|
|
408
|
+
{ name: "T3 Stealth — $0.0005", value: "3" },
|
|
409
|
+
{ name: "T3.5 Light JS — $0.0007", value: "3.5" },
|
|
410
|
+
{ name: "T4 Browser — $0.001", value: "4" },
|
|
348
411
|
],
|
|
349
|
-
description:
|
|
412
|
+
description: "Maximum tier to escalate to",
|
|
350
413
|
},
|
|
351
414
|
{
|
|
352
|
-
displayName:
|
|
353
|
-
name:
|
|
354
|
-
type:
|
|
415
|
+
displayName: "Prefer Cost",
|
|
416
|
+
name: "preferCost",
|
|
417
|
+
type: "boolean",
|
|
355
418
|
default: false,
|
|
356
|
-
description:
|
|
419
|
+
description: "Whether to optimize for lower cost (try cheaper tiers first)",
|
|
357
420
|
},
|
|
358
421
|
{
|
|
359
|
-
displayName:
|
|
360
|
-
name:
|
|
361
|
-
type:
|
|
422
|
+
displayName: "Prefer Speed",
|
|
423
|
+
name: "preferSpeed",
|
|
424
|
+
type: "boolean",
|
|
362
425
|
default: false,
|
|
363
|
-
description:
|
|
426
|
+
description: "Whether to optimize for speed (skip to reliable tier)",
|
|
364
427
|
},
|
|
365
428
|
{
|
|
366
|
-
displayName:
|
|
367
|
-
name:
|
|
368
|
-
type:
|
|
429
|
+
displayName: "Fail Fast",
|
|
430
|
+
name: "failFast",
|
|
431
|
+
type: "boolean",
|
|
369
432
|
default: false,
|
|
370
|
-
description:
|
|
433
|
+
description: "Whether to return an error instead of escalating to expensive tiers",
|
|
371
434
|
},
|
|
372
435
|
],
|
|
373
436
|
},
|
|
437
|
+
// ── Batch Options ────────────────────────────────────
|
|
438
|
+
{
|
|
439
|
+
displayName: "Webhook URL",
|
|
440
|
+
name: "webhookUrl",
|
|
441
|
+
type: "string",
|
|
442
|
+
default: "",
|
|
443
|
+
placeholder: "https://your-server.com/webhook",
|
|
444
|
+
description: "Optional URL to receive a webhook notification when the batch completes",
|
|
445
|
+
displayOptions: {
|
|
446
|
+
show: {
|
|
447
|
+
operation: ["batchScrape"],
|
|
448
|
+
},
|
|
449
|
+
},
|
|
450
|
+
},
|
|
451
|
+
{
|
|
452
|
+
displayName: "Polling Timeout (Seconds)",
|
|
453
|
+
name: "batchPollingTimeout",
|
|
454
|
+
type: "number",
|
|
455
|
+
default: 300,
|
|
456
|
+
typeOptions: { minValue: 30, maxValue: 900 },
|
|
457
|
+
description: "Maximum time to wait for the batch to complete (30-900 seconds)",
|
|
458
|
+
displayOptions: {
|
|
459
|
+
show: {
|
|
460
|
+
operation: ["batchScrape"],
|
|
461
|
+
},
|
|
462
|
+
},
|
|
463
|
+
},
|
|
374
464
|
],
|
|
375
465
|
};
|
|
376
466
|
}
|
|
377
467
|
async execute() {
|
|
378
|
-
var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k, _l, _m, _o, _p, _q, _r, _s, _t, _u, _v, _w, _x, _y, _z, _0, _1;
|
|
468
|
+
var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k, _l, _m, _o, _p, _q, _r, _s, _t, _u, _v, _w, _x, _y, _z, _0, _1, _2, _3, _4, _5, _6, _7, _8, _9;
|
|
379
469
|
const items = this.getInputData();
|
|
470
|
+
const operation = this.getNodeParameter("operation", 0);
|
|
471
|
+
// Detect credential type once for all items
|
|
472
|
+
let authName = "alterLabApi";
|
|
473
|
+
try {
|
|
474
|
+
await this.getCredentials("alterLabOAuth2Api");
|
|
475
|
+
authName = "alterLabOAuth2Api";
|
|
476
|
+
}
|
|
477
|
+
catch {
|
|
478
|
+
// OAuth2 not configured, fall back to API key
|
|
479
|
+
}
|
|
480
|
+
// ── Batch Scrape operation ──────────────────────────
|
|
481
|
+
if (operation === "batchScrape") {
|
|
482
|
+
return executeBatchScrape(this, items, authName);
|
|
483
|
+
}
|
|
380
484
|
const results = [];
|
|
381
485
|
for (let i = 0; i < items.length; i++) {
|
|
382
486
|
try {
|
|
383
|
-
const url = this.getNodeParameter(
|
|
384
|
-
const mode = this.getNodeParameter(
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
487
|
+
const url = this.getNodeParameter("url", i);
|
|
488
|
+
const mode = this.getNodeParameter("mode", i);
|
|
489
|
+
// ── Estimate Cost operation ───────────────────────
|
|
490
|
+
if (operation === "estimateCost") {
|
|
491
|
+
const advancedOptions = this.getNodeParameter("advancedOptions", i, {});
|
|
492
|
+
const costControls = this.getNodeParameter("costControls", i, {});
|
|
493
|
+
const body = { url, mode };
|
|
494
|
+
const advanced = {};
|
|
495
|
+
if (advancedOptions.renderJs)
|
|
496
|
+
advanced.render_js = true;
|
|
497
|
+
if (advancedOptions.useProxy)
|
|
498
|
+
advanced.use_proxy = true;
|
|
499
|
+
if (advancedOptions.proxyCountry) {
|
|
500
|
+
advanced.proxy_country = advancedOptions.proxyCountry;
|
|
501
|
+
}
|
|
502
|
+
if (Object.keys(advanced).length > 0) {
|
|
503
|
+
body.advanced = advanced;
|
|
504
|
+
}
|
|
505
|
+
const costCtrl = {};
|
|
506
|
+
if (costControls.maxCredits && costControls.maxCredits > 0) {
|
|
507
|
+
costCtrl.max_credits = costControls.maxCredits;
|
|
508
|
+
}
|
|
509
|
+
if (costControls.forceTier)
|
|
510
|
+
costCtrl.force_tier = costControls.forceTier;
|
|
511
|
+
if (costControls.maxTier)
|
|
512
|
+
costCtrl.max_tier = costControls.maxTier;
|
|
513
|
+
if (costControls.preferCost)
|
|
514
|
+
costCtrl.prefer_cost = true;
|
|
515
|
+
if (costControls.preferSpeed)
|
|
516
|
+
costCtrl.prefer_speed = true;
|
|
517
|
+
if (costControls.failFast)
|
|
518
|
+
costCtrl.fail_fast = true;
|
|
519
|
+
if (Object.keys(costCtrl).length > 0) {
|
|
520
|
+
body.cost_controls = costCtrl;
|
|
521
|
+
}
|
|
522
|
+
const response = await this.helpers.httpRequestWithAuthentication.call(this, authName, {
|
|
523
|
+
method: "POST",
|
|
524
|
+
url: `${BASE_URL}/api/v1/scrape/estimate`,
|
|
525
|
+
body,
|
|
526
|
+
json: true,
|
|
527
|
+
returnFullResponse: true,
|
|
528
|
+
ignoreHttpStatusErrors: true,
|
|
529
|
+
});
|
|
530
|
+
const statusCode = response.statusCode;
|
|
531
|
+
const responseBody = response
|
|
532
|
+
.body;
|
|
533
|
+
if (statusCode >= 400) {
|
|
534
|
+
handleApiError(this, statusCode, responseBody, i);
|
|
535
|
+
}
|
|
536
|
+
const data = responseBody;
|
|
537
|
+
results.push({
|
|
538
|
+
json: {
|
|
539
|
+
url: (_a = data.url) !== null && _a !== void 0 ? _a : "",
|
|
540
|
+
estimatedTier: (_b = data.estimated_tier) !== null && _b !== void 0 ? _b : "unknown",
|
|
541
|
+
estimatedCredits: (_c = data.estimated_credits) !== null && _c !== void 0 ? _c : 0,
|
|
542
|
+
confidence: (_d = data.confidence) !== null && _d !== void 0 ? _d : "low",
|
|
543
|
+
maxPossibleCredits: (_e = data.max_possible_credits) !== null && _e !== void 0 ? _e : 0,
|
|
544
|
+
reasoning: (_f = data.reasoning) !== null && _f !== void 0 ? _f : "",
|
|
545
|
+
},
|
|
546
|
+
});
|
|
547
|
+
continue;
|
|
548
|
+
}
|
|
549
|
+
// ── Scrape operation ──────────────────────────────
|
|
550
|
+
const outputOptions = this.getNodeParameter("outputOptions", i, {});
|
|
551
|
+
const executionMode = this.getNodeParameter("executionMode", i, {});
|
|
552
|
+
const advancedOptions = this.getNodeParameter("advancedOptions", i, {});
|
|
553
|
+
const extraction = this.getNodeParameter("extraction", i, {});
|
|
554
|
+
const costControls = this.getNodeParameter("costControls", i, {});
|
|
390
555
|
// Build request body
|
|
391
556
|
const body = {
|
|
392
557
|
url,
|
|
@@ -394,7 +559,7 @@ class AlterLab {
|
|
|
394
559
|
sync: true,
|
|
395
560
|
};
|
|
396
561
|
// Output options
|
|
397
|
-
if ((
|
|
562
|
+
if ((_g = outputOptions.formats) === null || _g === void 0 ? void 0 : _g.length) {
|
|
398
563
|
body.formats = outputOptions.formats;
|
|
399
564
|
}
|
|
400
565
|
if (outputOptions.includeRawHtml) {
|
|
@@ -425,7 +590,8 @@ class AlterLab {
|
|
|
425
590
|
if (advancedOptions.proxyCountry) {
|
|
426
591
|
advanced.proxy_country = advancedOptions.proxyCountry;
|
|
427
592
|
}
|
|
428
|
-
if (advancedOptions.waitCondition &&
|
|
593
|
+
if (advancedOptions.waitCondition &&
|
|
594
|
+
advancedOptions.waitCondition !== "networkidle") {
|
|
429
595
|
advanced.wait_condition = advancedOptions.waitCondition;
|
|
430
596
|
}
|
|
431
597
|
if (advancedOptions.removeCookieBanners === false) {
|
|
@@ -435,7 +601,8 @@ class AlterLab {
|
|
|
435
601
|
body.advanced = advanced;
|
|
436
602
|
}
|
|
437
603
|
// Extraction
|
|
438
|
-
if (extraction.extractionProfile &&
|
|
604
|
+
if (extraction.extractionProfile &&
|
|
605
|
+
extraction.extractionProfile !== "auto") {
|
|
439
606
|
body.extraction_profile = extraction.extractionProfile;
|
|
440
607
|
}
|
|
441
608
|
if (extraction.extractionPrompt) {
|
|
@@ -444,12 +611,12 @@ class AlterLab {
|
|
|
444
611
|
if (extraction.extractionSchema) {
|
|
445
612
|
try {
|
|
446
613
|
body.extraction_schema =
|
|
447
|
-
typeof extraction.extractionSchema ===
|
|
614
|
+
typeof extraction.extractionSchema === "string"
|
|
448
615
|
? JSON.parse(extraction.extractionSchema)
|
|
449
616
|
: extraction.extractionSchema;
|
|
450
617
|
}
|
|
451
618
|
catch {
|
|
452
|
-
throw new n8n_workflow_1.NodeOperationError(this.getNode(),
|
|
619
|
+
throw new n8n_workflow_1.NodeOperationError(this.getNode(), "Invalid JSON in Extraction Schema", { itemIndex: i });
|
|
453
620
|
}
|
|
454
621
|
}
|
|
455
622
|
if (extraction.promoteSchemaOrg === false) {
|
|
@@ -478,19 +645,10 @@ class AlterLab {
|
|
|
478
645
|
if (Object.keys(costCtrl).length > 0) {
|
|
479
646
|
body.cost_controls = costCtrl;
|
|
480
647
|
}
|
|
481
|
-
// ── Detect credential type ────────────────────────
|
|
482
|
-
let authName = 'alterLabApi';
|
|
483
|
-
try {
|
|
484
|
-
await this.getCredentials('alterLabOAuth2Api');
|
|
485
|
-
authName = 'alterLabOAuth2Api';
|
|
486
|
-
}
|
|
487
|
-
catch {
|
|
488
|
-
// OAuth2 not configured, fall back to API key
|
|
489
|
-
}
|
|
490
648
|
// ── Make the API call ─────────────────────────────
|
|
491
649
|
let response = await this.helpers.httpRequestWithAuthentication.call(this, authName, {
|
|
492
|
-
method:
|
|
493
|
-
url:
|
|
650
|
+
method: "POST",
|
|
651
|
+
url: `${BASE_URL}/api/v1/scrape`,
|
|
494
652
|
body,
|
|
495
653
|
json: true,
|
|
496
654
|
returnFullResponse: true,
|
|
@@ -503,20 +661,22 @@ class AlterLab {
|
|
|
503
661
|
const jobId = responseBody.job_id;
|
|
504
662
|
let delay = 500;
|
|
505
663
|
const maxDelay = 5000;
|
|
506
|
-
const maxPollTime = (((
|
|
664
|
+
const maxPollTime = (((_h = outputOptions.timeout) !== null && _h !== void 0 ? _h : 90) + 30) * 1000; // timeout + 30s buffer
|
|
507
665
|
const pollStart = Date.now();
|
|
508
666
|
while (Date.now() - pollStart < maxPollTime) {
|
|
509
|
-
await
|
|
667
|
+
await sleep(delay);
|
|
510
668
|
delay = Math.min(delay * 2, maxDelay);
|
|
511
669
|
const pollResponse = await this.helpers.httpRequestWithAuthentication.call(this, authName, {
|
|
512
|
-
method:
|
|
513
|
-
url:
|
|
670
|
+
method: "GET",
|
|
671
|
+
url: `${BASE_URL}/api/v1/jobs/${jobId}`,
|
|
514
672
|
json: true,
|
|
515
673
|
returnFullResponse: true,
|
|
516
674
|
ignoreHttpStatusErrors: true,
|
|
517
675
|
});
|
|
518
|
-
const pollStatus = pollResponse
|
|
519
|
-
|
|
676
|
+
const pollStatus = pollResponse
|
|
677
|
+
.statusCode;
|
|
678
|
+
const pollBody = pollResponse
|
|
679
|
+
.body;
|
|
520
680
|
if (pollStatus === 200 && (pollBody === null || pollBody === void 0 ? void 0 : pollBody.status_code)) {
|
|
521
681
|
statusCode = 200;
|
|
522
682
|
responseBody = pollBody;
|
|
@@ -530,7 +690,7 @@ class AlterLab {
|
|
|
530
690
|
// Still processing (200 with status: "processing") — continue polling
|
|
531
691
|
}
|
|
532
692
|
if (statusCode === 202) {
|
|
533
|
-
throw new n8n_workflow_1.NodeOperationError(this.getNode(),
|
|
693
|
+
throw new n8n_workflow_1.NodeOperationError(this.getNode(), "Scrape job timed out while waiting for results. Try increasing the timeout or using a simpler scraping mode.", { itemIndex: i });
|
|
534
694
|
}
|
|
535
695
|
}
|
|
536
696
|
// ── Handle errors ─────────────────────────────────
|
|
@@ -541,40 +701,43 @@ class AlterLab {
|
|
|
541
701
|
const data = responseBody;
|
|
542
702
|
const content = data.content;
|
|
543
703
|
const output = {
|
|
544
|
-
url: (
|
|
545
|
-
statusCode: (
|
|
546
|
-
title: (
|
|
547
|
-
author: (
|
|
548
|
-
publishedAt: (
|
|
549
|
-
cached: (
|
|
550
|
-
responseTimeMs: (
|
|
551
|
-
sizeBytes: (
|
|
704
|
+
url: (_j = data.url) !== null && _j !== void 0 ? _j : "",
|
|
705
|
+
statusCode: (_k = data.status_code) !== null && _k !== void 0 ? _k : 0,
|
|
706
|
+
title: (_l = data.title) !== null && _l !== void 0 ? _l : null,
|
|
707
|
+
author: (_m = data.author) !== null && _m !== void 0 ? _m : null,
|
|
708
|
+
publishedAt: (_o = data.published_at) !== null && _o !== void 0 ? _o : null,
|
|
709
|
+
cached: (_p = data.cached) !== null && _p !== void 0 ? _p : false,
|
|
710
|
+
responseTimeMs: (_q = data.response_time_ms) !== null && _q !== void 0 ? _q : 0,
|
|
711
|
+
sizeBytes: (_r = data.size_bytes) !== null && _r !== void 0 ? _r : 0,
|
|
552
712
|
};
|
|
553
713
|
// Flatten multi-format content
|
|
554
|
-
if (content && typeof content ===
|
|
555
|
-
output.markdown =
|
|
556
|
-
|
|
557
|
-
output.
|
|
558
|
-
output.
|
|
714
|
+
if (content && typeof content === "object") {
|
|
715
|
+
output.markdown =
|
|
716
|
+
(_s = content.markdown) !== null && _s !== void 0 ? _s : null;
|
|
717
|
+
output.text = (_t = content.text) !== null && _t !== void 0 ? _t : null;
|
|
718
|
+
output.json = (_u = content.json) !== null && _u !== void 0 ? _u : null;
|
|
719
|
+
output.jsonV2 = (_v = content.json_v2) !== null && _v !== void 0 ? _v : null;
|
|
720
|
+
output.html = (_w = content.html) !== null && _w !== void 0 ? _w : null;
|
|
721
|
+
output.rag = (_x = content.rag) !== null && _x !== void 0 ? _x : null;
|
|
559
722
|
}
|
|
560
723
|
else {
|
|
561
724
|
output.markdown = content !== null && content !== void 0 ? content : null;
|
|
562
725
|
}
|
|
563
726
|
// Extraction results
|
|
564
|
-
output.filteredContent = (
|
|
565
|
-
output.extractionMethod = (
|
|
727
|
+
output.filteredContent = (_y = data.filtered_content) !== null && _y !== void 0 ? _y : null;
|
|
728
|
+
output.extractionMethod = (_z = data.extraction_method) !== null && _z !== void 0 ? _z : null;
|
|
566
729
|
// Advanced outputs
|
|
567
|
-
output.screenshotUrl = (
|
|
568
|
-
output.pdfUrl = (
|
|
569
|
-
output.ocrResults = (
|
|
570
|
-
output.rawHtml = (
|
|
730
|
+
output.screenshotUrl = (_0 = data.screenshot_url) !== null && _0 !== void 0 ? _0 : null;
|
|
731
|
+
output.pdfUrl = (_1 = data.pdf_url) !== null && _1 !== void 0 ? _1 : null;
|
|
732
|
+
output.ocrResults = (_2 = data.ocr_results) !== null && _2 !== void 0 ? _2 : null;
|
|
733
|
+
output.rawHtml = (_3 = data.raw_html) !== null && _3 !== void 0 ? _3 : null;
|
|
571
734
|
// Billing breakdown (flattened)
|
|
572
735
|
const billing = data.billing;
|
|
573
736
|
output.billing = {
|
|
574
|
-
cost: (
|
|
575
|
-
tier: (
|
|
576
|
-
savings: (
|
|
577
|
-
suggestion: (
|
|
737
|
+
cost: (_5 = (_4 = billing === null || billing === void 0 ? void 0 : billing.total_credits) !== null && _4 !== void 0 ? _4 : data.credits_used) !== null && _5 !== void 0 ? _5 : 0,
|
|
738
|
+
tier: (_7 = (_6 = billing === null || billing === void 0 ? void 0 : billing.tier_used) !== null && _6 !== void 0 ? _6 : data.tier_used) !== null && _7 !== void 0 ? _7 : "unknown",
|
|
739
|
+
savings: (_8 = billing === null || billing === void 0 ? void 0 : billing.savings) !== null && _8 !== void 0 ? _8 : 0,
|
|
740
|
+
suggestion: (_9 = billing === null || billing === void 0 ? void 0 : billing.optimization_suggestion) !== null && _9 !== void 0 ? _9 : null,
|
|
578
741
|
};
|
|
579
742
|
results.push({ json: output });
|
|
580
743
|
}
|
|
@@ -593,43 +756,269 @@ class AlterLab {
|
|
|
593
756
|
}
|
|
594
757
|
}
|
|
595
758
|
exports.AlterLab = AlterLab;
|
|
759
|
+
/**
|
|
760
|
+
* Format a single scrape result into n8n output format.
|
|
761
|
+
*/
|
|
762
|
+
function formatScrapeResult(data) {
|
|
763
|
+
var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k, _l, _m, _o, _p, _q, _r, _s, _t, _u, _v, _w, _x, _y, _z, _0, _1;
|
|
764
|
+
const content = data.content;
|
|
765
|
+
const output = {
|
|
766
|
+
url: (_a = data.url) !== null && _a !== void 0 ? _a : "",
|
|
767
|
+
statusCode: (_b = data.status_code) !== null && _b !== void 0 ? _b : 0,
|
|
768
|
+
title: (_c = data.title) !== null && _c !== void 0 ? _c : null,
|
|
769
|
+
author: (_d = data.author) !== null && _d !== void 0 ? _d : null,
|
|
770
|
+
publishedAt: (_e = data.published_at) !== null && _e !== void 0 ? _e : null,
|
|
771
|
+
cached: (_f = data.cached) !== null && _f !== void 0 ? _f : false,
|
|
772
|
+
responseTimeMs: (_g = data.response_time_ms) !== null && _g !== void 0 ? _g : 0,
|
|
773
|
+
sizeBytes: (_h = data.size_bytes) !== null && _h !== void 0 ? _h : 0,
|
|
774
|
+
};
|
|
775
|
+
if (content && typeof content === "object") {
|
|
776
|
+
output.markdown = (_j = content.markdown) !== null && _j !== void 0 ? _j : null;
|
|
777
|
+
output.text = (_k = content.text) !== null && _k !== void 0 ? _k : null;
|
|
778
|
+
output.json = (_l = content.json) !== null && _l !== void 0 ? _l : null;
|
|
779
|
+
output.jsonV2 = (_m = content.json_v2) !== null && _m !== void 0 ? _m : null;
|
|
780
|
+
output.html = (_o = content.html) !== null && _o !== void 0 ? _o : null;
|
|
781
|
+
output.rag = (_p = content.rag) !== null && _p !== void 0 ? _p : null;
|
|
782
|
+
}
|
|
783
|
+
else {
|
|
784
|
+
output.markdown = content !== null && content !== void 0 ? content : null;
|
|
785
|
+
}
|
|
786
|
+
output.filteredContent = (_q = data.filtered_content) !== null && _q !== void 0 ? _q : null;
|
|
787
|
+
output.extractionMethod = (_r = data.extraction_method) !== null && _r !== void 0 ? _r : null;
|
|
788
|
+
output.screenshotUrl = (_s = data.screenshot_url) !== null && _s !== void 0 ? _s : null;
|
|
789
|
+
output.pdfUrl = (_t = data.pdf_url) !== null && _t !== void 0 ? _t : null;
|
|
790
|
+
output.ocrResults = (_u = data.ocr_results) !== null && _u !== void 0 ? _u : null;
|
|
791
|
+
output.rawHtml = (_v = data.raw_html) !== null && _v !== void 0 ? _v : null;
|
|
792
|
+
const billing = data.billing;
|
|
793
|
+
output.billing = {
|
|
794
|
+
cost: (_x = (_w = billing === null || billing === void 0 ? void 0 : billing.total_credits) !== null && _w !== void 0 ? _w : data.credits_used) !== null && _x !== void 0 ? _x : 0,
|
|
795
|
+
tier: (_z = (_y = billing === null || billing === void 0 ? void 0 : billing.tier_used) !== null && _y !== void 0 ? _y : data.tier_used) !== null && _z !== void 0 ? _z : "unknown",
|
|
796
|
+
savings: (_0 = billing === null || billing === void 0 ? void 0 : billing.savings) !== null && _0 !== void 0 ? _0 : 0,
|
|
797
|
+
suggestion: (_1 = billing === null || billing === void 0 ? void 0 : billing.optimization_suggestion) !== null && _1 !== void 0 ? _1 : null,
|
|
798
|
+
};
|
|
799
|
+
return output;
|
|
800
|
+
}
|
|
801
|
+
/**
|
|
802
|
+
* Build a per-URL request body for the batch API from node parameters.
|
|
803
|
+
*/
|
|
804
|
+
function buildBatchItemBody(ctx, itemIndex) {
|
|
805
|
+
var _a;
|
|
806
|
+
const url = ctx.getNodeParameter("url", itemIndex);
|
|
807
|
+
const mode = ctx.getNodeParameter("mode", itemIndex);
|
|
808
|
+
const outputOptions = ctx.getNodeParameter("outputOptions", itemIndex, {});
|
|
809
|
+
const executionMode = ctx.getNodeParameter("executionMode", itemIndex, {});
|
|
810
|
+
const advancedOptions = ctx.getNodeParameter("advancedOptions", itemIndex, {});
|
|
811
|
+
const extraction = ctx.getNodeParameter("extraction", itemIndex, {});
|
|
812
|
+
const costControls = ctx.getNodeParameter("costControls", itemIndex, {});
|
|
813
|
+
const body = { url, mode };
|
|
814
|
+
if ((_a = outputOptions.formats) === null || _a === void 0 ? void 0 : _a.length) {
|
|
815
|
+
body.formats = outputOptions.formats;
|
|
816
|
+
}
|
|
817
|
+
if (outputOptions.includeRawHtml) {
|
|
818
|
+
body.include_raw_html = true;
|
|
819
|
+
}
|
|
820
|
+
if (outputOptions.timeout && outputOptions.timeout !== 90) {
|
|
821
|
+
body.timeout = outputOptions.timeout;
|
|
822
|
+
}
|
|
823
|
+
if (executionMode.cache) {
|
|
824
|
+
body.cache = true;
|
|
825
|
+
}
|
|
826
|
+
const advanced = {};
|
|
827
|
+
if (advancedOptions.renderJs)
|
|
828
|
+
advanced.render_js = true;
|
|
829
|
+
if (advancedOptions.screenshot)
|
|
830
|
+
advanced.screenshot = true;
|
|
831
|
+
if (advancedOptions.generatePdf)
|
|
832
|
+
advanced.generate_pdf = true;
|
|
833
|
+
if (advancedOptions.ocr)
|
|
834
|
+
advanced.ocr = true;
|
|
835
|
+
if (advancedOptions.useProxy)
|
|
836
|
+
advanced.use_proxy = true;
|
|
837
|
+
if (advancedOptions.proxyCountry) {
|
|
838
|
+
advanced.proxy_country = advancedOptions.proxyCountry;
|
|
839
|
+
}
|
|
840
|
+
if (advancedOptions.waitCondition &&
|
|
841
|
+
advancedOptions.waitCondition !== "networkidle") {
|
|
842
|
+
advanced.wait_condition = advancedOptions.waitCondition;
|
|
843
|
+
}
|
|
844
|
+
if (advancedOptions.removeCookieBanners === false) {
|
|
845
|
+
advanced.remove_cookie_banners = false;
|
|
846
|
+
}
|
|
847
|
+
if (Object.keys(advanced).length > 0) {
|
|
848
|
+
body.advanced = advanced;
|
|
849
|
+
}
|
|
850
|
+
if (extraction.extractionProfile && extraction.extractionProfile !== "auto") {
|
|
851
|
+
body.extraction_profile = extraction.extractionProfile;
|
|
852
|
+
}
|
|
853
|
+
if (extraction.extractionPrompt) {
|
|
854
|
+
body.extraction_prompt = extraction.extractionPrompt;
|
|
855
|
+
}
|
|
856
|
+
if (extraction.extractionSchema) {
|
|
857
|
+
try {
|
|
858
|
+
body.extraction_schema =
|
|
859
|
+
typeof extraction.extractionSchema === "string"
|
|
860
|
+
? JSON.parse(extraction.extractionSchema)
|
|
861
|
+
: extraction.extractionSchema;
|
|
862
|
+
}
|
|
863
|
+
catch {
|
|
864
|
+
throw new n8n_workflow_1.NodeOperationError(ctx.getNode(), "Invalid JSON in Extraction Schema", { itemIndex });
|
|
865
|
+
}
|
|
866
|
+
}
|
|
867
|
+
const costCtrl = {};
|
|
868
|
+
if (costControls.maxCredits && costControls.maxCredits > 0) {
|
|
869
|
+
costCtrl.max_credits = costControls.maxCredits;
|
|
870
|
+
}
|
|
871
|
+
if (costControls.forceTier)
|
|
872
|
+
costCtrl.force_tier = costControls.forceTier;
|
|
873
|
+
if (costControls.maxTier)
|
|
874
|
+
costCtrl.max_tier = costControls.maxTier;
|
|
875
|
+
if (costControls.preferCost)
|
|
876
|
+
costCtrl.prefer_cost = true;
|
|
877
|
+
if (costControls.preferSpeed)
|
|
878
|
+
costCtrl.prefer_speed = true;
|
|
879
|
+
if (costControls.failFast)
|
|
880
|
+
costCtrl.fail_fast = true;
|
|
881
|
+
if (Object.keys(costCtrl).length > 0) {
|
|
882
|
+
body.cost_controls = costCtrl;
|
|
883
|
+
}
|
|
884
|
+
return body;
|
|
885
|
+
}
|
|
886
|
+
/**
|
|
887
|
+
* Execute batch scrape: collect all input items into one batch API call,
|
|
888
|
+
* poll for completion, and return one output item per URL result.
|
|
889
|
+
*/
|
|
890
|
+
async function executeBatchScrape(ctx, items, authName) {
|
|
891
|
+
var _a, _b, _c, _d, _e, _f;
|
|
892
|
+
if (items.length > 100) {
|
|
893
|
+
throw new n8n_workflow_1.NodeOperationError(ctx.getNode(), `Batch scrape supports up to 100 URLs, but ${items.length} items were provided. Split your data into smaller batches upstream.`);
|
|
894
|
+
}
|
|
895
|
+
// Build batch request body from all input items
|
|
896
|
+
const batchUrls = [];
|
|
897
|
+
for (let i = 0; i < items.length; i++) {
|
|
898
|
+
batchUrls.push(buildBatchItemBody(ctx, i));
|
|
899
|
+
}
|
|
900
|
+
const body = { urls: batchUrls };
|
|
901
|
+
const webhookUrl = ctx.getNodeParameter("webhookUrl", 0, "");
|
|
902
|
+
if (webhookUrl) {
|
|
903
|
+
body.webhook_url = webhookUrl;
|
|
904
|
+
}
|
|
905
|
+
// Submit batch
|
|
906
|
+
const response = await ctx.helpers.httpRequestWithAuthentication.call(ctx, authName, {
|
|
907
|
+
method: "POST",
|
|
908
|
+
url: "/api/v1/batch",
|
|
909
|
+
body,
|
|
910
|
+
json: true,
|
|
911
|
+
returnFullResponse: true,
|
|
912
|
+
ignoreHttpStatusErrors: true,
|
|
913
|
+
});
|
|
914
|
+
const statusCode = response.statusCode;
|
|
915
|
+
const responseBody = response.body;
|
|
916
|
+
if (statusCode >= 400) {
|
|
917
|
+
handleApiError(ctx, statusCode, responseBody, 0);
|
|
918
|
+
}
|
|
919
|
+
const batchId = responseBody.batch_id;
|
|
920
|
+
// Poll for completion with exponential backoff
|
|
921
|
+
const pollingTimeout = ctx.getNodeParameter("batchPollingTimeout", 0, 300);
|
|
922
|
+
const maxPollTime = pollingTimeout * 1000;
|
|
923
|
+
let delay = 1000;
|
|
924
|
+
const maxDelay = 5000;
|
|
925
|
+
const pollStart = Date.now();
|
|
926
|
+
let batchResult;
|
|
927
|
+
while (Date.now() - pollStart < maxPollTime) {
|
|
928
|
+
await new Promise((resolve) => setTimeout(resolve, delay));
|
|
929
|
+
delay = Math.min(delay * 2, maxDelay);
|
|
930
|
+
const pollResponse = await ctx.helpers.httpRequestWithAuthentication.call(ctx, authName, {
|
|
931
|
+
method: "GET",
|
|
932
|
+
url: `/api/v1/batch/${batchId}`,
|
|
933
|
+
json: true,
|
|
934
|
+
returnFullResponse: true,
|
|
935
|
+
ignoreHttpStatusErrors: true,
|
|
936
|
+
});
|
|
937
|
+
const pollStatusCode = pollResponse.statusCode;
|
|
938
|
+
const pollBody = pollResponse.body;
|
|
939
|
+
if (pollStatusCode >= 400) {
|
|
940
|
+
handleApiError(ctx, pollStatusCode, pollBody, 0);
|
|
941
|
+
}
|
|
942
|
+
const status = pollBody.status;
|
|
943
|
+
if (status === "completed" || status === "partial" || status === "failed") {
|
|
944
|
+
batchResult = pollBody;
|
|
945
|
+
break;
|
|
946
|
+
}
|
|
947
|
+
}
|
|
948
|
+
if (!batchResult) {
|
|
949
|
+
throw new n8n_workflow_1.NodeOperationError(ctx.getNode(), `Batch ${batchId} timed out after ${pollingTimeout}s. Use a webhook URL to receive results asynchronously, or increase the polling timeout.`);
|
|
950
|
+
}
|
|
951
|
+
// Map results to output items
|
|
952
|
+
const batchItems = (_a = batchResult.items) !== null && _a !== void 0 ? _a : [];
|
|
953
|
+
const results = [];
|
|
954
|
+
for (const batchItem of batchItems) {
|
|
955
|
+
if (batchItem.status === "succeeded" && batchItem.result) {
|
|
956
|
+
const output = formatScrapeResult(batchItem.result);
|
|
957
|
+
output.batchId = batchId;
|
|
958
|
+
output.jobId = (_b = batchItem.job_id) !== null && _b !== void 0 ? _b : "";
|
|
959
|
+
results.push({ json: output });
|
|
960
|
+
}
|
|
961
|
+
else {
|
|
962
|
+
results.push({
|
|
963
|
+
json: {
|
|
964
|
+
url: (_c = batchItem.url) !== null && _c !== void 0 ? _c : "",
|
|
965
|
+
jobId: (_d = batchItem.job_id) !== null && _d !== void 0 ? _d : "",
|
|
966
|
+
batchId,
|
|
967
|
+
status: (_e = batchItem.status) !== null && _e !== void 0 ? _e : "unknown",
|
|
968
|
+
error: (_f = batchItem.error) !== null && _f !== void 0 ? _f : "Scrape failed",
|
|
969
|
+
},
|
|
970
|
+
});
|
|
971
|
+
}
|
|
972
|
+
}
|
|
973
|
+
// Add batch summary to first result
|
|
974
|
+
if (results.length > 0) {
|
|
975
|
+
results[0].json.batchSummary = {
|
|
976
|
+
batchId,
|
|
977
|
+
total: batchResult.total,
|
|
978
|
+
completed: batchResult.completed,
|
|
979
|
+
failed: batchResult.failed,
|
|
980
|
+
status: batchResult.status,
|
|
981
|
+
};
|
|
982
|
+
}
|
|
983
|
+
return [results];
|
|
984
|
+
}
|
|
596
985
|
function handleApiError(ctx, statusCode, body, itemIndex) {
|
|
597
986
|
var _a, _b;
|
|
598
|
-
const detail = (_b = (_a = body === null || body === void 0 ? void 0 : body.detail) !== null && _a !== void 0 ? _a : body === null || body === void 0 ? void 0 : body.message) !== null && _b !== void 0 ? _b :
|
|
987
|
+
const detail = (_b = (_a = body === null || body === void 0 ? void 0 : body.detail) !== null && _a !== void 0 ? _a : body === null || body === void 0 ? void 0 : body.message) !== null && _b !== void 0 ? _b : "Unknown error";
|
|
599
988
|
switch (statusCode) {
|
|
600
989
|
case 401:
|
|
601
990
|
throw new n8n_workflow_1.NodeApiError(ctx.getNode(), body, {
|
|
602
|
-
message:
|
|
991
|
+
message: "Invalid API key",
|
|
603
992
|
description: `${detail}. Check your API key or get a new one at https://app.alterlab.io/dashboard/keys?${UTM}`,
|
|
604
|
-
httpCode:
|
|
993
|
+
httpCode: "401",
|
|
605
994
|
itemIndex,
|
|
606
995
|
});
|
|
607
996
|
case 402:
|
|
608
997
|
throw new n8n_workflow_1.NodeApiError(ctx.getNode(), body, {
|
|
609
|
-
message:
|
|
998
|
+
message: "Insufficient balance",
|
|
610
999
|
description: `${detail}. Top up your balance at https://app.alterlab.io/dashboard/billing?${UTM}`,
|
|
611
|
-
httpCode:
|
|
1000
|
+
httpCode: "402",
|
|
612
1001
|
itemIndex,
|
|
613
1002
|
});
|
|
614
1003
|
case 429:
|
|
615
1004
|
throw new n8n_workflow_1.NodeApiError(ctx.getNode(), body, {
|
|
616
|
-
message:
|
|
1005
|
+
message: "Rate limit exceeded",
|
|
617
1006
|
description: `${detail}. Upgrade your plan for higher rate limits at https://alterlab.io/pricing?${UTM}`,
|
|
618
|
-
httpCode:
|
|
1007
|
+
httpCode: "429",
|
|
619
1008
|
itemIndex,
|
|
620
1009
|
});
|
|
621
1010
|
case 403:
|
|
622
1011
|
throw new n8n_workflow_1.NodeApiError(ctx.getNode(), body, {
|
|
623
|
-
message:
|
|
1012
|
+
message: "Blocked by anti-bot protection",
|
|
624
1013
|
description: `${detail}. Try enabling "Use Proxy" in Advanced Options, or use a higher tier via Cost Controls.`,
|
|
625
|
-
httpCode:
|
|
1014
|
+
httpCode: "403",
|
|
626
1015
|
itemIndex,
|
|
627
1016
|
});
|
|
628
1017
|
case 504:
|
|
629
1018
|
throw new n8n_workflow_1.NodeApiError(ctx.getNode(), body, {
|
|
630
|
-
message:
|
|
1019
|
+
message: "Request timed out",
|
|
631
1020
|
description: `${detail}. Try increasing the timeout, using async mode, or a simpler scraping mode.`,
|
|
632
|
-
httpCode:
|
|
1021
|
+
httpCode: "504",
|
|
633
1022
|
itemIndex,
|
|
634
1023
|
});
|
|
635
1024
|
default:
|