n8n-nodes-crawl4ai-onuro 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/LICENSE.md +19 -0
- package/README.md +129 -0
- package/dist/credentials/Crawl4aiApi.credentials.d.ts +7 -0
- package/dist/credentials/Crawl4aiApi.credentials.js +228 -0
- package/dist/credentials/Crawl4aiApi.credentials.js.map +1 -0
- package/dist/nodes/Crawl4aiBasicCrawler/Crawl4aiBasicCrawler.node.d.ts +5 -0
- package/dist/nodes/Crawl4aiBasicCrawler/Crawl4aiBasicCrawler.node.js +37 -0
- package/dist/nodes/Crawl4aiBasicCrawler/Crawl4aiBasicCrawler.node.js.map +1 -0
- package/dist/nodes/Crawl4aiBasicCrawler/actions/crawlMultipleUrls.operation.d.ts +4 -0
- package/dist/nodes/Crawl4aiBasicCrawler/actions/crawlMultipleUrls.operation.js +421 -0
- package/dist/nodes/Crawl4aiBasicCrawler/actions/crawlMultipleUrls.operation.js.map +1 -0
- package/dist/nodes/Crawl4aiBasicCrawler/actions/crawlSingleUrl.operation.d.ts +4 -0
- package/dist/nodes/Crawl4aiBasicCrawler/actions/crawlSingleUrl.operation.js +422 -0
- package/dist/nodes/Crawl4aiBasicCrawler/actions/crawlSingleUrl.operation.js.map +1 -0
- package/dist/nodes/Crawl4aiBasicCrawler/actions/operations.d.ts +8 -0
- package/dist/nodes/Crawl4aiBasicCrawler/actions/operations.js +67 -0
- package/dist/nodes/Crawl4aiBasicCrawler/actions/operations.js.map +1 -0
- package/dist/nodes/Crawl4aiBasicCrawler/actions/processRawHtml.operation.d.ts +4 -0
- package/dist/nodes/Crawl4aiBasicCrawler/actions/processRawHtml.operation.js +148 -0
- package/dist/nodes/Crawl4aiBasicCrawler/actions/processRawHtml.operation.js.map +1 -0
- package/dist/nodes/Crawl4aiBasicCrawler/actions/router.d.ts +2 -0
- package/dist/nodes/Crawl4aiBasicCrawler/actions/router.js +37 -0
- package/dist/nodes/Crawl4aiBasicCrawler/actions/router.js.map +1 -0
- package/dist/nodes/Crawl4aiBasicCrawler/crawl4ai.svg +6 -0
- package/dist/nodes/Crawl4aiBasicCrawler/helpers/apiClient.d.ts +15 -0
- package/dist/nodes/Crawl4aiBasicCrawler/helpers/apiClient.js +271 -0
- package/dist/nodes/Crawl4aiBasicCrawler/helpers/apiClient.js.map +1 -0
- package/dist/nodes/Crawl4aiBasicCrawler/helpers/formatters.d.ts +5 -0
- package/dist/nodes/Crawl4aiBasicCrawler/helpers/formatters.js +96 -0
- package/dist/nodes/Crawl4aiBasicCrawler/helpers/formatters.js.map +1 -0
- package/dist/nodes/Crawl4aiBasicCrawler/helpers/interfaces.d.ts +119 -0
- package/dist/nodes/Crawl4aiBasicCrawler/helpers/interfaces.js +3 -0
- package/dist/nodes/Crawl4aiBasicCrawler/helpers/interfaces.js.map +1 -0
- package/dist/nodes/Crawl4aiBasicCrawler/helpers/utils.d.ts +8 -0
- package/dist/nodes/Crawl4aiBasicCrawler/helpers/utils.js +80 -0
- package/dist/nodes/Crawl4aiBasicCrawler/helpers/utils.js.map +1 -0
- package/dist/nodes/Crawl4aiContentExtractor/Crawl4aiContentExtractor.node.d.ts +5 -0
- package/dist/nodes/Crawl4aiContentExtractor/Crawl4aiContentExtractor.node.js +38 -0
- package/dist/nodes/Crawl4aiContentExtractor/Crawl4aiContentExtractor.node.js.map +1 -0
- package/dist/nodes/Crawl4aiContentExtractor/actions/cssExtractor.operation.d.ts +4 -0
- package/dist/nodes/Crawl4aiContentExtractor/actions/cssExtractor.operation.js +295 -0
- package/dist/nodes/Crawl4aiContentExtractor/actions/cssExtractor.operation.js.map +1 -0
- package/dist/nodes/Crawl4aiContentExtractor/actions/jsonExtractor.operation.d.ts +4 -0
- package/dist/nodes/Crawl4aiContentExtractor/actions/jsonExtractor.operation.js +328 -0
- package/dist/nodes/Crawl4aiContentExtractor/actions/jsonExtractor.operation.js.map +1 -0
- package/dist/nodes/Crawl4aiContentExtractor/actions/llmExtractor.operation.d.ts +4 -0
- package/dist/nodes/Crawl4aiContentExtractor/actions/llmExtractor.operation.js +417 -0
- package/dist/nodes/Crawl4aiContentExtractor/actions/llmExtractor.operation.js.map +1 -0
- package/dist/nodes/Crawl4aiContentExtractor/actions/operations.d.ts +8 -0
- package/dist/nodes/Crawl4aiContentExtractor/actions/operations.js +67 -0
- package/dist/nodes/Crawl4aiContentExtractor/actions/operations.js.map +1 -0
- package/dist/nodes/Crawl4aiContentExtractor/actions/router.d.ts +2 -0
- package/dist/nodes/Crawl4aiContentExtractor/actions/router.js +37 -0
- package/dist/nodes/Crawl4aiContentExtractor/actions/router.js.map +1 -0
- package/dist/nodes/Crawl4aiContentExtractor/crawl4ai.svg +6 -0
- package/dist/nodes/Crawl4aiContentExtractor/helpers/apiClient.d.ts +1 -0
- package/dist/nodes/Crawl4aiContentExtractor/helpers/apiClient.js +7 -0
- package/dist/nodes/Crawl4aiContentExtractor/helpers/apiClient.js.map +1 -0
- package/dist/nodes/Crawl4aiContentExtractor/helpers/formatters.d.ts +1 -0
- package/dist/nodes/Crawl4aiContentExtractor/helpers/formatters.js +8 -0
- package/dist/nodes/Crawl4aiContentExtractor/helpers/formatters.js.map +1 -0
- package/dist/nodes/Crawl4aiContentExtractor/helpers/interfaces.d.ts +1 -0
- package/dist/nodes/Crawl4aiContentExtractor/helpers/interfaces.js +3 -0
- package/dist/nodes/Crawl4aiContentExtractor/helpers/interfaces.js.map +1 -0
- package/dist/nodes/Crawl4aiContentExtractor/helpers/utils.d.ts +9 -0
- package/dist/nodes/Crawl4aiContentExtractor/helpers/utils.js +93 -0
- package/dist/nodes/Crawl4aiContentExtractor/helpers/utils.js.map +1 -0
- package/dist/tsconfig.tsbuildinfo +1 -0
- package/index.js +14 -0
- package/package.json +68 -0
|
@@ -0,0 +1,422 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.description = void 0;
|
|
4
|
+
exports.execute = execute;
|
|
5
|
+
const n8n_workflow_1 = require("n8n-workflow");
|
|
6
|
+
const utils_1 = require("../helpers/utils");
|
|
7
|
+
const formatters_1 = require("../helpers/formatters");
|
|
8
|
+
exports.description = [
|
|
9
|
+
{
|
|
10
|
+
displayName: 'URL',
|
|
11
|
+
name: 'url',
|
|
12
|
+
type: 'string',
|
|
13
|
+
required: true,
|
|
14
|
+
default: '',
|
|
15
|
+
placeholder: 'https://example.com',
|
|
16
|
+
description: 'The URL to crawl',
|
|
17
|
+
displayOptions: {
|
|
18
|
+
show: {
|
|
19
|
+
operation: ['crawlSingleUrl'],
|
|
20
|
+
},
|
|
21
|
+
},
|
|
22
|
+
},
|
|
23
|
+
{
|
|
24
|
+
displayName: 'Browser Options',
|
|
25
|
+
name: 'browserOptions',
|
|
26
|
+
type: 'collection',
|
|
27
|
+
placeholder: 'Add Option',
|
|
28
|
+
default: {},
|
|
29
|
+
displayOptions: {
|
|
30
|
+
show: {
|
|
31
|
+
operation: ['crawlSingleUrl'],
|
|
32
|
+
},
|
|
33
|
+
},
|
|
34
|
+
options: [
|
|
35
|
+
{
|
|
36
|
+
displayName: 'Enable JavaScript',
|
|
37
|
+
name: 'javaScriptEnabled',
|
|
38
|
+
type: 'boolean',
|
|
39
|
+
default: true,
|
|
40
|
+
description: 'Whether to enable JavaScript execution',
|
|
41
|
+
},
|
|
42
|
+
{
|
|
43
|
+
displayName: 'Headless Mode',
|
|
44
|
+
name: 'headless',
|
|
45
|
+
type: 'boolean',
|
|
46
|
+
default: true,
|
|
47
|
+
description: 'Whether to run browser in headless mode',
|
|
48
|
+
},
|
|
49
|
+
{
|
|
50
|
+
displayName: 'Timeout (Ms)',
|
|
51
|
+
name: 'timeout',
|
|
52
|
+
type: 'number',
|
|
53
|
+
default: 30000,
|
|
54
|
+
description: 'Maximum time to wait for the browser to load the page',
|
|
55
|
+
},
|
|
56
|
+
{
|
|
57
|
+
displayName: 'User Agent',
|
|
58
|
+
name: 'userAgent',
|
|
59
|
+
type: 'string',
|
|
60
|
+
default: '',
|
|
61
|
+
placeholder: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) ...',
|
|
62
|
+
description: 'The user agent to use (leave empty for default)',
|
|
63
|
+
},
|
|
64
|
+
{
|
|
65
|
+
displayName: 'Viewport Height',
|
|
66
|
+
name: 'viewportHeight',
|
|
67
|
+
type: 'number',
|
|
68
|
+
default: 800,
|
|
69
|
+
description: 'The height of the browser viewport',
|
|
70
|
+
},
|
|
71
|
+
{
|
|
72
|
+
displayName: 'Viewport Width',
|
|
73
|
+
name: 'viewportWidth',
|
|
74
|
+
type: 'number',
|
|
75
|
+
default: 1280,
|
|
76
|
+
description: 'The width of the browser viewport',
|
|
77
|
+
},
|
|
78
|
+
],
|
|
79
|
+
},
|
|
80
|
+
{
|
|
81
|
+
displayName: 'Crawler Options',
|
|
82
|
+
name: 'crawlerOptions',
|
|
83
|
+
type: 'collection',
|
|
84
|
+
placeholder: 'Add Option',
|
|
85
|
+
default: {},
|
|
86
|
+
displayOptions: {
|
|
87
|
+
show: {
|
|
88
|
+
operation: ['crawlSingleUrl'],
|
|
89
|
+
},
|
|
90
|
+
},
|
|
91
|
+
options: [
|
|
92
|
+
{
|
|
93
|
+
displayName: 'Cache Mode',
|
|
94
|
+
name: 'cacheMode',
|
|
95
|
+
type: 'options',
|
|
96
|
+
options: [
|
|
97
|
+
{
|
|
98
|
+
name: 'Enabled (Read/Write)',
|
|
99
|
+
value: 'enabled',
|
|
100
|
+
description: 'Use cache if available, save new results to cache',
|
|
101
|
+
},
|
|
102
|
+
{
|
|
103
|
+
name: 'Bypass (Force Fresh)',
|
|
104
|
+
value: 'bypass',
|
|
105
|
+
description: 'Ignore cache, always fetch fresh content',
|
|
106
|
+
},
|
|
107
|
+
{
|
|
108
|
+
name: 'Only (Read Only)',
|
|
109
|
+
value: 'only',
|
|
110
|
+
description: 'Only use cache, do not make new requests',
|
|
111
|
+
},
|
|
112
|
+
],
|
|
113
|
+
default: 'enabled',
|
|
114
|
+
description: 'How to use the cache when crawling',
|
|
115
|
+
},
|
|
116
|
+
{
|
|
117
|
+
displayName: 'Check Robots.txt',
|
|
118
|
+
name: 'checkRobotsTxt',
|
|
119
|
+
type: 'boolean',
|
|
120
|
+
default: false,
|
|
121
|
+
description: 'Whether to respect robots.txt rules',
|
|
122
|
+
},
|
|
123
|
+
{
|
|
124
|
+
displayName: 'CSS Selector',
|
|
125
|
+
name: 'cssSelector',
|
|
126
|
+
type: 'string',
|
|
127
|
+
default: '',
|
|
128
|
+
placeholder: 'article.content',
|
|
129
|
+
description: 'CSS selector to focus on specific content (leave empty for full page)',
|
|
130
|
+
},
|
|
131
|
+
{
|
|
132
|
+
displayName: 'Exclude External Links',
|
|
133
|
+
name: 'excludeExternalLinks',
|
|
134
|
+
type: 'boolean',
|
|
135
|
+
default: false,
|
|
136
|
+
description: 'Whether to exclude external links from the result',
|
|
137
|
+
},
|
|
138
|
+
{
|
|
139
|
+
displayName: 'Excluded Tags',
|
|
140
|
+
name: 'excludedTags',
|
|
141
|
+
type: 'string',
|
|
142
|
+
default: '',
|
|
143
|
+
placeholder: 'nav,footer,aside',
|
|
144
|
+
description: 'Comma-separated list of HTML tags to exclude from processing',
|
|
145
|
+
},
|
|
146
|
+
{
|
|
147
|
+
displayName: 'JavaScript Code',
|
|
148
|
+
name: 'jsCode',
|
|
149
|
+
type: 'string',
|
|
150
|
+
typeOptions: {
|
|
151
|
+
rows: 4,
|
|
152
|
+
},
|
|
153
|
+
default: '',
|
|
154
|
+
placeholder: 'document.querySelector("button.load-more").click();',
|
|
155
|
+
description: 'JavaScript code to execute on the page after load',
|
|
156
|
+
},
|
|
157
|
+
{
|
|
158
|
+
displayName: 'JavaScript Only Mode',
|
|
159
|
+
name: 'jsOnly',
|
|
160
|
+
type: 'boolean',
|
|
161
|
+
default: false,
|
|
162
|
+
description: 'Whether to only execute JavaScript without crawling',
|
|
163
|
+
},
|
|
164
|
+
{
|
|
165
|
+
displayName: 'Max Retries',
|
|
166
|
+
name: 'maxRetries',
|
|
167
|
+
type: 'number',
|
|
168
|
+
default: 3,
|
|
169
|
+
description: 'Maximum number of retries for failed requests',
|
|
170
|
+
},
|
|
171
|
+
{
|
|
172
|
+
displayName: 'Page Timeout (Ms)',
|
|
173
|
+
name: 'pageTimeout',
|
|
174
|
+
type: 'number',
|
|
175
|
+
default: 30000,
|
|
176
|
+
description: 'Maximum time to wait for the page to load',
|
|
177
|
+
},
|
|
178
|
+
{
|
|
179
|
+
displayName: 'Request Timeout (Ms)',
|
|
180
|
+
name: 'requestTimeout',
|
|
181
|
+
type: 'number',
|
|
182
|
+
default: 30000,
|
|
183
|
+
description: 'Maximum time to wait for network requests',
|
|
184
|
+
},
|
|
185
|
+
{
|
|
186
|
+
displayName: 'Session ID',
|
|
187
|
+
name: 'sessionId',
|
|
188
|
+
type: 'string',
|
|
189
|
+
default: '',
|
|
190
|
+
placeholder: 'my-session-ID',
|
|
191
|
+
description: 'ID to maintain browser state across multiple crawls (for multi-step crawling)',
|
|
192
|
+
},
|
|
193
|
+
{
|
|
194
|
+
displayName: 'Word Count Threshold',
|
|
195
|
+
name: 'wordCountThreshold',
|
|
196
|
+
type: 'number',
|
|
197
|
+
default: 0,
|
|
198
|
+
description: 'Minimum number of words for content to be included',
|
|
199
|
+
},
|
|
200
|
+
{
|
|
201
|
+
displayName: 'Exclude Domains',
|
|
202
|
+
name: 'excludeDomains',
|
|
203
|
+
type: 'string',
|
|
204
|
+
default: '',
|
|
205
|
+
placeholder: 'ads.example.com,tracker.example.com',
|
|
206
|
+
description: 'Comma-separated list of domains to exclude from links',
|
|
207
|
+
},
|
|
208
|
+
{
|
|
209
|
+
displayName: 'Exclude External Images',
|
|
210
|
+
name: 'excludeExternalImages',
|
|
211
|
+
type: 'boolean',
|
|
212
|
+
default: false,
|
|
213
|
+
description: 'Whether to exclude images not hosted on the same domain',
|
|
214
|
+
},
|
|
215
|
+
{
|
|
216
|
+
displayName: 'Exclude Internal Links',
|
|
217
|
+
name: 'excludeInternalLinks',
|
|
218
|
+
type: 'boolean',
|
|
219
|
+
default: false,
|
|
220
|
+
description: 'Whether to exclude links pointing to the same domain',
|
|
221
|
+
},
|
|
222
|
+
{
|
|
223
|
+
displayName: 'Exclude Social Media Links',
|
|
224
|
+
name: 'excludeSocialMediaLinks',
|
|
225
|
+
type: 'boolean',
|
|
226
|
+
default: false,
|
|
227
|
+
description: 'Whether to remove links to social media platforms',
|
|
228
|
+
},
|
|
229
|
+
{
|
|
230
|
+
displayName: 'Process Iframes',
|
|
231
|
+
name: 'processIframes',
|
|
232
|
+
type: 'boolean',
|
|
233
|
+
default: false,
|
|
234
|
+
description: 'Whether to extract content from embedded iframes',
|
|
235
|
+
},
|
|
236
|
+
{
|
|
237
|
+
displayName: 'Remove Overlay Elements',
|
|
238
|
+
name: 'removeOverlayElements',
|
|
239
|
+
type: 'boolean',
|
|
240
|
+
default: false,
|
|
241
|
+
description: 'Whether to remove popups, modals, and cookie banners',
|
|
242
|
+
},
|
|
243
|
+
{
|
|
244
|
+
displayName: 'Target Elements',
|
|
245
|
+
name: 'targetElements',
|
|
246
|
+
type: 'string',
|
|
247
|
+
default: '',
|
|
248
|
+
placeholder: 'article.content,div.main',
|
|
249
|
+
description: 'Comma-separated CSS selectors for focused extraction',
|
|
250
|
+
},
|
|
251
|
+
],
|
|
252
|
+
},
|
|
253
|
+
{
|
|
254
|
+
displayName: 'LLM Extraction',
|
|
255
|
+
name: 'llmExtraction',
|
|
256
|
+
type: 'collection',
|
|
257
|
+
placeholder: 'Add LLM Option',
|
|
258
|
+
default: {},
|
|
259
|
+
displayOptions: {
|
|
260
|
+
show: {
|
|
261
|
+
operation: ['crawlSingleUrl'],
|
|
262
|
+
},
|
|
263
|
+
},
|
|
264
|
+
options: [
|
|
265
|
+
{
|
|
266
|
+
displayName: 'LLM Provider',
|
|
267
|
+
name: 'provider',
|
|
268
|
+
type: 'options',
|
|
269
|
+
options: [
|
|
270
|
+
{ name: 'DeepSeek', value: 'deepseek' },
|
|
271
|
+
{ name: 'OpenAI', value: 'openai' },
|
|
272
|
+
{ name: 'Anthropic', value: 'anthropic' },
|
|
273
|
+
{ name: 'Ollama (Local)', value: 'ollama' },
|
|
274
|
+
],
|
|
275
|
+
default: 'deepseek',
|
|
276
|
+
description: 'The LLM provider to use for extraction',
|
|
277
|
+
},
|
|
278
|
+
{
|
|
279
|
+
displayName: 'API Key',
|
|
280
|
+
name: 'apiKey',
|
|
281
|
+
type: 'string',
|
|
282
|
+
typeOptions: { password: true },
|
|
283
|
+
default: '',
|
|
284
|
+
description: 'API key for the LLM provider',
|
|
285
|
+
},
|
|
286
|
+
{
|
|
287
|
+
displayName: 'Base URL',
|
|
288
|
+
name: 'baseUrl',
|
|
289
|
+
type: 'string',
|
|
290
|
+
default: '',
|
|
291
|
+
placeholder: 'https://api.deepseek.com/v1',
|
|
292
|
+
description: 'Base URL for the LLM API (required for DeepSeek/Ollama)',
|
|
293
|
+
},
|
|
294
|
+
{
|
|
295
|
+
displayName: 'Extraction Instruction',
|
|
296
|
+
name: 'instruction',
|
|
297
|
+
type: 'string',
|
|
298
|
+
typeOptions: { rows: 3 },
|
|
299
|
+
default: '',
|
|
300
|
+
placeholder: 'Extract all product names and prices from this page',
|
|
301
|
+
description: 'Instructions for the LLM on what to extract',
|
|
302
|
+
},
|
|
303
|
+
{
|
|
304
|
+
displayName: 'Schema (JSON)',
|
|
305
|
+
name: 'schema',
|
|
306
|
+
type: 'string',
|
|
307
|
+
typeOptions: { rows: 4 },
|
|
308
|
+
default: '',
|
|
309
|
+
placeholder: '{"type": "object", "properties": {"title": {"type": "string"}}}',
|
|
310
|
+
description: 'Optional JSON schema for structured output',
|
|
311
|
+
},
|
|
312
|
+
],
|
|
313
|
+
},
|
|
314
|
+
{
|
|
315
|
+
displayName: 'Options',
|
|
316
|
+
name: 'options',
|
|
317
|
+
type: 'collection',
|
|
318
|
+
placeholder: 'Add Option',
|
|
319
|
+
default: {},
|
|
320
|
+
displayOptions: {
|
|
321
|
+
show: {
|
|
322
|
+
operation: ['crawlSingleUrl'],
|
|
323
|
+
},
|
|
324
|
+
},
|
|
325
|
+
options: [
|
|
326
|
+
{
|
|
327
|
+
displayName: 'Output Format',
|
|
328
|
+
name: 'outputFormat',
|
|
329
|
+
type: 'options',
|
|
330
|
+
options: [
|
|
331
|
+
{
|
|
332
|
+
name: 'Markdown',
|
|
333
|
+
value: 'markdown',
|
|
334
|
+
description: 'Clean markdown format (default, best for LLMs)',
|
|
335
|
+
},
|
|
336
|
+
{
|
|
337
|
+
name: 'Fit Markdown',
|
|
338
|
+
value: 'fitMarkdown',
|
|
339
|
+
description: 'Shorter markdown optimized for LLM token limits',
|
|
340
|
+
},
|
|
341
|
+
{
|
|
342
|
+
name: 'Plain Text',
|
|
343
|
+
value: 'text',
|
|
344
|
+
description: 'Plain text with no formatting',
|
|
345
|
+
},
|
|
346
|
+
{
|
|
347
|
+
name: 'Cleaned HTML',
|
|
348
|
+
value: 'cleanedHtml',
|
|
349
|
+
description: 'Cleaned HTML with unnecessary elements removed',
|
|
350
|
+
},
|
|
351
|
+
{
|
|
352
|
+
name: 'Raw HTML',
|
|
353
|
+
value: 'html',
|
|
354
|
+
description: 'Original raw HTML from the page',
|
|
355
|
+
},
|
|
356
|
+
],
|
|
357
|
+
default: 'markdown',
|
|
358
|
+
description: 'Format for the content output',
|
|
359
|
+
},
|
|
360
|
+
{
|
|
361
|
+
displayName: 'Include Media Data',
|
|
362
|
+
name: 'includeMedia',
|
|
363
|
+
type: 'boolean',
|
|
364
|
+
default: false,
|
|
365
|
+
description: 'Whether to include media data in output (images, videos)',
|
|
366
|
+
},
|
|
367
|
+
{
|
|
368
|
+
displayName: 'Verbose Response',
|
|
369
|
+
name: 'verboseResponse',
|
|
370
|
+
type: 'boolean',
|
|
371
|
+
default: false,
|
|
372
|
+
description: 'Whether to include detailed data in output (HTML, status codes, etc.)',
|
|
373
|
+
},
|
|
374
|
+
],
|
|
375
|
+
},
|
|
376
|
+
];
|
|
377
|
+
async function execute(items, nodeOptions) {
|
|
378
|
+
var _a;
|
|
379
|
+
const allResults = [];
|
|
380
|
+
for (let i = 0; i < items.length; i++) {
|
|
381
|
+
try {
|
|
382
|
+
const url = this.getNodeParameter('url', i, '');
|
|
383
|
+
const browserOptions = this.getNodeParameter('browserOptions', i, {});
|
|
384
|
+
const crawlerOptions = this.getNodeParameter('crawlerOptions', i, {});
|
|
385
|
+
const llmExtraction = this.getNodeParameter('llmExtraction', i, {});
|
|
386
|
+
const options = this.getNodeParameter('options', i, {});
|
|
387
|
+
if (!url) {
|
|
388
|
+
throw new n8n_workflow_1.NodeOperationError(this.getNode(), 'URL cannot be empty.', { itemIndex: i });
|
|
389
|
+
}
|
|
390
|
+
if (!(0, utils_1.isValidUrl)(url)) {
|
|
391
|
+
throw new n8n_workflow_1.NodeOperationError(this.getNode(), `Invalid URL: ${url}`, { itemIndex: i });
|
|
392
|
+
}
|
|
393
|
+
const browserConfig = (0, utils_1.createBrowserConfig)(browserOptions);
|
|
394
|
+
const crawlerConfig = (0, utils_1.createCrawlerRunConfig)({
|
|
395
|
+
...crawlerOptions,
|
|
396
|
+
...browserConfig,
|
|
397
|
+
});
|
|
398
|
+
const crawler = await (0, utils_1.getCrawl4aiClient)(this);
|
|
399
|
+
const result = await crawler.crawlUrl(url, crawlerConfig, llmExtraction);
|
|
400
|
+
const formattedResult = (0, formatters_1.formatCrawlResult)(result, options.includeMedia, options.verboseResponse, options.outputFormat || 'markdown');
|
|
401
|
+
allResults.push({
|
|
402
|
+
json: formattedResult,
|
|
403
|
+
pairedItem: { item: i },
|
|
404
|
+
});
|
|
405
|
+
}
|
|
406
|
+
catch (error) {
|
|
407
|
+
if (this.continueOnFail()) {
|
|
408
|
+
const node = this.getNode();
|
|
409
|
+
const errorItemIndex = (_a = error.itemIndex) !== null && _a !== void 0 ? _a : i;
|
|
410
|
+
allResults.push({
|
|
411
|
+
json: items[i].json,
|
|
412
|
+
error: new n8n_workflow_1.NodeOperationError(node, error.message, { itemIndex: errorItemIndex }),
|
|
413
|
+
pairedItem: { item: i },
|
|
414
|
+
});
|
|
415
|
+
continue;
|
|
416
|
+
}
|
|
417
|
+
throw error;
|
|
418
|
+
}
|
|
419
|
+
}
|
|
420
|
+
return allResults;
|
|
421
|
+
}
|
|
422
|
+
//# sourceMappingURL=crawlSingleUrl.operation.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"crawlSingleUrl.operation.js","sourceRoot":"","sources":["../../../../nodes/Crawl4aiBasicCrawler/actions/crawlSingleUrl.operation.ts"],"names":[],"mappings":";;;AAsYA,0BAqEC;AArcD,+CAAkD;AAIlD,4CAK0B;AAC1B,sDAA0D;AAG7C,QAAA,WAAW,GAAsB;IAC5C;QACE,WAAW,EAAE,KAAK;QAClB,IAAI,EAAE,KAAK;QACX,IAAI,EAAE,QAAQ;QACd,QAAQ,EAAE,IAAI;QACd,OAAO,EAAE,EAAE;QACX,WAAW,EAAE,qBAAqB;QAClC,WAAW,EAAE,kBAAkB;QAC/B,cAAc,EAAE;YACd,IAAI,EAAE;gBACJ,SAAS,EAAE,CAAC,gBAAgB,CAAC;aAC9B;SACF;KACF;IACD;QACE,WAAW,EAAE,iBAAiB;QAC9B,IAAI,EAAE,gBAAgB;QACtB,IAAI,EAAE,YAAY;QAClB,WAAW,EAAE,YAAY;QACzB,OAAO,EAAE,EAAE;QACX,cAAc,EAAE;YACd,IAAI,EAAE;gBACJ,SAAS,EAAE,CAAC,gBAAgB,CAAC;aAC9B;SACF;QACD,OAAO,EAAE;YACP;gBACE,WAAW,EAAE,mBAAmB;gBAChC,IAAI,EAAE,mBAAmB;gBACzB,IAAI,EAAE,SAAS;gBACf,OAAO,EAAE,IAAI;gBACb,WAAW,EAAE,wCAAwC;aACtD;YACD;gBACE,WAAW,EAAE,eAAe;gBAC5B,IAAI,EAAE,UAAU;gBAChB,IAAI,EAAE,SAAS;gBACf,OAAO,EAAE,IAAI;gBACb,WAAW,EAAE,yCAAyC;aACvD;YACD;gBACE,WAAW,EAAE,cAAc;gBAC3B,IAAI,EAAE,SAAS;gBACf,IAAI,EAAE,QAAQ;gBACd,OAAO,EAAE,KAAK;gBACd,WAAW,EAAE,uDAAuD;aACrE;YACD;gBACE,WAAW,EAAE,YAAY;gBACzB,IAAI,EAAE,WAAW;gBACjB,IAAI,EAAE,QAAQ;gBACd,OAAO,EAAE,EAAE;gBACX,WAAW,EAAE,+CAA+C;gBAC5D,WAAW,EAAE,iDAAiD;aAC/D;YACD;gBACE,WAAW,EAAE,iBAAiB;gBAC9B,IAAI,EAAE,gBAAgB;gBACtB,IAAI,EAAE,QAAQ;gBACd,OAAO,EAAE,GAAG;gBACZ,WAAW,EAAE,oCAAoC;aAClD;YACD;gBACE,WAAW,EAAE,gBAAgB;gBAC7B,IAAI,EAAE,eAAe;gBACrB,IAAI,EAAE,QAAQ;gBACd,OAAO,EAAE,IAAI;gBACb,WAAW,EAAE,mCAAmC;aACjD;SACF;KACF;IACD;QACE,WAAW,EAAE,iBAAiB;QAC9B,IAAI,EAAE,gBAAgB;QACtB,IAAI,EAAE,YAAY;QAClB,WAAW,EAAE,YAAY;QACzB,OAAO,EAAE,EAAE;QACX,cAAc,EAAE;YACd,IAAI,EAAE;gBACJ,SAAS,EAAE,CAAC,gBAAgB,CAAC;aAC9B;SACF;QACD,OAAO,EAAE;YACP;gBACE,WAAW,EAAE,YAAY;gBACzB,IAAI,EAAE,WAAW;gBACjB,IAAI,EAAE,SAAS;gBACf,OAAO,EAAE;oBACP;wBACE,IAAI,EAAE,sBAAsB;wBAC5B,KAAK,EAAE,SAAS;wBAChB,WAAW,EAAE,mDAAmD;qBACjE;oBACD;wBACE,IAAI,EAAE,sBAAsB;wBAC5B,KAAK,EAAE,QAAQ;wBACf,WAAW,EAAE,0CAA0C;qBACxD;oBACD;wBACE,IAAI,EAAE,kBAAkB;wBACxB,KAAK,EAAE,MAAM;wBACb,WAAW,EAAE,0CAA0C;qBACxD;iBACF;gBACD,OAAO,EAAE,SAAS;gBAClB,WAAW,EAAE,oCAAoC;aAClD;YACD;gBACE,WAAW,EAAE,kBAAkB;gBAC/B,IAAI,EAAE,gBAAgB;gBACtB,IAAI,EAAE,SAAS;gBACf,OAAO,EAAE,KAAK;gBACd,WAAW,EAAE,qCAAqC;aACnD;YACD;gBACE,WAAW,EAAE,cAAc;gBAC3B,IAAI,EAAE,aAAa;gBACnB,IAAI,EAAE,QAAQ;gBACd,OAAO,EAAE,EAAE;gBACX,WAAW,EAAE,iBAAiB;gBAC9B,WAAW,EAAE,uEAAuE;aACrF;YACD;gBACE,WAAW,EAAE,wBAAwB;gBACrC,IAAI,EAAE,sBAAsB;gBAC5B,IAAI,EAAE,SAAS;gBACf,OAAO,EAAE,KAAK;gBACd,WAAW,EAAE,mDAAmD;aACjE;YACD;gBACE,WAAW,EAAE,eAAe;gBAC5B,IAAI,EAAE,cAAc;gBACpB,IAAI,EAAE,QAAQ;gBACd,OAAO,EAAE,EAAE;gBACX,WAAW,EAAE,kBAAkB;gBAC/B,WAAW,EAAE,8DAA8D;aAC5E;YACD;gBACE,WAAW,EAAE,iBAAiB;gBAC9B,IAAI,EAAE,QAAQ;gBACd,IAAI,EAAE,QAAQ;gBACd,WAAW,EAAE;oBACX,IAAI,EAAE,CAAC;iBACR;gBACD,OAAO,EAAE,EAAE;gBACX,WAAW,EAAE,qDAAqD;gBAClE,WAAW,EAAE,mDAAmD;aACjE;YACD;gBACE,WAAW,EAAE,sBAAsB;gBACnC,IAAI,EAAE,QAAQ;gBACd,IAAI,EAAE,SAAS;gBACf,OAAO,EAAE,KAAK;gBACd,WAAW,EAAE,qDAAqD;aACnE;YACD;gBACE,WAAW,EAAE,aAAa;gBAC1B,IAAI,EAAE,YAAY;gBAClB,IAAI,EAAE,QAAQ;gBACd,OAAO,EAAE,CAAC;gBACV,WAAW,EAAE,+CAA+C;aAC7D;YACD;gBACE,WAAW,EAAE,mBAAmB;gBAChC,IAAI,EAAE,aAAa;gBACnB,IAAI,EAAE,QAAQ;gBACd,OAAO,EAAE,KAAK;gBACd,WAAW,EAAE,2CAA2C;aACzD;YACD;gBACE,WAAW,EAAE,sBAAsB;gBACnC,IAAI,EAAE,gBAAgB;gBACtB,IAAI,EAAE,QAAQ;gBACd,OAAO,EAAE,KAAK;gBACd,WAAW,EAAE,2CAA2C;aACzD;YACD;gBACE,WAAW,EAAE,YAAY;gBACzB,IAAI,EAAE,WAAW;gBACjB,IAAI,EAAE,QAAQ;gBACd,OAAO,EAAE,EAAE;gBACX,WAAW,EAAE,eAAe;gBAC5B,WAAW,EAAE,+EAA+E;aAC7F;YACD;gBACE,WAAW,EAAE,sBAAsB;gBACnC,IAAI,EAAE,oBAAoB;gBAC1B,IAAI,EAAE,QAAQ;gBACd,OAAO,EAAE,CAAC;gBACV,WAAW,EAAE,oDAAoD;aAClE;YACD;gBACE,WAAW,EAAE,iBAAiB;gBAC9B,IAAI,EAAE,gBAAgB;gBACtB,IAAI,EAAE,QAAQ;gBACd,OAAO,EAAE,EAAE;gBACX,WAAW,EAAE,qCAAqC;gBAClD,WAAW,EAAE,uDAAuD;aACrE;YACD;gBACE,WAAW,EAAE,yBAAyB;gBACtC,IAAI,EAAE,uBAAuB;gBAC7B,IAAI,EAAE,SAAS;gBACf,OAAO,EAAE,KAAK;gBACd,WAAW,EAAE,yDAAyD;aACvE;YACD;gBACE,WAAW,EAAE,wBAAwB;gBACrC,IAAI,EAAE,sBAAsB;gBAC5B,IAAI,EAAE,SAAS;gBACf,OAAO,EAAE,KAAK;gBACd,WAAW,EAAE,sDAAsD;aACpE;YACD;gBACE,WAAW,EAAE,4BAA4B;gBACzC,IAAI,EAAE,yBAAyB;gBAC/B,IAAI,EAAE,SAAS;gBACf,OAAO,EAAE,KAAK;gBACd,WAAW,EAAE,mDAAmD;aACjE;YACD;gBACE,WAAW,EAAE,iBAAiB;gBAC9B,IAAI,EAAE,gBAAgB;gBACtB,IAAI,EAAE,SAAS;gBACf,OAAO,EAAE,KAAK;gBACd,WAAW,EAAE,kDAAkD;aAChE;YACD;gBACE,WAAW,EAAE,yBAAyB;gBACtC,IAAI,EAAE,uBAAuB;gBAC7B,IAAI,EAAE,SAAS;gBACf,OAAO,EAAE,KAAK;gBACd,WAAW,EAAE,sDAAsD;aACpE;YACD;gBACE,WAAW,EAAE,iBAAiB;gBAC9B,IAAI,EAAE,gBAAgB;gBACtB,IAAI,EAAE,QAAQ;gBACd,OAAO,EAAE,EAAE;gBACX,WAAW,EAAE,0BAA0B;gBACvC,WAAW,EAAE,sDAAsD;aACpE;SACF;KACF;IACD;QACE,WAAW,EAAE,gBAAgB;QAC7B,IAAI,EAAE,eAAe;QACrB,IAAI,EAAE,YAAY;QAClB,WAAW,EAAE,gBAAgB;QAC7B,OAAO,EAAE,EAAE;QACX,cAAc,EAAE;YACd,IAAI,EAAE;gBACJ,SAAS,EAAE,CAAC,gBAAgB,CAAC;aAC9B;SACF;QACD,OAAO,EAAE;YACP;gBACE,WAAW,EAAE,cAAc;gBAC3B,IAAI,EAAE,UAAU;gBAChB,IAAI,EAAE,SAAS;gBACf,OAAO,EAAE;oBACP,EAAE,IAAI,EAAE,UAAU,EAAE,KAAK,EAAE,UAAU,EAAE;oBACvC,EAAE,IAAI,EAAE,QAAQ,EAAE,KAAK,EAAE,QAAQ,EAAE;oBACnC,EAAE,IAAI,EAAE,WAAW,EAAE,KAAK,EAAE,WAAW,EAAE;oBACzC,EAAE,IAAI,EAAE,gBAAgB,EAAE,KAAK,EAAE,QAAQ,EAAE;iBAC5C;gBACD,OAAO,EAAE,UAAU;gBACnB,WAAW,EAAE,wCAAwC;aACtD;YACD;gBACE,WAAW,EAAE,SAAS;gBACtB,IAAI,EAAE,QAAQ;gBACd,IAAI,EAAE,QAAQ;gBACd,WAAW,EAAE,EAAE,QAAQ,EAAE,IAAI,EAAE;gBAC/B,OAAO,EAAE,EAAE;gBACX,WAAW,EAAE,8BAA8B;aAC5C;YACD;gBACE,WAAW,EAAE,UAAU;gBACvB,IAAI,EAAE,SAAS;gBACf,IAAI,EAAE,QAAQ;gBACd,OAAO,EAAE,EAAE;gBACX,WAAW,EAAE,6BAA6B;gBAC1C,WAAW,EAAE,yDAAyD;aACvE;YACD;gBACE,WAAW,EAAE,wBAAwB;gBACrC,IAAI,EAAE,aAAa;gBACnB,IAAI,EAAE,QAAQ;gBACd,WAAW,EAAE,EAAE,IAAI,EAAE,CAAC,EAAE;gBACxB,OAAO,EAAE,EAAE;gBACX,WAAW,EAAE,qDAAqD;gBAClE,WAAW,EAAE,6CAA6C;aAC3D;YACD;gBACE,WAAW,EAAE,eAAe;gBAC5B,IAAI,EAAE,QAAQ;gBACd,IAAI,EAAE,QAAQ;gBACd,WAAW,EAAE,EAAE,IAAI,EAAE,CAAC,EAAE;gBACxB,OAAO,EAAE,EAAE;gBACX,WAAW,EAAE,iEAAiE;gBAC9E,WAAW,EAAE,4CAA4C;aAC1D;SACF;KACF;IACD;QACE,WAAW,EAAE,SAAS;QACtB,IAAI,EAAE,SAAS;QACf,IAAI,EAAE,YAAY;QAClB,WAAW,EAAE,YAAY;QACzB,OAAO,EAAE,EAAE;QACX,cAAc,EAAE;YACd,IAAI,EAAE;gBACJ,SAAS,EAAE,CAAC,gBAAgB,CAAC;aAC9B;SACF;QACD,OAAO,EAAE;YACP;gBACE,WAAW,EAAE,eAAe;gBAC5B,IAAI,EAAE,cAAc;gBACpB,IAAI,EAAE,SAAS;gBACf,OAAO,EAAE;oBACP;wBACE,IAAI,EAAE,UAAU;wBAChB,KAAK,EAAE,UAAU;wBACjB,WAAW,EAAE,gDAAgD;qBAC9D;oBACD;wBACE,IAAI,EAAE,cAAc;wBACpB,KAAK,EAAE,aAAa;wBACpB,WAAW,EAAE,iDAAiD;qBAC/D;oBACD;wBACE,IAAI,EAAE,YAAY;wBAClB,KAAK,EAAE,MAAM;wBACb,WAAW,EAAE,+BAA+B;qBAC7C;oBACD;wBACE,IAAI,EAAE,cAAc;wBACpB,KAAK,EAAE,aAAa;wBACpB,WAAW,EAAE,gDAAgD;qBAC9D;oBACD;wBACE,IAAI,EAAE,UAAU;wBAChB,KAAK,EAAE,MAAM;wBACb,WAAW,EAAE,iCAAiC;qBAC/C;iBACF;gBACD,OAAO,EAAE,UAAU;gBACnB,WAAW,EAAE,+BAA+B;aAC7C;YACD;gBACE,WAAW,EAAE,oBAAoB;gBACjC,IAAI,EAAE,cAAc;gBACpB,IAAI,EAAE,SAAS;gBACf,OAAO,EAAE,KAAK;gBACd,WAAW,EAAE,0DAA0D;aACxE;YACD;gBACE,WAAW,EAAE,kBAAkB;gBAC/B,IAAI,EAAE,iBAAiB;gBACvB,IAAI,EAAE,SAAS;gBACf,OAAO,EAAE,KAAK;gBACd,WAAW,EAAE,uEAAuE;aACrF;SACF;KACF;CACF,CAAC;AAGK,KAAK,UAAU,OAAO,CAE3B,KAA2B,EAC3B,WAAgC;;IAEhC,MAAM,UAAU,GAAyB,EAAE,CAAC;IAE5C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACtC,IAAI,CAAC;YAEH,MAAM,GAAG,GAAG,IAAI,CAAC,gBAAgB,CAAC,KAAK,EAAE,CAAC,EAAE,EAAE,CAAW,CAAC;YAC1D,MAAM,cAAc,GAAG,IAAI,CAAC,gBAAgB,CAAC,gBAAgB,EAAE,CAAC,EAAE,EAAE,CAAgB,CAAC;YACrF,MAAM,cAAc,GAAG,IAAI,CAAC,gBAAgB,CAAC,gBAAgB,EAAE,CAAC,EAAE,EAAE,CAAgB,CAAC;YACrF,MAAM,aAAa,GAAG,IAAI,CAAC,gBAAgB,CAAC,eAAe,EAAE,CAAC,EAAE,EAAE,CAAgB,CAAC;YACnF,MAAM,OAAO,GAAG,IAAI,CAAC,gBAAgB,CAAC,SAAS,EAAE,CAAC,EAAE,EAAE,CAAgB,CAAC;YAEvE,IAAI,CAAC,GAAG,EAAE,CAAC;gBACT,MAAM,IAAI,iCAAkB,CAAC,IAAI,CAAC,OAAO,EAAE,EAAE,sBAAsB,EAAE,EAAE,SAAS,EAAE,CAAC,EAAE,CAAC,CAAC;YACzF,CAAC;YAED,IAAI,CAAC,IAAA,kBAAU,EAAC,GAAG,CAAC,EAAE,CAAC;gBACrB,MAAM,IAAI,iCAAkB,CAAC,IAAI,CAAC,OAAO,EAAE,EAAE,gBAAgB,GAAG,EAAE,EAAE,EAAE,SAAS,EAAE,CAAC,EAAE,CAAC,CAAC;YACxF,CAAC;YAGD,MAAM,aAAa,GAAG,IAAA,2BAAmB,EAAC,cAAc,CAAC,CAAC;YAC1D,MAAM,aAAa,GAAG,IAAA,8BAAsB,EAAC;gBAC3C,GAAG,cAAc;gBACjB,GAAG,aAAa;aACjB,CAAC,CAAC;YAGH,MAAM,OAAO,GAAG,MAAM,IAAA,yBAAiB,EAAC,IAAI,CAAC,CAAC;YAG9C,MAAM,MAAM,GAAG,MAAM,OAAO,CAAC,QAAQ,CAAC,GAAG,EAAE,aAAa,EAAE,aAAa,CAAC,CAAC;YAGzE,MAAM,eAAe,GAAG,IAAA,8BAAiB,EACvC,MAAM,EACN,OAAO,CAAC,YAAuB,EAC/B,OAAO,CAAC,eAA0B,EACjC,OAAO,CAAC,YAAuB,IAAI,UAAU,CAC/C,CAAC;YAGF,UAAU,CAAC,IAAI,CAAC;gBACd,IAAI,EAAE,eAAe;gBACrB,UAAU,EAAE,EAAE,IAAI,EAAE,CAAC,EAAE;aACxB,CAAC,CAAC;QAEL,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YAEf,IAAI,IAAI,CAAC,cAAc,EAAE,EAAE,CAAC;gBAC1B,MAAM,IAAI,GAAG,IAAI,CAAC,OAAO,EAAE,CAAC;gBAC5B,MAAM,cAAc,GAAG,MAAC,KAAa,CAAC,SAAS,mCAAI,CAAC,CAAC;gBACrD,UAAU,CAAC,IAAI,CAAC;oBACd,IAAI,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI;oBACnB,KAAK,EAAE,IAAI,iCAAkB,CAAC,IAAI,EAAG,KAAe,CAAC,OAAO,EAAE,EAAE,SAAS,EAAE,cAAc,EAAE,CAAC;oBAC5F,UAAU,EAAE,EAAE,IAAI,EAAE,CAAC,EAAE;iBACxB,CAAC,CAAC;gBACH,SAAS;YACX,CAAC;YAED,MAAM,KAAK,CAAC;QACd,CAAC;IACH,CAAC;IAED,OAAO,UAAU,CAAC;AACpB,CAAC"}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
import type { INodeProperties, IExecuteFunctions, INodeExecutionData } from 'n8n-workflow';
|
|
2
|
+
import type { Crawl4aiNodeOptions } from '../helpers/interfaces';
|
|
3
|
+
type OperationExecuteFunction = (this: IExecuteFunctions, items: INodeExecutionData[], nodeOptions: Crawl4aiNodeOptions) => Promise<INodeExecutionData[]>;
|
|
4
|
+
export declare const operations: {
|
|
5
|
+
[key: string]: OperationExecuteFunction;
|
|
6
|
+
};
|
|
7
|
+
export declare const description: INodeProperties[];
|
|
8
|
+
export {};
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
+
}) : function(o, v) {
|
|
16
|
+
o["default"] = v;
|
|
17
|
+
});
|
|
18
|
+
var __importStar = (this && this.__importStar) || function (mod) {
|
|
19
|
+
if (mod && mod.__esModule) return mod;
|
|
20
|
+
var result = {};
|
|
21
|
+
if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
|
|
22
|
+
__setModuleDefault(result, mod);
|
|
23
|
+
return result;
|
|
24
|
+
};
|
|
25
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
26
|
+
exports.description = exports.operations = void 0;
|
|
27
|
+
const crawlSingleUrl = __importStar(require("./crawlSingleUrl.operation"));
|
|
28
|
+
const crawlMultipleUrls = __importStar(require("./crawlMultipleUrls.operation"));
|
|
29
|
+
const processRawHtml = __importStar(require("./processRawHtml.operation"));
|
|
30
|
+
exports.operations = {
|
|
31
|
+
crawlSingleUrl: crawlSingleUrl.execute,
|
|
32
|
+
crawlMultipleUrls: crawlMultipleUrls.execute,
|
|
33
|
+
processRawHtml: processRawHtml.execute,
|
|
34
|
+
};
|
|
35
|
+
exports.description = [
|
|
36
|
+
{
|
|
37
|
+
displayName: 'Operation',
|
|
38
|
+
name: 'operation',
|
|
39
|
+
type: 'options',
|
|
40
|
+
noDataExpression: true,
|
|
41
|
+
options: [
|
|
42
|
+
{
|
|
43
|
+
name: 'Crawl Single URL',
|
|
44
|
+
value: 'crawlSingleUrl',
|
|
45
|
+
description: 'Crawl a single URL and extract content',
|
|
46
|
+
action: 'Crawl a single URL',
|
|
47
|
+
},
|
|
48
|
+
{
|
|
49
|
+
name: 'Crawl Multiple URLs',
|
|
50
|
+
value: 'crawlMultipleUrls',
|
|
51
|
+
description: 'Crawl multiple URLs and extract content',
|
|
52
|
+
action: 'Crawl multiple ur ls',
|
|
53
|
+
},
|
|
54
|
+
{
|
|
55
|
+
name: 'Process Raw HTML',
|
|
56
|
+
value: 'processRawHtml',
|
|
57
|
+
description: 'Process provided HTML content without crawling',
|
|
58
|
+
action: 'Process raw html',
|
|
59
|
+
},
|
|
60
|
+
],
|
|
61
|
+
default: 'crawlSingleUrl',
|
|
62
|
+
},
|
|
63
|
+
...crawlSingleUrl.description,
|
|
64
|
+
...crawlMultipleUrls.description,
|
|
65
|
+
...processRawHtml.description,
|
|
66
|
+
];
|
|
67
|
+
//# sourceMappingURL=operations.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"operations.js","sourceRoot":"","sources":["../../../../nodes/Crawl4aiBasicCrawler/actions/operations.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;AAIA,2EAA6D;AAC7D,iFAAmE;AACnE,2EAA6D;AAWhD,QAAA,UAAU,GAAgD;IACrE,cAAc,EAAE,cAAc,CAAC,OAAO;IACtC,iBAAiB,EAAE,iBAAiB,CAAC,OAAO;IAC5C,cAAc,EAAE,cAAc,CAAC,OAAO;CACvC,CAAC;AAGW,QAAA,WAAW,GAAsB;IAC5C;QACE,WAAW,EAAE,WAAW;QACxB,IAAI,EAAE,WAAW;QACjB,IAAI,EAAE,SAAS;QACf,gBAAgB,EAAE,IAAI;QACtB,OAAO,EAAE;YACP;gBACE,IAAI,EAAE,kBAAkB;gBACxB,KAAK,EAAE,gBAAgB;gBACvB,WAAW,EAAE,wCAAwC;gBACrD,MAAM,EAAE,oBAAoB;aAC7B;YACD;gBACE,IAAI,EAAE,qBAAqB;gBAC3B,KAAK,EAAE,mBAAmB;gBAC1B,WAAW,EAAE,yCAAyC;gBACtD,MAAM,EAAE,sBAAsB;aAC/B;YACD;gBACE,IAAI,EAAE,kBAAkB;gBACxB,KAAK,EAAE,gBAAgB;gBACvB,WAAW,EAAE,gDAAgD;gBAC7D,MAAM,EAAE,kBAAkB;aAC3B;SACF;QACD,OAAO,EAAE,gBAAgB;KAC1B;IAGD,GAAG,cAAc,CAAC,WAAW;IAC7B,GAAG,iBAAiB,CAAC,WAAW;IAChC,GAAG,cAAc,CAAC,WAAW;CAC9B,CAAC"}
|
|
@@ -0,0 +1,4 @@
|
|
|
1
|
+
import type { IExecuteFunctions, INodeExecutionData, INodeProperties } from 'n8n-workflow';
|
|
2
|
+
import type { Crawl4aiNodeOptions } from '../helpers/interfaces';
|
|
3
|
+
export declare const description: INodeProperties[];
|
|
4
|
+
export declare function execute(this: IExecuteFunctions, items: INodeExecutionData[], nodeOptions: Crawl4aiNodeOptions): Promise<INodeExecutionData[]>;
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.description = void 0;
|
|
4
|
+
exports.execute = execute;
|
|
5
|
+
const n8n_workflow_1 = require("n8n-workflow");
|
|
6
|
+
const utils_1 = require("../helpers/utils");
|
|
7
|
+
const formatters_1 = require("../helpers/formatters");
|
|
8
|
+
exports.description = [
|
|
9
|
+
{
|
|
10
|
+
displayName: 'HTML Content',
|
|
11
|
+
name: 'html',
|
|
12
|
+
type: 'string',
|
|
13
|
+
typeOptions: {
|
|
14
|
+
rows: 8,
|
|
15
|
+
},
|
|
16
|
+
required: true,
|
|
17
|
+
default: '',
|
|
18
|
+
placeholder: '<html><body><h1>Example</h1><p>Content</p></body></html>',
|
|
19
|
+
description: 'The raw HTML content to process',
|
|
20
|
+
displayOptions: {
|
|
21
|
+
show: {
|
|
22
|
+
operation: ['processRawHtml'],
|
|
23
|
+
},
|
|
24
|
+
},
|
|
25
|
+
},
|
|
26
|
+
{
|
|
27
|
+
displayName: 'Base URL',
|
|
28
|
+
name: 'baseUrl',
|
|
29
|
+
type: 'string',
|
|
30
|
+
default: 'https://example.com',
|
|
31
|
+
description: 'The base URL to use for resolving relative links',
|
|
32
|
+
displayOptions: {
|
|
33
|
+
show: {
|
|
34
|
+
operation: ['processRawHtml'],
|
|
35
|
+
},
|
|
36
|
+
},
|
|
37
|
+
},
|
|
38
|
+
{
|
|
39
|
+
displayName: 'Crawler Options',
|
|
40
|
+
name: 'crawlerOptions',
|
|
41
|
+
type: 'collection',
|
|
42
|
+
placeholder: 'Add Option',
|
|
43
|
+
default: {},
|
|
44
|
+
displayOptions: {
|
|
45
|
+
show: {
|
|
46
|
+
operation: ['processRawHtml'],
|
|
47
|
+
},
|
|
48
|
+
},
|
|
49
|
+
options: [
|
|
50
|
+
{
|
|
51
|
+
displayName: 'CSS Selector',
|
|
52
|
+
name: 'cssSelector',
|
|
53
|
+
type: 'string',
|
|
54
|
+
default: '',
|
|
55
|
+
placeholder: 'article.content',
|
|
56
|
+
description: 'CSS selector to focus on specific content (leave empty for full page)',
|
|
57
|
+
},
|
|
58
|
+
{
|
|
59
|
+
displayName: 'Exclude External Links',
|
|
60
|
+
name: 'excludeExternalLinks',
|
|
61
|
+
type: 'boolean',
|
|
62
|
+
default: false,
|
|
63
|
+
description: 'Whether to exclude external links from the result',
|
|
64
|
+
},
|
|
65
|
+
{
|
|
66
|
+
displayName: 'Excluded Tags',
|
|
67
|
+
name: 'excludedTags',
|
|
68
|
+
type: 'string',
|
|
69
|
+
default: '',
|
|
70
|
+
placeholder: 'nav,footer,aside',
|
|
71
|
+
description: 'Comma-separated list of HTML tags to exclude from processing',
|
|
72
|
+
},
|
|
73
|
+
{
|
|
74
|
+
displayName: 'Word Count Threshold',
|
|
75
|
+
name: 'wordCountThreshold',
|
|
76
|
+
type: 'number',
|
|
77
|
+
default: 0,
|
|
78
|
+
description: 'Minimum number of words for content to be included',
|
|
79
|
+
},
|
|
80
|
+
],
|
|
81
|
+
},
|
|
82
|
+
{
|
|
83
|
+
displayName: 'Options',
|
|
84
|
+
name: 'options',
|
|
85
|
+
type: 'collection',
|
|
86
|
+
placeholder: 'Add Option',
|
|
87
|
+
default: {},
|
|
88
|
+
displayOptions: {
|
|
89
|
+
show: {
|
|
90
|
+
operation: ['processRawHtml'],
|
|
91
|
+
},
|
|
92
|
+
},
|
|
93
|
+
options: [
|
|
94
|
+
{
|
|
95
|
+
displayName: 'Include Media Data',
|
|
96
|
+
name: 'includeMedia',
|
|
97
|
+
type: 'boolean',
|
|
98
|
+
default: false,
|
|
99
|
+
description: 'Whether to include media data in output (images, videos)',
|
|
100
|
+
},
|
|
101
|
+
{
|
|
102
|
+
displayName: 'Verbose Response',
|
|
103
|
+
name: 'verboseResponse',
|
|
104
|
+
type: 'boolean',
|
|
105
|
+
default: false,
|
|
106
|
+
description: 'Whether to include detailed data in output (HTML, status codes, etc.)',
|
|
107
|
+
},
|
|
108
|
+
],
|
|
109
|
+
},
|
|
110
|
+
];
|
|
111
|
+
async function execute(items, nodeOptions) {
|
|
112
|
+
var _a;
|
|
113
|
+
const allResults = [];
|
|
114
|
+
for (let i = 0; i < items.length; i++) {
|
|
115
|
+
try {
|
|
116
|
+
const html = this.getNodeParameter('html', i, '');
|
|
117
|
+
const baseUrl = this.getNodeParameter('baseUrl', i, 'https://example.com');
|
|
118
|
+
const crawlerOptions = this.getNodeParameter('crawlerOptions', i, {});
|
|
119
|
+
const options = this.getNodeParameter('options', i, {});
|
|
120
|
+
if (!html) {
|
|
121
|
+
throw new n8n_workflow_1.NodeOperationError(this.getNode(), 'HTML content cannot be empty.', { itemIndex: i });
|
|
122
|
+
}
|
|
123
|
+
const crawlerConfig = (0, utils_1.createCrawlerRunConfig)(crawlerOptions);
|
|
124
|
+
const crawler = await (0, utils_1.getCrawl4aiClient)(this);
|
|
125
|
+
const result = await crawler.processRawHtml(html, baseUrl, crawlerConfig);
|
|
126
|
+
const formattedResult = (0, formatters_1.formatCrawlResult)(result, options.includeMedia, options.verboseResponse);
|
|
127
|
+
allResults.push({
|
|
128
|
+
json: formattedResult,
|
|
129
|
+
pairedItem: { item: i },
|
|
130
|
+
});
|
|
131
|
+
}
|
|
132
|
+
catch (error) {
|
|
133
|
+
if (this.continueOnFail()) {
|
|
134
|
+
const node = this.getNode();
|
|
135
|
+
const errorItemIndex = (_a = error.itemIndex) !== null && _a !== void 0 ? _a : i;
|
|
136
|
+
allResults.push({
|
|
137
|
+
json: items[i].json,
|
|
138
|
+
error: new n8n_workflow_1.NodeOperationError(node, error.message, { itemIndex: errorItemIndex }),
|
|
139
|
+
pairedItem: { item: i },
|
|
140
|
+
});
|
|
141
|
+
continue;
|
|
142
|
+
}
|
|
143
|
+
throw error;
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
return allResults;
|
|
147
|
+
}
|
|
148
|
+
//# sourceMappingURL=processRawHtml.operation.js.map
|