n8n-nodes-crawl4ai-onuro 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. package/LICENSE +21 -0
  2. package/LICENSE.md +19 -0
  3. package/README.md +129 -0
  4. package/dist/credentials/Crawl4aiApi.credentials.d.ts +7 -0
  5. package/dist/credentials/Crawl4aiApi.credentials.js +228 -0
  6. package/dist/credentials/Crawl4aiApi.credentials.js.map +1 -0
  7. package/dist/nodes/Crawl4aiBasicCrawler/Crawl4aiBasicCrawler.node.d.ts +5 -0
  8. package/dist/nodes/Crawl4aiBasicCrawler/Crawl4aiBasicCrawler.node.js +37 -0
  9. package/dist/nodes/Crawl4aiBasicCrawler/Crawl4aiBasicCrawler.node.js.map +1 -0
  10. package/dist/nodes/Crawl4aiBasicCrawler/actions/crawlMultipleUrls.operation.d.ts +4 -0
  11. package/dist/nodes/Crawl4aiBasicCrawler/actions/crawlMultipleUrls.operation.js +421 -0
  12. package/dist/nodes/Crawl4aiBasicCrawler/actions/crawlMultipleUrls.operation.js.map +1 -0
  13. package/dist/nodes/Crawl4aiBasicCrawler/actions/crawlSingleUrl.operation.d.ts +4 -0
  14. package/dist/nodes/Crawl4aiBasicCrawler/actions/crawlSingleUrl.operation.js +422 -0
  15. package/dist/nodes/Crawl4aiBasicCrawler/actions/crawlSingleUrl.operation.js.map +1 -0
  16. package/dist/nodes/Crawl4aiBasicCrawler/actions/operations.d.ts +8 -0
  17. package/dist/nodes/Crawl4aiBasicCrawler/actions/operations.js +67 -0
  18. package/dist/nodes/Crawl4aiBasicCrawler/actions/operations.js.map +1 -0
  19. package/dist/nodes/Crawl4aiBasicCrawler/actions/processRawHtml.operation.d.ts +4 -0
  20. package/dist/nodes/Crawl4aiBasicCrawler/actions/processRawHtml.operation.js +148 -0
  21. package/dist/nodes/Crawl4aiBasicCrawler/actions/processRawHtml.operation.js.map +1 -0
  22. package/dist/nodes/Crawl4aiBasicCrawler/actions/router.d.ts +2 -0
  23. package/dist/nodes/Crawl4aiBasicCrawler/actions/router.js +37 -0
  24. package/dist/nodes/Crawl4aiBasicCrawler/actions/router.js.map +1 -0
  25. package/dist/nodes/Crawl4aiBasicCrawler/crawl4ai.svg +6 -0
  26. package/dist/nodes/Crawl4aiBasicCrawler/helpers/apiClient.d.ts +15 -0
  27. package/dist/nodes/Crawl4aiBasicCrawler/helpers/apiClient.js +271 -0
  28. package/dist/nodes/Crawl4aiBasicCrawler/helpers/apiClient.js.map +1 -0
  29. package/dist/nodes/Crawl4aiBasicCrawler/helpers/formatters.d.ts +5 -0
  30. package/dist/nodes/Crawl4aiBasicCrawler/helpers/formatters.js +96 -0
  31. package/dist/nodes/Crawl4aiBasicCrawler/helpers/formatters.js.map +1 -0
  32. package/dist/nodes/Crawl4aiBasicCrawler/helpers/interfaces.d.ts +119 -0
  33. package/dist/nodes/Crawl4aiBasicCrawler/helpers/interfaces.js +3 -0
  34. package/dist/nodes/Crawl4aiBasicCrawler/helpers/interfaces.js.map +1 -0
  35. package/dist/nodes/Crawl4aiBasicCrawler/helpers/utils.d.ts +8 -0
  36. package/dist/nodes/Crawl4aiBasicCrawler/helpers/utils.js +80 -0
  37. package/dist/nodes/Crawl4aiBasicCrawler/helpers/utils.js.map +1 -0
  38. package/dist/nodes/Crawl4aiContentExtractor/Crawl4aiContentExtractor.node.d.ts +5 -0
  39. package/dist/nodes/Crawl4aiContentExtractor/Crawl4aiContentExtractor.node.js +38 -0
  40. package/dist/nodes/Crawl4aiContentExtractor/Crawl4aiContentExtractor.node.js.map +1 -0
  41. package/dist/nodes/Crawl4aiContentExtractor/actions/cssExtractor.operation.d.ts +4 -0
  42. package/dist/nodes/Crawl4aiContentExtractor/actions/cssExtractor.operation.js +295 -0
  43. package/dist/nodes/Crawl4aiContentExtractor/actions/cssExtractor.operation.js.map +1 -0
  44. package/dist/nodes/Crawl4aiContentExtractor/actions/jsonExtractor.operation.d.ts +4 -0
  45. package/dist/nodes/Crawl4aiContentExtractor/actions/jsonExtractor.operation.js +328 -0
  46. package/dist/nodes/Crawl4aiContentExtractor/actions/jsonExtractor.operation.js.map +1 -0
  47. package/dist/nodes/Crawl4aiContentExtractor/actions/llmExtractor.operation.d.ts +4 -0
  48. package/dist/nodes/Crawl4aiContentExtractor/actions/llmExtractor.operation.js +417 -0
  49. package/dist/nodes/Crawl4aiContentExtractor/actions/llmExtractor.operation.js.map +1 -0
  50. package/dist/nodes/Crawl4aiContentExtractor/actions/operations.d.ts +8 -0
  51. package/dist/nodes/Crawl4aiContentExtractor/actions/operations.js +67 -0
  52. package/dist/nodes/Crawl4aiContentExtractor/actions/operations.js.map +1 -0
  53. package/dist/nodes/Crawl4aiContentExtractor/actions/router.d.ts +2 -0
  54. package/dist/nodes/Crawl4aiContentExtractor/actions/router.js +37 -0
  55. package/dist/nodes/Crawl4aiContentExtractor/actions/router.js.map +1 -0
  56. package/dist/nodes/Crawl4aiContentExtractor/crawl4ai.svg +6 -0
  57. package/dist/nodes/Crawl4aiContentExtractor/helpers/apiClient.d.ts +1 -0
  58. package/dist/nodes/Crawl4aiContentExtractor/helpers/apiClient.js +7 -0
  59. package/dist/nodes/Crawl4aiContentExtractor/helpers/apiClient.js.map +1 -0
  60. package/dist/nodes/Crawl4aiContentExtractor/helpers/formatters.d.ts +1 -0
  61. package/dist/nodes/Crawl4aiContentExtractor/helpers/formatters.js +8 -0
  62. package/dist/nodes/Crawl4aiContentExtractor/helpers/formatters.js.map +1 -0
  63. package/dist/nodes/Crawl4aiContentExtractor/helpers/interfaces.d.ts +1 -0
  64. package/dist/nodes/Crawl4aiContentExtractor/helpers/interfaces.js +3 -0
  65. package/dist/nodes/Crawl4aiContentExtractor/helpers/interfaces.js.map +1 -0
  66. package/dist/nodes/Crawl4aiContentExtractor/helpers/utils.d.ts +9 -0
  67. package/dist/nodes/Crawl4aiContentExtractor/helpers/utils.js +93 -0
  68. package/dist/nodes/Crawl4aiContentExtractor/helpers/utils.js.map +1 -0
  69. package/dist/tsconfig.tsbuildinfo +1 -0
  70. package/index.js +14 -0
  71. package/package.json +68 -0
@@ -0,0 +1,80 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.getCrawl4aiClient = getCrawl4aiClient;
4
+ exports.createBrowserConfig = createBrowserConfig;
5
+ exports.createCrawlerRunConfig = createCrawlerRunConfig;
6
+ exports.safeJsonParse = safeJsonParse;
7
+ exports.cleanText = cleanText;
8
+ exports.isValidUrl = isValidUrl;
9
+ const apiClient_1 = require("./apiClient");
10
+ async function getCrawl4aiClient(executeFunctions) {
11
+ const credentials = await executeFunctions.getCredentials('crawl4aiApi');
12
+ if (!credentials) {
13
+ throw new Error('Crawl4AI credentials are not configured!');
14
+ }
15
+ return (0, apiClient_1.createCrawlerInstance)(credentials);
16
+ }
17
+ function createBrowserConfig(options) {
18
+ return {
19
+ headless: options.headless !== false,
20
+ javaScriptEnabled: options.javaScriptEnabled === true,
21
+ viewport: {
22
+ width: options.viewportWidth ? Number(options.viewportWidth) : 1280,
23
+ height: options.viewportHeight ? Number(options.viewportHeight) : 800,
24
+ },
25
+ timeout: options.timeout ? Number(options.timeout) : 30000,
26
+ userAgent: options.userAgent ? String(options.userAgent) : undefined,
27
+ };
28
+ }
29
+ function createCrawlerRunConfig(options) {
30
+ let excludedTags = [];
31
+ if (options.excludedTags) {
32
+ if (typeof options.excludedTags === 'string') {
33
+ excludedTags = options.excludedTags
34
+ .split(',')
35
+ .map(tag => tag.trim())
36
+ .filter(tag => tag);
37
+ }
38
+ else if (Array.isArray(options.excludedTags)) {
39
+ excludedTags = options.excludedTags;
40
+ }
41
+ }
42
+ return {
43
+ cacheMode: options.cacheMode || 'enabled',
44
+ streamEnabled: options.streamEnabled === true,
45
+ pageTimeout: options.pageTimeout ? Number(options.pageTimeout) : 30000,
46
+ requestTimeout: options.requestTimeout ? Number(options.requestTimeout) : 30000,
47
+ jsCode: options.jsCode ? String(options.jsCode) : undefined,
48
+ jsOnly: options.jsOnly === true,
49
+ cssSelector: options.cssSelector ? String(options.cssSelector) : undefined,
50
+ excludedTags,
51
+ excludeExternalLinks: options.excludeExternalLinks === true,
52
+ checkRobotsTxt: options.checkRobotsTxt === true,
53
+ wordCountThreshold: options.wordCountThreshold ? Number(options.wordCountThreshold) : 0,
54
+ sessionId: options.sessionId ? String(options.sessionId) : undefined,
55
+ maxRetries: options.maxRetries ? Number(options.maxRetries) : 3,
56
+ };
57
+ }
58
+ function safeJsonParse(jsonString, defaultValue = null) {
59
+ try {
60
+ return JSON.parse(jsonString);
61
+ }
62
+ catch (error) {
63
+ return defaultValue;
64
+ }
65
+ }
66
+ function cleanText(text) {
67
+ return text
68
+ .replace(/\s+/g, ' ')
69
+ .trim();
70
+ }
71
+ function isValidUrl(url) {
72
+ try {
73
+ new URL(url);
74
+ return true;
75
+ }
76
+ catch (error) {
77
+ return false;
78
+ }
79
+ }
80
+ //# sourceMappingURL=utils.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"utils.js","sourceRoot":"","sources":["../../../../nodes/Crawl4aiBasicCrawler/helpers/utils.ts"],"names":[],"mappings":";;AAOA,8CAYC;AAOD,kDAWC;AAOD,wDA6BC;AAKD,sCAMC;AAKD,8BAIC;AAKD,gCAOC;AAvGD,2CAAoD;AAK7C,KAAK,UAAU,iBAAiB,CACrC,gBAAmC;IAGnC,MAAM,WAAW,GAAG,MAAM,gBAAgB,CAAC,cAAc,CAAC,aAAa,CAAsC,CAAC;IAE9G,IAAI,CAAC,WAAW,EAAE,CAAC;QACjB,MAAM,IAAI,KAAK,CAAC,0CAA0C,CAAC,CAAC;IAC9D,CAAC;IAGD,OAAO,IAAA,iCAAqB,EAAC,WAAW,CAAC,CAAC;AAC5C,CAAC;AAOD,SAAgB,mBAAmB,CAAC,OAAoB;IACtD,OAAO;QACL,QAAQ,EAAE,OAAO,CAAC,QAAQ,KAAK,KAAK;QACpC,iBAAiB,EAAE,OAAO,CAAC,iBAAiB,KAAK,IAAI;QACrD,QAAQ,EAAE;YACR,KAAK,EAAE,OAAO,CAAC,aAAa,CAAC,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,aAAa,CAAC,CAAC,CAAC,CAAC,IAAI;YACnE,MAAM,EAAE,OAAO,CAAC,cAAc,CAAC,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,GAAG;SACtE;QACD,OAAO,EAAE,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,KAAK;QAC1D,SAAS,EAAE,OAAO,CAAC,SAAS,CAAC,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,SAAS;KACrE,CAAC;AACJ,CAAC;AAOD,SAAgB,sBAAsB,CAAC,OAAoB;IAEzD,IAAI,YAAY,GAAa,EAAE,CAAC;IAChC,IAAI,OAAO,CAAC,YAAY,EAAE,CAAC;QACzB,IAAI,OAAO,OAAO,CAAC,YAAY,KAAK,QAAQ,EAAE,CAAC;YAC7C,YAAY,GAAI,OAAO,CAAC,YAAuB;iBAC5C,KAAK,CAAC,GAAG,CAAC;iBACV,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC;iBACtB,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC;QACxB,CAAC;aAAM,IAAI,KAAK,CAAC,OAAO,CAAC,OAAO,CAAC,YAAY,CAAC,EAAE,CAAC;YAC/C,YAAY,GAAG,OAAO,CAAC,YAAwB,CAAC;QAClD,CAAC;IACH,CAAC;IAED,OAAO;QACL,SAAS,EAAE,OAAO,CAAC,SAA0C,IAAI,SAAS;QAC1E,aAAa,EAAE,OAAO,CAAC,aAAa,KAAK,IAAI;QAC7C,WAAW,EAAE,OAAO,CAAC,WAAW,CAAC,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,KAAK;QACtE,cAAc,EAAE,OAAO,CAAC,cAAc,CAAC,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,KAAK;QAC/E,MAAM,EAAE,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,SAAS;QAC3D,MAAM,EAAE,OAAO,CAAC,MAAM,KAAK,IAAI;QAC/B,WAAW,EAAE,OAAO,CAAC,WAAW,CAAC,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,SAAS;QAC1E,YAAY;QACZ,oBAAoB,EAAE,OAAO,CAAC,oBAAoB,KAAK,IAAI;QAC3D,cAAc,EAAE,OAAO,CAAC,cAAc,KAAK,IAAI;QAC/C,kBAAkB,EAAE,OAAO,CAAC,kBAAkB,CAAC,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,kBAAkB,CAAC,CAAC,CAAC,CAAC,CAAC;QACvF,SAAS,EAAE,OAAO,CAAC,SAAS,CAAC,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,SAAS;QACpE,UAAU,EAAE,OAAO,CAAC,UAAU,CAAC,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC;KAChE,CAAC;AACJ,CAAC;AAKD,SAAgB,aAAa,CAAC,UAAkB,EAAE,eAAoB,IAAI;IACxE,IAAI,CAAC;QACH,OAAO,IAAI,CAAC,KAAK,CAAC,UAAU,CAAC,CAAC;IAChC,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,OAAO,YAAY,CAAC;IACtB,CAAC;AACH,CAAC;AAKD,SAAgB,SAAS,CAAC,IAAY;IACpC,OAAO,IAAI;SACR,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC;SACpB,IAAI,EAAE,CAAC;AACZ,CAAC;AAKD,SAAgB,UAAU,CAAC,GAAW;IACpC,IAAI,CAAC;QACH,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;QACb,OAAO,IAAI,CAAC;IACd,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,OAAO,KAAK,CAAC;IACf,CAAC;AACH,CAAC"}
@@ -0,0 +1,5 @@
1
+ import { IExecuteFunctions, INodeExecutionData, INodeType, INodeTypeDescription } from 'n8n-workflow';
2
+ export declare class Crawl4aiContentExtractor implements INodeType {
3
+ description: INodeTypeDescription;
4
+ execute(this: IExecuteFunctions): Promise<INodeExecutionData[][]>;
5
+ }
@@ -0,0 +1,38 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.Crawl4aiContentExtractor = void 0;
4
+ const router_1 = require("./actions/router");
5
+ const operations_1 = require("./actions/operations");
6
+ class Crawl4aiContentExtractor {
7
+ constructor() {
8
+ this.description = {
9
+ displayName: 'Crawl4AI: Content Extractor',
10
+ name: 'crawl4aiContentExtractor',
11
+ icon: 'file:crawl4ai.svg',
12
+ group: ['transform'],
13
+ version: 1,
14
+ subtitle: '={{$parameter["operation"]}}',
15
+ description: 'Extract structured content from web pages using Crawl4AI',
16
+ defaults: {
17
+ name: 'Crawl4AI: Content Extractor',
18
+ },
19
+ inputs: ['main'],
20
+ outputs: ['main'],
21
+ usableAsTool: true,
22
+ credentials: [
23
+ {
24
+ name: 'crawl4aiApi',
25
+ required: true,
26
+ },
27
+ ],
28
+ properties: [
29
+ ...operations_1.description,
30
+ ],
31
+ };
32
+ }
33
+ async execute() {
34
+ return await router_1.router.call(this);
35
+ }
36
+ }
37
+ exports.Crawl4aiContentExtractor = Crawl4aiContentExtractor;
38
+ //# sourceMappingURL=Crawl4aiContentExtractor.node.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"Crawl4aiContentExtractor.node.js","sourceRoot":"","sources":["../../../nodes/Crawl4aiContentExtractor/Crawl4aiContentExtractor.node.ts"],"names":[],"mappings":";;;AAQA,6CAA0C;AAC1C,qDAA4E;AAG5E,MAAa,wBAAwB;IAArC;QACC,gBAAW,GAAyB;YACnC,WAAW,EAAE,6BAA6B;YAC1C,IAAI,EAAE,0BAA0B;YAChC,IAAI,EAAE,mBAAmB;YACzB,KAAK,EAAE,CAAC,WAAW,CAAC;YACpB,OAAO,EAAE,CAAC;YACV,QAAQ,EAAE,8BAA8B;YACxC,WAAW,EAAE,0DAA0D;YACvE,QAAQ,EAAE;gBACT,IAAI,EAAE,6BAA6B;aACnC;YAED,MAAM,EAAE,CAAC,MAAM,CAAC;YAEhB,OAAO,EAAE,CAAC,MAAM,CAAC;YAEjB,YAAY,EAAE,IAAI;YAClB,WAAW,EAAE;gBACZ;oBACC,IAAI,EAAE,aAAa;oBACnB,QAAQ,EAAE,IAAI;iBACd;aACD;YACD,UAAU,EAAE;gBACX,GAAG,wBAAqB;aACxB;SACD,CAAC;IAMH,CAAC;IAHA,KAAK,CAAC,OAAO;QACZ,OAAO,MAAM,eAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAChC,CAAC;CACD;AAjCD,4DAiCC"}
@@ -0,0 +1,4 @@
1
+ import type { IExecuteFunctions, INodeExecutionData, INodeProperties } from 'n8n-workflow';
2
+ import type { Crawl4aiNodeOptions } from '../helpers/interfaces';
3
+ export declare const description: INodeProperties[];
4
+ export declare function execute(this: IExecuteFunctions, items: INodeExecutionData[], nodeOptions: Crawl4aiNodeOptions): Promise<INodeExecutionData[]>;
@@ -0,0 +1,295 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.description = void 0;
4
+ exports.execute = execute;
5
+ const n8n_workflow_1 = require("n8n-workflow");
6
+ const utils_1 = require("../helpers/utils");
7
+ const formatters_1 = require("../../Crawl4aiBasicCrawler/helpers/formatters");
8
+ exports.description = [
9
+ {
10
+ displayName: 'URL',
11
+ name: 'url',
12
+ type: 'string',
13
+ required: true,
14
+ default: '',
15
+ placeholder: 'https://example.com',
16
+ description: 'The URL to extract content from',
17
+ displayOptions: {
18
+ show: {
19
+ operation: ['cssExtractor'],
20
+ },
21
+ },
22
+ },
23
+ {
24
+ displayName: 'Base Selector',
25
+ name: 'baseSelector',
26
+ type: 'string',
27
+ required: true,
28
+ default: '',
29
+ placeholder: 'div.product-item',
30
+ description: 'CSS selector for the repeating element (e.g., product items, article cards)',
31
+ displayOptions: {
32
+ show: {
33
+ operation: ['cssExtractor'],
34
+ },
35
+ },
36
+ },
37
+ {
38
+ displayName: 'Fields',
39
+ name: 'fields',
40
+ placeholder: 'Add Field',
41
+ type: 'fixedCollection',
42
+ typeOptions: {
43
+ multipleValues: true,
44
+ },
45
+ default: {},
46
+ displayOptions: {
47
+ show: {
48
+ operation: ['cssExtractor'],
49
+ },
50
+ },
51
+ options: [
52
+ {
53
+ name: 'fieldsValues',
54
+ displayName: 'Fields',
55
+ values: [
56
+ {
57
+ displayName: 'Field Name',
58
+ name: 'name',
59
+ type: 'string',
60
+ required: true,
61
+ default: '',
62
+ placeholder: 'title',
63
+ description: 'Name of the field to extract',
64
+ },
65
+ {
66
+ displayName: 'CSS Selector',
67
+ name: 'selector',
68
+ type: 'string',
69
+ required: true,
70
+ default: '',
71
+ placeholder: 'h3.title',
72
+ description: 'CSS selector relative to the base selector',
73
+ },
74
+ {
75
+ displayName: 'Field Type',
76
+ name: 'fieldType',
77
+ type: 'options',
78
+ options: [
79
+ {
80
+ name: 'Text',
81
+ value: 'text',
82
+ description: 'Extract text content',
83
+ },
84
+ {
85
+ name: 'HTML',
86
+ value: 'html',
87
+ description: 'Extract HTML content',
88
+ },
89
+ {
90
+ name: 'Attribute',
91
+ value: 'attribute',
92
+ description: 'Extract an attribute value',
93
+ },
94
+ ],
95
+ default: 'text',
96
+ description: 'Type of data to extract',
97
+ },
98
+ {
99
+ displayName: 'Attribute Name',
100
+ name: 'attribute',
101
+ type: 'string',
102
+ displayOptions: {
103
+ show: {
104
+ fieldType: ['attribute'],
105
+ },
106
+ },
107
+ default: 'href',
108
+ placeholder: 'href',
109
+ description: 'Name of the attribute to extract',
110
+ },
111
+ ],
112
+ },
113
+ ],
114
+ },
115
+ {
116
+ displayName: 'Browser Options',
117
+ name: 'browserOptions',
118
+ type: 'collection',
119
+ placeholder: 'Add Option',
120
+ default: {},
121
+ displayOptions: {
122
+ show: {
123
+ operation: ['cssExtractor'],
124
+ },
125
+ },
126
+ options: [
127
+ {
128
+ displayName: 'Enable JavaScript',
129
+ name: 'javaScriptEnabled',
130
+ type: 'boolean',
131
+ default: true,
132
+ description: 'Whether to enable JavaScript execution',
133
+ },
134
+ {
135
+ displayName: 'Headless Mode',
136
+ name: 'headless',
137
+ type: 'boolean',
138
+ default: true,
139
+ description: 'Whether to run browser in headless mode',
140
+ },
141
+ {
142
+ displayName: 'JavaScript Code',
143
+ name: 'jsCode',
144
+ type: 'string',
145
+ typeOptions: {
146
+ rows: 4,
147
+ },
148
+ default: '',
149
+ placeholder: 'document.querySelector("button.load-more").click();',
150
+ description: 'JavaScript code to execute before extraction (e.g., to click buttons, scroll)',
151
+ },
152
+ {
153
+ displayName: 'Timeout (Ms)',
154
+ name: 'timeout',
155
+ type: 'number',
156
+ default: 30000,
157
+ description: 'Maximum time to wait for the browser to load the page',
158
+ },
159
+ {
160
+ displayName: 'Viewport Height',
161
+ name: 'viewportHeight',
162
+ type: 'number',
163
+ default: 800,
164
+ description: 'The height of the browser viewport',
165
+ },
166
+ {
167
+ displayName: 'Viewport Width',
168
+ name: 'viewportWidth',
169
+ type: 'number',
170
+ default: 1280,
171
+ description: 'The width of the browser viewport',
172
+ },
173
+ ],
174
+ },
175
+ {
176
+ displayName: 'Options',
177
+ name: 'options',
178
+ type: 'collection',
179
+ placeholder: 'Add Option',
180
+ default: {},
181
+ displayOptions: {
182
+ show: {
183
+ operation: ['cssExtractor'],
184
+ },
185
+ },
186
+ options: [
187
+ {
188
+ displayName: 'Cache Mode',
189
+ name: 'cacheMode',
190
+ type: 'options',
191
+ options: [
192
+ {
193
+ name: 'Enabled (Read/Write)',
194
+ value: 'enabled',
195
+ description: 'Use cache if available, save new results to cache',
196
+ },
197
+ {
198
+ name: 'Bypass (Force Fresh)',
199
+ value: 'bypass',
200
+ description: 'Ignore cache, always fetch fresh content',
201
+ },
202
+ {
203
+ name: 'Only (Read Only)',
204
+ value: 'only',
205
+ description: 'Only use cache, do not make new requests',
206
+ },
207
+ ],
208
+ default: 'enabled',
209
+ description: 'How to use the cache when crawling',
210
+ },
211
+ {
212
+ displayName: 'Include Original Text',
213
+ name: 'includeFullText',
214
+ type: 'boolean',
215
+ default: false,
216
+ description: 'Whether to include the original webpage text in output',
217
+ },
218
+ {
219
+ displayName: 'Clean Text',
220
+ name: 'cleanText',
221
+ type: 'boolean',
222
+ default: true,
223
+ description: 'Whether to clean and normalize extracted text (remove extra spaces, newlines)',
224
+ },
225
+ ],
226
+ },
227
+ ];
228
+ async function execute(items, nodeOptions) {
229
+ var _a;
230
+ const allResults = [];
231
+ for (let i = 0; i < items.length; i++) {
232
+ try {
233
+ const url = this.getNodeParameter('url', i, '');
234
+ const baseSelector = this.getNodeParameter('baseSelector', i, '');
235
+ const fieldsValues = this.getNodeParameter('fields.fieldsValues', i, []);
236
+ const browserOptions = this.getNodeParameter('browserOptions', i, {});
237
+ const options = this.getNodeParameter('options', i, {});
238
+ if (!url) {
239
+ throw new n8n_workflow_1.NodeOperationError(this.getNode(), 'URL cannot be empty.', { itemIndex: i });
240
+ }
241
+ if (!(0, utils_1.isValidUrl)(url)) {
242
+ throw new n8n_workflow_1.NodeOperationError(this.getNode(), `Invalid URL: ${url}`, { itemIndex: i });
243
+ }
244
+ if (!baseSelector) {
245
+ throw new n8n_workflow_1.NodeOperationError(this.getNode(), 'Base selector cannot be empty.', { itemIndex: i });
246
+ }
247
+ if (!fieldsValues || fieldsValues.length === 0) {
248
+ throw new n8n_workflow_1.NodeOperationError(this.getNode(), 'At least one field must be defined.', { itemIndex: i });
249
+ }
250
+ const schema = {
251
+ name: 'extracted_items',
252
+ baseSelector,
253
+ fields: fieldsValues.map(field => ({
254
+ name: field.name,
255
+ selector: field.selector,
256
+ type: field.fieldType,
257
+ attribute: field.attribute,
258
+ })),
259
+ };
260
+ const browserConfig = (0, utils_1.createBrowserConfig)(browserOptions);
261
+ const extractionStrategy = (0, utils_1.createCssSelectorExtractionStrategy)(schema);
262
+ const crawler = await (0, utils_1.getCrawl4aiClient)(this);
263
+ const result = await crawler.arun(url, {
264
+ browserConfig,
265
+ extractionStrategy,
266
+ cacheMode: options.cacheMode || 'enabled',
267
+ jsCode: browserOptions.jsCode,
268
+ });
269
+ const extractedData = (0, formatters_1.parseExtractedJson)(result);
270
+ const formattedResult = (0, formatters_1.formatExtractionResult)(result, extractedData, options.includeFullText);
271
+ if (options.cleanText === true && extractedData) {
272
+ formattedResult.data = (0, utils_1.cleanExtractedData)(extractedData);
273
+ }
274
+ allResults.push({
275
+ json: formattedResult,
276
+ pairedItem: { item: i },
277
+ });
278
+ }
279
+ catch (error) {
280
+ if (this.continueOnFail()) {
281
+ const node = this.getNode();
282
+ const errorItemIndex = (_a = error.itemIndex) !== null && _a !== void 0 ? _a : i;
283
+ allResults.push({
284
+ json: items[i].json,
285
+ error: new n8n_workflow_1.NodeOperationError(node, error.message, { itemIndex: errorItemIndex }),
286
+ pairedItem: { item: i },
287
+ });
288
+ continue;
289
+ }
290
+ throw error;
291
+ }
292
+ }
293
+ return allResults;
294
+ }
295
+ //# sourceMappingURL=cssExtractor.operation.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"cssExtractor.operation.js","sourceRoot":"","sources":["../../../../nodes/Crawl4aiContentExtractor/actions/cssExtractor.operation.ts"],"names":[],"mappings":";;;AAqPA,0BAoGC;AAnVD,+CAAkD;AAIlD,4CAM0B;AAC1B,8EAGuD;AAG1C,QAAA,WAAW,GAAsB;IAC5C;QACE,WAAW,EAAE,KAAK;QAClB,IAAI,EAAE,KAAK;QACX,IAAI,EAAE,QAAQ;QACd,QAAQ,EAAE,IAAI;QACd,OAAO,EAAE,EAAE;QACX,WAAW,EAAE,qBAAqB;QAClC,WAAW,EAAE,iCAAiC;QAC9C,cAAc,EAAE;YACd,IAAI,EAAE;gBACJ,SAAS,EAAE,CAAC,cAAc,CAAC;aAC5B;SACF;KACF;IACD;QACE,WAAW,EAAE,eAAe;QAC5B,IAAI,EAAE,cAAc;QACpB,IAAI,EAAE,QAAQ;QACd,QAAQ,EAAE,IAAI;QACd,OAAO,EAAE,EAAE;QACX,WAAW,EAAE,kBAAkB;QAC/B,WAAW,EAAE,6EAA6E;QAC1F,cAAc,EAAE;YACd,IAAI,EAAE;gBACJ,SAAS,EAAE,CAAC,cAAc,CAAC;aAC5B;SACF;KACF;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,IAAI,EAAE,QAAQ;QACd,WAAW,EAAE,WAAW;QACxB,IAAI,EAAE,iBAAiB;QACvB,WAAW,EAAE;YACX,cAAc,EAAE,IAAI;SACrB;QACD,OAAO,EAAE,EAAE;QACX,cAAc,EAAE;YACd,IAAI,EAAE;gBACJ,SAAS,EAAE,CAAC,cAAc,CAAC;aAC5B;SACF;QACD,OAAO,EAAE;YACP;gBACE,IAAI,EAAE,cAAc;gBACpB,WAAW,EAAE,QAAQ;gBACrB,MAAM,EAAE;oBACN;wBACE,WAAW,EAAE,YAAY;wBACzB,IAAI,EAAE,MAAM;wBACZ,IAAI,EAAE,QAAQ;wBACd,QAAQ,EAAE,IAAI;wBACd,OAAO,EAAE,EAAE;wBACX,WAAW,EAAE,OAAO;wBACpB,WAAW,EAAE,8BAA8B;qBAC5C;oBACD;wBACE,WAAW,EAAE,cAAc;wBAC3B,IAAI,EAAE,UAAU;wBAChB,IAAI,EAAE,QAAQ;wBACd,QAAQ,EAAE,IAAI;wBACd,OAAO,EAAE,EAAE;wBACX,WAAW,EAAE,UAAU;wBACvB,WAAW,EAAE,4CAA4C;qBAC1D;oBACD;wBACE,WAAW,EAAE,YAAY;wBACzB,IAAI,EAAE,WAAW;wBACjB,IAAI,EAAE,SAAS;wBACf,OAAO,EAAE;4BACP;gCACE,IAAI,EAAE,MAAM;gCACZ,KAAK,EAAE,MAAM;gCACb,WAAW,EAAE,sBAAsB;6BACpC;4BACD;gCACE,IAAI,EAAE,MAAM;gCACZ,KAAK,EAAE,MAAM;gCACb,WAAW,EAAE,sBAAsB;6BACpC;4BACD;gCACE,IAAI,EAAE,WAAW;gCACjB,KAAK,EAAE,WAAW;gCAClB,WAAW,EAAE,4BAA4B;6BAC1C;yBACF;wBACD,OAAO,EAAE,MAAM;wBACf,WAAW,EAAE,yBAAyB;qBACvC;oBACD;wBACE,WAAW,EAAE,gBAAgB;wBAC7B,IAAI,EAAE,WAAW;wBACjB,IAAI,EAAE,QAAQ;wBACd,cAAc,EAAE;4BACd,IAAI,EAAE;gCACJ,SAAS,EAAE,CAAC,WAAW,CAAC;6BACzB;yBACF;wBACD,OAAO,EAAE,MAAM;wBACf,WAAW,EAAE,MAAM;wBACnB,WAAW,EAAE,kCAAkC;qBAChD;iBACF;aACF;SACF;KACF;IACD;QACE,WAAW,EAAE,iBAAiB;QAC9B,IAAI,EAAE,gBAAgB;QACtB,IAAI,EAAE,YAAY;QAClB,WAAW,EAAE,YAAY;QACzB,OAAO,EAAE,EAAE;QACX,cAAc,EAAE;YACd,IAAI,EAAE;gBACJ,SAAS,EAAE,CAAC,cAAc,CAAC;aAC5B;SACF;QACD,OAAO,EAAE;YACP;gBACE,WAAW,EAAE,mBAAmB;gBAChC,IAAI,EAAE,mBAAmB;gBACzB,IAAI,EAAE,SAAS;gBACf,OAAO,EAAE,IAAI;gBACb,WAAW,EAAE,wCAAwC;aACtD;YACD;gBACE,WAAW,EAAE,eAAe;gBAC5B,IAAI,EAAE,UAAU;gBAChB,IAAI,EAAE,SAAS;gBACf,OAAO,EAAE,IAAI;gBACb,WAAW,EAAE,yCAAyC;aACvD;YACD;gBACE,WAAW,EAAE,iBAAiB;gBAC9B,IAAI,EAAE,QAAQ;gBACd,IAAI,EAAE,QAAQ;gBACd,WAAW,EAAE;oBACX,IAAI,EAAE,CAAC;iBACR;gBACD,OAAO,EAAE,EAAE;gBACX,WAAW,EAAE,qDAAqD;gBAClE,WAAW,EAAE,+EAA+E;aAC7F;YACD;gBACE,WAAW,EAAE,cAAc;gBAC3B,IAAI,EAAE,SAAS;gBACf,IAAI,EAAE,QAAQ;gBACd,OAAO,EAAE,KAAK;gBACd,WAAW,EAAE,uDAAuD;aACrE;YACD;gBACE,WAAW,EAAE,iBAAiB;gBAC9B,IAAI,EAAE,gBAAgB;gBACtB,IAAI,EAAE,QAAQ;gBACd,OAAO,EAAE,GAAG;gBACZ,WAAW,EAAE,oCAAoC;aAClD;YACD;gBACE,WAAW,EAAE,gBAAgB;gBAC7B,IAAI,EAAE,eAAe;gBACrB,IAAI,EAAE,QAAQ;gBACd,OAAO,EAAE,IAAI;gBACb,WAAW,EAAE,mCAAmC;aACjD;SACF;KACF;IACD;QACE,WAAW,EAAE,SAAS;QACtB,IAAI,EAAE,SAAS;QACf,IAAI,EAAE,YAAY;QAClB,WAAW,EAAE,YAAY;QACzB,OAAO,EAAE,EAAE;QACX,cAAc,EAAE;YACd,IAAI,EAAE;gBACJ,SAAS,EAAE,CAAC,cAAc,CAAC;aAC5B;SACF;QACD,OAAO,EAAE;YACP;gBACE,WAAW,EAAE,YAAY;gBACzB,IAAI,EAAE,WAAW;gBACjB,IAAI,EAAE,SAAS;gBACf,OAAO,EAAE;oBACP;wBACE,IAAI,EAAE,sBAAsB;wBAC5B,KAAK,EAAE,SAAS;wBAChB,WAAW,EAAE,mDAAmD;qBACjE;oBACD;wBACE,IAAI,EAAE,sBAAsB;wBAC5B,KAAK,EAAE,QAAQ;wBACf,WAAW,EAAE,0CAA0C;qBACxD;oBACD;wBACE,IAAI,EAAE,kBAAkB;wBACxB,KAAK,EAAE,MAAM;wBACb,WAAW,EAAE,0CAA0C;qBACxD;iBACF;gBACD,OAAO,EAAE,SAAS;gBAClB,WAAW,EAAE,oCAAoC;aAClD;YACD;gBACE,WAAW,EAAE,uBAAuB;gBACpC,IAAI,EAAE,iBAAiB;gBACvB,IAAI,EAAE,SAAS;gBACf,OAAO,EAAE,KAAK;gBACd,WAAW,EAAE,wDAAwD;aACtE;YACD;gBACE,WAAW,EAAE,YAAY;gBACzB,IAAI,EAAE,WAAW;gBACjB,IAAI,EAAE,SAAS;gBACf,OAAO,EAAE,IAAI;gBACb,WAAW,EAAE,+EAA+E;aAC7F;SACF;KACF;CACF,CAAC;AAGK,KAAK,UAAU,OAAO,CAE3B,KAA2B,EAC3B,WAAgC;;IAEhC,MAAM,UAAU,GAAyB,EAAE,CAAC;IAE5C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACtC,IAAI,CAAC;YAEH,MAAM,GAAG,GAAG,IAAI,CAAC,gBAAgB,CAAC,KAAK,EAAE,CAAC,EAAE,EAAE,CAAW,CAAC;YAC1D,MAAM,YAAY,GAAG,IAAI,CAAC,gBAAgB,CAAC,cAAc,EAAE,CAAC,EAAE,EAAE,CAAW,CAAC;YAC5E,MAAM,YAAY,GAAG,IAAI,CAAC,gBAAgB,CAAC,qBAAqB,EAAE,CAAC,EAAE,EAAE,CAAkB,CAAC;YAC1F,MAAM,cAAc,GAAG,IAAI,CAAC,gBAAgB,CAAC,gBAAgB,EAAE,CAAC,EAAE,EAAE,CAAgB,CAAC;YACrF,MAAM,OAAO,GAAG,IAAI,CAAC,gBAAgB,CAAC,SAAS,EAAE,CAAC,EAAE,EAAE,CAAgB,CAAC;YAEvE,IAAI,CAAC,GAAG,EAAE,CAAC;gBACT,MAAM,IAAI,iCAAkB,CAAC,IAAI,CAAC,OAAO,EAAE,EAAE,sBAAsB,EAAE,EAAE,SAAS,EAAE,CAAC,EAAE,CAAC,CAAC;YACzF,CAAC;YAED,IAAI,CAAC,IAAA,kBAAU,EAAC,GAAG,CAAC,EAAE,CAAC;gBACrB,MAAM,IAAI,iCAAkB,CAAC,IAAI,CAAC,OAAO,EAAE,EAAE,gBAAgB,GAAG,EAAE,EAAE,EAAE,SAAS,EAAE,CAAC,EAAE,CAAC,CAAC;YACxF,CAAC;YAED,IAAI,CAAC,YAAY,EAAE,CAAC;gBAClB,MAAM,IAAI,iCAAkB,CAAC,IAAI,CAAC,OAAO,EAAE,EAAE,gCAAgC,EAAE,EAAE,SAAS,EAAE,CAAC,EAAE,CAAC,CAAC;YACnG,CAAC;YAED,IAAI,CAAC,YAAY,IAAI,YAAY,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBAC/C,MAAM,IAAI,iCAAkB,CAAC,IAAI,CAAC,OAAO,EAAE,EAAE,qCAAqC,EAAE,EAAE,SAAS,EAAE,CAAC,EAAE,CAAC,CAAC;YACxG,CAAC;YAGD,MAAM,MAAM,GAAsB;gBAChC,IAAI,EAAE,iBAAiB;gBACvB,YAAY;gBACZ,MAAM,EAAE,YAAY,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;oBACjC,IAAI,EAAE,KAAK,CAAC,IAAc;oBAC1B,QAAQ,EAAE,KAAK,CAAC,QAAkB;oBAClC,IAAI,EAAE,KAAK,CAAC,SAA0C;oBACtD,SAAS,EAAE,KAAK,CAAC,SAAmB;iBACrC,CAAC,CAAC;aACJ,CAAC;YAGF,MAAM,aAAa,GAAG,IAAA,2BAAmB,EAAC,cAAc,CAAC,CAAC;YAG1D,MAAM,kBAAkB,GAAG,IAAA,2CAAmC,EAAC,MAAM,CAAC,CAAC;YAGvE,MAAM,OAAO,GAAG,MAAM,IAAA,yBAAiB,EAAC,IAAI,CAAC,CAAC;YAG9C,MAAM,MAAM,GAAG,MAAM,OAAO,CAAC,IAAI,CAAC,GAAG,EAAE;gBACrC,aAAa;gBACb,kBAAkB;gBAClB,SAAS,EAAE,OAAO,CAAC,SAAS,IAAI,SAAS;gBACzC,MAAM,EAAE,cAAc,CAAC,MAAM;aAC9B,CAAC,CAAC;YAGH,MAAM,aAAa,GAAG,IAAA,+BAAkB,EAAC,MAAM,CAAC,CAAC;YAGjD,MAAM,eAAe,GAAG,IAAA,mCAAsB,EAC5C,MAAM,EACN,aAAa,EACb,OAAO,CAAC,eAA0B,CACnC,CAAC;YAGF,IAAI,OAAO,CAAC,SAAS,KAAK,IAAI,IAAI,aAAa,EAAE,CAAC;gBAChD,eAAe,CAAC,IAAI,GAAG,IAAA,0BAAkB,EAAC,aAAa,CAAC,CAAC;YAC3D,CAAC;YAGD,UAAU,CAAC,IAAI,CAAC;gBACd,IAAI,EAAE,eAAe;gBACrB,UAAU,EAAE,EAAE,IAAI,EAAE,CAAC,EAAE;aACxB,CAAC,CAAC;QAEL,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YAEf,IAAI,IAAI,CAAC,cAAc,EAAE,EAAE,CAAC;gBAC1B,MAAM,IAAI,GAAG,IAAI,CAAC,OAAO,EAAE,CAAC;gBAC5B,MAAM,cAAc,GAAG,MAAC,KAAa,CAAC,SAAS,mCAAI,CAAC,CAAC;gBACrD,UAAU,CAAC,IAAI,CAAC;oBACd,IAAI,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI;oBACnB,KAAK,EAAE,IAAI,iCAAkB,CAAC,IAAI,EAAG,KAAe,CAAC,OAAO,EAAE,EAAE,SAAS,EAAE,cAAc,EAAE,CAAC;oBAC5F,UAAU,EAAE,EAAE,IAAI,EAAE,CAAC,EAAE;iBACxB,CAAC,CAAC;gBACH,SAAS;YACX,CAAC;YAED,MAAM,KAAK,CAAC;QACd,CAAC;IACH,CAAC;IAED,OAAO,UAAU,CAAC;AACpB,CAAC"}
@@ -0,0 +1,4 @@
1
+ import type { IExecuteFunctions, INodeExecutionData, INodeProperties } from 'n8n-workflow';
2
+ import type { Crawl4aiNodeOptions } from '../helpers/interfaces';
3
+ export declare const description: INodeProperties[];
4
+ export declare function execute(this: IExecuteFunctions, items: INodeExecutionData[], nodeOptions: Crawl4aiNodeOptions): Promise<INodeExecutionData[]>;