n8n-nodes-crawl4ai-plus 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. package/LICENSE +23 -0
  2. package/LICENSE.md +19 -0
  3. package/README.md +157 -0
  4. package/dist/credentials/Crawl4aiApi.credentials.d.ts +7 -0
  5. package/dist/credentials/Crawl4aiApi.credentials.js +242 -0
  6. package/dist/credentials/Crawl4aiApi.credentials.js.map +1 -0
  7. package/dist/nodes/Crawl4aiBasicCrawler/Crawl4aiBasicCrawler.node.d.ts +5 -0
  8. package/dist/nodes/Crawl4aiBasicCrawler/Crawl4aiBasicCrawler.node.js +37 -0
  9. package/dist/nodes/Crawl4aiBasicCrawler/Crawl4aiBasicCrawler.node.js.map +1 -0
  10. package/dist/nodes/Crawl4aiBasicCrawler/actions/crawlMultipleUrls.operation.d.ts +4 -0
  11. package/dist/nodes/Crawl4aiBasicCrawler/actions/crawlMultipleUrls.operation.js +299 -0
  12. package/dist/nodes/Crawl4aiBasicCrawler/actions/crawlMultipleUrls.operation.js.map +1 -0
  13. package/dist/nodes/Crawl4aiBasicCrawler/actions/crawlSingleUrl.operation.d.ts +4 -0
  14. package/dist/nodes/Crawl4aiBasicCrawler/actions/crawlSingleUrl.operation.js +324 -0
  15. package/dist/nodes/Crawl4aiBasicCrawler/actions/crawlSingleUrl.operation.js.map +1 -0
  16. package/dist/nodes/Crawl4aiBasicCrawler/actions/operations.d.ts +8 -0
  17. package/dist/nodes/Crawl4aiBasicCrawler/actions/operations.js +67 -0
  18. package/dist/nodes/Crawl4aiBasicCrawler/actions/operations.js.map +1 -0
  19. package/dist/nodes/Crawl4aiBasicCrawler/actions/processRawHtml.operation.d.ts +4 -0
  20. package/dist/nodes/Crawl4aiBasicCrawler/actions/processRawHtml.operation.js +148 -0
  21. package/dist/nodes/Crawl4aiBasicCrawler/actions/processRawHtml.operation.js.map +1 -0
  22. package/dist/nodes/Crawl4aiBasicCrawler/actions/router.d.ts +2 -0
  23. package/dist/nodes/Crawl4aiBasicCrawler/actions/router.js +37 -0
  24. package/dist/nodes/Crawl4aiBasicCrawler/actions/router.js.map +1 -0
  25. package/dist/nodes/Crawl4aiBasicCrawler/crawl4ai.svg +17 -0
  26. package/dist/nodes/Crawl4aiBasicCrawler/helpers/apiClient.d.ts +15 -0
  27. package/dist/nodes/Crawl4aiBasicCrawler/helpers/apiClient.js +226 -0
  28. package/dist/nodes/Crawl4aiBasicCrawler/helpers/apiClient.js.map +1 -0
  29. package/dist/nodes/Crawl4aiBasicCrawler/helpers/formatters.d.ts +5 -0
  30. package/dist/nodes/Crawl4aiBasicCrawler/helpers/formatters.js +81 -0
  31. package/dist/nodes/Crawl4aiBasicCrawler/helpers/formatters.js.map +1 -0
  32. package/dist/nodes/Crawl4aiBasicCrawler/helpers/interfaces.d.ts +189 -0
  33. package/dist/nodes/Crawl4aiBasicCrawler/helpers/interfaces.js +3 -0
  34. package/dist/nodes/Crawl4aiBasicCrawler/helpers/interfaces.js.map +1 -0
  35. package/dist/nodes/Crawl4aiBasicCrawler/helpers/utils.d.ts +8 -0
  36. package/dist/nodes/Crawl4aiBasicCrawler/helpers/utils.js +97 -0
  37. package/dist/nodes/Crawl4aiBasicCrawler/helpers/utils.js.map +1 -0
  38. package/dist/nodes/Crawl4aiContentExtractor/Crawl4aiContentExtractor.node.d.ts +5 -0
  39. package/dist/nodes/Crawl4aiContentExtractor/Crawl4aiContentExtractor.node.js +38 -0
  40. package/dist/nodes/Crawl4aiContentExtractor/Crawl4aiContentExtractor.node.js.map +1 -0
  41. package/dist/nodes/Crawl4aiContentExtractor/actions/cssExtractor.operation.d.ts +4 -0
  42. package/dist/nodes/Crawl4aiContentExtractor/actions/cssExtractor.operation.js +336 -0
  43. package/dist/nodes/Crawl4aiContentExtractor/actions/cssExtractor.operation.js.map +1 -0
  44. package/dist/nodes/Crawl4aiContentExtractor/actions/jsonExtractor.operation.d.ts +4 -0
  45. package/dist/nodes/Crawl4aiContentExtractor/actions/jsonExtractor.operation.js +369 -0
  46. package/dist/nodes/Crawl4aiContentExtractor/actions/jsonExtractor.operation.js.map +1 -0
  47. package/dist/nodes/Crawl4aiContentExtractor/actions/llmExtractor.operation.d.ts +4 -0
  48. package/dist/nodes/Crawl4aiContentExtractor/actions/llmExtractor.operation.js +786 -0
  49. package/dist/nodes/Crawl4aiContentExtractor/actions/llmExtractor.operation.js.map +1 -0
  50. package/dist/nodes/Crawl4aiContentExtractor/actions/operations.d.ts +8 -0
  51. package/dist/nodes/Crawl4aiContentExtractor/actions/operations.js +76 -0
  52. package/dist/nodes/Crawl4aiContentExtractor/actions/operations.js.map +1 -0
  53. package/dist/nodes/Crawl4aiContentExtractor/actions/regexExtractor.operation.d.ts +4 -0
  54. package/dist/nodes/Crawl4aiContentExtractor/actions/regexExtractor.operation.js +437 -0
  55. package/dist/nodes/Crawl4aiContentExtractor/actions/regexExtractor.operation.js.map +1 -0
  56. package/dist/nodes/Crawl4aiContentExtractor/actions/router.d.ts +2 -0
  57. package/dist/nodes/Crawl4aiContentExtractor/actions/router.js +37 -0
  58. package/dist/nodes/Crawl4aiContentExtractor/actions/router.js.map +1 -0
  59. package/dist/nodes/Crawl4aiContentExtractor/crawl4ai.svg +17 -0
  60. package/dist/nodes/Crawl4aiContentExtractor/helpers/apiClient.d.ts +1 -0
  61. package/dist/nodes/Crawl4aiContentExtractor/helpers/apiClient.js +7 -0
  62. package/dist/nodes/Crawl4aiContentExtractor/helpers/apiClient.js.map +1 -0
  63. package/dist/nodes/Crawl4aiContentExtractor/helpers/formatters.d.ts +1 -0
  64. package/dist/nodes/Crawl4aiContentExtractor/helpers/formatters.js +8 -0
  65. package/dist/nodes/Crawl4aiContentExtractor/helpers/formatters.js.map +1 -0
  66. package/dist/nodes/Crawl4aiContentExtractor/helpers/interfaces.d.ts +1 -0
  67. package/dist/nodes/Crawl4aiContentExtractor/helpers/interfaces.js +3 -0
  68. package/dist/nodes/Crawl4aiContentExtractor/helpers/interfaces.js.map +1 -0
  69. package/dist/nodes/Crawl4aiContentExtractor/helpers/utils.d.ts +6 -0
  70. package/dist/nodes/Crawl4aiContentExtractor/helpers/utils.js +89 -0
  71. package/dist/nodes/Crawl4aiContentExtractor/helpers/utils.js.map +1 -0
  72. package/dist/tsconfig.tsbuildinfo +1 -0
  73. package/index.js +14 -0
  74. package/package.json +70 -0
@@ -0,0 +1,7 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.createCrawlerInstance = exports.Crawl4aiClient = void 0;
4
+ var apiClient_1 = require("../../Crawl4aiBasicCrawler/helpers/apiClient");
5
+ Object.defineProperty(exports, "Crawl4aiClient", { enumerable: true, get: function () { return apiClient_1.Crawl4aiClient; } });
6
+ Object.defineProperty(exports, "createCrawlerInstance", { enumerable: true, get: function () { return apiClient_1.createCrawlerInstance; } });
7
+ //# sourceMappingURL=apiClient.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"apiClient.js","sourceRoot":"","sources":["../../../../nodes/Crawl4aiContentExtractor/helpers/apiClient.ts"],"names":[],"mappings":";;;AACA,0EAGsD;AAFpD,2GAAA,cAAc,OAAA;AACd,kHAAA,qBAAqB,OAAA"}
@@ -0,0 +1 @@
1
+ export { formatCrawlResult, parseExtractedJson, formatExtractionResult } from '../../Crawl4aiBasicCrawler/helpers/formatters';
@@ -0,0 +1,8 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.formatExtractionResult = exports.parseExtractedJson = exports.formatCrawlResult = void 0;
4
+ var formatters_1 = require("../../Crawl4aiBasicCrawler/helpers/formatters");
5
+ Object.defineProperty(exports, "formatCrawlResult", { enumerable: true, get: function () { return formatters_1.formatCrawlResult; } });
6
+ Object.defineProperty(exports, "parseExtractedJson", { enumerable: true, get: function () { return formatters_1.parseExtractedJson; } });
7
+ Object.defineProperty(exports, "formatExtractionResult", { enumerable: true, get: function () { return formatters_1.formatExtractionResult; } });
8
+ //# sourceMappingURL=formatters.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"formatters.js","sourceRoot":"","sources":["../../../../nodes/Crawl4aiContentExtractor/helpers/formatters.ts"],"names":[],"mappings":";;;AACA,4EAIuD;AAHrD,+GAAA,iBAAiB,OAAA;AACjB,gHAAA,kBAAkB,OAAA;AAClB,oHAAA,sBAAsB,OAAA"}
@@ -0,0 +1 @@
1
+ export { Crawl4aiApiCredentials, Crawl4aiNodeOptions, BrowserConfig, CrawlerRunConfig, CrawlResult, Link, Media, CssSelectorField, CssSelectorSchema, LlmSchemaField, LlmSchema } from '../../Crawl4aiBasicCrawler/helpers/interfaces';
@@ -0,0 +1,3 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ //# sourceMappingURL=interfaces.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"interfaces.js","sourceRoot":"","sources":["../../../../nodes/Crawl4aiContentExtractor/helpers/interfaces.ts"],"names":[],"mappings":""}
@@ -0,0 +1,6 @@
1
+ export { getCrawl4aiClient, createBrowserConfig, createCrawlerRunConfig, safeJsonParse, cleanText, isValidUrl } from '../../Crawl4aiBasicCrawler/helpers/utils';
2
+ import { IDataObject } from 'n8n-workflow';
3
+ import { CssSelectorSchema, LlmSchema } from './interfaces';
4
+ export declare function createCssSelectorExtractionStrategy(schema: CssSelectorSchema): any;
5
+ export declare function createLlmExtractionStrategy(schema: LlmSchema, instruction: string, provider: string, apiKey?: string, baseUrl?: string): any;
6
+ export declare function cleanExtractedData(data: IDataObject): IDataObject;
@@ -0,0 +1,89 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.isValidUrl = exports.cleanText = exports.safeJsonParse = exports.createCrawlerRunConfig = exports.createBrowserConfig = exports.getCrawl4aiClient = void 0;
4
+ exports.createCssSelectorExtractionStrategy = createCssSelectorExtractionStrategy;
5
+ exports.createLlmExtractionStrategy = createLlmExtractionStrategy;
6
+ exports.cleanExtractedData = cleanExtractedData;
7
+ var utils_1 = require("../../Crawl4aiBasicCrawler/helpers/utils");
8
+ Object.defineProperty(exports, "getCrawl4aiClient", { enumerable: true, get: function () { return utils_1.getCrawl4aiClient; } });
9
+ Object.defineProperty(exports, "createBrowserConfig", { enumerable: true, get: function () { return utils_1.createBrowserConfig; } });
10
+ Object.defineProperty(exports, "createCrawlerRunConfig", { enumerable: true, get: function () { return utils_1.createCrawlerRunConfig; } });
11
+ Object.defineProperty(exports, "safeJsonParse", { enumerable: true, get: function () { return utils_1.safeJsonParse; } });
12
+ Object.defineProperty(exports, "cleanText", { enumerable: true, get: function () { return utils_1.cleanText; } });
13
+ Object.defineProperty(exports, "isValidUrl", { enumerable: true, get: function () { return utils_1.isValidUrl; } });
14
+ const utils_2 = require("../../Crawl4aiBasicCrawler/helpers/utils");
15
+ function createCssSelectorExtractionStrategy(schema) {
16
+ return {
17
+ type: 'JsonCssExtractionStrategy',
18
+ params: {
19
+ schema: {
20
+ type: 'dict',
21
+ value: {
22
+ name: schema.name,
23
+ baseSelector: schema.baseSelector,
24
+ fields: schema.fields.map(field => ({
25
+ name: field.name,
26
+ selector: field.selector,
27
+ type: field.type,
28
+ attribute: field.attribute,
29
+ })),
30
+ },
31
+ },
32
+ },
33
+ };
34
+ }
35
+ function createLlmExtractionStrategy(schema, instruction, provider, apiKey, baseUrl) {
36
+ const llmConfigParams = {
37
+ provider: provider || 'openai/gpt-4o',
38
+ api_token: apiKey,
39
+ };
40
+ if (baseUrl && baseUrl.trim() !== '') {
41
+ llmConfigParams.api_base = baseUrl;
42
+ }
43
+ return {
44
+ type: 'LLMExtractionStrategy',
45
+ params: {
46
+ llm_config: {
47
+ type: 'LLMConfig',
48
+ params: llmConfigParams,
49
+ },
50
+ instruction,
51
+ schema: {
52
+ type: 'dict',
53
+ value: schema,
54
+ },
55
+ extraction_type: 'schema',
56
+ apply_chunking: false,
57
+ force_json_response: true,
58
+ },
59
+ };
60
+ }
61
+ function cleanExtractedData(data) {
62
+ if (!data)
63
+ return {};
64
+ const cleanedData = {};
65
+ Object.entries(data).forEach(([key, value]) => {
66
+ if (typeof value === 'string') {
67
+ cleanedData[key] = (0, utils_2.cleanText)(value);
68
+ }
69
+ else if (Array.isArray(value)) {
70
+ cleanedData[key] = value.map(item => {
71
+ if (typeof item === 'string') {
72
+ return (0, utils_2.cleanText)(item);
73
+ }
74
+ else if (typeof item === 'object' && item !== null) {
75
+ return cleanExtractedData(item);
76
+ }
77
+ return item;
78
+ });
79
+ }
80
+ else if (typeof value === 'object' && value !== null) {
81
+ cleanedData[key] = cleanExtractedData(value);
82
+ }
83
+ else {
84
+ cleanedData[key] = value;
85
+ }
86
+ });
87
+ return cleanedData;
88
+ }
89
+ //# sourceMappingURL=utils.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"utils.js","sourceRoot":"","sources":["../../../../nodes/Crawl4aiContentExtractor/helpers/utils.ts"],"names":[],"mappings":";;;AAmBA,kFAmBC;AAWD,kEAkCC;AAKD,gDAyBC;AAhHD,kEAOkD;AANhD,0GAAA,iBAAiB,OAAA;AACjB,4GAAA,mBAAmB,OAAA;AACnB,+GAAA,sBAAsB,OAAA;AACtB,sGAAA,aAAa,OAAA;AACb,kGAAA,SAAS,OAAA;AACT,mGAAA,UAAU,OAAA;AAIZ,oEAAqE;AAQrE,SAAgB,mCAAmC,CAAC,MAAyB;IAC3E,OAAO;QACL,IAAI,EAAE,2BAA2B;QACjC,MAAM,EAAE;YACN,MAAM,EAAE;gBACN,IAAI,EAAE,MAAM;gBACZ,KAAK,EAAE;oBACL,IAAI,EAAE,MAAM,CAAC,IAAI;oBACjB,YAAY,EAAE,MAAM,CAAC,YAAY;oBACjC,MAAM,EAAE,MAAM,CAAC,MAAM,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;wBAClC,IAAI,EAAE,KAAK,CAAC,IAAI;wBAChB,QAAQ,EAAE,KAAK,CAAC,QAAQ;wBACxB,IAAI,EAAE,KAAK,CAAC,IAAI;wBAChB,SAAS,EAAE,KAAK,CAAC,SAAS;qBAC3B,CAAC,CAAC;iBACJ;aACF;SACF;KACF,CAAC;AACJ,CAAC;AAWD,SAAgB,2BAA2B,CACzC,MAAiB,EACjB,WAAmB,EACnB,QAAgB,EAChB,MAAe,EACf,OAAgB;IAEhB,MAAM,eAAe,GAAQ;QAC3B,QAAQ,EAAE,QAAQ,IAAI,eAAe;QACrC,SAAS,EAAE,MAAM;KAClB,CAAC;IAGF,IAAI,OAAO,IAAI,OAAO,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE,CAAC;QACrC,eAAe,CAAC,QAAQ,GAAG,OAAO,CAAC;IACrC,CAAC;IAED,OAAO;QACL,IAAI,EAAE,uBAAuB;QAC7B,MAAM,EAAE;YACN,UAAU,EAAE;gBACV,IAAI,EAAE,WAAW;gBACjB,MAAM,EAAE,eAAe;aACxB;YACD,WAAW;YACX,MAAM,EAAE;gBACN,IAAI,EAAE,MAAM;gBACZ,KAAK,EAAE,MAAM;aACd;YACD,eAAe,EAAE,QAAQ;YACzB,cAAc,EAAE,KAAK;YACrB,mBAAmB,EAAE,IAAI;SAC1B;KACF,CAAC;AACJ,CAAC;AAKD,SAAgB,kBAAkB,CAAC,IAAiB;IAClD,IAAI,CAAC,IAAI;QAAE,OAAO,EAAE,CAAC;IAErB,MAAM,WAAW,GAAgB,EAAE,CAAC;IAEpC,MAAM,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,EAAE,KAAK,CAAC,EAAE,EAAE;QAC5C,IAAI,OAAO,KAAK,KAAK,QAAQ,EAAE,CAAC;YAC9B,WAAW,CAAC,GAAG,CAAC,GAAG,IAAA,iBAAS,EAAC,KAAK,CAAC,CAAC;QACtC,CAAC;aAAM,IAAI,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE,CAAC;YAChC,WAAW,CAAC,GAAG,CAAC,GAAG,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE;gBAClC,IAAI,OAAO,IAAI,KAAK,QAAQ,EAAE,CAAC;oBAC7B,OAAO,IAAA,iBAAS,EAAC,IAAI,CAAC,CAAC;gBACzB,CAAC;qBAAM,IAAI,OAAO,IAAI,KAAK,QAAQ,IAAI,IAAI,KAAK,IAAI,EAAE,CAAC;oBACrD,OAAO,kBAAkB,CAAC,IAAmB,CAAC,CAAC;gBACjD,CAAC;gBACD,OAAO,IAAI,CAAC;YACd,CAAC,CAAC,CAAC;QACL,CAAC;aAAM,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,KAAK,IAAI,EAAE,CAAC;YACvD,WAAW,CAAC,GAAG,CAAC,GAAG,kBAAkB,CAAC,KAAoB,CAAC,CAAC;QAC9D,CAAC;aAAM,CAAC;YACN,WAAW,CAAC,GAAG,CAAC,GAAG,KAAK,CAAC;QAC3B,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,OAAO,WAAW,CAAC;AACrB,CAAC"}