n8n-nodes-crawl4ai-onuro 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. package/LICENSE +21 -0
  2. package/LICENSE.md +19 -0
  3. package/README.md +129 -0
  4. package/dist/credentials/Crawl4aiApi.credentials.d.ts +7 -0
  5. package/dist/credentials/Crawl4aiApi.credentials.js +228 -0
  6. package/dist/credentials/Crawl4aiApi.credentials.js.map +1 -0
  7. package/dist/nodes/Crawl4aiBasicCrawler/Crawl4aiBasicCrawler.node.d.ts +5 -0
  8. package/dist/nodes/Crawl4aiBasicCrawler/Crawl4aiBasicCrawler.node.js +37 -0
  9. package/dist/nodes/Crawl4aiBasicCrawler/Crawl4aiBasicCrawler.node.js.map +1 -0
  10. package/dist/nodes/Crawl4aiBasicCrawler/actions/crawlMultipleUrls.operation.d.ts +4 -0
  11. package/dist/nodes/Crawl4aiBasicCrawler/actions/crawlMultipleUrls.operation.js +421 -0
  12. package/dist/nodes/Crawl4aiBasicCrawler/actions/crawlMultipleUrls.operation.js.map +1 -0
  13. package/dist/nodes/Crawl4aiBasicCrawler/actions/crawlSingleUrl.operation.d.ts +4 -0
  14. package/dist/nodes/Crawl4aiBasicCrawler/actions/crawlSingleUrl.operation.js +422 -0
  15. package/dist/nodes/Crawl4aiBasicCrawler/actions/crawlSingleUrl.operation.js.map +1 -0
  16. package/dist/nodes/Crawl4aiBasicCrawler/actions/operations.d.ts +8 -0
  17. package/dist/nodes/Crawl4aiBasicCrawler/actions/operations.js +67 -0
  18. package/dist/nodes/Crawl4aiBasicCrawler/actions/operations.js.map +1 -0
  19. package/dist/nodes/Crawl4aiBasicCrawler/actions/processRawHtml.operation.d.ts +4 -0
  20. package/dist/nodes/Crawl4aiBasicCrawler/actions/processRawHtml.operation.js +148 -0
  21. package/dist/nodes/Crawl4aiBasicCrawler/actions/processRawHtml.operation.js.map +1 -0
  22. package/dist/nodes/Crawl4aiBasicCrawler/actions/router.d.ts +2 -0
  23. package/dist/nodes/Crawl4aiBasicCrawler/actions/router.js +37 -0
  24. package/dist/nodes/Crawl4aiBasicCrawler/actions/router.js.map +1 -0
  25. package/dist/nodes/Crawl4aiBasicCrawler/crawl4ai.svg +6 -0
  26. package/dist/nodes/Crawl4aiBasicCrawler/helpers/apiClient.d.ts +15 -0
  27. package/dist/nodes/Crawl4aiBasicCrawler/helpers/apiClient.js +271 -0
  28. package/dist/nodes/Crawl4aiBasicCrawler/helpers/apiClient.js.map +1 -0
  29. package/dist/nodes/Crawl4aiBasicCrawler/helpers/formatters.d.ts +5 -0
  30. package/dist/nodes/Crawl4aiBasicCrawler/helpers/formatters.js +96 -0
  31. package/dist/nodes/Crawl4aiBasicCrawler/helpers/formatters.js.map +1 -0
  32. package/dist/nodes/Crawl4aiBasicCrawler/helpers/interfaces.d.ts +119 -0
  33. package/dist/nodes/Crawl4aiBasicCrawler/helpers/interfaces.js +3 -0
  34. package/dist/nodes/Crawl4aiBasicCrawler/helpers/interfaces.js.map +1 -0
  35. package/dist/nodes/Crawl4aiBasicCrawler/helpers/utils.d.ts +8 -0
  36. package/dist/nodes/Crawl4aiBasicCrawler/helpers/utils.js +80 -0
  37. package/dist/nodes/Crawl4aiBasicCrawler/helpers/utils.js.map +1 -0
  38. package/dist/nodes/Crawl4aiContentExtractor/Crawl4aiContentExtractor.node.d.ts +5 -0
  39. package/dist/nodes/Crawl4aiContentExtractor/Crawl4aiContentExtractor.node.js +38 -0
  40. package/dist/nodes/Crawl4aiContentExtractor/Crawl4aiContentExtractor.node.js.map +1 -0
  41. package/dist/nodes/Crawl4aiContentExtractor/actions/cssExtractor.operation.d.ts +4 -0
  42. package/dist/nodes/Crawl4aiContentExtractor/actions/cssExtractor.operation.js +295 -0
  43. package/dist/nodes/Crawl4aiContentExtractor/actions/cssExtractor.operation.js.map +1 -0
  44. package/dist/nodes/Crawl4aiContentExtractor/actions/jsonExtractor.operation.d.ts +4 -0
  45. package/dist/nodes/Crawl4aiContentExtractor/actions/jsonExtractor.operation.js +328 -0
  46. package/dist/nodes/Crawl4aiContentExtractor/actions/jsonExtractor.operation.js.map +1 -0
  47. package/dist/nodes/Crawl4aiContentExtractor/actions/llmExtractor.operation.d.ts +4 -0
  48. package/dist/nodes/Crawl4aiContentExtractor/actions/llmExtractor.operation.js +417 -0
  49. package/dist/nodes/Crawl4aiContentExtractor/actions/llmExtractor.operation.js.map +1 -0
  50. package/dist/nodes/Crawl4aiContentExtractor/actions/operations.d.ts +8 -0
  51. package/dist/nodes/Crawl4aiContentExtractor/actions/operations.js +67 -0
  52. package/dist/nodes/Crawl4aiContentExtractor/actions/operations.js.map +1 -0
  53. package/dist/nodes/Crawl4aiContentExtractor/actions/router.d.ts +2 -0
  54. package/dist/nodes/Crawl4aiContentExtractor/actions/router.js +37 -0
  55. package/dist/nodes/Crawl4aiContentExtractor/actions/router.js.map +1 -0
  56. package/dist/nodes/Crawl4aiContentExtractor/crawl4ai.svg +6 -0
  57. package/dist/nodes/Crawl4aiContentExtractor/helpers/apiClient.d.ts +1 -0
  58. package/dist/nodes/Crawl4aiContentExtractor/helpers/apiClient.js +7 -0
  59. package/dist/nodes/Crawl4aiContentExtractor/helpers/apiClient.js.map +1 -0
  60. package/dist/nodes/Crawl4aiContentExtractor/helpers/formatters.d.ts +1 -0
  61. package/dist/nodes/Crawl4aiContentExtractor/helpers/formatters.js +8 -0
  62. package/dist/nodes/Crawl4aiContentExtractor/helpers/formatters.js.map +1 -0
  63. package/dist/nodes/Crawl4aiContentExtractor/helpers/interfaces.d.ts +1 -0
  64. package/dist/nodes/Crawl4aiContentExtractor/helpers/interfaces.js +3 -0
  65. package/dist/nodes/Crawl4aiContentExtractor/helpers/interfaces.js.map +1 -0
  66. package/dist/nodes/Crawl4aiContentExtractor/helpers/utils.d.ts +9 -0
  67. package/dist/nodes/Crawl4aiContentExtractor/helpers/utils.js +93 -0
  68. package/dist/nodes/Crawl4aiContentExtractor/helpers/utils.js.map +1 -0
  69. package/dist/tsconfig.tsbuildinfo +1 -0
  70. package/index.js +14 -0
  71. package/package.json +68 -0
@@ -0,0 +1,93 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.isValidUrl = exports.cleanText = exports.safeJsonParse = exports.createCrawlerRunConfig = exports.createBrowserConfig = exports.getCrawl4aiClient = void 0;
4
+ exports.createCssSelectorExtractionStrategy = createCssSelectorExtractionStrategy;
5
+ exports.createLlmExtractionStrategy = createLlmExtractionStrategy;
6
+ exports.cleanExtractedData = cleanExtractedData;
7
+ var utils_1 = require("../../Crawl4aiBasicCrawler/helpers/utils");
8
+ Object.defineProperty(exports, "getCrawl4aiClient", { enumerable: true, get: function () { return utils_1.getCrawl4aiClient; } });
9
+ Object.defineProperty(exports, "createBrowserConfig", { enumerable: true, get: function () { return utils_1.createBrowserConfig; } });
10
+ Object.defineProperty(exports, "createCrawlerRunConfig", { enumerable: true, get: function () { return utils_1.createCrawlerRunConfig; } });
11
+ Object.defineProperty(exports, "safeJsonParse", { enumerable: true, get: function () { return utils_1.safeJsonParse; } });
12
+ Object.defineProperty(exports, "cleanText", { enumerable: true, get: function () { return utils_1.cleanText; } });
13
+ Object.defineProperty(exports, "isValidUrl", { enumerable: true, get: function () { return utils_1.isValidUrl; } });
14
+ const utils_2 = require("../../Crawl4aiBasicCrawler/helpers/utils");
15
+ function createCssSelectorExtractionStrategy(schema) {
16
+ return {
17
+ type: 'JsonCssExtractionStrategy',
18
+ params: {
19
+ schema: {
20
+ type: 'dict',
21
+ value: {
22
+ name: schema.name,
23
+ baseSelector: schema.baseSelector,
24
+ fields: schema.fields.map(field => ({
25
+ name: field.name,
26
+ selector: field.selector,
27
+ type: field.type,
28
+ attribute: field.attribute,
29
+ })),
30
+ },
31
+ },
32
+ },
33
+ };
34
+ }
35
+ function createLlmExtractionStrategy(schema, instruction, provider, apiKey, extraArgs) {
36
+ const providerMap = {
37
+ 'openai': 'openai/gpt-4o',
38
+ 'anthropic': 'anthropic/claude-3-sonnet',
39
+ 'groq': 'groq/llama3-70b-8192',
40
+ 'ollama': 'ollama/llama2',
41
+ };
42
+ const fullProvider = providerMap[provider] || provider || 'openai/gpt-4o';
43
+ return {
44
+ type: 'LLMExtractionStrategy',
45
+ params: {
46
+ llm_config: {
47
+ type: 'LLMConfig',
48
+ params: {
49
+ provider: fullProvider,
50
+ api_token: apiKey,
51
+ },
52
+ },
53
+ instruction,
54
+ schema: {
55
+ type: 'dict',
56
+ value: schema,
57
+ },
58
+ extraction_type: 'schema',
59
+ apply_chunking: false,
60
+ force_json_response: true,
61
+ extra_args: extraArgs || { temperature: 0, max_tokens: 4000 },
62
+ },
63
+ };
64
+ }
65
+ function cleanExtractedData(data) {
66
+ if (!data)
67
+ return {};
68
+ const cleanedData = {};
69
+ Object.entries(data).forEach(([key, value]) => {
70
+ if (typeof value === 'string') {
71
+ cleanedData[key] = (0, utils_2.cleanText)(value);
72
+ }
73
+ else if (Array.isArray(value)) {
74
+ cleanedData[key] = value.map(item => {
75
+ if (typeof item === 'string') {
76
+ return (0, utils_2.cleanText)(item);
77
+ }
78
+ else if (typeof item === 'object' && item !== null) {
79
+ return cleanExtractedData(item);
80
+ }
81
+ return item;
82
+ });
83
+ }
84
+ else if (typeof value === 'object' && value !== null) {
85
+ cleanedData[key] = cleanExtractedData(value);
86
+ }
87
+ else {
88
+ cleanedData[key] = value;
89
+ }
90
+ });
91
+ return cleanedData;
92
+ }
93
+ //# sourceMappingURL=utils.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"utils.js","sourceRoot":"","sources":["../../../../nodes/Crawl4aiContentExtractor/helpers/utils.ts"],"names":[],"mappings":";;;AAqBA,kFAmBC;AAWD,kEAqCC;AAKD,gDAyBC;AArHD,kEAOkD;AANhD,0GAAA,iBAAiB,OAAA;AACjB,4GAAA,mBAAmB,OAAA;AACnB,+GAAA,sBAAsB,OAAA;AACtB,sGAAA,aAAa,OAAA;AACb,kGAAA,SAAS,OAAA;AACT,mGAAA,UAAU,OAAA;AAIZ,oEAAqE;AAUrE,SAAgB,mCAAmC,CAAC,MAAyB;IAC3E,OAAO;QACL,IAAI,EAAE,2BAA2B;QACjC,MAAM,EAAE;YACN,MAAM,EAAE;gBACN,IAAI,EAAE,MAAM;gBACZ,KAAK,EAAE;oBACL,IAAI,EAAE,MAAM,CAAC,IAAI;oBACjB,YAAY,EAAE,MAAM,CAAC,YAAY;oBACjC,MAAM,EAAE,MAAM,CAAC,MAAM,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;wBAClC,IAAI,EAAE,KAAK,CAAC,IAAI;wBAChB,QAAQ,EAAE,KAAK,CAAC,QAAQ;wBACxB,IAAI,EAAE,KAAK,CAAC,IAAI;wBAChB,SAAS,EAAE,KAAK,CAAC,SAAS;qBAC3B,CAAC,CAAC;iBACJ;aACF;SACF;KACF,CAAC;AACJ,CAAC;AAWD,SAAgB,2BAA2B,CACzC,MAAiB,EACjB,WAAmB,EACnB,QAAgB,EAChB,MAAe,EACf,SAAyD;IAGzD,MAAM,WAAW,GAA8B;QAC7C,QAAQ,EAAE,eAAe;QACzB,WAAW,EAAE,2BAA2B;QACxC,MAAM,EAAE,sBAAsB;QAC9B,QAAQ,EAAE,eAAe;KAC1B,CAAC;IACF,MAAM,YAAY,GAAG,WAAW,CAAC,QAAQ,CAAC,IAAI,QAAQ,IAAI,eAAe,CAAC;IAE1E,OAAO;QACL,IAAI,EAAE,uBAAuB;QAC7B,MAAM,EAAE;YACN,UAAU,EAAE;gBACV,IAAI,EAAE,WAAW;gBACjB,MAAM,EAAE;oBACN,QAAQ,EAAE,YAAY;oBACtB,SAAS,EAAE,MAAM;iBAClB;aACF;YACD,WAAW;YACX,MAAM,EAAE;gBACN,IAAI,EAAE,MAAM;gBACZ,KAAK,EAAE,MAAM;aACd;YACD,eAAe,EAAE,QAAQ;YACzB,cAAc,EAAE,KAAK;YACrB,mBAAmB,EAAE,IAAI;YACzB,UAAU,EAAE,SAAS,IAAI,EAAE,WAAW,EAAE,CAAC,EAAE,UAAU,EAAE,IAAI,EAAE;SAC9D;KACF,CAAC;AACJ,CAAC;AAKD,SAAgB,kBAAkB,CAAC,IAAiB;IAClD,IAAI,CAAC,IAAI;QAAE,OAAO,EAAE,CAAC;IAErB,MAAM,WAAW,GAAgB,EAAE,CAAC;IAEpC,MAAM,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,EAAE,KAAK,CAAC,EAAE,EAAE;QAC5C,IAAI,OAAO,KAAK,KAAK,QAAQ,EAAE,CAAC;YAC9B,WAAW,CAAC,GAAG,CAAC,GAAG,IAAA,iBAAS,EAAC,KAAK,CAAC,CAAC;QACtC,CAAC;aAAM,IAAI,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE,CAAC;YAChC,WAAW,CAAC,GAAG,CAAC,GAAG,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE;gBAClC,IAAI,OAAO,IAAI,KAAK,QAAQ,EAAE,CAAC;oBAC7B,OAAO,IAAA,iBAAS,EAAC,IAAI,CAAC,CAAC;gBACzB,CAAC;qBAAM,IAAI,OAAO,IAAI,KAAK,QAAQ,IAAI,IAAI,KAAK,IAAI,EAAE,CAAC;oBACrD,OAAO,kBAAkB,CAAC,IAAmB,CAAC,CAAC;gBACjD,CAAC;gBACD,OAAO,IAAI,CAAC;YACd,CAAC,CAAC,CAAC;QACL,CAAC;aAAM,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,KAAK,IAAI,EAAE,CAAC;YACvD,WAAW,CAAC,GAAG,CAAC,GAAG,kBAAkB,CAAC,KAAoB,CAAC,CAAC;QAC9D,CAAC;aAAM,CAAC;YACN,WAAW,CAAC,GAAG,CAAC,GAAG,KAAK,CAAC;QAC3B,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,OAAO,WAAW,CAAC;AACrB,CAAC"}