n8n-nodes-crawl4ai-onuro 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/LICENSE.md +19 -0
- package/README.md +129 -0
- package/dist/credentials/Crawl4aiApi.credentials.d.ts +7 -0
- package/dist/credentials/Crawl4aiApi.credentials.js +228 -0
- package/dist/credentials/Crawl4aiApi.credentials.js.map +1 -0
- package/dist/nodes/Crawl4aiBasicCrawler/Crawl4aiBasicCrawler.node.d.ts +5 -0
- package/dist/nodes/Crawl4aiBasicCrawler/Crawl4aiBasicCrawler.node.js +37 -0
- package/dist/nodes/Crawl4aiBasicCrawler/Crawl4aiBasicCrawler.node.js.map +1 -0
- package/dist/nodes/Crawl4aiBasicCrawler/actions/crawlMultipleUrls.operation.d.ts +4 -0
- package/dist/nodes/Crawl4aiBasicCrawler/actions/crawlMultipleUrls.operation.js +421 -0
- package/dist/nodes/Crawl4aiBasicCrawler/actions/crawlMultipleUrls.operation.js.map +1 -0
- package/dist/nodes/Crawl4aiBasicCrawler/actions/crawlSingleUrl.operation.d.ts +4 -0
- package/dist/nodes/Crawl4aiBasicCrawler/actions/crawlSingleUrl.operation.js +422 -0
- package/dist/nodes/Crawl4aiBasicCrawler/actions/crawlSingleUrl.operation.js.map +1 -0
- package/dist/nodes/Crawl4aiBasicCrawler/actions/operations.d.ts +8 -0
- package/dist/nodes/Crawl4aiBasicCrawler/actions/operations.js +67 -0
- package/dist/nodes/Crawl4aiBasicCrawler/actions/operations.js.map +1 -0
- package/dist/nodes/Crawl4aiBasicCrawler/actions/processRawHtml.operation.d.ts +4 -0
- package/dist/nodes/Crawl4aiBasicCrawler/actions/processRawHtml.operation.js +148 -0
- package/dist/nodes/Crawl4aiBasicCrawler/actions/processRawHtml.operation.js.map +1 -0
- package/dist/nodes/Crawl4aiBasicCrawler/actions/router.d.ts +2 -0
- package/dist/nodes/Crawl4aiBasicCrawler/actions/router.js +37 -0
- package/dist/nodes/Crawl4aiBasicCrawler/actions/router.js.map +1 -0
- package/dist/nodes/Crawl4aiBasicCrawler/crawl4ai.svg +6 -0
- package/dist/nodes/Crawl4aiBasicCrawler/helpers/apiClient.d.ts +15 -0
- package/dist/nodes/Crawl4aiBasicCrawler/helpers/apiClient.js +271 -0
- package/dist/nodes/Crawl4aiBasicCrawler/helpers/apiClient.js.map +1 -0
- package/dist/nodes/Crawl4aiBasicCrawler/helpers/formatters.d.ts +5 -0
- package/dist/nodes/Crawl4aiBasicCrawler/helpers/formatters.js +96 -0
- package/dist/nodes/Crawl4aiBasicCrawler/helpers/formatters.js.map +1 -0
- package/dist/nodes/Crawl4aiBasicCrawler/helpers/interfaces.d.ts +119 -0
- package/dist/nodes/Crawl4aiBasicCrawler/helpers/interfaces.js +3 -0
- package/dist/nodes/Crawl4aiBasicCrawler/helpers/interfaces.js.map +1 -0
- package/dist/nodes/Crawl4aiBasicCrawler/helpers/utils.d.ts +8 -0
- package/dist/nodes/Crawl4aiBasicCrawler/helpers/utils.js +80 -0
- package/dist/nodes/Crawl4aiBasicCrawler/helpers/utils.js.map +1 -0
- package/dist/nodes/Crawl4aiContentExtractor/Crawl4aiContentExtractor.node.d.ts +5 -0
- package/dist/nodes/Crawl4aiContentExtractor/Crawl4aiContentExtractor.node.js +38 -0
- package/dist/nodes/Crawl4aiContentExtractor/Crawl4aiContentExtractor.node.js.map +1 -0
- package/dist/nodes/Crawl4aiContentExtractor/actions/cssExtractor.operation.d.ts +4 -0
- package/dist/nodes/Crawl4aiContentExtractor/actions/cssExtractor.operation.js +295 -0
- package/dist/nodes/Crawl4aiContentExtractor/actions/cssExtractor.operation.js.map +1 -0
- package/dist/nodes/Crawl4aiContentExtractor/actions/jsonExtractor.operation.d.ts +4 -0
- package/dist/nodes/Crawl4aiContentExtractor/actions/jsonExtractor.operation.js +328 -0
- package/dist/nodes/Crawl4aiContentExtractor/actions/jsonExtractor.operation.js.map +1 -0
- package/dist/nodes/Crawl4aiContentExtractor/actions/llmExtractor.operation.d.ts +4 -0
- package/dist/nodes/Crawl4aiContentExtractor/actions/llmExtractor.operation.js +417 -0
- package/dist/nodes/Crawl4aiContentExtractor/actions/llmExtractor.operation.js.map +1 -0
- package/dist/nodes/Crawl4aiContentExtractor/actions/operations.d.ts +8 -0
- package/dist/nodes/Crawl4aiContentExtractor/actions/operations.js +67 -0
- package/dist/nodes/Crawl4aiContentExtractor/actions/operations.js.map +1 -0
- package/dist/nodes/Crawl4aiContentExtractor/actions/router.d.ts +2 -0
- package/dist/nodes/Crawl4aiContentExtractor/actions/router.js +37 -0
- package/dist/nodes/Crawl4aiContentExtractor/actions/router.js.map +1 -0
- package/dist/nodes/Crawl4aiContentExtractor/crawl4ai.svg +6 -0
- package/dist/nodes/Crawl4aiContentExtractor/helpers/apiClient.d.ts +1 -0
- package/dist/nodes/Crawl4aiContentExtractor/helpers/apiClient.js +7 -0
- package/dist/nodes/Crawl4aiContentExtractor/helpers/apiClient.js.map +1 -0
- package/dist/nodes/Crawl4aiContentExtractor/helpers/formatters.d.ts +1 -0
- package/dist/nodes/Crawl4aiContentExtractor/helpers/formatters.js +8 -0
- package/dist/nodes/Crawl4aiContentExtractor/helpers/formatters.js.map +1 -0
- package/dist/nodes/Crawl4aiContentExtractor/helpers/interfaces.d.ts +1 -0
- package/dist/nodes/Crawl4aiContentExtractor/helpers/interfaces.js +3 -0
- package/dist/nodes/Crawl4aiContentExtractor/helpers/interfaces.js.map +1 -0
- package/dist/nodes/Crawl4aiContentExtractor/helpers/utils.d.ts +9 -0
- package/dist/nodes/Crawl4aiContentExtractor/helpers/utils.js +93 -0
- package/dist/nodes/Crawl4aiContentExtractor/helpers/utils.js.map +1 -0
- package/dist/tsconfig.tsbuildinfo +1 -0
- package/index.js +14 -0
- package/package.json +68 -0
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.isValidUrl = exports.cleanText = exports.safeJsonParse = exports.createCrawlerRunConfig = exports.createBrowserConfig = exports.getCrawl4aiClient = void 0;
|
|
4
|
+
exports.createCssSelectorExtractionStrategy = createCssSelectorExtractionStrategy;
|
|
5
|
+
exports.createLlmExtractionStrategy = createLlmExtractionStrategy;
|
|
6
|
+
exports.cleanExtractedData = cleanExtractedData;
|
|
7
|
+
var utils_1 = require("../../Crawl4aiBasicCrawler/helpers/utils");
|
|
8
|
+
Object.defineProperty(exports, "getCrawl4aiClient", { enumerable: true, get: function () { return utils_1.getCrawl4aiClient; } });
|
|
9
|
+
Object.defineProperty(exports, "createBrowserConfig", { enumerable: true, get: function () { return utils_1.createBrowserConfig; } });
|
|
10
|
+
Object.defineProperty(exports, "createCrawlerRunConfig", { enumerable: true, get: function () { return utils_1.createCrawlerRunConfig; } });
|
|
11
|
+
Object.defineProperty(exports, "safeJsonParse", { enumerable: true, get: function () { return utils_1.safeJsonParse; } });
|
|
12
|
+
Object.defineProperty(exports, "cleanText", { enumerable: true, get: function () { return utils_1.cleanText; } });
|
|
13
|
+
Object.defineProperty(exports, "isValidUrl", { enumerable: true, get: function () { return utils_1.isValidUrl; } });
|
|
14
|
+
const utils_2 = require("../../Crawl4aiBasicCrawler/helpers/utils");
|
|
15
|
+
function createCssSelectorExtractionStrategy(schema) {
|
|
16
|
+
return {
|
|
17
|
+
type: 'JsonCssExtractionStrategy',
|
|
18
|
+
params: {
|
|
19
|
+
schema: {
|
|
20
|
+
type: 'dict',
|
|
21
|
+
value: {
|
|
22
|
+
name: schema.name,
|
|
23
|
+
baseSelector: schema.baseSelector,
|
|
24
|
+
fields: schema.fields.map(field => ({
|
|
25
|
+
name: field.name,
|
|
26
|
+
selector: field.selector,
|
|
27
|
+
type: field.type,
|
|
28
|
+
attribute: field.attribute,
|
|
29
|
+
})),
|
|
30
|
+
},
|
|
31
|
+
},
|
|
32
|
+
},
|
|
33
|
+
};
|
|
34
|
+
}
|
|
35
|
+
function createLlmExtractionStrategy(schema, instruction, provider, apiKey, extraArgs) {
|
|
36
|
+
const providerMap = {
|
|
37
|
+
'openai': 'openai/gpt-4o',
|
|
38
|
+
'anthropic': 'anthropic/claude-3-sonnet',
|
|
39
|
+
'groq': 'groq/llama3-70b-8192',
|
|
40
|
+
'ollama': 'ollama/llama2',
|
|
41
|
+
};
|
|
42
|
+
const fullProvider = providerMap[provider] || provider || 'openai/gpt-4o';
|
|
43
|
+
return {
|
|
44
|
+
type: 'LLMExtractionStrategy',
|
|
45
|
+
params: {
|
|
46
|
+
llm_config: {
|
|
47
|
+
type: 'LLMConfig',
|
|
48
|
+
params: {
|
|
49
|
+
provider: fullProvider,
|
|
50
|
+
api_token: apiKey,
|
|
51
|
+
},
|
|
52
|
+
},
|
|
53
|
+
instruction,
|
|
54
|
+
schema: {
|
|
55
|
+
type: 'dict',
|
|
56
|
+
value: schema,
|
|
57
|
+
},
|
|
58
|
+
extraction_type: 'schema',
|
|
59
|
+
apply_chunking: false,
|
|
60
|
+
force_json_response: true,
|
|
61
|
+
extra_args: extraArgs || { temperature: 0, max_tokens: 4000 },
|
|
62
|
+
},
|
|
63
|
+
};
|
|
64
|
+
}
|
|
65
|
+
function cleanExtractedData(data) {
|
|
66
|
+
if (!data)
|
|
67
|
+
return {};
|
|
68
|
+
const cleanedData = {};
|
|
69
|
+
Object.entries(data).forEach(([key, value]) => {
|
|
70
|
+
if (typeof value === 'string') {
|
|
71
|
+
cleanedData[key] = (0, utils_2.cleanText)(value);
|
|
72
|
+
}
|
|
73
|
+
else if (Array.isArray(value)) {
|
|
74
|
+
cleanedData[key] = value.map(item => {
|
|
75
|
+
if (typeof item === 'string') {
|
|
76
|
+
return (0, utils_2.cleanText)(item);
|
|
77
|
+
}
|
|
78
|
+
else if (typeof item === 'object' && item !== null) {
|
|
79
|
+
return cleanExtractedData(item);
|
|
80
|
+
}
|
|
81
|
+
return item;
|
|
82
|
+
});
|
|
83
|
+
}
|
|
84
|
+
else if (typeof value === 'object' && value !== null) {
|
|
85
|
+
cleanedData[key] = cleanExtractedData(value);
|
|
86
|
+
}
|
|
87
|
+
else {
|
|
88
|
+
cleanedData[key] = value;
|
|
89
|
+
}
|
|
90
|
+
});
|
|
91
|
+
return cleanedData;
|
|
92
|
+
}
|
|
93
|
+
//# sourceMappingURL=utils.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"utils.js","sourceRoot":"","sources":["../../../../nodes/Crawl4aiContentExtractor/helpers/utils.ts"],"names":[],"mappings":";;;AAqBA,kFAmBC;AAWD,kEAqCC;AAKD,gDAyBC;AArHD,kEAOkD;AANhD,0GAAA,iBAAiB,OAAA;AACjB,4GAAA,mBAAmB,OAAA;AACnB,+GAAA,sBAAsB,OAAA;AACtB,sGAAA,aAAa,OAAA;AACb,kGAAA,SAAS,OAAA;AACT,mGAAA,UAAU,OAAA;AAIZ,oEAAqE;AAUrE,SAAgB,mCAAmC,CAAC,MAAyB;IAC3E,OAAO;QACL,IAAI,EAAE,2BAA2B;QACjC,MAAM,EAAE;YACN,MAAM,EAAE;gBACN,IAAI,EAAE,MAAM;gBACZ,KAAK,EAAE;oBACL,IAAI,EAAE,MAAM,CAAC,IAAI;oBACjB,YAAY,EAAE,MAAM,CAAC,YAAY;oBACjC,MAAM,EAAE,MAAM,CAAC,MAAM,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;wBAClC,IAAI,EAAE,KAAK,CAAC,IAAI;wBAChB,QAAQ,EAAE,KAAK,CAAC,QAAQ;wBACxB,IAAI,EAAE,KAAK,CAAC,IAAI;wBAChB,SAAS,EAAE,KAAK,CAAC,SAAS;qBAC3B,CAAC,CAAC;iBACJ;aACF;SACF;KACF,CAAC;AACJ,CAAC;AAWD,SAAgB,2BAA2B,CACzC,MAAiB,EACjB,WAAmB,EACnB,QAAgB,EAChB,MAAe,EACf,SAAyD;IAGzD,MAAM,WAAW,GAA8B;QAC7C,QAAQ,EAAE,eAAe;QACzB,WAAW,EAAE,2BAA2B;QACxC,MAAM,EAAE,sBAAsB;QAC9B,QAAQ,EAAE,eAAe;KAC1B,CAAC;IACF,MAAM,YAAY,GAAG,WAAW,CAAC,QAAQ,CAAC,IAAI,QAAQ,IAAI,eAAe,CAAC;IAE1E,OAAO;QACL,IAAI,EAAE,uBAAuB;QAC7B,MAAM,EAAE;YACN,UAAU,EAAE;gBACV,IAAI,EAAE,WAAW;gBACjB,MAAM,EAAE;oBACN,QAAQ,EAAE,YAAY;oBACtB,SAAS,EAAE,MAAM;iBAClB;aACF;YACD,WAAW;YACX,MAAM,EAAE;gBACN,IAAI,EAAE,MAAM;gBACZ,KAAK,EAAE,MAAM;aACd;YACD,eAAe,EAAE,QAAQ;YACzB,cAAc,EAAE,KAAK;YACrB,mBAAmB,EAAE,IAAI;YACzB,UAAU,EAAE,SAAS,IAAI,EAAE,WAAW,EAAE,CAAC,EAAE,UAAU,EAAE,IAAI,EAAE;SAC9D;KACF,CAAC;AACJ,CAAC;AAKD,SAAgB,kBAAkB,CAAC,IAAiB;IAClD,IAAI,CAAC,IAAI;QAAE,OAAO,EAAE,CAAC;IAErB,MAAM,WAAW,GAAgB,EAAE,CAAC;IAEpC,MAAM,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,EAAE,KAAK,CAAC,EAAE,EAAE;QAC5C,IAAI,OAAO,KAAK,KAAK,QAAQ,EAAE,CAAC;YAC9B,WAAW,CAAC,GAAG,CAAC,GAAG,IAAA,iBAAS,EAAC,KAAK,CAAC,CAAC;QACtC,CAAC;aAAM,IAAI,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE,CAAC;YAChC,WAAW,CAAC,GAAG,CAAC,GAAG,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE;gBAClC,IAAI,OAAO,IAAI,KAAK,QAAQ,EAAE,CAAC;oBAC7B,OAAO,IAAA,iBAAS,EAAC,IAAI,CAAC,CAAC;gBACzB,CAAC;qBAAM,IAAI,OAAO,IAAI,KAAK,QAAQ,IAAI,IAAI,KAAK,IAAI,EAAE,CAAC;oBACrD,OAAO,kBAAkB,CAAC,IAAmB,CAAC,CAAC;gBACjD,CAAC;gBACD,OAAO,IAAI,CAAC;YACd,CAAC,CAAC,CAAC;QACL,CAAC;aAAM,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,KAAK,IAAI,EAAE,CAAC;YACvD,WAAW,CAAC,GAAG,CAAC,GAAG,kBAAkB,CAAC,KAAoB,CAAC,CAAC;QAC9D,CAAC;aAAM,CAAC;YACN,WAAW,CAAC,GAAG,CAAC,GAAG,KAAK,CAAC;QAC3B,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,OAAO,WAAW,CAAC;AACrB,CAAC"}
|