n8n-nodes-crawl4ai-plus 2.0.9 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/LICENSE +23 -23
  2. package/README.md +129 -41
  3. package/dist/credentials/Crawl4aiApi.credentials.js +2 -34
  4. package/dist/credentials/Crawl4aiApi.credentials.js.map +1 -1
  5. package/dist/nodes/Crawl4aiPlusBasicCrawler/actions/crawlMultipleUrls.operation.js +1230 -30
  6. package/dist/nodes/Crawl4aiPlusBasicCrawler/actions/crawlMultipleUrls.operation.js.map +1 -1
  7. package/dist/nodes/Crawl4aiPlusBasicCrawler/actions/crawlSingleUrl.operation.js +715 -9
  8. package/dist/nodes/Crawl4aiPlusBasicCrawler/actions/crawlSingleUrl.operation.js.map +1 -1
  9. package/dist/nodes/Crawl4aiPlusBasicCrawler/actions/discoverLinks.operation.d.ts +4 -0
  10. package/dist/nodes/Crawl4aiPlusBasicCrawler/actions/discoverLinks.operation.js +495 -0
  11. package/dist/nodes/Crawl4aiPlusBasicCrawler/actions/discoverLinks.operation.js.map +1 -0
  12. package/dist/nodes/Crawl4aiPlusBasicCrawler/actions/operations.js +9 -0
  13. package/dist/nodes/Crawl4aiPlusBasicCrawler/actions/operations.js.map +1 -1
  14. package/dist/nodes/Crawl4aiPlusBasicCrawler/actions/processRawHtml.operation.js +1 -1
  15. package/dist/nodes/Crawl4aiPlusBasicCrawler/actions/processRawHtml.operation.js.map +1 -1
  16. package/dist/nodes/Crawl4aiPlusBasicCrawler/helpers/apiClient.d.ts +4 -1
  17. package/dist/nodes/Crawl4aiPlusBasicCrawler/helpers/apiClient.js +94 -60
  18. package/dist/nodes/Crawl4aiPlusBasicCrawler/helpers/apiClient.js.map +1 -1
  19. package/dist/nodes/Crawl4aiPlusBasicCrawler/helpers/formatters.d.ts +8 -1
  20. package/dist/nodes/Crawl4aiPlusBasicCrawler/helpers/formatters.js +49 -12
  21. package/dist/nodes/Crawl4aiPlusBasicCrawler/helpers/formatters.js.map +1 -1
  22. package/dist/nodes/Crawl4aiPlusBasicCrawler/helpers/interfaces.d.ts +38 -5
  23. package/dist/nodes/Crawl4aiPlusBasicCrawler/helpers/utils.d.ts +13 -0
  24. package/dist/nodes/Crawl4aiPlusBasicCrawler/helpers/utils.js +270 -0
  25. package/dist/nodes/Crawl4aiPlusBasicCrawler/helpers/utils.js.map +1 -1
  26. package/dist/nodes/Crawl4aiPlusContentExtractor/actions/cosineExtractor.operation.d.ts +4 -0
  27. package/dist/nodes/Crawl4aiPlusContentExtractor/actions/cosineExtractor.operation.js +445 -0
  28. package/dist/nodes/Crawl4aiPlusContentExtractor/actions/cosineExtractor.operation.js.map +1 -0
  29. package/dist/nodes/Crawl4aiPlusContentExtractor/actions/cssExtractor.operation.js +108 -8
  30. package/dist/nodes/Crawl4aiPlusContentExtractor/actions/cssExtractor.operation.js.map +1 -1
  31. package/dist/nodes/Crawl4aiPlusContentExtractor/actions/jsonExtractor.operation.js +49 -9
  32. package/dist/nodes/Crawl4aiPlusContentExtractor/actions/jsonExtractor.operation.js.map +1 -1
  33. package/dist/nodes/Crawl4aiPlusContentExtractor/actions/llmExtractor.operation.js +134 -17
  34. package/dist/nodes/Crawl4aiPlusContentExtractor/actions/llmExtractor.operation.js.map +1 -1
  35. package/dist/nodes/Crawl4aiPlusContentExtractor/actions/operations.js +27 -9
  36. package/dist/nodes/Crawl4aiPlusContentExtractor/actions/operations.js.map +1 -1
  37. package/dist/nodes/Crawl4aiPlusContentExtractor/actions/regexExtractor.operation.js +206 -9
  38. package/dist/nodes/Crawl4aiPlusContentExtractor/actions/regexExtractor.operation.js.map +1 -1
  39. package/dist/nodes/Crawl4aiPlusContentExtractor/actions/seoExtractor.operation.d.ts +4 -0
  40. package/dist/nodes/Crawl4aiPlusContentExtractor/actions/seoExtractor.operation.js +376 -0
  41. package/dist/nodes/Crawl4aiPlusContentExtractor/actions/seoExtractor.operation.js.map +1 -0
  42. package/dist/nodes/Crawl4aiPlusContentExtractor/helpers/utils.d.ts +4 -2
  43. package/dist/nodes/Crawl4aiPlusContentExtractor/helpers/utils.js +53 -16
  44. package/dist/nodes/Crawl4aiPlusContentExtractor/helpers/utils.js.map +1 -1
  45. package/dist/tsconfig.tsbuildinfo +1 -1
  46. package/index.js +11 -11
  47. package/package.json +1 -1
@@ -0,0 +1,376 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.description = void 0;
4
+ exports.execute = execute;
5
+ const n8n_workflow_1 = require("n8n-workflow");
6
+ const utils_1 = require("../helpers/utils");
7
+ exports.description = [
8
+ {
9
+ displayName: 'URL',
10
+ name: 'url',
11
+ type: 'string',
12
+ required: true,
13
+ default: '',
14
+ placeholder: 'https://example.com',
15
+ description: 'The URL to extract SEO metadata from',
16
+ displayOptions: {
17
+ show: {
18
+ operation: ['seoExtractor'],
19
+ },
20
+ },
21
+ },
22
+ {
23
+ displayName: 'Metadata Types',
24
+ name: 'metadataTypes',
25
+ type: 'multiOptions',
26
+ options: [
27
+ {
28
+ name: 'Basic Meta Tags',
29
+ value: 'basic',
30
+ description: 'Title, description, keywords, canonical URL',
31
+ },
32
+ {
33
+ name: 'JSON-LD Structured Data',
34
+ value: 'jsonLd',
35
+ description: 'Schema.org structured data in JSON-LD format',
36
+ },
37
+ {
38
+ name: 'Language & Locale',
39
+ value: 'language',
40
+ description: 'HTML lang, hreflang tags, locale settings',
41
+ },
42
+ {
43
+ name: 'Open Graph (OG) Tags',
44
+ value: 'openGraph',
45
+ description: 'OG title, description, image, type, URL',
46
+ },
47
+ {
48
+ name: 'Robots & Indexing',
49
+ value: 'robots',
50
+ description: 'Robots meta, noindex, nofollow directives',
51
+ },
52
+ {
53
+ name: 'Twitter Cards',
54
+ value: 'twitter',
55
+ description: 'Twitter card metadata',
56
+ },
57
+ ],
58
+ default: ['basic', 'openGraph', 'jsonLd'],
59
+ description: 'Select which types of SEO metadata to extract',
60
+ displayOptions: {
61
+ show: {
62
+ operation: ['seoExtractor'],
63
+ },
64
+ },
65
+ },
66
+ {
67
+ displayName: 'Browser Options',
68
+ name: 'browserOptions',
69
+ type: 'collection',
70
+ placeholder: 'Add Option',
71
+ default: {},
72
+ displayOptions: {
73
+ show: {
74
+ operation: ['seoExtractor'],
75
+ },
76
+ },
77
+ options: [
78
+ {
79
+ displayName: 'Browser Type',
80
+ name: 'browserType',
81
+ type: 'options',
82
+ options: [
83
+ {
84
+ name: 'Chromium',
85
+ value: 'chromium',
86
+ description: 'Use Chromium browser (default, most compatible)',
87
+ },
88
+ {
89
+ name: 'Firefox',
90
+ value: 'firefox',
91
+ description: 'Use Firefox browser',
92
+ },
93
+ {
94
+ name: 'Webkit',
95
+ value: 'webkit',
96
+ description: 'Use Webkit browser (Safari engine)',
97
+ },
98
+ ],
99
+ default: 'chromium',
100
+ description: 'Which browser engine to use for crawling',
101
+ },
102
+ {
103
+ displayName: 'Enable JavaScript',
104
+ name: 'java_script_enabled',
105
+ type: 'boolean',
106
+ default: true,
107
+ description: 'Whether to enable JavaScript execution (recommended for dynamic SEO tags)',
108
+ },
109
+ {
110
+ displayName: 'Headless Mode',
111
+ name: 'headless',
112
+ type: 'boolean',
113
+ default: true,
114
+ description: 'Whether to run browser in headless mode',
115
+ },
116
+ {
117
+ displayName: 'Timeout (MS)',
118
+ name: 'timeout',
119
+ type: 'number',
120
+ default: 30000,
121
+ description: 'Maximum time to wait for the browser to load the page',
122
+ },
123
+ {
124
+ displayName: 'Wait For',
125
+ name: 'waitFor',
126
+ type: 'string',
127
+ default: '',
128
+ placeholder: 'head',
129
+ description: 'CSS selector to wait for before extracting (useful for dynamically injected meta tags)',
130
+ },
131
+ ],
132
+ },
133
+ {
134
+ displayName: 'Options',
135
+ name: 'options',
136
+ type: 'collection',
137
+ placeholder: 'Add Option',
138
+ default: {},
139
+ displayOptions: {
140
+ show: {
141
+ operation: ['seoExtractor'],
142
+ },
143
+ },
144
+ options: [
145
+ {
146
+ displayName: 'Cache Mode',
147
+ name: 'cacheMode',
148
+ type: 'options',
149
+ options: [
150
+ {
151
+ name: 'Bypass (Skip Cache)',
152
+ value: 'BYPASS',
153
+ description: 'Skip cache for this operation, fetch fresh content',
154
+ },
155
+ {
156
+ name: 'Disabled (No Cache)',
157
+ value: 'DISABLED',
158
+ description: 'No caching at all',
159
+ },
160
+ {
161
+ name: 'Enabled (Read/Write)',
162
+ value: 'ENABLED',
163
+ description: 'Use cache if available, save new results to cache',
164
+ },
165
+ {
166
+ name: 'Read Only',
167
+ value: 'READ_ONLY',
168
+ description: 'Only read from cache, do not write new results',
169
+ },
170
+ {
171
+ name: 'Write Only',
172
+ value: 'WRITE_ONLY',
173
+ description: 'Only write to cache, do not read existing cache',
174
+ },
175
+ ],
176
+ default: 'ENABLED',
177
+ description: 'How to use the cache when crawling',
178
+ },
179
+ {
180
+ displayName: 'Include Raw HTML',
181
+ name: 'includeRawHtml',
182
+ type: 'boolean',
183
+ default: false,
184
+ description: 'Whether to include the raw HTML head section in output',
185
+ },
186
+ ],
187
+ },
188
+ ];
189
+ const SEO_FIELDS = {
190
+ basic: [
191
+ { name: 'title', selector: 'title', type: 'text' },
192
+ { name: 'metaDescription', selector: 'meta[name="description"]', type: 'attribute', attribute: 'content' },
193
+ { name: 'metaKeywords', selector: 'meta[name="keywords"]', type: 'attribute', attribute: 'content' },
194
+ { name: 'canonicalUrl', selector: 'link[rel="canonical"]', type: 'attribute', attribute: 'href' },
195
+ { name: 'author', selector: 'meta[name="author"]', type: 'attribute', attribute: 'content' },
196
+ { name: 'viewport', selector: 'meta[name="viewport"]', type: 'attribute', attribute: 'content' },
197
+ ],
198
+ openGraph: [
199
+ { name: 'ogTitle', selector: 'meta[property="og:title"]', type: 'attribute', attribute: 'content' },
200
+ { name: 'ogDescription', selector: 'meta[property="og:description"]', type: 'attribute', attribute: 'content' },
201
+ { name: 'ogImage', selector: 'meta[property="og:image"]', type: 'attribute', attribute: 'content' },
202
+ { name: 'ogType', selector: 'meta[property="og:type"]', type: 'attribute', attribute: 'content' },
203
+ { name: 'ogUrl', selector: 'meta[property="og:url"]', type: 'attribute', attribute: 'content' },
204
+ { name: 'ogSiteName', selector: 'meta[property="og:site_name"]', type: 'attribute', attribute: 'content' },
205
+ { name: 'ogLocale', selector: 'meta[property="og:locale"]', type: 'attribute', attribute: 'content' },
206
+ ],
207
+ twitter: [
208
+ { name: 'twitterCard', selector: 'meta[name="twitter:card"]', type: 'attribute', attribute: 'content' },
209
+ { name: 'twitterTitle', selector: 'meta[name="twitter:title"]', type: 'attribute', attribute: 'content' },
210
+ { name: 'twitterDescription', selector: 'meta[name="twitter:description"]', type: 'attribute', attribute: 'content' },
211
+ { name: 'twitterImage', selector: 'meta[name="twitter:image"]', type: 'attribute', attribute: 'content' },
212
+ { name: 'twitterSite', selector: 'meta[name="twitter:site"]', type: 'attribute', attribute: 'content' },
213
+ { name: 'twitterCreator', selector: 'meta[name="twitter:creator"]', type: 'attribute', attribute: 'content' },
214
+ ],
215
+ robots: [
216
+ { name: 'robots', selector: 'meta[name="robots"]', type: 'attribute', attribute: 'content' },
217
+ { name: 'googlebot', selector: 'meta[name="googlebot"]', type: 'attribute', attribute: 'content' },
218
+ { name: 'bingbot', selector: 'meta[name="bingbot"]', type: 'attribute', attribute: 'content' },
219
+ ],
220
+ language: [
221
+ { name: 'htmlLang', selector: 'html', type: 'attribute', attribute: 'lang' },
222
+ { name: 'contentLanguage', selector: 'meta[http-equiv="content-language"]', type: 'attribute', attribute: 'content' },
223
+ ],
224
+ };
225
+ async function execute(items, nodeOptions) {
226
+ var _a;
227
+ const allResults = [];
228
+ for (let i = 0; i < items.length; i++) {
229
+ try {
230
+ const url = this.getNodeParameter('url', i, '');
231
+ const metadataTypes = this.getNodeParameter('metadataTypes', i, ['basic', 'openGraph', 'jsonLd']);
232
+ const browserOptions = this.getNodeParameter('browserOptions', i, {});
233
+ const options = this.getNodeParameter('options', i, {});
234
+ if (!url) {
235
+ throw new n8n_workflow_1.NodeOperationError(this.getNode(), 'URL cannot be empty.', { itemIndex: i });
236
+ }
237
+ if (!(0, utils_1.isValidUrl)(url)) {
238
+ throw new n8n_workflow_1.NodeOperationError(this.getNode(), `Invalid URL: ${url}`, { itemIndex: i });
239
+ }
240
+ if (!metadataTypes || metadataTypes.length === 0) {
241
+ throw new n8n_workflow_1.NodeOperationError(this.getNode(), 'At least one metadata type must be selected.', { itemIndex: i });
242
+ }
243
+ const fields = [];
244
+ for (const metaType of metadataTypes) {
245
+ if (metaType !== 'jsonLd' && SEO_FIELDS[metaType]) {
246
+ fields.push(...SEO_FIELDS[metaType]);
247
+ }
248
+ }
249
+ const extractionStrategy = fields.length > 0 ? {
250
+ type: 'JsonCssExtractionStrategy',
251
+ params: {
252
+ schema: {
253
+ type: 'dict',
254
+ value: {
255
+ name: 'SEO_Metadata',
256
+ baseSelector: 'html',
257
+ fields: fields.map(field => ({
258
+ name: field.name,
259
+ selector: field.selector,
260
+ type: field.type,
261
+ ...(field.attribute ? { attribute: field.attribute } : {}),
262
+ })),
263
+ },
264
+ },
265
+ },
266
+ } : null;
267
+ const crawlerOptions = {
268
+ ...browserOptions,
269
+ cacheMode: options.cacheMode || 'ENABLED',
270
+ waitFor: browserOptions.waitFor,
271
+ };
272
+ const crawlerConfig = (0, utils_1.createCrawlerRunConfig)(crawlerOptions);
273
+ if (extractionStrategy) {
274
+ crawlerConfig.extractionStrategy = extractionStrategy;
275
+ }
276
+ const crawler = await (0, utils_1.getCrawl4aiClient)(this);
277
+ const result = await crawler.arun(url, crawlerConfig);
278
+ if (!result.success) {
279
+ throw new n8n_workflow_1.NodeOperationError(this.getNode(), `Failed to crawl URL: ${result.error_message || 'Unknown error'}`, { itemIndex: i });
280
+ }
281
+ let seoData = {};
282
+ if (result.extracted_content) {
283
+ try {
284
+ const parsed = JSON.parse(result.extracted_content);
285
+ if (Array.isArray(parsed) && parsed.length > 0) {
286
+ seoData = { ...seoData, ...parsed[0] };
287
+ }
288
+ else if (typeof parsed === 'object') {
289
+ seoData = { ...seoData, ...parsed };
290
+ }
291
+ }
292
+ catch (e) {
293
+ }
294
+ }
295
+ if (metadataTypes.includes('jsonLd')) {
296
+ const jsonLdData = extractJsonLd(result.html || result.cleaned_html || '');
297
+ if (jsonLdData.length > 0) {
298
+ seoData.jsonLd = jsonLdData;
299
+ }
300
+ }
301
+ if (metadataTypes.includes('language')) {
302
+ const hreflangTags = extractHreflang(result.html || result.cleaned_html || '');
303
+ if (hreflangTags.length > 0) {
304
+ seoData.hreflang = hreflangTags;
305
+ }
306
+ }
307
+ const formattedResult = {
308
+ url,
309
+ success: true,
310
+ seo: seoData,
311
+ ...(options.includeRawHtml ? { rawHtml: extractHead(result.html || '') } : {}),
312
+ };
313
+ allResults.push({
314
+ json: formattedResult,
315
+ pairedItem: { item: i },
316
+ });
317
+ }
318
+ catch (error) {
319
+ if (this.continueOnFail()) {
320
+ const node = this.getNode();
321
+ const errorItemIndex = (_a = error.itemIndex) !== null && _a !== void 0 ? _a : i;
322
+ allResults.push({
323
+ json: items[i].json,
324
+ error: new n8n_workflow_1.NodeOperationError(node, error.message, { itemIndex: errorItemIndex }),
325
+ pairedItem: { item: i },
326
+ });
327
+ continue;
328
+ }
329
+ throw error;
330
+ }
331
+ }
332
+ return allResults;
333
+ }
334
+ function extractJsonLd(html) {
335
+ const jsonLdData = [];
336
+ const regex = /<script[^>]*type=["']application\/ld\+json["'][^>]*>([\s\S]*?)<\/script>/gi;
337
+ let match;
338
+ while ((match = regex.exec(html)) !== null) {
339
+ try {
340
+ const data = JSON.parse(match[1].trim());
341
+ jsonLdData.push(data);
342
+ }
343
+ catch (e) {
344
+ }
345
+ }
346
+ return jsonLdData;
347
+ }
348
+ function extractHreflang(html) {
349
+ const hreflangTags = [];
350
+ const regex = /<link[^>]*rel=["']alternate["'][^>]*hreflang=["']([^"']+)["'][^>]*href=["']([^"']+)["'][^>]*\/?>/gi;
351
+ const regex2 = /<link[^>]*hreflang=["']([^"']+)["'][^>]*rel=["']alternate["'][^>]*href=["']([^"']+)["'][^>]*\/?>/gi;
352
+ const regex3 = /<link[^>]*href=["']([^"']+)["'][^>]*hreflang=["']([^"']+)["'][^>]*rel=["']alternate["'][^>]*\/?>/gi;
353
+ let match;
354
+ while ((match = regex.exec(html)) !== null) {
355
+ hreflangTags.push({ lang: match[1], href: match[2] });
356
+ }
357
+ while ((match = regex2.exec(html)) !== null) {
358
+ hreflangTags.push({ lang: match[1], href: match[2] });
359
+ }
360
+ while ((match = regex3.exec(html)) !== null) {
361
+ hreflangTags.push({ lang: match[2], href: match[1] });
362
+ }
363
+ const seen = new Set();
364
+ return hreflangTags.filter(tag => {
365
+ const key = `${tag.lang}:${tag.href}`;
366
+ if (seen.has(key))
367
+ return false;
368
+ seen.add(key);
369
+ return true;
370
+ });
371
+ }
372
+ function extractHead(html) {
373
+ const match = html.match(/<head[^>]*>([\s\S]*?)<\/head>/i);
374
+ return match ? match[1] : '';
375
+ }
376
+ //# sourceMappingURL=seoExtractor.operation.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"seoExtractor.operation.js","sourceRoot":"","sources":["../../../../nodes/Crawl4aiPlusContentExtractor/actions/seoExtractor.operation.ts"],"names":[],"mappings":";;;AAuPA,0BAoJC;AArYD,+CAAkD;AAIlD,4CAI0B;AAGb,QAAA,WAAW,GAAsB;IAC5C;QACE,WAAW,EAAE,KAAK;QAClB,IAAI,EAAE,KAAK;QACX,IAAI,EAAE,QAAQ;QACd,QAAQ,EAAE,IAAI;QACd,OAAO,EAAE,EAAE;QACX,WAAW,EAAE,qBAAqB;QAClC,WAAW,EAAE,sCAAsC;QACnD,cAAc,EAAE;YACd,IAAI,EAAE;gBACJ,SAAS,EAAE,CAAC,cAAc,CAAC;aAC5B;SACF;KACF;IACD;QACE,WAAW,EAAE,gBAAgB;QAC7B,IAAI,EAAE,eAAe;QACrB,IAAI,EAAE,cAAc;QACpB,OAAO,EAAE;YACP;gBACE,IAAI,EAAE,iBAAiB;gBACvB,KAAK,EAAE,OAAO;gBACd,WAAW,EAAE,6CAA6C;aAC3D;YACD;gBACE,IAAI,EAAE,yBAAyB;gBAC/B,KAAK,EAAE,QAAQ;gBACf,WAAW,EAAE,8CAA8C;aAC5D;YACD;gBACE,IAAI,EAAE,mBAAmB;gBACzB,KAAK,EAAE,UAAU;gBACjB,WAAW,EAAE,2CAA2C;aACzD;YACJ;gBACK,IAAI,EAAE,sBAAsB;gBAC5B,KAAK,EAAE,WAAW;gBAClB,WAAW,EAAE,yCAAyC;aACvD;YACD;gBACE,IAAI,EAAE,mBAAmB;gBACzB,KAAK,EAAE,QAAQ;gBACf,WAAW,EAAE,2CAA2C;aACzD;YACJ;gBACK,IAAI,EAAE,eAAe;gBACrB,KAAK,EAAE,SAAS;gBAChB,WAAW,EAAE,uBAAuB;aACrC;SACF;QACD,OAAO,EAAE,CAAC,OAAO,EAAE,WAAW,EAAE,QAAQ,CAAC;QACzC,WAAW,EAAE,+CAA+C;QAC5D,cAAc,EAAE;YACd,IAAI,EAAE;gBACJ,SAAS,EAAE,CAAC,cAAc,CAAC;aAC5B;SACF;KACF;IACD;QACE,WAAW,EAAE,iBAAiB;QAC9B,IAAI,EAAE,gBAAgB;QACtB,IAAI,EAAE,YAAY;QAClB,WAAW,EAAE,YAAY;QACzB,OAAO,EAAE,EAAE;QACX,cAAc,EAAE;YACd,IAAI,EAAE;gBACJ,SAAS,EAAE,CAAC,cAAc,CAAC;aAC5B;SACF;QACD,OAAO,EAAE;YACP;gBACE,WAAW,EAAE,cAAc;gBAC3B,IAAI,EAAE,aAAa;gBACnB,IAAI,EAAE,SAAS;gBACf,OAAO,EAAE;oBACP;wBACE,IAAI,EAAE,UAAU;wBAChB,KAAK,EAAE,UAAU;wBACjB,WAAW,EAAE,iDAAiD;qBAC/D;oBACD;wBACE,IAAI,EAAE,SAAS;wBACf,KAAK,EAAE,SAAS;wBAChB,WAAW,EAAE,qBAAqB;qBACnC;oBACD;wBACE,IAAI,EAAE,QAAQ;wBACd,KAAK,EAAE,QAAQ;wBACf,WAAW,EAAE,oCAAoC;qBAClD;iBACF;gBACD,OAAO,EAAE,UAAU;gBACnB,WAAW,EAAE,0CAA0C;aACxD;YACD;gBACE,WAAW,EAAE,mBAAmB;gBAChC,IAAI,EAAE,qBAAqB;gBAC3B,IAAI,EAAE,SAAS;gBACf,OAAO,EAAE,IAAI;gBACb,WAAW,EAAE,2EAA2E;aACzF;YACD;gBACE,WAAW,EAAE,eAAe;gBAC5B,IAAI,EAAE,UAAU;gBAChB,IAAI,EAAE,SAAS;gBACf,OAAO,EAAE,IAAI;gBACb,WAAW,EAAE,yCAAyC;aACvD;YACD;gBACE,WAAW,EAAE,cAAc;gBAC3B,IAAI,EAAE,SAAS;gBACf,IAAI,EAAE,QAAQ;gBACd,OAAO,EAAE,KAAK;gBACd,WAAW,EAAE,uDAAuD;aACrE;YACD;gBACE,WAAW,EAAE,UAAU;gBACvB,IAAI,EAAE,SAAS;gBACf,IAAI,EAAE,QAAQ;gBACd,OAAO,EAAE,EAAE;gBACX,WAAW,EAAE,MAAM;gBACnB,WAAW,EAAE,wFAAwF;aACtG;SACF;KACF;IACD;QACE,WAAW,EAAE,SAAS;QACtB,IAAI,EAAE,SAAS;QACf,IAAI,EAAE,YAAY;QAClB,WAAW,EAAE,YAAY;QACzB,OAAO,EAAE,EAAE;QACX,cAAc,EAAE;YACd,IAAI,EAAE;gBACJ,SAAS,EAAE,CAAC,cAAc,CAAC;aAC5B;SACF;QACD,OAAO,EAAE;YACP;gBACE,WAAW,EAAE,YAAY;gBACzB,IAAI,EAAE,WAAW;gBACjB,IAAI,EAAE,SAAS;gBACf,OAAO,EAAE;oBACP;wBACE,IAAI,EAAE,qBAAqB;wBAC3B,KAAK,EAAE,QAAQ;wBACf,WAAW,EAAE,oDAAoD;qBAClE;oBACD;wBACE,IAAI,EAAE,qBAAqB;wBAC3B,KAAK,EAAE,UAAU;wBACjB,WAAW,EAAE,mBAAmB;qBACjC;oBACD;wBACE,IAAI,EAAE,sBAAsB;wBAC5B,KAAK,EAAE,SAAS;wBAChB,WAAW,EAAE,mDAAmD;qBACjE;oBACD;wBACE,IAAI,EAAE,WAAW;wBACjB,KAAK,EAAE,WAAW;wBAClB,WAAW,EAAE,gDAAgD;qBAC9D;oBACD;wBACE,IAAI,EAAE,YAAY;wBAClB,KAAK,EAAE,YAAY;wBACnB,WAAW,EAAE,iDAAiD;qBAC/D;iBACF;gBACD,OAAO,EAAE,SAAS;gBAClB,WAAW,EAAE,oCAAoC;aAClD;YACD;gBACE,WAAW,EAAE,kBAAkB;gBAC/B,IAAI,EAAE,gBAAgB;gBACtB,IAAI,EAAE,SAAS;gBACf,OAAO,EAAE,KAAK;gBACd,WAAW,EAAE,wDAAwD;aACtE;SACF;KACF;CACF,CAAC;AAWF,MAAM,UAAU,GAA+B;IAC7C,KAAK,EAAE;QACL,EAAE,IAAI,EAAE,OAAO,EAAE,QAAQ,EAAE,OAAO,EAAE,IAAI,EAAE,MAAM,EAAE;QAClD,EAAE,IAAI,EAAE,iBAAiB,EAAE,QAAQ,EAAE,0BAA0B,EAAE,IAAI,EAAE,WAAW,EAAE,SAAS,EAAE,SAAS,EAAE;QAC1G,EAAE,IAAI,EAAE,cAAc,EAAE,QAAQ,EAAE,uBAAuB,EAAE,IAAI,EAAE,WAAW,EAAE,SAAS,EAAE,SAAS,EAAE;QACpG,EAAE,IAAI,EAAE,cAAc,EAAE,QAAQ,EAAE,uBAAuB,EAAE,IAAI,EAAE,WAAW,EAAE,SAAS,EAAE,MAAM,EAAE;QACjG,EAAE,IAAI,EAAE,QAAQ,EAAE,QAAQ,EAAE,qBAAqB,EAAE,IAAI,EAAE,WAAW,EAAE,SAAS,EAAE,SAAS,EAAE;QAC5F,EAAE,IAAI,EAAE,UAAU,EAAE,QAAQ,EAAE,uBAAuB,EAAE,IAAI,EAAE,WAAW,EAAE,SAAS,EAAE,SAAS,EAAE;KACjG;IACD,SAAS,EAAE;QACT,EAAE,IAAI,EAAE,SAAS,EAAE,QAAQ,EAAE,2BAA2B,EAAE,IAAI,EAAE,WAAW,EAAE,SAAS,EAAE,SAAS,EAAE;QACnG,EAAE,IAAI,EAAE,eAAe,EAAE,QAAQ,EAAE,iCAAiC,EAAE,IAAI,EAAE,WAAW,EAAE,SAAS,EAAE,SAAS,EAAE;QAC/G,EAAE,IAAI,EAAE,SAAS,EAAE,QAAQ,EAAE,2BAA2B,EAAE,IAAI,EAAE,WAAW,EAAE,SAAS,EAAE,SAAS,EAAE;QACnG,EAAE,IAAI,EAAE,QAAQ,EAAE,QAAQ,EAAE,0BAA0B,EAAE,IAAI,EAAE,WAAW,EAAE,SAAS,EAAE,SAAS,EAAE;QACjG,EAAE,IAAI,EAAE,OAAO,EAAE,QAAQ,EAAE,yBAAyB,EAAE,IAAI,EAAE,WAAW,EAAE,SAAS,EAAE,SAAS,EAAE;QAC/F,EAAE,IAAI,EAAE,YAAY,EAAE,QAAQ,EAAE,+BAA+B,EAAE,IAAI,EAAE,WAAW,EAAE,SAAS,EAAE,SAAS,EAAE;QAC1G,EAAE,IAAI,EAAE,UAAU,EAAE,QAAQ,EAAE,4BAA4B,EAAE,IAAI,EAAE,WAAW,EAAE,SAAS,EAAE,SAAS,EAAE;KACtG;IACD,OAAO,EAAE;QACP,EAAE,IAAI,EAAE,aAAa,EAAE,QAAQ,EAAE,2BAA2B,EAAE,IAAI,EAAE,WAAW,EAAE,SAAS,EAAE,SAAS,EAAE;QACvG,EAAE,IAAI,EAAE,cAAc,EAAE,QAAQ,EAAE,4BAA4B,EAAE,IAAI,EAAE,WAAW,EAAE,SAAS,EAAE,SAAS,EAAE;QACzG,EAAE,IAAI,EAAE,oBAAoB,EAAE,QAAQ,EAAE,kCAAkC,EAAE,IAAI,EAAE,WAAW,EAAE,SAAS,EAAE,SAAS,EAAE;QACrH,EAAE,IAAI,EAAE,cAAc,EAAE,QAAQ,EAAE,4BAA4B,EAAE,IAAI,EAAE,WAAW,EAAE,SAAS,EAAE,SAAS,EAAE;QACzG,EAAE,IAAI,EAAE,aAAa,EAAE,QAAQ,EAAE,2BAA2B,EAAE,IAAI,EAAE,WAAW,EAAE,SAAS,EAAE,SAAS,EAAE;QACvG,EAAE,IAAI,EAAE,gBAAgB,EAAE,QAAQ,EAAE,8BAA8B,EAAE,IAAI,EAAE,WAAW,EAAE,SAAS,EAAE,SAAS,EAAE;KAC9G;IACD,MAAM,EAAE;QACN,EAAE,IAAI,EAAE,QAAQ,EAAE,QAAQ,EAAE,qBAAqB,EAAE,IAAI,EAAE,WAAW,EAAE,SAAS,EAAE,SAAS,EAAE;QAC5F,EAAE,IAAI,EAAE,WAAW,EAAE,QAAQ,EAAE,wBAAwB,EAAE,IAAI,EAAE,WAAW,EAAE,SAAS,EAAE,SAAS,EAAE;QAClG,EAAE,IAAI,EAAE,SAAS,EAAE,QAAQ,EAAE,sBAAsB,EAAE,IAAI,EAAE,WAAW,EAAE,SAAS,EAAE,SAAS,EAAE;KAC/F;IACD,QAAQ,EAAE;QACR,EAAE,IAAI,EAAE,UAAU,EAAE,QAAQ,EAAE,MAAM,EAAE,IAAI,EAAE,WAAW,EAAE,SAAS,EAAE,MAAM,EAAE;QAC5E,EAAE,IAAI,EAAE,iBAAiB,EAAE,QAAQ,EAAE,qCAAqC,EAAE,IAAI,EAAE,WAAW,EAAE,SAAS,EAAE,SAAS,EAAE;KACtH;CACF,CAAC;AAGK,KAAK,UAAU,OAAO,CAE3B,KAA2B,EAC3B,WAAgC;;IAEhC,MAAM,UAAU,GAAyB,EAAE,CAAC;IAE5C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACtC,IAAI,CAAC;YAEH,MAAM,GAAG,GAAG,IAAI,CAAC,gBAAgB,CAAC,KAAK,EAAE,CAAC,EAAE,EAAE,CAAW,CAAC;YAC1D,MAAM,aAAa,GAAG,IAAI,CAAC,gBAAgB,CAAC,eAAe,EAAE,CAAC,EAAE,CAAC,OAAO,EAAE,WAAW,EAAE,QAAQ,CAAC,CAAa,CAAC;YAC9G,MAAM,cAAc,GAAG,IAAI,CAAC,gBAAgB,CAAC,gBAAgB,EAAE,CAAC,EAAE,EAAE,CAAgB,CAAC;YACrF,MAAM,OAAO,GAAG,IAAI,CAAC,gBAAgB,CAAC,SAAS,EAAE,CAAC,EAAE,EAAE,CAAgB,CAAC;YAEvE,IAAI,CAAC,GAAG,EAAE,CAAC;gBACT,MAAM,IAAI,iCAAkB,CAAC,IAAI,CAAC,OAAO,EAAE,EAAE,sBAAsB,EAAE,EAAE,SAAS,EAAE,CAAC,EAAE,CAAC,CAAC;YACzF,CAAC;YAED,IAAI,CAAC,IAAA,kBAAU,EAAC,GAAG,CAAC,EAAE,CAAC;gBACrB,MAAM,IAAI,iCAAkB,CAAC,IAAI,CAAC,OAAO,EAAE,EAAE,gBAAgB,GAAG,EAAE,EAAE,EAAE,SAAS,EAAE,CAAC,EAAE,CAAC,CAAC;YACxF,CAAC;YAED,IAAI,CAAC,aAAa,IAAI,aAAa,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBACjD,MAAM,IAAI,iCAAkB,CAAC,IAAI,CAAC,OAAO,EAAE,EAAE,8CAA8C,EAAE,EAAE,SAAS,EAAE,CAAC,EAAE,CAAC,CAAC;YACjH,CAAC;YAGD,MAAM,MAAM,GAAe,EAAE,CAAC;YAC9B,KAAK,MAAM,QAAQ,IAAI,aAAa,EAAE,CAAC;gBACrC,IAAI,QAAQ,KAAK,QAAQ,IAAI,UAAU,CAAC,QAAQ,CAAC,EAAE,CAAC;oBAClD,MAAM,CAAC,IAAI,CAAC,GAAG,UAAU,CAAC,QAAQ,CAAC,CAAC,CAAC;gBACvC,CAAC;YACH,CAAC;YAGD,MAAM,kBAAkB,GAAQ,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC;gBAClD,IAAI,EAAE,2BAA2B;gBACjC,MAAM,EAAE;oBACN,MAAM,EAAE;wBACN,IAAI,EAAE,MAAM;wBACZ,KAAK,EAAE;4BACL,IAAI,EAAE,cAAc;4BACpB,YAAY,EAAE,MAAM;4BACpB,MAAM,EAAE,MAAM,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;gCAC3B,IAAI,EAAE,KAAK,CAAC,IAAI;gCAChB,QAAQ,EAAE,KAAK,CAAC,QAAQ;gCACxB,IAAI,EAAE,KAAK,CAAC,IAAI;gCAChB,GAAG,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC,CAAC,EAAE,SAAS,EAAE,KAAK,CAAC,SAAS,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;6BAC3D,CAAC,CAAC;yBACJ;qBACF;iBACF;aACF,CAAC,CAAC,CAAC,IAAI,CAAC;YAGT,MAAM,cAAc,GAAQ;gBAC1B,GAAG,cAAc;gBACjB,SAAS,EAAE,OAAO,CAAC,SAAS,IAAI,SAAS;gBACzC,OAAO,EAAE,cAAc,CAAC,OAAO;aAChC,CAAC;YAEF,MAAM,aAAa,GAAG,IAAA,8BAAsB,EAAC,cAAc,CAAC,CAAC;YAG7D,IAAI,kBAAkB,EAAE,CAAC;gBACvB,aAAa,CAAC,kBAAkB,GAAG,kBAAkB,CAAC;YACxD,CAAC;YAGD,MAAM,OAAO,GAAG,MAAM,IAAA,yBAAiB,EAAC,IAAI,CAAC,CAAC;YAG9C,MAAM,MAAM,GAAG,MAAM,OAAO,CAAC,IAAI,CAAC,GAAG,EAAE,aAAa,CAAC,CAAC;YAEtD,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;gBACpB,MAAM,IAAI,iCAAkB,CAC1B,IAAI,CAAC,OAAO,EAAE,EACd,wBAAwB,MAAM,CAAC,aAAa,IAAI,eAAe,EAAE,EACjE,EAAE,SAAS,EAAE,CAAC,EAAE,CACjB,CAAC;YACJ,CAAC;YAGD,IAAI,OAAO,GAAgB,EAAE,CAAC;YAE9B,IAAI,MAAM,CAAC,iBAAiB,EAAE,CAAC;gBAC7B,IAAI,CAAC;oBACH,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,iBAAiB,CAAC,CAAC;oBAEpD,IAAI,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;wBAC/C,OAAO,GAAG,EAAE,GAAG,OAAO,EAAE,GAAG,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC;oBACzC,CAAC;yBAAM,IAAI,OAAO,MAAM,KAAK,QAAQ,EAAE,CAAC;wBACtC,OAAO,GAAG,EAAE,GAAG,OAAO,EAAE,GAAG,MAAM,EAAE,CAAC;oBACtC,CAAC;gBACH,CAAC;gBAAC,OAAO,CAAC,EAAE,CAAC;gBAEb,CAAC;YACH,CAAC;YAGD,IAAI,aAAa,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE,CAAC;gBACrC,MAAM,UAAU,GAAG,aAAa,CAAC,MAAM,CAAC,IAAI,IAAI,MAAM,CAAC,YAAY,IAAI,EAAE,CAAC,CAAC;gBAC3E,IAAI,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;oBAC1B,OAAO,CAAC,MAAM,GAAG,UAAU,CAAC;gBAC9B,CAAC;YACH,CAAC;YAGD,IAAI,aAAa,CAAC,QAAQ,CAAC,UAAU,CAAC,EAAE,CAAC;gBACvC,MAAM,YAAY,GAAG,eAAe,CAAC,MAAM,CAAC,IAAI,IAAI,MAAM,CAAC,YAAY,IAAI,EAAE,CAAC,CAAC;gBAC/E,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;oBAC5B,OAAO,CAAC,QAAQ,GAAG,YAAY,CAAC;gBAClC,CAAC;YACH,CAAC;YAGD,MAAM,eAAe,GAAgB;gBACnC,GAAG;gBACH,OAAO,EAAE,IAAI;gBACb,GAAG,EAAE,OAAO;gBACZ,GAAG,CAAC,OAAO,CAAC,cAAc,CAAC,CAAC,CAAC,EAAE,OAAO,EAAE,WAAW,CAAC,MAAM,CAAC,IAAI,IAAI,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;aAC/E,CAAC;YAGF,UAAU,CAAC,IAAI,CAAC;gBACd,IAAI,EAAE,eAAe;gBACrB,UAAU,EAAE,EAAE,IAAI,EAAE,CAAC,EAAE;aACxB,CAAC,CAAC;QAEL,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YAEf,IAAI,IAAI,CAAC,cAAc,EAAE,EAAE,CAAC;gBAC1B,MAAM,IAAI,GAAG,IAAI,CAAC,OAAO,EAAE,CAAC;gBAC5B,MAAM,cAAc,GAAG,MAAC,KAAa,CAAC,SAAS,mCAAI,CAAC,CAAC;gBACrD,UAAU,CAAC,IAAI,CAAC;oBACd,IAAI,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI;oBACnB,KAAK,EAAE,IAAI,iCAAkB,CAAC,IAAI,EAAG,KAAe,CAAC,OAAO,EAAE,EAAE,SAAS,EAAE,cAAc,EAAE,CAAC;oBAC5F,UAAU,EAAE,EAAE,IAAI,EAAE,CAAC,EAAE;iBACxB,CAAC,CAAC;gBACH,SAAS;YACX,CAAC;YAED,MAAM,KAAK,CAAC;QACd,CAAC;IACH,CAAC;IAED,OAAO,UAAU,CAAC;AACpB,CAAC;AAOD,SAAS,aAAa,CAAC,IAAY;IACjC,MAAM,UAAU,GAAU,EAAE,CAAC;IAC7B,MAAM,KAAK,GAAG,4EAA4E,CAAC;IAC3F,IAAI,KAAK,CAAC;IAEV,OAAO,CAAC,KAAK,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QAC3C,IAAI,CAAC;YACH,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;YACzC,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACxB,CAAC;QAAC,OAAO,CAAC,EAAE,CAAC;QAEb,CAAC;IACH,CAAC;IAED,OAAO,UAAU,CAAC;AACpB,CAAC;AAKD,SAAS,eAAe,CAAC,IAAY;IACnC,MAAM,YAAY,GAA0C,EAAE,CAAC;IAC/D,MAAM,KAAK,GAAG,oGAAoG,CAAC;IACnH,MAAM,MAAM,GAAG,oGAAoG,CAAC;IACpH,MAAM,MAAM,GAAG,oGAAoG,CAAC;IAEpH,IAAI,KAAK,CAAC;IAEV,OAAO,CAAC,KAAK,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QAC3C,YAAY,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,KAAK,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;IACxD,CAAC;IAED,OAAO,CAAC,KAAK,GAAG,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QAC5C,YAAY,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,KAAK,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;IACxD,CAAC;IAED,OAAO,CAAC,KAAK,GAAG,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QAC5C,YAAY,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,KAAK,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;IACxD,CAAC;IAGD,MAAM,IAAI,GAAG,IAAI,GAAG,EAAU,CAAC;IAC/B,OAAO,YAAY,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE;QAC/B,MAAM,GAAG,GAAG,GAAG,GAAG,CAAC,IAAI,IAAI,GAAG,CAAC,IAAI,EAAE,CAAC;QACtC,IAAI,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC;YAAE,OAAO,KAAK,CAAC;QAChC,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;QACd,OAAO,IAAI,CAAC;IACd,CAAC,CAAC,CAAC;AACL,CAAC;AAKD,SAAS,WAAW,CAAC,IAAY;IAC/B,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,gCAAgC,CAAC,CAAC;IAC3D,OAAO,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;AAC/B,CAAC"}
@@ -1,6 +1,8 @@
1
- export { getCrawl4aiClient, createBrowserConfig, createCrawlerRunConfig, safeJsonParse, cleanText, isValidUrl } from '../../Crawl4aiPlusBasicCrawler/helpers/utils';
1
+ export { getCrawl4aiClient, createBrowserConfig, createCrawlerRunConfig, safeJsonParse, cleanText, isValidUrl, buildLlmConfig, validateLlmCredentials, } from '../../Crawl4aiPlusBasicCrawler/helpers/utils';
2
+ export type { LlmConfigResult } from '../../Crawl4aiPlusBasicCrawler/helpers/utils';
2
3
  import { IDataObject } from 'n8n-workflow';
3
4
  import { CssSelectorSchema, LlmSchema } from './interfaces';
4
5
  export declare function createCssSelectorExtractionStrategy(schema: CssSelectorSchema): any;
5
- export declare function createLlmExtractionStrategy(schema: LlmSchema, instruction: string, provider: string, apiKey?: string, baseUrl?: string): any;
6
+ export declare function createLlmExtractionStrategy(schema: LlmSchema, instruction: string, provider: string, apiKey?: string, baseUrl?: string, inputFormat?: 'markdown' | 'html' | 'fit_markdown'): any;
7
+ export declare function createCosineExtractionStrategy(semanticFilter: string, options?: IDataObject): any;
6
8
  export declare function cleanExtractedData(data: IDataObject): IDataObject;
@@ -1,8 +1,9 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.isValidUrl = exports.cleanText = exports.safeJsonParse = exports.createCrawlerRunConfig = exports.createBrowserConfig = exports.getCrawl4aiClient = void 0;
3
+ exports.validateLlmCredentials = exports.buildLlmConfig = exports.isValidUrl = exports.cleanText = exports.safeJsonParse = exports.createCrawlerRunConfig = exports.createBrowserConfig = exports.getCrawl4aiClient = void 0;
4
4
  exports.createCssSelectorExtractionStrategy = createCssSelectorExtractionStrategy;
5
5
  exports.createLlmExtractionStrategy = createLlmExtractionStrategy;
6
+ exports.createCosineExtractionStrategy = createCosineExtractionStrategy;
6
7
  exports.cleanExtractedData = cleanExtractedData;
7
8
  var utils_1 = require("../../Crawl4aiPlusBasicCrawler/helpers/utils");
8
9
  Object.defineProperty(exports, "getCrawl4aiClient", { enumerable: true, get: function () { return utils_1.getCrawl4aiClient; } });
@@ -11,6 +12,8 @@ Object.defineProperty(exports, "createCrawlerRunConfig", { enumerable: true, get
11
12
  Object.defineProperty(exports, "safeJsonParse", { enumerable: true, get: function () { return utils_1.safeJsonParse; } });
12
13
  Object.defineProperty(exports, "cleanText", { enumerable: true, get: function () { return utils_1.cleanText; } });
13
14
  Object.defineProperty(exports, "isValidUrl", { enumerable: true, get: function () { return utils_1.isValidUrl; } });
15
+ Object.defineProperty(exports, "buildLlmConfig", { enumerable: true, get: function () { return utils_1.buildLlmConfig; } });
16
+ Object.defineProperty(exports, "validateLlmCredentials", { enumerable: true, get: function () { return utils_1.validateLlmCredentials; } });
14
17
  const utils_2 = require("../../Crawl4aiPlusBasicCrawler/helpers/utils");
15
18
  function createCssSelectorExtractionStrategy(schema) {
16
19
  return {
@@ -32,7 +35,7 @@ function createCssSelectorExtractionStrategy(schema) {
32
35
  },
33
36
  };
34
37
  }
35
- function createLlmExtractionStrategy(schema, instruction, provider, apiKey, baseUrl) {
38
+ function createLlmExtractionStrategy(schema, instruction, provider, apiKey, baseUrl, inputFormat) {
36
39
  const llmConfigParams = {
37
40
  provider: provider || 'openai/gpt-4o',
38
41
  api_token: apiKey,
@@ -40,22 +43,56 @@ function createLlmExtractionStrategy(schema, instruction, provider, apiKey, base
40
43
  if (baseUrl && baseUrl.trim() !== '') {
41
44
  llmConfigParams.api_base = baseUrl;
42
45
  }
46
+ const strategyParams = {
47
+ llm_config: {
48
+ type: 'LLMConfig',
49
+ params: llmConfigParams,
50
+ },
51
+ instruction,
52
+ schema: {
53
+ type: 'dict',
54
+ value: schema,
55
+ },
56
+ extraction_type: 'schema',
57
+ apply_chunking: false,
58
+ force_json_response: true,
59
+ };
60
+ if (inputFormat && inputFormat !== 'markdown') {
61
+ strategyParams.input_format = inputFormat;
62
+ }
43
63
  return {
44
64
  type: 'LLMExtractionStrategy',
45
- params: {
46
- llm_config: {
47
- type: 'LLMConfig',
48
- params: llmConfigParams,
49
- },
50
- instruction,
51
- schema: {
52
- type: 'dict',
53
- value: schema,
54
- },
55
- extraction_type: 'schema',
56
- apply_chunking: false,
57
- force_json_response: true,
58
- },
65
+ params: strategyParams,
66
+ };
67
+ }
68
+ function createCosineExtractionStrategy(semanticFilter, options = {}) {
69
+ const strategyParams = {
70
+ semantic_filter: semanticFilter,
71
+ };
72
+ if (options.wordCountThreshold !== undefined) {
73
+ strategyParams.word_count_threshold = Number(options.wordCountThreshold);
74
+ }
75
+ if (options.simThreshold !== undefined) {
76
+ strategyParams.sim_threshold = Number(options.simThreshold);
77
+ }
78
+ if (options.maxDist !== undefined) {
79
+ strategyParams.max_dist = Number(options.maxDist);
80
+ }
81
+ if (options.linkageMethod !== undefined && options.linkageMethod !== '') {
82
+ strategyParams.linkage_method = String(options.linkageMethod);
83
+ }
84
+ if (options.topK !== undefined) {
85
+ strategyParams.top_k = Number(options.topK);
86
+ }
87
+ if (options.modelName !== undefined && options.modelName !== '') {
88
+ strategyParams.model_name = String(options.modelName);
89
+ }
90
+ if (options.verbose === true) {
91
+ strategyParams.verbose = true;
92
+ }
93
+ return {
94
+ type: 'CosineStrategy',
95
+ params: strategyParams,
59
96
  };
60
97
  }
61
98
  function cleanExtractedData(data) {
@@ -1 +1 @@
1
- {"version":3,"file":"utils.js","sourceRoot":"","sources":["../../../../nodes/Crawl4aiPlusContentExtractor/helpers/utils.ts"],"names":[],"mappings":";;;AAmBA,kFAmBC;AAWD,kEAkCC;AAKD,gDAyBC;AAhHD,sEAOsD;AANpD,0GAAA,iBAAiB,OAAA;AACjB,4GAAA,mBAAmB,OAAA;AACnB,+GAAA,sBAAsB,OAAA;AACtB,sGAAA,aAAa,OAAA;AACb,kGAAA,SAAS,OAAA;AACT,mGAAA,UAAU,OAAA;AAIZ,wEAAyE;AAQzE,SAAgB,mCAAmC,CAAC,MAAyB;IAC3E,OAAO;QACL,IAAI,EAAE,2BAA2B;QACjC,MAAM,EAAE;YACN,MAAM,EAAE;gBACN,IAAI,EAAE,MAAM;gBACZ,KAAK,EAAE;oBACL,IAAI,EAAE,MAAM,CAAC,IAAI;oBACjB,YAAY,EAAE,MAAM,CAAC,YAAY;oBACjC,MAAM,EAAE,MAAM,CAAC,MAAM,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;wBAClC,IAAI,EAAE,KAAK,CAAC,IAAI;wBAChB,QAAQ,EAAE,KAAK,CAAC,QAAQ;wBACxB,IAAI,EAAE,KAAK,CAAC,IAAI;wBAChB,SAAS,EAAE,KAAK,CAAC,SAAS;qBAC3B,CAAC,CAAC;iBACJ;aACF;SACF;KACF,CAAC;AACJ,CAAC;AAWD,SAAgB,2BAA2B,CACzC,MAAiB,EACjB,WAAmB,EACnB,QAAgB,EAChB,MAAe,EACf,OAAgB;IAEhB,MAAM,eAAe,GAAQ;QAC3B,QAAQ,EAAE,QAAQ,IAAI,eAAe;QACrC,SAAS,EAAE,MAAM;KAClB,CAAC;IAGF,IAAI,OAAO,IAAI,OAAO,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE,CAAC;QACrC,eAAe,CAAC,QAAQ,GAAG,OAAO,CAAC;IACrC,CAAC;IAED,OAAO;QACL,IAAI,EAAE,uBAAuB;QAC7B,MAAM,EAAE;YACN,UAAU,EAAE;gBACV,IAAI,EAAE,WAAW;gBACjB,MAAM,EAAE,eAAe;aACxB;YACD,WAAW;YACX,MAAM,EAAE;gBACN,IAAI,EAAE,MAAM;gBACZ,KAAK,EAAE,MAAM;aACd;YACD,eAAe,EAAE,QAAQ;YACzB,cAAc,EAAE,KAAK;YACrB,mBAAmB,EAAE,IAAI;SAC1B;KACF,CAAC;AACJ,CAAC;AAKD,SAAgB,kBAAkB,CAAC,IAAiB;IAClD,IAAI,CAAC,IAAI;QAAE,OAAO,EAAE,CAAC;IAErB,MAAM,WAAW,GAAgB,EAAE,CAAC;IAEpC,MAAM,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,EAAE,KAAK,CAAC,EAAE,EAAE;QAC5C,IAAI,OAAO,KAAK,KAAK,QAAQ,EAAE,CAAC;YAC9B,WAAW,CAAC,GAAG,CAAC,GAAG,IAAA,iBAAS,EAAC,KAAK,CAAC,CAAC;QACtC,CAAC;aAAM,IAAI,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE,CAAC;YAChC,WAAW,CAAC,GAAG,CAAC,GAAG,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE;gBAClC,IAAI,OAAO,IAAI,KAAK,QAAQ,EAAE,CAAC;oBAC7B,OAAO,IAAA,iBAAS,EAAC,IAAI,CAAC,CAAC;gBACzB,CAAC;qBAAM,IAAI,OAAO,IAAI,KAAK,QAAQ,IAAI,IAAI,KAAK,IAAI,EAAE,CAAC;oBACrD,OAAO,kBAAkB,CAAC,IAAmB,CAAC,CAAC;gBACjD,CAAC;gBACD,OAAO,IAAI,CAAC;YACd,CAAC,CAAC,CAAC;QACL,CAAC;aAAM,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,KAAK,IAAI,EAAE,CAAC;YACvD,WAAW,CAAC,GAAG,CAAC,GAAG,kBAAkB,CAAC,KAAoB,CAAC,CAAC;QAC9D,CAAC;aAAM,CAAC;YACN,WAAW,CAAC,GAAG,CAAC,GAAG,KAAK,CAAC;QAC3B,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,OAAO,WAAW,CAAC;AACrB,CAAC"}
1
+ {"version":3,"file":"utils.js","sourceRoot":"","sources":["../../../../nodes/Crawl4aiPlusContentExtractor/helpers/utils.ts"],"names":[],"mappings":";;;AAwBA,kFAmBC;AAYD,kEA0CC;AAQD,wEAyCC;AAKD,gDAyBC;AA/KD,sEASsD;AARpD,0GAAA,iBAAiB,OAAA;AACjB,4GAAA,mBAAmB,OAAA;AACnB,+GAAA,sBAAsB,OAAA;AACtB,sGAAA,aAAa,OAAA;AACb,kGAAA,SAAS,OAAA;AACT,mGAAA,UAAU,OAAA;AACV,uGAAA,cAAc,OAAA;AACd,+GAAA,sBAAsB,OAAA;AAOxB,wEAAyE;AAQzE,SAAgB,mCAAmC,CAAC,MAAyB;IAC3E,OAAO;QACL,IAAI,EAAE,2BAA2B;QACjC,MAAM,EAAE;YACN,MAAM,EAAE;gBACN,IAAI,EAAE,MAAM;gBACZ,KAAK,EAAE;oBACL,IAAI,EAAE,MAAM,CAAC,IAAI;oBACjB,YAAY,EAAE,MAAM,CAAC,YAAY;oBACjC,MAAM,EAAE,MAAM,CAAC,MAAM,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;wBAClC,IAAI,EAAE,KAAK,CAAC,IAAI;wBAChB,QAAQ,EAAE,KAAK,CAAC,QAAQ;wBACxB,IAAI,EAAE,KAAK,CAAC,IAAI;wBAChB,SAAS,EAAE,KAAK,CAAC,SAAS;qBAC3B,CAAC,CAAC;iBACJ;aACF;SACF;KACF,CAAC;AACJ,CAAC;AAYD,SAAgB,2BAA2B,CACzC,MAAiB,EACjB,WAAmB,EACnB,QAAgB,EAChB,MAAe,EACf,OAAgB,EAChB,WAAkD;IAElD,MAAM,eAAe,GAAQ;QAC3B,QAAQ,EAAE,QAAQ,IAAI,eAAe;QACrC,SAAS,EAAE,MAAM;KAClB,CAAC;IAGF,IAAI,OAAO,IAAI,OAAO,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE,CAAC;QACrC,eAAe,CAAC,QAAQ,GAAG,OAAO,CAAC;IACrC,CAAC;IAED,MAAM,cAAc,GAAQ;QAC1B,UAAU,EAAE;YACV,IAAI,EAAE,WAAW;YACjB,MAAM,EAAE,eAAe;SACxB;QACD,WAAW;QACX,MAAM,EAAE;YACN,IAAI,EAAE,MAAM;YACZ,KAAK,EAAE,MAAM;SACd;QACD,eAAe,EAAE,QAAQ;QACzB,cAAc,EAAE,KAAK;QACrB,mBAAmB,EAAE,IAAI;KAC1B,CAAC;IAGF,IAAI,WAAW,IAAI,WAAW,KAAK,UAAU,EAAE,CAAC;QAC9C,cAAc,CAAC,YAAY,GAAG,WAAW,CAAC;IAC5C,CAAC;IAED,OAAO;QACL,IAAI,EAAE,uBAAuB;QAC7B,MAAM,EAAE,cAAc;KACvB,CAAC;AACJ,CAAC;AAQD,SAAgB,8BAA8B,CAC5C,cAAsB,EACtB,UAAuB,EAAE;IAEzB,MAAM,cAAc,GAAQ;QAC1B,eAAe,EAAE,cAAc;KAChC,CAAC;IAGF,IAAI,OAAO,CAAC,kBAAkB,KAAK,SAAS,EAAE,CAAC;QAC7C,cAAc,CAAC,oBAAoB,GAAG,MAAM,CAAC,OAAO,CAAC,kBAAkB,CAAC,CAAC;IAC3E,CAAC;IAED,IAAI,OAAO,CAAC,YAAY,KAAK,SAAS,EAAE,CAAC;QACvC,cAAc,CAAC,aAAa,GAAG,MAAM,CAAC,OAAO,CAAC,YAAY,CAAC,CAAC;IAC9D,CAAC;IAED,IAAI,OAAO,CAAC,OAAO,KAAK,SAAS,EAAE,CAAC;QAClC,cAAc,CAAC,QAAQ,GAAG,MAAM,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;IACpD,CAAC;IAED,IAAI,OAAO,CAAC,aAAa,KAAK,SAAS,IAAI,OAAO,CAAC,aAAa,KAAK,EAAE,EAAE,CAAC;QACxE,cAAc,CAAC,cAAc,GAAG,MAAM,CAAC,OAAO,CAAC,aAAa,CAAC,CAAC;IAChE,CAAC;IAED,IAAI,OAAO,CAAC,IAAI,KAAK,SAAS,EAAE,CAAC;QAC/B,cAAc,CAAC,KAAK,GAAG,MAAM,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;IAC9C,CAAC;IAED,IAAI,OAAO,CAAC,SAAS,KAAK,SAAS,IAAI,OAAO,CAAC,SAAS,KAAK,EAAE,EAAE,CAAC;QAChE,cAAc,CAAC,UAAU,GAAG,MAAM,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC;IACxD,CAAC;IAED,IAAI,OAAO,CAAC,OAAO,KAAK,IAAI,EAAE,CAAC;QAC7B,cAAc,CAAC,OAAO,GAAG,IAAI,CAAC;IAChC,CAAC;IAED,OAAO;QACL,IAAI,EAAE,gBAAgB;QACtB,MAAM,EAAE,cAAc;KACvB,CAAC;AACJ,CAAC;AAKD,SAAgB,kBAAkB,CAAC,IAAiB;IAClD,IAAI,CAAC,IAAI;QAAE,OAAO,EAAE,CAAC;IAErB,MAAM,WAAW,GAAgB,EAAE,CAAC;IAEpC,MAAM,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,EAAE,KAAK,CAAC,EAAE,EAAE;QAC5C,IAAI,OAAO,KAAK,KAAK,QAAQ,EAAE,CAAC;YAC9B,WAAW,CAAC,GAAG,CAAC,GAAG,IAAA,iBAAS,EAAC,KAAK,CAAC,CAAC;QACtC,CAAC;aAAM,IAAI,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE,CAAC;YAChC,WAAW,CAAC,GAAG,CAAC,GAAG,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE;gBAClC,IAAI,OAAO,IAAI,KAAK,QAAQ,EAAE,CAAC;oBAC7B,OAAO,IAAA,iBAAS,EAAC,IAAI,CAAC,CAAC;gBACzB,CAAC;qBAAM,IAAI,OAAO,IAAI,KAAK,QAAQ,IAAI,IAAI,KAAK,IAAI,EAAE,CAAC;oBACrD,OAAO,kBAAkB,CAAC,IAAmB,CAAC,CAAC;gBACjD,CAAC;gBACD,OAAO,IAAI,CAAC;YACd,CAAC,CAAC,CAAC;QACL,CAAC;aAAM,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,KAAK,IAAI,EAAE,CAAC;YACvD,WAAW,CAAC,GAAG,CAAC,GAAG,kBAAkB,CAAC,KAAoB,CAAC,CAAC;QAC9D,CAAC;aAAM,CAAC;YACN,WAAW,CAAC,GAAG,CAAC,GAAG,KAAK,CAAC;QAC3B,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,OAAO,WAAW,CAAC;AACrB,CAAC"}