n8n-nodes-seo-scanner 1.2.32 → 1.2.34

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -85,6 +85,97 @@ function readNullableNumber(value) {
85
85
  const numberValue = Number(value);
86
86
  return Number.isFinite(numberValue) ? numberValue : undefined;
87
87
  }
88
+ function readStringArray(value) {
89
+ if (value === undefined || value === null)
90
+ return undefined;
91
+ if (Array.isArray(value)) {
92
+ return value.flatMap((item) => readStringArray(item) || []);
93
+ }
94
+ if (typeof value !== 'string')
95
+ return [];
96
+ const trimmed = value.trim();
97
+ if (!trimmed)
98
+ return [];
99
+ if (trimmed.startsWith('[')) {
100
+ try {
101
+ const parsed = JSON.parse(trimmed);
102
+ if (Array.isArray(parsed))
103
+ return readStringArray(parsed) || [];
104
+ }
105
+ catch {
106
+ }
107
+ }
108
+ return trimmed.split(/\r?\n/).map((item) => item.trim()).filter(Boolean);
109
+ }
110
+ function uniqueStrings(values) {
111
+ const seen = new Set();
112
+ const result = [];
113
+ for (const value of values) {
114
+ const key = value.toLowerCase();
115
+ if (seen.has(key))
116
+ continue;
117
+ seen.add(key);
118
+ result.push(value);
119
+ }
120
+ return result;
121
+ }
122
+ function stripWww(hostname) {
123
+ return hostname.toLowerCase().replace(/^www\./, '');
124
+ }
125
+ function sameCrawlDomain(url1, url2) {
126
+ try {
127
+ return stripWww(new URL(url1).hostname) === stripWww(new URL(url2).hostname);
128
+ }
129
+ catch {
130
+ return false;
131
+ }
132
+ }
133
+ function normalizeCrawlAbsoluteUrl(value) {
134
+ if (!/^https?:\/\//i.test(value))
135
+ return '';
136
+ try {
137
+ const url = new URL(value);
138
+ if (url.protocol !== 'http:' && url.protocol !== 'https:')
139
+ return '';
140
+ url.hash = '';
141
+ url.searchParams.sort();
142
+ return url.href.replace(/\/$/, '');
143
+ }
144
+ catch {
145
+ return '';
146
+ }
147
+ }
148
+ function urlContainsPattern(url, pattern) {
149
+ const normalizedUrl = url.toLowerCase();
150
+ const normalizedPattern = pattern.toLowerCase();
151
+ try {
152
+ const parsed = new URL(url);
153
+ const pathAndSearch = `${parsed.pathname}${parsed.search}`.toLowerCase();
154
+ return normalizedUrl.includes(normalizedPattern) || pathAndSearch.includes(normalizedPattern);
155
+ }
156
+ catch {
157
+ return normalizedUrl.includes(normalizedPattern);
158
+ }
159
+ }
160
+ function normalizePlanInternalLimit(value) {
161
+ if (value === null)
162
+ return null;
163
+ if (typeof value !== 'number' || !Number.isFinite(value))
164
+ return undefined;
165
+ return Math.max(1, Math.round(value));
166
+ }
167
+ function planInternalLimitError(limit, planName) {
168
+ if (planName?.toLowerCase() === 'max' && limit === 100) {
169
+ return 'Tu plan Max permite escanear como máximo 100 páginas internas por escaneo. Para escanear más páginas, puedes usar el escaneo por API o contactar con soporte.';
170
+ }
171
+ return `Tu plan permite escanear como máximo ${limit} páginas internas por escaneo.`;
172
+ }
173
+ function requiredSlotsLimitError(availableRequiredSlots) {
174
+ if (availableRequiredSlots <= 0) {
175
+ return 'Ya has usado todo el límite de páginas internas de tu plan. Baja el número de páginas internas a escanear para poder añadir páginas obligatorias.';
176
+ }
177
+ return `Con la configuración actual solo puedes añadir ${availableRequiredSlots} páginas obligatorias. Baja el número de páginas internas a escanear o mejora tu plan.`;
178
+ }
88
179
  function readHeader(headers, name) {
89
180
  const direct = readString(headers[name]);
90
181
  if (direct)
@@ -228,8 +319,17 @@ function getIncomingWebhookPayload(input) {
228
319
  readBearerToken(authorization),
229
320
  apiToken: readString(source.apiToken) ||
230
321
  readHeader(headers, 'x-api-token'),
231
- scanInternalLinks: readBoolean(source.allsite) ?? readBoolean(source.scanInternalLinks),
232
- maxInternalUrls: readNumber(source.maxInternalUrls),
322
+ scanFullSite: readBoolean(source.scanFullSite),
323
+ scanInternalLinks: readBoolean(source.scanFullSite) ?? readBoolean(source.allsite) ?? readBoolean(source.scanInternalLinks),
324
+ maxUrls: readNumber(source.maxUrls),
325
+ maxInternalUrls: readNumber(source.maxInternalUrls) ?? readNumber(source.maxUrls),
326
+ requiredUrls: readStringArray(source.requiredUrls),
327
+ excludedPatterns: readStringArray(source.excludedPatterns),
328
+ ignoredUrls: readStringArray(source.ignoredUrls),
329
+ planName: readString(source.planName) || readString(source.webPlan) || readString(source.apiPlan),
330
+ planInternalPageLimit: readNullableNumber(source.planInternalPageLimit) ??
331
+ readNullableNumber(source.webInternalPageLimit) ??
332
+ readNullableNumber(source.maxWebScanUrls),
233
333
  scanLimitSource: readString(source.scanLimitSource) === 'api' ? 'api' : undefined,
234
334
  apiPlan: readString(source.apiPlan),
235
335
  apiInternalPageLimit: readNullableNumber(source.apiInternalPageLimit),
@@ -287,6 +387,24 @@ class SeoScanner {
287
387
  description: 'Número máximo de páginas internas a analizar. El rastreo sigue enlaces descubiertos en cada página y usa el sitemap como semilla si existe.',
288
388
  displayOptions: { show: { scanInternalLinks: [true] } },
289
389
  },
390
+ {
391
+ displayName: 'Páginas Obligatorias a Escanear',
392
+ name: 'requiredUrls',
393
+ type: 'string',
394
+ typeOptions: { alwaysOpenEditWindow: true },
395
+ default: '',
396
+ description: 'URLs completas del mismo dominio que se analizarán aunque no aparezcan en enlaces internos o sitemap. Una por línea.',
397
+ displayOptions: { show: { scanInternalLinks: [true] } },
398
+ },
399
+ {
400
+ displayName: 'Excluir URLs Que Contengan',
401
+ name: 'excludedPatterns',
402
+ type: 'string',
403
+ typeOptions: { alwaysOpenEditWindow: true },
404
+ default: '',
405
+ description: 'Fragmentos de URL que no se analizarán ni aparecerán como errores. Uno por línea.',
406
+ displayOptions: { show: { scanInternalLinks: [true] } },
407
+ },
290
408
  {
291
409
  displayName: 'Ignorar Fallos (Uno por línea)',
292
410
  name: 'ignoredIssues',
@@ -578,6 +696,11 @@ class SeoScanner {
578
696
  apiToken,
579
697
  });
580
698
  maxInternalUrls = apiScanLimit.effectiveInternalPageLimit;
699
+ const incomingPlanInternalLimit = normalizePlanInternalLimit(incoming.planInternalPageLimit);
700
+ const effectivePlanInternalLimit = apiScanLimit.apiInternalPageLimit !== undefined
701
+ ? apiScanLimit.apiInternalPageLimit
702
+ : incomingPlanInternalLimit;
703
+ const effectivePlanName = incoming.planName || apiScanLimit.apiPlan || '';
581
704
  const timeoutSeconds = Math.max(5, Math.min(60, this.getNodeParameter('timeoutSeconds', 0) ?? 15));
582
705
  const timeoutMs = timeoutSeconds * 1000;
583
706
  const followRedirects = this.getNodeParameter('followRedirects', 0) !== false;
@@ -604,7 +727,9 @@ class SeoScanner {
604
727
  const generateHtmlReport = configuredHtmlReport || hasIncomingCallback;
605
728
  const detailOpts = this.getNodeParameter('detailOptions', 0) || {};
606
729
  const ignoredIssues = (this.getNodeParameter('ignoredIssues', 0, '') || '').split('\n').map(s => s.trim()).filter(Boolean);
607
- const ignoredPages = (this.getNodeParameter('ignoredPages', 0, '') || '').split('\n').map(s => s.trim()).filter(Boolean);
730
+ const configuredRequiredUrls = (this.getNodeParameter('requiredUrls', 0, '') || '').split('\n').map(s => s.trim()).filter(Boolean);
731
+ const configuredExcludedPatterns = (this.getNodeParameter('excludedPatterns', 0, '') || '').split('\n').map(s => s.trim()).filter(Boolean);
732
+ const configuredIgnoredPages = (this.getNodeParameter('ignoredPages', 0, '') || '').split('\n').map(s => s.trim()).filter(Boolean);
608
733
  const analyzeOpts = {
609
734
  includeImageDetails: detailOpts.includeImageDetails !== false,
610
735
  includeLinkDetails: detailOpts.includeLinkDetails !== false,
@@ -630,6 +755,91 @@ class SeoScanner {
630
755
  await notifyIncomingCallbackError(incoming, apiToken, localize(errorMessage));
631
756
  return [[buildErrorOutput({ message: localize(errorMessage), url, incoming, apiToken })]];
632
757
  }
758
+ let requiredUrls = [];
759
+ let excludedPatterns = [];
760
+ let ignoredUrls = [];
761
+ if (scanInternalLinks) {
762
+ const normalizeUrlList = (values, label) => {
763
+ const urls = [];
764
+ const seen = new Set();
765
+ for (const value of values) {
766
+ const normalized = normalizeCrawlAbsoluteUrl(value);
767
+ if (!normalized)
768
+ return { ok: false, error: `${label} debe incluir URLs completas que empiecen por http:// o https://.` };
769
+ if (!sameCrawlDomain(normalized, baseUrl))
770
+ return { ok: false, error: `${label} solo puede incluir URLs del mismo dominio que se está escaneando.` };
771
+ const key = normalized.toLowerCase();
772
+ if (seen.has(key))
773
+ return { ok: false, error: `${label} no puede incluir URLs duplicadas.` };
774
+ seen.add(key);
775
+ urls.push(normalized);
776
+ }
777
+ return { ok: true, urls };
778
+ };
779
+ const requiredResult = normalizeUrlList(uniqueStrings([...configuredRequiredUrls, ...(incoming.requiredUrls || [])]), 'Páginas obligatorias a escanear');
780
+ if (!requiredResult.ok) {
781
+ await notifyIncomingCallbackError(incoming, apiToken, localize(requiredResult.error));
782
+ return [[buildErrorOutput({ message: localize(requiredResult.error), url: baseUrl, incoming, apiToken })]];
783
+ }
784
+ const ignoredResult = normalizeUrlList(uniqueStrings(incoming.ignoredUrls || []), 'URLs ignoradas');
785
+ if (!ignoredResult.ok) {
786
+ await notifyIncomingCallbackError(incoming, apiToken, localize(ignoredResult.error));
787
+ return [[buildErrorOutput({ message: localize(ignoredResult.error), url: baseUrl, incoming, apiToken })]];
788
+ }
789
+ requiredUrls = requiredResult.urls;
790
+ ignoredUrls = ignoredResult.urls;
791
+ if (requiredUrls.some((requiredUrl) => (0, urlUtils_1.normalizeUrlForDedupe)(requiredUrl).toLowerCase() === (0, urlUtils_1.normalizeUrlForDedupe)(baseUrl).toLowerCase())) {
792
+ const errorMessage = 'La URL principal ya se escanea aparte. No la añadas como página obligatoria.';
793
+ await notifyIncomingCallbackError(incoming, apiToken, localize(errorMessage));
794
+ return [[buildErrorOutput({ message: localize(errorMessage), url: baseUrl, incoming, apiToken })]];
795
+ }
796
+ const rawPatterns = uniqueStrings([
797
+ ...configuredIgnoredPages,
798
+ ...configuredExcludedPatterns,
799
+ ...(incoming.excludedPatterns || []),
800
+ ]);
801
+ for (const pattern of rawPatterns) {
802
+ const trimmed = pattern.trim();
803
+ if (!trimmed) {
804
+ const errorMessage = 'Las reglas de exclusión no pueden estar vacías.';
805
+ await notifyIncomingCallbackError(incoming, apiToken, localize(errorMessage));
806
+ return [[buildErrorOutput({ message: localize(errorMessage), url: baseUrl, incoming, apiToken })]];
807
+ }
808
+ if (trimmed === '/') {
809
+ const errorMessage = 'No puedes excluir solo "/", porque bloquearía todo el sitio.';
810
+ await notifyIncomingCallbackError(incoming, apiToken, localize(errorMessage));
811
+ return [[buildErrorOutput({ message: localize(errorMessage), url: baseUrl, incoming, apiToken })]];
812
+ }
813
+ if (urlContainsPattern(baseUrl, trimmed)) {
814
+ const errorMessage = 'Una regla de exclusión no puede bloquear directamente la URL principal.';
815
+ await notifyIncomingCallbackError(incoming, apiToken, localize(errorMessage));
816
+ return [[buildErrorOutput({ message: localize(errorMessage), url: baseUrl, incoming, apiToken })]];
817
+ }
818
+ excludedPatterns.push(trimmed);
819
+ }
820
+ if (typeof effectivePlanInternalLimit === 'number' && maxInternalUrls > effectivePlanInternalLimit) {
821
+ const errorMessage = planInternalLimitError(effectivePlanInternalLimit, effectivePlanName);
822
+ await notifyIncomingCallbackError(incoming, apiToken, localize(errorMessage));
823
+ return [[buildErrorOutput({ message: localize(errorMessage), url: baseUrl, incoming, apiToken })]];
824
+ }
825
+ if (typeof effectivePlanInternalLimit === 'number' && maxInternalUrls + requiredUrls.length > effectivePlanInternalLimit) {
826
+ const availableRequiredSlots = effectivePlanInternalLimit - maxInternalUrls;
827
+ const errorMessage = requiredSlotsLimitError(availableRequiredSlots);
828
+ await notifyIncomingCallbackError(incoming, apiToken, localize(errorMessage));
829
+ return [[buildErrorOutput({ message: localize(errorMessage), url: baseUrl, incoming, apiToken })]];
830
+ }
831
+ const ignoredUrlSet = new Set(ignoredUrls.map((ignoredUrl) => ignoredUrl.toLowerCase()));
832
+ if (requiredUrls.some((requiredUrl) => ignoredUrlSet.has(requiredUrl.toLowerCase()))) {
833
+ const errorMessage = 'Una página obligatoria también está marcada como ignorada.';
834
+ await notifyIncomingCallbackError(incoming, apiToken, localize(errorMessage));
835
+ return [[buildErrorOutput({ message: localize(errorMessage), url: baseUrl, incoming, apiToken })]];
836
+ }
837
+ if (requiredUrls.some((requiredUrl) => excludedPatterns.some((pattern) => urlContainsPattern(requiredUrl, pattern)))) {
838
+ const errorMessage = 'Una página obligatoria coincide con una regla de exclusión.';
839
+ await notifyIncomingCallbackError(incoming, apiToken, localize(errorMessage));
840
+ return [[buildErrorOutput({ message: localize(errorMessage), url: baseUrl, incoming, apiToken })]];
841
+ }
842
+ }
633
843
  if (!apiToken) {
634
844
  try {
635
845
  validationInfo = await validateApiKeyWithApp({
@@ -881,42 +1091,96 @@ class SeoScanner {
881
1091
  let internalResults = [];
882
1092
  const internalHtmlByFinalUrl = new Map();
883
1093
  let crawlLimitReached = false;
1094
+ const exactIgnoredUrls = new Set(ignoredUrls.map((ignoredUrl) => (0, urlUtils_1.normalizeUrlForDedupe)(ignoredUrl).toLowerCase()));
1095
+ const ignoredByRuleUrls = new Map();
1096
+ const trackIgnoredByRule = (candidate) => {
1097
+ const norm = (0, urlUtils_1.normalizeUrlForDedupe)(candidate).toLowerCase();
1098
+ if (!ignoredByRuleUrls.has(norm))
1099
+ ignoredByRuleUrls.set(norm, candidate);
1100
+ };
1101
+ const isSkippedByCrawlRules = (candidate) => {
1102
+ const norm = (0, urlUtils_1.normalizeUrlForDedupe)(candidate).toLowerCase();
1103
+ if (exactIgnoredUrls.has(norm))
1104
+ return true;
1105
+ return excludedPatterns.length > 0 && isIgnoredPageUrl(candidate, excludedPatterns);
1106
+ };
884
1107
  if (scanInternalLinks) {
885
1108
  const crawlQueue = [];
886
1109
  const queuedUrls = new Set();
1110
+ const requiredUrlSet = new Set(requiredUrls.map((requiredUrl) => (0, urlUtils_1.normalizeUrlForDedupe)(requiredUrl).toLowerCase()));
887
1111
  const scannedUrls = new Set([(0, urlUtils_1.normalizeUrlForDedupe)(mainResult.finalUrl), (0, urlUtils_1.normalizeUrlForDedupe)(baseUrl)]);
888
- const addToCrawlQueue = (candidates) => {
1112
+ let discoveredCrawlAttempts = 0;
1113
+ let queuedDiscoveredUrls = 0;
1114
+ const addToCrawlQueue = (candidates, source) => {
889
1115
  for (const candidate of candidates) {
890
- if (internalResults.length + crawlQueue.length >= maxInternalUrls) {
1116
+ if (source === 'discovered' && discoveredCrawlAttempts + queuedDiscoveredUrls >= maxInternalUrls) {
891
1117
  crawlLimitReached = true;
892
1118
  break;
893
1119
  }
894
1120
  if (!candidate || !isLikelyHtmlPageUrl(candidate))
895
1121
  continue;
896
- if (!(0, urlUtils_1.sameOrigin)(candidate, mainResult.finalUrl))
1122
+ if (!sameCrawlDomain(candidate, mainResult.finalUrl))
897
1123
  continue;
898
- if (ignoredPages.length > 0 && isIgnoredPageUrl(candidate, ignoredPages))
1124
+ if (isSkippedByCrawlRules(candidate)) {
1125
+ trackIgnoredByRule(candidate);
899
1126
  continue;
1127
+ }
900
1128
  const norm = (0, urlUtils_1.normalizeUrlForDedupe)(candidate);
901
1129
  if (scannedUrls.has(norm) || queuedUrls.has(norm))
902
1130
  continue;
903
1131
  queuedUrls.add(norm);
904
- crawlQueue.push(candidate);
1132
+ if (source === 'discovered')
1133
+ queuedDiscoveredUrls++;
1134
+ crawlQueue.push({ url: candidate, source });
905
1135
  }
906
1136
  };
907
- addToCrawlQueue(mainResult.linksInternalUrls);
908
- addToCrawlQueue(sitemapPageUrls);
909
- while (crawlQueue.length > 0 && internalResults.length < maxInternalUrls) {
910
- const link = crawlQueue.shift();
1137
+ addToCrawlQueue(requiredUrls, 'required');
1138
+ addToCrawlQueue(mainResult.linksInternalUrls, 'discovered');
1139
+ addToCrawlQueue(sitemapPageUrls, 'discovered');
1140
+ while (crawlQueue.length > 0) {
1141
+ const queueItem = crawlQueue.shift();
1142
+ const link = queueItem.url;
1143
+ const isRequiredCrawlUrl = queueItem.source === 'required' || requiredUrlSet.has((0, urlUtils_1.normalizeUrlForDedupe)(link).toLowerCase());
1144
+ if (queueItem.source === 'discovered') {
1145
+ queuedDiscoveredUrls = Math.max(0, queuedDiscoveredUrls - 1);
1146
+ if (discoveredCrawlAttempts >= maxInternalUrls) {
1147
+ crawlLimitReached = true;
1148
+ break;
1149
+ }
1150
+ }
911
1151
  const requestedNorm = (0, urlUtils_1.normalizeUrlForDedupe)(link);
912
1152
  queuedUrls.delete(requestedNorm);
913
1153
  if (scannedUrls.has(requestedNorm))
914
1154
  continue;
1155
+ if (isSkippedByCrawlRules(link)) {
1156
+ trackIgnoredByRule(link);
1157
+ continue;
1158
+ }
915
1159
  scannedUrls.add(requestedNorm);
916
1160
  try {
917
1161
  const { html: innerHtml, finalUrl: innerFinal, statusCode: innerStatus, responseTimeMs: innerTime, timeToFirstByteMs: innerTtfb } = await (0, networkUtils_1.fetchPage)(link, timeoutMs, fetchOpts);
918
- if (!(0, urlUtils_1.sameOrigin)(innerFinal, mainResult.finalUrl)) {
919
- internalResults.push((0, analyzeUtils_1.createEmptySeoResult)(link, 'La URL redirige fuera del dominio escaneado'));
1162
+ if (!sameCrawlDomain(innerFinal, mainResult.finalUrl)) {
1163
+ const result = (0, analyzeUtils_1.createEmptySeoResult)(link, isRequiredCrawlUrl
1164
+ ? 'Página obligatoria configurada por el usuario: la URL redirige fuera del dominio escaneado'
1165
+ : 'La URL redirige fuera del dominio escaneado');
1166
+ if (queueItem.source === 'discovered')
1167
+ discoveredCrawlAttempts++;
1168
+ result.requiredUrl = isRequiredCrawlUrl;
1169
+ internalResults.push(result);
1170
+ continue;
1171
+ }
1172
+ if (innerStatus >= 400) {
1173
+ const result = (0, analyzeUtils_1.createEmptySeoResult)(link, isRequiredCrawlUrl
1174
+ ? `Página obligatoria configurada por el usuario devolvió código ${innerStatus}`
1175
+ : `La página devolvió código ${innerStatus}`);
1176
+ result.finalUrl = innerFinal;
1177
+ result.statusCode = innerStatus;
1178
+ result.responseTimeMs = innerTime;
1179
+ result.timeToFirstByteMs = innerTtfb;
1180
+ if (queueItem.source === 'discovered')
1181
+ discoveredCrawlAttempts++;
1182
+ result.requiredUrl = isRequiredCrawlUrl;
1183
+ internalResults.push(result);
920
1184
  continue;
921
1185
  }
922
1186
  scannedUrls.add((0, urlUtils_1.normalizeUrlForDedupe)(innerFinal));
@@ -926,14 +1190,23 @@ class SeoScanner {
926
1190
  result.statusCode = innerStatus;
927
1191
  result.responseTimeMs = innerTime;
928
1192
  internalHtmlByFinalUrl.set((0, urlUtils_1.normalizeUrlForDedupe)(innerFinal), innerHtml);
1193
+ result.requiredUrl = isRequiredCrawlUrl;
929
1194
  internalResults.push(result);
930
- addToCrawlQueue(result.linksInternalUrls || []);
1195
+ if (queueItem.source === 'discovered')
1196
+ discoveredCrawlAttempts++;
1197
+ addToCrawlQueue(result.linksInternalUrls || [], 'discovered');
931
1198
  }
932
1199
  catch {
933
- internalResults.push((0, analyzeUtils_1.createEmptySeoResult)(link, 'Error al cargar la página (timeout o red)'));
1200
+ const result = (0, analyzeUtils_1.createEmptySeoResult)(link, isRequiredCrawlUrl
1201
+ ? 'Página obligatoria configurada por el usuario: error al cargar la página (timeout o red)'
1202
+ : 'Error al cargar la página (timeout o red)');
1203
+ if (queueItem.source === 'discovered')
1204
+ discoveredCrawlAttempts++;
1205
+ result.requiredUrl = isRequiredCrawlUrl;
1206
+ internalResults.push(result);
934
1207
  }
935
1208
  }
936
- if (crawlQueue.length > 0 && internalResults.length >= maxInternalUrls) {
1209
+ if (crawlQueue.some((item) => item.source === 'discovered') && discoveredCrawlAttempts >= maxInternalUrls) {
937
1210
  crawlLimitReached = true;
938
1211
  }
939
1212
  output.internalPages = internalResults;
@@ -941,9 +1214,16 @@ class SeoScanner {
941
1214
  output.message = `Escaneadas ${output.scannedUrls} páginas (1 principal + ${internalResults.length} URLs internas descubiertas).`;
942
1215
  output.siteCrawlDiscovery = {
943
1216
  limit: maxInternalUrls,
1217
+ requiredUrls: requiredUrls.length,
1218
+ totalInternalLimit: maxInternalUrls + requiredUrls.length,
1219
+ planInternalLimit: effectivePlanInternalLimit === undefined ? null : effectivePlanInternalLimit,
1220
+ discoveredCrawlLimit: maxInternalUrls,
1221
+ discoveredCrawlScanned: discoveredCrawlAttempts,
1222
+ excludedPatterns: excludedPatterns.length,
944
1223
  sitemapSeedUrls: sitemapPageUrls.length,
945
1224
  totalDiscoveredInternalUrls: scannedUrls.size + queuedUrls.size,
946
1225
  pendingUrlsNotScanned: crawlQueue.length,
1226
+ ignoredUrlsCount: ignoredByRuleUrls.size,
947
1227
  };
948
1228
  if (analyzeOpts.checkBrokenLinks) {
949
1229
  const maxSiteCheck = Math.min(100, Math.max(analyzeOpts.maxBrokenLinksToCheck ?? 15, 30));
@@ -952,6 +1232,8 @@ class SeoScanner {
952
1232
  const toCheckSite = [];
953
1233
  const addInternal = (url, anchor) => {
954
1234
  const norm = (0, urlUtils_1.normalizeUrlForDedupe)(url);
1235
+ if (isSkippedByCrawlRules(url))
1236
+ return;
955
1237
  if (!seenInternal.has(norm) && isHttp(url)) {
956
1238
  seenInternal.add(norm);
957
1239
  if (toCheckSite.length < maxSiteCheck)
@@ -960,6 +1242,8 @@ class SeoScanner {
960
1242
  };
961
1243
  const addExternal = (url, anchor) => {
962
1244
  const norm = url.toLowerCase();
1245
+ if (isSkippedByCrawlRules(url))
1246
+ return;
963
1247
  if (!seenExternal.has(norm) && isHttp(url)) {
964
1248
  seenExternal.add(norm);
965
1249
  if (toCheckSite.length < maxSiteCheck)
@@ -1041,14 +1325,24 @@ class SeoScanner {
1041
1325
  }
1042
1326
  if (sitemapPageUrls.length > 0) {
1043
1327
  const allDiscoveredLinks = new Set();
1044
- mainResult.linksInternalUrls.forEach(u => allDiscoveredLinks.add((0, urlUtils_1.normalizeUrlForDedupe)(u)));
1328
+ mainResult.linksInternalUrls.forEach(u => {
1329
+ if (!isSkippedByCrawlRules(u))
1330
+ allDiscoveredLinks.add((0, urlUtils_1.normalizeUrlForDedupe)(u));
1331
+ });
1045
1332
  if (internalResults.length > 0) {
1046
1333
  internalResults.forEach(r => {
1047
- (r.linksInternalUrls || []).forEach(u => allDiscoveredLinks.add((0, urlUtils_1.normalizeUrlForDedupe)(u)));
1334
+ (r.linksInternalUrls || []).forEach(u => {
1335
+ if (!isSkippedByCrawlRules(u))
1336
+ allDiscoveredLinks.add((0, urlUtils_1.normalizeUrlForDedupe)(u));
1337
+ });
1048
1338
  });
1049
1339
  }
1050
1340
  const orphanPages = [];
1051
1341
  for (const sitemapUrl of sitemapPageUrls) {
1342
+ if (isSkippedByCrawlRules(sitemapUrl)) {
1343
+ trackIgnoredByRule(sitemapUrl);
1344
+ continue;
1345
+ }
1052
1346
  const norm = (0, urlUtils_1.normalizeUrlForDedupe)(sitemapUrl);
1053
1347
  if (norm !== (0, urlUtils_1.normalizeUrlForDedupe)(mainResult.finalUrl) &&
1054
1348
  !internalResults.some(r => (0, urlUtils_1.normalizeUrlForDedupe)(r.finalUrl) === norm) &&
@@ -1076,6 +1370,14 @@ class SeoScanner {
1076
1370
  output.summary.orphanPagesCount = 0;
1077
1371
  }
1078
1372
  }
1373
+ if (ignoredByRuleUrls.size > 0) {
1374
+ output.ignoredUrls = [...ignoredByRuleUrls.values()];
1375
+ output.ignoredUrlsCount = ignoredByRuleUrls.size;
1376
+ output.ignoredUrlsSummary = `${ignoredByRuleUrls.size} URLs ignoradas por reglas de exclusión.`;
1377
+ if (output.siteCrawlDiscovery && typeof output.siteCrawlDiscovery === 'object') {
1378
+ output.siteCrawlDiscovery.ignoredUrlsCount = ignoredByRuleUrls.size;
1379
+ }
1380
+ }
1079
1381
  if (apiScanLimit.apiLimitSource) {
1080
1382
  const apiCreditsRemaining = (0, apiLimitUtils_1.resolveApiCreditsRemaining)({ apiToken, incoming, validationInfo });
1081
1383
  output.apiUsage = (0, apiLimitUtils_1.buildApiUsagePayload)(apiScanLimit, apiCreditsRemaining);