aiseo-audit 1.4.6 → 1.4.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/cli.js +316 -278
- package/dist/cli.mjs +316 -278
- package/dist/index.js +316 -278
- package/dist/index.mjs +316 -278
- package/package.json +2 -1
package/README.md
CHANGED
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
[](https://opensource.org/licenses/MIT)
|
|
7
7
|
[](https://nodejs.org)
|
|
8
8
|
[](https://www.typescriptlang.org/)
|
|
9
|
-
[](https://github.com/agencyenterprise/aiseo-audit)
|
|
10
10
|
[](https://codecov.io/gh/agencyenterprise/aiseo-audit)
|
|
11
11
|
[](https://github.com/agencyenterprise/aiseo-audit/stargazers)
|
|
12
12
|
|
package/dist/cli.js
CHANGED
|
@@ -28,7 +28,7 @@ var import_zod3 = require("zod");
|
|
|
28
28
|
|
|
29
29
|
// src/modules/analyzer/constants.ts
|
|
30
30
|
var DOMAIN_SIGNAL_TIMEOUT_CAP = 5e3;
|
|
31
|
-
var VERSION = true ? "1.4.
|
|
31
|
+
var VERSION = true ? "1.4.7" : "0.0.0";
|
|
32
32
|
|
|
33
33
|
// src/modules/fetcher/constants.ts
|
|
34
34
|
var MAX_RESPONSE_SIZE = 10 * 1024 * 1024;
|
|
@@ -191,223 +191,10 @@ function getDomain(url) {
|
|
|
191
191
|
}
|
|
192
192
|
}
|
|
193
193
|
|
|
194
|
-
// src/modules/
|
|
195
|
-
var CATEGORY_DISPLAY_NAMES = {
|
|
196
|
-
contentExtractability: "Content Extractability",
|
|
197
|
-
contentStructure: "Content Structure for Reuse",
|
|
198
|
-
answerability: "Answerability",
|
|
199
|
-
entityClarity: "Entity Clarity",
|
|
200
|
-
groundingSignals: "Grounding Signals",
|
|
201
|
-
authorityContext: "Authority Context",
|
|
202
|
-
readabilityForCompression: "Readability for Compression"
|
|
203
|
-
};
|
|
204
|
-
|
|
205
|
-
// src/modules/audits/support/patterns.ts
|
|
206
|
-
var DEFINITION_PATTERNS = [
|
|
207
|
-
/\bis\s+defined\s+as\b/gi,
|
|
208
|
-
/\brefers?\s+to\b/gi,
|
|
209
|
-
/\bmeans?\s+that\b/gi,
|
|
210
|
-
/\bis\s+a\s+type\s+of\b/gi,
|
|
211
|
-
/\bcan\s+be\s+described\s+as\b/gi,
|
|
212
|
-
/\balso\s+known\s+as\b/gi
|
|
213
|
-
];
|
|
214
|
-
var CITATION_PATTERNS = [
|
|
215
|
-
/\[\d+\]/g,
|
|
216
|
-
/\([\w\s]+,?\s*\d{4}\)/g,
|
|
217
|
-
/according\s+to/gi,
|
|
218
|
-
/research\s+(?:shows|indicates|suggests)/gi,
|
|
219
|
-
/studies?\s+(?:show|indicate|suggest|found)/gi,
|
|
220
|
-
/data\s+from/gi,
|
|
221
|
-
/as\s+reported\s+by/gi
|
|
222
|
-
];
|
|
223
|
-
var ATTRIBUTION_PATTERNS = [
|
|
224
|
-
/according\s+to/gi,
|
|
225
|
-
/\bsaid\b/gi,
|
|
226
|
-
/\bstated\b/gi,
|
|
227
|
-
/\breported\b/gi,
|
|
228
|
-
/\bcited\s+by\b/gi
|
|
229
|
-
];
|
|
230
|
-
var NUMERIC_CLAIM_PATTERNS = [
|
|
231
|
-
/\d+(?:\.\d+)?\s*%/g,
|
|
232
|
-
/\d+(?:\.\d+)?\s*(?:million|billion|thousand|trillion)/gi,
|
|
233
|
-
/\$[\d,.]+/g,
|
|
234
|
-
/increased\s+by/gi,
|
|
235
|
-
/decreased\s+by/gi,
|
|
236
|
-
/grew\s+by/gi
|
|
237
|
-
];
|
|
238
|
-
var STEP_PATTERNS = [
|
|
239
|
-
/step\s+\d+/gi,
|
|
240
|
-
/^\s*\d+\.\s+\w/gm,
|
|
241
|
-
/\bfirst(?:ly)?,?\s/gi,
|
|
242
|
-
/\bsecond(?:ly)?,?\s/gi,
|
|
243
|
-
/\bfinally,?\s/gi,
|
|
244
|
-
/\bhow\s+to\b/gi
|
|
245
|
-
];
|
|
246
|
-
var SUMMARY_MARKERS = [
|
|
247
|
-
/\bin\s+summary\b/gi,
|
|
248
|
-
/\bin\s+conclusion\b/gi,
|
|
249
|
-
/\bto\s+summarize\b/gi,
|
|
250
|
-
/\bkey\s+takeaways?\b/gi,
|
|
251
|
-
/\bbottom\s+line\b/gi,
|
|
252
|
-
/\btl;?dr\b/gi
|
|
253
|
-
];
|
|
254
|
-
var QUESTION_PATTERNS = [
|
|
255
|
-
/what\s+is/gi,
|
|
256
|
-
/what\s+are/gi,
|
|
257
|
-
/how\s+to/gi,
|
|
258
|
-
/how\s+do/gi,
|
|
259
|
-
/why\s+is/gi,
|
|
260
|
-
/why\s+do/gi,
|
|
261
|
-
/when\s+to/gi,
|
|
262
|
-
/where\s+to/gi,
|
|
263
|
-
/which\s+is/gi,
|
|
264
|
-
/who\s+is/gi
|
|
265
|
-
];
|
|
266
|
-
var DIRECT_ANSWER_PATTERNS = [
|
|
267
|
-
/^The\s+\w+\s+is\b/gm,
|
|
268
|
-
/^It\s+is\b/gm,
|
|
269
|
-
/^This\s+is\b/gm,
|
|
270
|
-
/^They\s+are\b/gm,
|
|
271
|
-
/\bsimply\s+put\b/gi,
|
|
272
|
-
/\bin\s+short\b/gi
|
|
273
|
-
];
|
|
274
|
-
var TRANSITION_WORDS = [
|
|
275
|
-
"however",
|
|
276
|
-
"therefore",
|
|
277
|
-
"moreover",
|
|
278
|
-
"furthermore",
|
|
279
|
-
"consequently",
|
|
280
|
-
"additionally",
|
|
281
|
-
"in contrast",
|
|
282
|
-
"similarly",
|
|
283
|
-
"as a result",
|
|
284
|
-
"for example",
|
|
285
|
-
"for instance",
|
|
286
|
-
"on the other hand",
|
|
287
|
-
"nevertheless",
|
|
288
|
-
"meanwhile",
|
|
289
|
-
"likewise",
|
|
290
|
-
"in addition",
|
|
291
|
-
"specifically",
|
|
292
|
-
"in particular",
|
|
293
|
-
"notably",
|
|
294
|
-
"importantly"
|
|
295
|
-
];
|
|
296
|
-
var AUTHOR_SELECTORS = [
|
|
297
|
-
'[rel="author"]',
|
|
298
|
-
".author",
|
|
299
|
-
".byline",
|
|
300
|
-
'[itemprop="author"]',
|
|
301
|
-
".post-author",
|
|
302
|
-
".entry-author",
|
|
303
|
-
'meta[name="author"]'
|
|
304
|
-
];
|
|
305
|
-
var DATE_SELECTORS = [
|
|
306
|
-
"time[datetime]",
|
|
307
|
-
'[itemprop="datePublished"]',
|
|
308
|
-
'[itemprop="dateModified"]',
|
|
309
|
-
".published",
|
|
310
|
-
".post-date",
|
|
311
|
-
".entry-date",
|
|
312
|
-
'meta[property="article:published_time"]',
|
|
313
|
-
'meta[property="article:modified_time"]'
|
|
314
|
-
];
|
|
315
|
-
var QUESTION_HEADING_PATTERN = /^(?:what|how|why|when|where|which|who|can|do|does|is|are|should|will)\b/i;
|
|
316
|
-
var QUOTED_ATTRIBUTION_PATTERNS = [
|
|
317
|
-
/"[^"]{10,}"\s*[-\u2013\u2014]\s*[A-Z][a-z]+/g,
|
|
318
|
-
/"[^"]{10,}",?\s+said\s+[A-Z]/g,
|
|
319
|
-
/"[^"]{10,}",?\s+according\s+to\s+[A-Z]/g,
|
|
320
|
-
/according\s+to\s+[A-Z][a-z]+[^,]*,\s*"[^"]{10,}"/g,
|
|
321
|
-
/\u201c[^\u201d]{10,}\u201d\s*[-\u2013\u2014]\s*[A-Z][a-z]+/g,
|
|
322
|
-
/\u201c[^\u201d]{10,}\u201d,?\s+said\s+[A-Z]/g
|
|
323
|
-
];
|
|
324
|
-
var AI_CRAWLERS = [
|
|
325
|
-
"GPTBot",
|
|
326
|
-
"ChatGPT-User",
|
|
327
|
-
"ClaudeBot",
|
|
328
|
-
"PerplexityBot",
|
|
329
|
-
"Google-Extended"
|
|
330
|
-
];
|
|
331
|
-
var MODIFIED_DATE_SELECTORS = [
|
|
332
|
-
'[itemprop="dateModified"]',
|
|
333
|
-
'meta[property="article:modified_time"]'
|
|
334
|
-
];
|
|
335
|
-
var PUBLISH_DATE_SELECTORS = [
|
|
336
|
-
"time[datetime]",
|
|
337
|
-
'[itemprop="datePublished"]',
|
|
338
|
-
'meta[property="article:published_time"]'
|
|
339
|
-
];
|
|
340
|
-
|
|
341
|
-
// src/modules/audits/support/dom.ts
|
|
342
|
-
function detectAnswerCapsules($) {
|
|
343
|
-
let total = 0;
|
|
344
|
-
let withCapsule = 0;
|
|
345
|
-
$("h2").each((_, el) => {
|
|
346
|
-
const headingText = $(el).text().trim();
|
|
347
|
-
const isQuestion = headingText.includes("?") || QUESTION_HEADING_PATTERN.test(headingText);
|
|
348
|
-
if (!isQuestion) return;
|
|
349
|
-
total++;
|
|
350
|
-
const nextP = $(el).nextAll("p").first();
|
|
351
|
-
if (!nextP.length) return;
|
|
352
|
-
const pText = nextP.text().trim();
|
|
353
|
-
const firstSentence = pText.split(/[.!?]/)[0] || "";
|
|
354
|
-
if (firstSentence.length > 0 && firstSentence.length <= 200) {
|
|
355
|
-
withCapsule++;
|
|
356
|
-
}
|
|
357
|
-
});
|
|
358
|
-
return { total, withCapsule };
|
|
359
|
-
}
|
|
360
|
-
function measureSectionLengths($) {
|
|
361
|
-
const headings = $("h1, h2, h3, h4, h5, h6");
|
|
362
|
-
if (headings.length === 0)
|
|
363
|
-
return { sectionCount: 0, avgWordsPerSection: 0, sections: [] };
|
|
364
|
-
const sections = [];
|
|
365
|
-
headings.each((_, el) => {
|
|
366
|
-
let words = 0;
|
|
367
|
-
let sibling = $(el).next();
|
|
368
|
-
while (sibling.length && !sibling.is("h1, h2, h3, h4, h5, h6")) {
|
|
369
|
-
const text = sibling.text().trim();
|
|
370
|
-
words += text.split(/\s+/).filter((w) => w.length > 0).length;
|
|
371
|
-
sibling = sibling.next();
|
|
372
|
-
}
|
|
373
|
-
if (words > 0) sections.push(words);
|
|
374
|
-
});
|
|
375
|
-
const avg = sections.length > 0 ? Math.round(sections.reduce((a, b) => a + b, 0) / sections.length) : 0;
|
|
376
|
-
return { sectionCount: sections.length, avgWordsPerSection: avg, sections };
|
|
377
|
-
}
|
|
378
|
-
function parseJsonLdObjects($) {
|
|
379
|
-
const objects = [];
|
|
380
|
-
$('script[type="application/ld+json"]').each((_, el) => {
|
|
381
|
-
try {
|
|
382
|
-
const data = JSON.parse($(el).html() || "{}");
|
|
383
|
-
if (Array.isArray(data)) objects.push(...data);
|
|
384
|
-
else objects.push(data);
|
|
385
|
-
} catch {
|
|
386
|
-
}
|
|
387
|
-
});
|
|
388
|
-
return objects;
|
|
389
|
-
}
|
|
390
|
-
|
|
391
|
-
// src/modules/audits/support/nlp.ts
|
|
194
|
+
// src/modules/nlp/service.ts
|
|
392
195
|
var import_compromise = __toESM(require("compromise"));
|
|
393
196
|
|
|
394
|
-
// src/
|
|
395
|
-
function countWords(text) {
|
|
396
|
-
return text.split(/\s+/).filter((w) => w.length > 0).length;
|
|
397
|
-
}
|
|
398
|
-
function countSentences(text) {
|
|
399
|
-
return text.split(/[.!?]+/).filter((s) => s.trim().length > 5).length;
|
|
400
|
-
}
|
|
401
|
-
function countSyllables(word) {
|
|
402
|
-
word = word.toLowerCase().replace(/[^a-z]/g, "");
|
|
403
|
-
if (word.length <= 3) return 1;
|
|
404
|
-
word = word.replace(/(?:[^laeiouy]es|ed|[^laeiouy]e)$/, "");
|
|
405
|
-
word = word.replace(/^y/, "");
|
|
406
|
-
const matches = word.match(/[aeiouy]{1,2}/g);
|
|
407
|
-
return matches ? Math.max(matches.length, 1) : 1;
|
|
408
|
-
}
|
|
409
|
-
|
|
410
|
-
// src/modules/audits/support/nlp.ts
|
|
197
|
+
// src/modules/nlp/constants.ts
|
|
411
198
|
var STOPWORDS = /* @__PURE__ */ new Set([
|
|
412
199
|
"a",
|
|
413
200
|
"an",
|
|
@@ -680,6 +467,8 @@ var ACRONYM_STOPLIST = /* @__PURE__ */ new Set([
|
|
|
680
467
|
]);
|
|
681
468
|
var ORG_SUFFIXES = /\b(?:Inc|Corp|Corporation|LLC|Ltd|Limited|Co|Company|Group|Foundation|Institute|University|Association|Society|Agency|Authority|Bureau|Commission|Council|Department|Board|Trust|Fund|Partners|Ventures|Labs|Technologies|Solutions|Systems|Services|Consulting|Media|Network|Studios|Entertainment|Healthcare|Pharmaceuticals|Dynamics|Holdings|Capital|Enterprises|International)\b/i;
|
|
682
469
|
var PERSON_HONORIFICS = /\b(?:Mr|Mrs|Ms|Miss|Dr|Prof|Professor|Rev|Reverend|Sen|Senator|Rep|Representative|Gov|Governor|Pres|President|Gen|General|Col|Colonel|Sgt|Sergeant|Cpl|Corporal|Pvt|Private|Adm|Admiral|Capt|Captain|Lt|Lieutenant|Maj|Major|Sir|Dame|Lord|Lady|Hon|Honorable|Judge|Justice|Chancellor|Dean|Provost)\.\s*/;
|
|
470
|
+
|
|
471
|
+
// src/modules/nlp/support/entities.ts
|
|
683
472
|
function extractAcronymEntities(text) {
|
|
684
473
|
const matches = text.match(/\b[A-Z]{2,6}\b/g);
|
|
685
474
|
if (!matches) return [];
|
|
@@ -731,6 +520,33 @@ function isPersonByHonorific(text, entity) {
|
|
|
731
520
|
);
|
|
732
521
|
return pattern.test(text);
|
|
733
522
|
}
|
|
523
|
+
function smartDedup(entities) {
|
|
524
|
+
if (entities.length === 0) return [];
|
|
525
|
+
const sorted = [...entities].sort((a, b) => b.length - a.length);
|
|
526
|
+
const result = [];
|
|
527
|
+
const lowerSeen = /* @__PURE__ */ new Set();
|
|
528
|
+
for (const entity of sorted) {
|
|
529
|
+
const lower = entity.toLowerCase();
|
|
530
|
+
if (lowerSeen.has(lower)) continue;
|
|
531
|
+
let isSubstring = false;
|
|
532
|
+
for (const accepted of lowerSeen) {
|
|
533
|
+
if (accepted.includes(lower)) {
|
|
534
|
+
isSubstring = true;
|
|
535
|
+
break;
|
|
536
|
+
}
|
|
537
|
+
}
|
|
538
|
+
if (isSubstring) continue;
|
|
539
|
+
result.push(entity);
|
|
540
|
+
lowerSeen.add(lower);
|
|
541
|
+
}
|
|
542
|
+
return result;
|
|
543
|
+
}
|
|
544
|
+
function mergeEntityLists(compromiseList, supplementalList, limit) {
|
|
545
|
+
const combined = [...compromiseList, ...supplementalList];
|
|
546
|
+
return smartDedup(combined).slice(0, limit);
|
|
547
|
+
}
|
|
548
|
+
|
|
549
|
+
// src/modules/nlp/support/topics.ts
|
|
734
550
|
function extractTopicsByTfIdf(text, limit) {
|
|
735
551
|
const lower = text.toLowerCase();
|
|
736
552
|
const words = lower.replace(/[^a-z0-9\s'-]/g, " ").split(/\s+/).filter((w) => w.length > 2 && !STOPWORDS.has(w));
|
|
@@ -754,31 +570,60 @@ function extractTopicsByTfIdf(text, limit) {
|
|
|
754
570
|
candidates.sort((a, b) => b[1] - a[1]);
|
|
755
571
|
return candidates.slice(0, limit).map(([term]) => term);
|
|
756
572
|
}
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
const
|
|
762
|
-
|
|
763
|
-
const
|
|
764
|
-
if (
|
|
765
|
-
let isSubstring = false;
|
|
766
|
-
for (const accepted of lowerSeen) {
|
|
767
|
-
if (accepted.includes(lower)) {
|
|
768
|
-
isSubstring = true;
|
|
769
|
-
break;
|
|
770
|
-
}
|
|
771
|
-
}
|
|
772
|
-
if (isSubstring) continue;
|
|
773
|
-
result.push(entity);
|
|
774
|
-
lowerSeen.add(lower);
|
|
573
|
+
|
|
574
|
+
// src/modules/nlp/support/patterns.ts
|
|
575
|
+
function countPatternMatches(text, patterns) {
|
|
576
|
+
let count = 0;
|
|
577
|
+
for (const pattern of patterns) {
|
|
578
|
+
const re = new RegExp(pattern.source, pattern.flags);
|
|
579
|
+
const matches = text.match(re);
|
|
580
|
+
if (matches) count += matches.length;
|
|
775
581
|
}
|
|
776
|
-
return
|
|
582
|
+
return count;
|
|
583
|
+
}
|
|
584
|
+
function countTransitionWords(text, words) {
|
|
585
|
+
const lower = text.toLowerCase();
|
|
586
|
+
return words.filter((w) => lower.includes(w)).length;
|
|
587
|
+
}
|
|
588
|
+
|
|
589
|
+
// src/utils/strings.ts
|
|
590
|
+
function countWords(text) {
|
|
591
|
+
return text.split(/\s+/).filter((w) => w.length > 0).length;
|
|
592
|
+
}
|
|
593
|
+
function countSentences(text) {
|
|
594
|
+
return text.split(/[.!?]+/).filter((s) => s.trim().length > 5).length;
|
|
595
|
+
}
|
|
596
|
+
function countSyllables(word) {
|
|
597
|
+
word = word.toLowerCase().replace(/[^a-z]/g, "");
|
|
598
|
+
if (word.length <= 3) return 1;
|
|
599
|
+
word = word.replace(/(?:[^laeiouy]es|ed|[^laeiouy]e)$/, "");
|
|
600
|
+
word = word.replace(/^y/, "");
|
|
601
|
+
const matches = word.match(/[aeiouy]{1,2}/g);
|
|
602
|
+
return matches ? Math.max(matches.length, 1) : 1;
|
|
777
603
|
}
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
|
|
604
|
+
|
|
605
|
+
// src/modules/nlp/support/readability.ts
|
|
606
|
+
function computeFleschReadingEase(text) {
|
|
607
|
+
const words = text.split(/\s+/).filter((w) => w.length > 0);
|
|
608
|
+
const sentences = text.split(/[.!?]+/).filter((s) => s.trim().length > 5);
|
|
609
|
+
const totalSyllables = words.reduce((sum, w) => sum + countSyllables(w), 0);
|
|
610
|
+
if (words.length === 0 || sentences.length === 0) return 0;
|
|
611
|
+
const avgSentenceLength2 = words.length / sentences.length;
|
|
612
|
+
const avgSyllablesPerWord = totalSyllables / words.length;
|
|
613
|
+
return 206.835 - 1.015 * avgSentenceLength2 - 84.6 * avgSyllablesPerWord;
|
|
614
|
+
}
|
|
615
|
+
function countComplexWords(text) {
|
|
616
|
+
const words = text.split(/\s+/).filter((w) => w.length > 0);
|
|
617
|
+
return words.filter((w) => countSyllables(w) >= 4).length;
|
|
781
618
|
}
|
|
619
|
+
function avgSentenceLength(text) {
|
|
620
|
+
const words = text.split(/\s+/).filter((w) => w.length > 0);
|
|
621
|
+
const sentences = text.split(/[.!?]+/).filter((s) => s.trim().length > 5);
|
|
622
|
+
if (sentences.length === 0) return 0;
|
|
623
|
+
return Math.round(words.length / sentences.length);
|
|
624
|
+
}
|
|
625
|
+
|
|
626
|
+
// src/modules/nlp/service.ts
|
|
782
627
|
function extractEntities(text) {
|
|
783
628
|
const doc = (0, import_compromise.default)(text);
|
|
784
629
|
const compromisePeople = [...new Set(doc.people().out("array"))];
|
|
@@ -820,37 +665,202 @@ function extractEntities(text) {
|
|
|
820
665
|
numberCount
|
|
821
666
|
};
|
|
822
667
|
}
|
|
823
|
-
|
|
824
|
-
|
|
825
|
-
|
|
826
|
-
|
|
827
|
-
|
|
828
|
-
|
|
829
|
-
|
|
830
|
-
|
|
831
|
-
|
|
832
|
-
|
|
833
|
-
|
|
834
|
-
|
|
835
|
-
|
|
836
|
-
|
|
837
|
-
|
|
838
|
-
|
|
839
|
-
|
|
840
|
-
|
|
841
|
-
|
|
842
|
-
|
|
843
|
-
|
|
668
|
+
|
|
669
|
+
// src/modules/audits/constants.ts
|
|
670
|
+
var CATEGORY_DISPLAY_NAMES = {
|
|
671
|
+
contentExtractability: "Content Extractability",
|
|
672
|
+
contentStructure: "Content Structure for Reuse",
|
|
673
|
+
answerability: "Answerability",
|
|
674
|
+
entityClarity: "Entity Clarity",
|
|
675
|
+
groundingSignals: "Grounding Signals",
|
|
676
|
+
authorityContext: "Authority Context",
|
|
677
|
+
readabilityForCompression: "Readability for Compression"
|
|
678
|
+
};
|
|
679
|
+
|
|
680
|
+
// src/modules/audits/support/patterns.ts
|
|
681
|
+
var DEFINITION_PATTERNS = [
|
|
682
|
+
/\bis\s+defined\s+as\b/gi,
|
|
683
|
+
/\brefers?\s+to\b/gi,
|
|
684
|
+
/\bmeans?\s+that\b/gi,
|
|
685
|
+
/\bis\s+a\s+type\s+of\b/gi,
|
|
686
|
+
/\bcan\s+be\s+described\s+as\b/gi,
|
|
687
|
+
/\balso\s+known\s+as\b/gi
|
|
688
|
+
];
|
|
689
|
+
var CITATION_PATTERNS = [
|
|
690
|
+
/\[\d+\]/g,
|
|
691
|
+
/\([\w\s]+,?\s*\d{4}\)/g,
|
|
692
|
+
/according\s+to/gi,
|
|
693
|
+
/research\s+(?:shows|indicates|suggests)/gi,
|
|
694
|
+
/studies?\s+(?:show|indicate|suggest|found)/gi,
|
|
695
|
+
/data\s+from/gi,
|
|
696
|
+
/as\s+reported\s+by/gi
|
|
697
|
+
];
|
|
698
|
+
var ATTRIBUTION_PATTERNS = [
|
|
699
|
+
/according\s+to/gi,
|
|
700
|
+
/\bsaid\b/gi,
|
|
701
|
+
/\bstated\b/gi,
|
|
702
|
+
/\breported\b/gi,
|
|
703
|
+
/\bcited\s+by\b/gi
|
|
704
|
+
];
|
|
705
|
+
var NUMERIC_CLAIM_PATTERNS = [
|
|
706
|
+
/\d+(?:\.\d+)?\s*%/g,
|
|
707
|
+
/\d+(?:\.\d+)?\s*(?:million|billion|thousand|trillion)/gi,
|
|
708
|
+
/\$[\d,.]+/g,
|
|
709
|
+
/increased\s+by/gi,
|
|
710
|
+
/decreased\s+by/gi,
|
|
711
|
+
/grew\s+by/gi
|
|
712
|
+
];
|
|
713
|
+
var STEP_PATTERNS = [
|
|
714
|
+
/step\s+\d+/gi,
|
|
715
|
+
/^\s*\d+\.\s+\w/gm,
|
|
716
|
+
/\bfirst(?:ly)?,?\s/gi,
|
|
717
|
+
/\bsecond(?:ly)?,?\s/gi,
|
|
718
|
+
/\bfinally,?\s/gi,
|
|
719
|
+
/\bhow\s+to\b/gi
|
|
720
|
+
];
|
|
721
|
+
var SUMMARY_MARKERS = [
|
|
722
|
+
/\bin\s+summary\b/gi,
|
|
723
|
+
/\bin\s+conclusion\b/gi,
|
|
724
|
+
/\bto\s+summarize\b/gi,
|
|
725
|
+
/\bkey\s+takeaways?\b/gi,
|
|
726
|
+
/\bbottom\s+line\b/gi,
|
|
727
|
+
/\btl;?dr\b/gi
|
|
728
|
+
];
|
|
729
|
+
var QUESTION_PATTERNS = [
|
|
730
|
+
/what\s+is/gi,
|
|
731
|
+
/what\s+are/gi,
|
|
732
|
+
/how\s+to/gi,
|
|
733
|
+
/how\s+do/gi,
|
|
734
|
+
/why\s+is/gi,
|
|
735
|
+
/why\s+do/gi,
|
|
736
|
+
/when\s+to/gi,
|
|
737
|
+
/where\s+to/gi,
|
|
738
|
+
/which\s+is/gi,
|
|
739
|
+
/who\s+is/gi
|
|
740
|
+
];
|
|
741
|
+
var DIRECT_ANSWER_PATTERNS = [
|
|
742
|
+
/^The\s+\w+\s+is\b/gm,
|
|
743
|
+
/^It\s+is\b/gm,
|
|
744
|
+
/^This\s+is\b/gm,
|
|
745
|
+
/^They\s+are\b/gm,
|
|
746
|
+
/\bsimply\s+put\b/gi,
|
|
747
|
+
/\bin\s+short\b/gi
|
|
748
|
+
];
|
|
749
|
+
var TRANSITION_WORDS = [
|
|
750
|
+
"however",
|
|
751
|
+
"therefore",
|
|
752
|
+
"moreover",
|
|
753
|
+
"furthermore",
|
|
754
|
+
"consequently",
|
|
755
|
+
"additionally",
|
|
756
|
+
"in contrast",
|
|
757
|
+
"similarly",
|
|
758
|
+
"as a result",
|
|
759
|
+
"for example",
|
|
760
|
+
"for instance",
|
|
761
|
+
"on the other hand",
|
|
762
|
+
"nevertheless",
|
|
763
|
+
"meanwhile",
|
|
764
|
+
"likewise",
|
|
765
|
+
"in addition",
|
|
766
|
+
"specifically",
|
|
767
|
+
"in particular",
|
|
768
|
+
"notably",
|
|
769
|
+
"importantly"
|
|
770
|
+
];
|
|
771
|
+
var AUTHOR_SELECTORS = [
|
|
772
|
+
'[rel="author"]',
|
|
773
|
+
".author",
|
|
774
|
+
".byline",
|
|
775
|
+
'[itemprop="author"]',
|
|
776
|
+
".post-author",
|
|
777
|
+
".entry-author",
|
|
778
|
+
'meta[name="author"]'
|
|
779
|
+
];
|
|
780
|
+
var DATE_SELECTORS = [
|
|
781
|
+
"time[datetime]",
|
|
782
|
+
'[itemprop="datePublished"]',
|
|
783
|
+
'[itemprop="dateModified"]',
|
|
784
|
+
".published",
|
|
785
|
+
".post-date",
|
|
786
|
+
".entry-date",
|
|
787
|
+
'meta[property="article:published_time"]',
|
|
788
|
+
'meta[property="article:modified_time"]'
|
|
789
|
+
];
|
|
790
|
+
var QUESTION_HEADING_PATTERN = /^(?:what|how|why|when|where|which|who|can|do|does|is|are|should|will)\b/i;
|
|
791
|
+
var QUOTED_ATTRIBUTION_PATTERNS = [
|
|
792
|
+
/"[^"]{10,}"\s*[-\u2013\u2014]\s*[A-Z][a-z]+/g,
|
|
793
|
+
/"[^"]{10,}",?\s+said\s+[A-Z]/g,
|
|
794
|
+
/"[^"]{10,}",?\s+according\s+to\s+[A-Z]/g,
|
|
795
|
+
/according\s+to\s+[A-Z][a-z]+[^,]*,\s*"[^"]{10,}"/g,
|
|
796
|
+
/\u201c[^\u201d]{10,}\u201d\s*[-\u2013\u2014]\s*[A-Z][a-z]+/g,
|
|
797
|
+
/\u201c[^\u201d]{10,}\u201d,?\s+said\s+[A-Z]/g
|
|
798
|
+
];
|
|
799
|
+
var AI_CRAWLERS = [
|
|
800
|
+
"GPTBot",
|
|
801
|
+
"ChatGPT-User",
|
|
802
|
+
"ClaudeBot",
|
|
803
|
+
"PerplexityBot",
|
|
804
|
+
"Google-Extended"
|
|
805
|
+
];
|
|
806
|
+
var MODIFIED_DATE_SELECTORS = [
|
|
807
|
+
'[itemprop="dateModified"]',
|
|
808
|
+
'meta[property="article:modified_time"]'
|
|
809
|
+
];
|
|
810
|
+
var PUBLISH_DATE_SELECTORS = [
|
|
811
|
+
"time[datetime]",
|
|
812
|
+
'[itemprop="datePublished"]',
|
|
813
|
+
'meta[property="article:published_time"]'
|
|
814
|
+
];
|
|
815
|
+
|
|
816
|
+
// src/modules/audits/support/dom.ts
|
|
817
|
+
function detectAnswerCapsules($) {
|
|
818
|
+
let total = 0;
|
|
819
|
+
let withCapsule = 0;
|
|
820
|
+
$("h2").each((_, el) => {
|
|
821
|
+
const headingText = $(el).text().trim();
|
|
822
|
+
const isQuestion = headingText.includes("?") || QUESTION_HEADING_PATTERN.test(headingText);
|
|
823
|
+
if (!isQuestion) return;
|
|
824
|
+
total++;
|
|
825
|
+
const nextP = $(el).nextAll("p").first();
|
|
826
|
+
if (!nextP.length) return;
|
|
827
|
+
const pText = nextP.text().trim();
|
|
828
|
+
const firstSentence = pText.split(/[.!?]/)[0] || "";
|
|
829
|
+
if (firstSentence.length > 0 && firstSentence.length <= 200) {
|
|
830
|
+
withCapsule++;
|
|
831
|
+
}
|
|
832
|
+
});
|
|
833
|
+
return { total, withCapsule };
|
|
844
834
|
}
|
|
845
|
-
function
|
|
846
|
-
const
|
|
847
|
-
|
|
835
|
+
function measureSectionLengths($) {
|
|
836
|
+
const headings = $("h1, h2, h3, h4, h5, h6");
|
|
837
|
+
if (headings.length === 0)
|
|
838
|
+
return { sectionCount: 0, avgWordsPerSection: 0, sections: [] };
|
|
839
|
+
const sections = [];
|
|
840
|
+
headings.each((_, el) => {
|
|
841
|
+
let words = 0;
|
|
842
|
+
let sibling = $(el).next();
|
|
843
|
+
while (sibling.length && !sibling.is("h1, h2, h3, h4, h5, h6")) {
|
|
844
|
+
const text = sibling.text().trim();
|
|
845
|
+
words += text.split(/\s+/).filter((w) => w.length > 0).length;
|
|
846
|
+
sibling = sibling.next();
|
|
847
|
+
}
|
|
848
|
+
if (words > 0) sections.push(words);
|
|
849
|
+
});
|
|
850
|
+
const avg = sections.length > 0 ? Math.round(sections.reduce((a, b) => a + b, 0) / sections.length) : 0;
|
|
851
|
+
return { sectionCount: sections.length, avgWordsPerSection: avg, sections };
|
|
848
852
|
}
|
|
849
|
-
function
|
|
850
|
-
const
|
|
851
|
-
|
|
852
|
-
|
|
853
|
-
|
|
853
|
+
function parseJsonLdObjects($) {
|
|
854
|
+
const objects = [];
|
|
855
|
+
$('script[type="application/ld+json"]').each((_, el) => {
|
|
856
|
+
try {
|
|
857
|
+
const data = JSON.parse($(el).html() || "{}");
|
|
858
|
+
if (Array.isArray(data)) objects.push(...data);
|
|
859
|
+
else objects.push(data);
|
|
860
|
+
} catch {
|
|
861
|
+
}
|
|
862
|
+
});
|
|
863
|
+
return objects;
|
|
854
864
|
}
|
|
855
865
|
|
|
856
866
|
// src/modules/audits/support/scoring.ts
|
|
@@ -4104,13 +4114,13 @@ function renderSitemapMarkdown(result) {
|
|
|
4104
4114
|
var import_chalk = __toESM(require("chalk"));
|
|
4105
4115
|
function scoreColor(score, max) {
|
|
4106
4116
|
const pct = max > 0 ? score / max * 100 : 0;
|
|
4107
|
-
if (pct >=
|
|
4108
|
-
if (pct >=
|
|
4117
|
+
if (pct >= 90) return import_chalk.default.green;
|
|
4118
|
+
if (pct >= 50) return import_chalk.default.yellow;
|
|
4109
4119
|
return import_chalk.default.red;
|
|
4110
4120
|
}
|
|
4111
4121
|
function gradeColor(grade) {
|
|
4112
4122
|
if (grade.startsWith("A")) return import_chalk.default.green;
|
|
4113
|
-
if (grade.startsWith("B")) return import_chalk.default.yellow;
|
|
4123
|
+
if (grade.startsWith("B") || grade.startsWith("C")) return import_chalk.default.yellow;
|
|
4114
4124
|
return import_chalk.default.red;
|
|
4115
4125
|
}
|
|
4116
4126
|
function pad(str, len) {
|
|
@@ -4257,7 +4267,7 @@ function renderSitemapPretty(result) {
|
|
|
4257
4267
|
lines.push(import_chalk.default.bold(" Site-Wide Category Averages:"));
|
|
4258
4268
|
lines.push("");
|
|
4259
4269
|
for (const avg of Object.values(result.categoryAverages)) {
|
|
4260
|
-
const color = avg.averagePct
|
|
4270
|
+
const color = scoreColor(avg.averagePct, 100);
|
|
4261
4271
|
const name = pad(avg.name, 38);
|
|
4262
4272
|
const dts = dots(Math.max(2, 40 - avg.name.length));
|
|
4263
4273
|
lines.push(` ${import_chalk.default.bold(name)} ${dts} ${color(`${avg.averagePct}%`)}`);
|
|
@@ -4337,6 +4347,36 @@ function renderSitemapReport(result, options) {
|
|
|
4337
4347
|
}
|
|
4338
4348
|
|
|
4339
4349
|
// src/modules/sitemap/service.ts
|
|
4350
|
+
var import_xml_to_html_converter = require("xml-to-html-converter");
|
|
4351
|
+
function stripCdata(raw) {
|
|
4352
|
+
const trimmed = raw.trim();
|
|
4353
|
+
if (trimmed.startsWith("<![CDATA[") && trimmed.endsWith("]]>")) {
|
|
4354
|
+
return trimmed.slice(9, -3);
|
|
4355
|
+
}
|
|
4356
|
+
return trimmed;
|
|
4357
|
+
}
|
|
4358
|
+
function collectLocText(nodes, urls) {
|
|
4359
|
+
for (const node of nodes) {
|
|
4360
|
+
if (node.xmlTag === "loc" && node.children) {
|
|
4361
|
+
const text = node.children.filter((c) => c.role === "textLeaf").map((c) => stripCdata(c.raw)).join("").trim();
|
|
4362
|
+
if (text) urls.push(text);
|
|
4363
|
+
}
|
|
4364
|
+
if (node.children) collectLocText(node.children, urls);
|
|
4365
|
+
}
|
|
4366
|
+
}
|
|
4367
|
+
function extractLocUrls(xml) {
|
|
4368
|
+
const nodes = (0, import_xml_to_html_converter.scaffold)(xml);
|
|
4369
|
+
const urls = [];
|
|
4370
|
+
collectLocText(nodes, urls);
|
|
4371
|
+
return urls;
|
|
4372
|
+
}
|
|
4373
|
+
function hasSitemapIndexNode(nodes) {
|
|
4374
|
+
for (const node of nodes) {
|
|
4375
|
+
if (node.xmlTag === "sitemapindex") return true;
|
|
4376
|
+
if (node.children && hasSitemapIndexNode(node.children)) return true;
|
|
4377
|
+
}
|
|
4378
|
+
return false;
|
|
4379
|
+
}
|
|
4340
4380
|
async function fetchSitemapUrls(sitemapUrl, timeout, userAgent) {
|
|
4341
4381
|
const response = await httpGet({
|
|
4342
4382
|
url: sitemapUrl,
|
|
@@ -4346,11 +4386,13 @@ async function fetchSitemapUrls(sitemapUrl, timeout, userAgent) {
|
|
|
4346
4386
|
if (response.status !== 200) {
|
|
4347
4387
|
throw new Error(`Failed to fetch sitemap: HTTP ${response.status}`);
|
|
4348
4388
|
}
|
|
4349
|
-
const
|
|
4350
|
-
if (
|
|
4389
|
+
const nodes = (0, import_xml_to_html_converter.scaffold)(response.data);
|
|
4390
|
+
if (hasSitemapIndexNode(nodes)) {
|
|
4351
4391
|
return fetchSitemapIndexUrls(response.data, timeout, userAgent);
|
|
4352
4392
|
}
|
|
4353
|
-
|
|
4393
|
+
const urls = [];
|
|
4394
|
+
collectLocText(nodes, urls);
|
|
4395
|
+
return urls;
|
|
4354
4396
|
}
|
|
4355
4397
|
async function fetchSitemapIndexUrls(xml, timeout, userAgent) {
|
|
4356
4398
|
const childSitemapUrls = extractLocUrls(xml);
|
|
@@ -4367,10 +4409,6 @@ async function fetchSitemapIndexUrls(xml, timeout, userAgent) {
|
|
|
4367
4409
|
}
|
|
4368
4410
|
return allUrls;
|
|
4369
4411
|
}
|
|
4370
|
-
function extractLocUrls(xml) {
|
|
4371
|
-
const matches = xml.matchAll(/<loc>\s*(.*?)\s*<\/loc>/g);
|
|
4372
|
-
return Array.from(matches, (m) => m[1]).filter(Boolean);
|
|
4373
|
-
}
|
|
4374
4412
|
function computeCategoryAverages(urlResults) {
|
|
4375
4413
|
const successResults = urlResults.filter((r) => r.status === "success").map(
|
|
4376
4414
|
(r) => r.result
|