aiseo-audit 1.4.6 → 1.4.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -6,7 +6,7 @@
6
6
  [![License: MIT](https://img.shields.io/badge/License-MIT-7EB6D7.svg)](https://opensource.org/licenses/MIT)
7
7
  [![Node.js](https://img.shields.io/badge/node-%3E%3D20-7EB6D7.svg)](https://nodejs.org)
8
8
  [![TypeScript](https://img.shields.io/badge/TypeScript-5.9-7EB6D7?logo=typescript&logoColor=white)](https://www.typescriptlang.org/)
9
- [![Tests](https://img.shields.io/badge/tests-454%20passed-8FBC8F)](https://github.com/agencyenterprise/aiseo-audit)
9
+ [![Tests](https://img.shields.io/badge/tests-455%20passed-8FBC8F)](https://github.com/agencyenterprise/aiseo-audit)
10
10
  [![Coverage](https://img.shields.io/codecov/c/github/agencyenterprise/aiseo-audit?color=8FBC8F&label=coverage)](https://codecov.io/gh/agencyenterprise/aiseo-audit)
11
11
  [![GitHub Stars](https://img.shields.io/github/stars/agencyenterprise/aiseo-audit?style=flat&color=8FBC8F)](https://github.com/agencyenterprise/aiseo-audit/stargazers)
12
12
 
package/dist/cli.js CHANGED
@@ -28,7 +28,7 @@ var import_zod3 = require("zod");
28
28
 
29
29
  // src/modules/analyzer/constants.ts
30
30
  var DOMAIN_SIGNAL_TIMEOUT_CAP = 5e3;
31
- var VERSION = true ? "1.4.6" : "0.0.0";
31
+ var VERSION = true ? "1.4.7" : "0.0.0";
32
32
 
33
33
  // src/modules/fetcher/constants.ts
34
34
  var MAX_RESPONSE_SIZE = 10 * 1024 * 1024;
@@ -191,223 +191,10 @@ function getDomain(url) {
191
191
  }
192
192
  }
193
193
 
194
- // src/modules/audits/constants.ts
195
- var CATEGORY_DISPLAY_NAMES = {
196
- contentExtractability: "Content Extractability",
197
- contentStructure: "Content Structure for Reuse",
198
- answerability: "Answerability",
199
- entityClarity: "Entity Clarity",
200
- groundingSignals: "Grounding Signals",
201
- authorityContext: "Authority Context",
202
- readabilityForCompression: "Readability for Compression"
203
- };
204
-
205
- // src/modules/audits/support/patterns.ts
206
- var DEFINITION_PATTERNS = [
207
- /\bis\s+defined\s+as\b/gi,
208
- /\brefers?\s+to\b/gi,
209
- /\bmeans?\s+that\b/gi,
210
- /\bis\s+a\s+type\s+of\b/gi,
211
- /\bcan\s+be\s+described\s+as\b/gi,
212
- /\balso\s+known\s+as\b/gi
213
- ];
214
- var CITATION_PATTERNS = [
215
- /\[\d+\]/g,
216
- /\([\w\s]+,?\s*\d{4}\)/g,
217
- /according\s+to/gi,
218
- /research\s+(?:shows|indicates|suggests)/gi,
219
- /studies?\s+(?:show|indicate|suggest|found)/gi,
220
- /data\s+from/gi,
221
- /as\s+reported\s+by/gi
222
- ];
223
- var ATTRIBUTION_PATTERNS = [
224
- /according\s+to/gi,
225
- /\bsaid\b/gi,
226
- /\bstated\b/gi,
227
- /\breported\b/gi,
228
- /\bcited\s+by\b/gi
229
- ];
230
- var NUMERIC_CLAIM_PATTERNS = [
231
- /\d+(?:\.\d+)?\s*%/g,
232
- /\d+(?:\.\d+)?\s*(?:million|billion|thousand|trillion)/gi,
233
- /\$[\d,.]+/g,
234
- /increased\s+by/gi,
235
- /decreased\s+by/gi,
236
- /grew\s+by/gi
237
- ];
238
- var STEP_PATTERNS = [
239
- /step\s+\d+/gi,
240
- /^\s*\d+\.\s+\w/gm,
241
- /\bfirst(?:ly)?,?\s/gi,
242
- /\bsecond(?:ly)?,?\s/gi,
243
- /\bfinally,?\s/gi,
244
- /\bhow\s+to\b/gi
245
- ];
246
- var SUMMARY_MARKERS = [
247
- /\bin\s+summary\b/gi,
248
- /\bin\s+conclusion\b/gi,
249
- /\bto\s+summarize\b/gi,
250
- /\bkey\s+takeaways?\b/gi,
251
- /\bbottom\s+line\b/gi,
252
- /\btl;?dr\b/gi
253
- ];
254
- var QUESTION_PATTERNS = [
255
- /what\s+is/gi,
256
- /what\s+are/gi,
257
- /how\s+to/gi,
258
- /how\s+do/gi,
259
- /why\s+is/gi,
260
- /why\s+do/gi,
261
- /when\s+to/gi,
262
- /where\s+to/gi,
263
- /which\s+is/gi,
264
- /who\s+is/gi
265
- ];
266
- var DIRECT_ANSWER_PATTERNS = [
267
- /^The\s+\w+\s+is\b/gm,
268
- /^It\s+is\b/gm,
269
- /^This\s+is\b/gm,
270
- /^They\s+are\b/gm,
271
- /\bsimply\s+put\b/gi,
272
- /\bin\s+short\b/gi
273
- ];
274
- var TRANSITION_WORDS = [
275
- "however",
276
- "therefore",
277
- "moreover",
278
- "furthermore",
279
- "consequently",
280
- "additionally",
281
- "in contrast",
282
- "similarly",
283
- "as a result",
284
- "for example",
285
- "for instance",
286
- "on the other hand",
287
- "nevertheless",
288
- "meanwhile",
289
- "likewise",
290
- "in addition",
291
- "specifically",
292
- "in particular",
293
- "notably",
294
- "importantly"
295
- ];
296
- var AUTHOR_SELECTORS = [
297
- '[rel="author"]',
298
- ".author",
299
- ".byline",
300
- '[itemprop="author"]',
301
- ".post-author",
302
- ".entry-author",
303
- 'meta[name="author"]'
304
- ];
305
- var DATE_SELECTORS = [
306
- "time[datetime]",
307
- '[itemprop="datePublished"]',
308
- '[itemprop="dateModified"]',
309
- ".published",
310
- ".post-date",
311
- ".entry-date",
312
- 'meta[property="article:published_time"]',
313
- 'meta[property="article:modified_time"]'
314
- ];
315
- var QUESTION_HEADING_PATTERN = /^(?:what|how|why|when|where|which|who|can|do|does|is|are|should|will)\b/i;
316
- var QUOTED_ATTRIBUTION_PATTERNS = [
317
- /"[^"]{10,}"\s*[-\u2013\u2014]\s*[A-Z][a-z]+/g,
318
- /"[^"]{10,}",?\s+said\s+[A-Z]/g,
319
- /"[^"]{10,}",?\s+according\s+to\s+[A-Z]/g,
320
- /according\s+to\s+[A-Z][a-z]+[^,]*,\s*"[^"]{10,}"/g,
321
- /\u201c[^\u201d]{10,}\u201d\s*[-\u2013\u2014]\s*[A-Z][a-z]+/g,
322
- /\u201c[^\u201d]{10,}\u201d,?\s+said\s+[A-Z]/g
323
- ];
324
- var AI_CRAWLERS = [
325
- "GPTBot",
326
- "ChatGPT-User",
327
- "ClaudeBot",
328
- "PerplexityBot",
329
- "Google-Extended"
330
- ];
331
- var MODIFIED_DATE_SELECTORS = [
332
- '[itemprop="dateModified"]',
333
- 'meta[property="article:modified_time"]'
334
- ];
335
- var PUBLISH_DATE_SELECTORS = [
336
- "time[datetime]",
337
- '[itemprop="datePublished"]',
338
- 'meta[property="article:published_time"]'
339
- ];
340
-
341
- // src/modules/audits/support/dom.ts
342
- function detectAnswerCapsules($) {
343
- let total = 0;
344
- let withCapsule = 0;
345
- $("h2").each((_, el) => {
346
- const headingText = $(el).text().trim();
347
- const isQuestion = headingText.includes("?") || QUESTION_HEADING_PATTERN.test(headingText);
348
- if (!isQuestion) return;
349
- total++;
350
- const nextP = $(el).nextAll("p").first();
351
- if (!nextP.length) return;
352
- const pText = nextP.text().trim();
353
- const firstSentence = pText.split(/[.!?]/)[0] || "";
354
- if (firstSentence.length > 0 && firstSentence.length <= 200) {
355
- withCapsule++;
356
- }
357
- });
358
- return { total, withCapsule };
359
- }
360
- function measureSectionLengths($) {
361
- const headings = $("h1, h2, h3, h4, h5, h6");
362
- if (headings.length === 0)
363
- return { sectionCount: 0, avgWordsPerSection: 0, sections: [] };
364
- const sections = [];
365
- headings.each((_, el) => {
366
- let words = 0;
367
- let sibling = $(el).next();
368
- while (sibling.length && !sibling.is("h1, h2, h3, h4, h5, h6")) {
369
- const text = sibling.text().trim();
370
- words += text.split(/\s+/).filter((w) => w.length > 0).length;
371
- sibling = sibling.next();
372
- }
373
- if (words > 0) sections.push(words);
374
- });
375
- const avg = sections.length > 0 ? Math.round(sections.reduce((a, b) => a + b, 0) / sections.length) : 0;
376
- return { sectionCount: sections.length, avgWordsPerSection: avg, sections };
377
- }
378
- function parseJsonLdObjects($) {
379
- const objects = [];
380
- $('script[type="application/ld+json"]').each((_, el) => {
381
- try {
382
- const data = JSON.parse($(el).html() || "{}");
383
- if (Array.isArray(data)) objects.push(...data);
384
- else objects.push(data);
385
- } catch {
386
- }
387
- });
388
- return objects;
389
- }
390
-
391
- // src/modules/audits/support/nlp.ts
194
+ // src/modules/nlp/service.ts
392
195
  var import_compromise = __toESM(require("compromise"));
393
196
 
394
- // src/utils/strings.ts
395
- function countWords(text) {
396
- return text.split(/\s+/).filter((w) => w.length > 0).length;
397
- }
398
- function countSentences(text) {
399
- return text.split(/[.!?]+/).filter((s) => s.trim().length > 5).length;
400
- }
401
- function countSyllables(word) {
402
- word = word.toLowerCase().replace(/[^a-z]/g, "");
403
- if (word.length <= 3) return 1;
404
- word = word.replace(/(?:[^laeiouy]es|ed|[^laeiouy]e)$/, "");
405
- word = word.replace(/^y/, "");
406
- const matches = word.match(/[aeiouy]{1,2}/g);
407
- return matches ? Math.max(matches.length, 1) : 1;
408
- }
409
-
410
- // src/modules/audits/support/nlp.ts
197
+ // src/modules/nlp/constants.ts
411
198
  var STOPWORDS = /* @__PURE__ */ new Set([
412
199
  "a",
413
200
  "an",
@@ -680,6 +467,8 @@ var ACRONYM_STOPLIST = /* @__PURE__ */ new Set([
680
467
  ]);
681
468
  var ORG_SUFFIXES = /\b(?:Inc|Corp|Corporation|LLC|Ltd|Limited|Co|Company|Group|Foundation|Institute|University|Association|Society|Agency|Authority|Bureau|Commission|Council|Department|Board|Trust|Fund|Partners|Ventures|Labs|Technologies|Solutions|Systems|Services|Consulting|Media|Network|Studios|Entertainment|Healthcare|Pharmaceuticals|Dynamics|Holdings|Capital|Enterprises|International)\b/i;
682
469
  var PERSON_HONORIFICS = /\b(?:Mr|Mrs|Ms|Miss|Dr|Prof|Professor|Rev|Reverend|Sen|Senator|Rep|Representative|Gov|Governor|Pres|President|Gen|General|Col|Colonel|Sgt|Sergeant|Cpl|Corporal|Pvt|Private|Adm|Admiral|Capt|Captain|Lt|Lieutenant|Maj|Major|Sir|Dame|Lord|Lady|Hon|Honorable|Judge|Justice|Chancellor|Dean|Provost)\.\s*/;
470
+
471
+ // src/modules/nlp/support/entities.ts
683
472
  function extractAcronymEntities(text) {
684
473
  const matches = text.match(/\b[A-Z]{2,6}\b/g);
685
474
  if (!matches) return [];
@@ -731,6 +520,33 @@ function isPersonByHonorific(text, entity) {
731
520
  );
732
521
  return pattern.test(text);
733
522
  }
523
+ function smartDedup(entities) {
524
+ if (entities.length === 0) return [];
525
+ const sorted = [...entities].sort((a, b) => b.length - a.length);
526
+ const result = [];
527
+ const lowerSeen = /* @__PURE__ */ new Set();
528
+ for (const entity of sorted) {
529
+ const lower = entity.toLowerCase();
530
+ if (lowerSeen.has(lower)) continue;
531
+ let isSubstring = false;
532
+ for (const accepted of lowerSeen) {
533
+ if (accepted.includes(lower)) {
534
+ isSubstring = true;
535
+ break;
536
+ }
537
+ }
538
+ if (isSubstring) continue;
539
+ result.push(entity);
540
+ lowerSeen.add(lower);
541
+ }
542
+ return result;
543
+ }
544
+ function mergeEntityLists(compromiseList, supplementalList, limit) {
545
+ const combined = [...compromiseList, ...supplementalList];
546
+ return smartDedup(combined).slice(0, limit);
547
+ }
548
+
549
+ // src/modules/nlp/support/topics.ts
734
550
  function extractTopicsByTfIdf(text, limit) {
735
551
  const lower = text.toLowerCase();
736
552
  const words = lower.replace(/[^a-z0-9\s'-]/g, " ").split(/\s+/).filter((w) => w.length > 2 && !STOPWORDS.has(w));
@@ -754,31 +570,60 @@ function extractTopicsByTfIdf(text, limit) {
754
570
  candidates.sort((a, b) => b[1] - a[1]);
755
571
  return candidates.slice(0, limit).map(([term]) => term);
756
572
  }
757
- function smartDedup(entities) {
758
- if (entities.length === 0) return [];
759
- const sorted = [...entities].sort((a, b) => b.length - a.length);
760
- const result = [];
761
- const lowerSeen = /* @__PURE__ */ new Set();
762
- for (const entity of sorted) {
763
- const lower = entity.toLowerCase();
764
- if (lowerSeen.has(lower)) continue;
765
- let isSubstring = false;
766
- for (const accepted of lowerSeen) {
767
- if (accepted.includes(lower)) {
768
- isSubstring = true;
769
- break;
770
- }
771
- }
772
- if (isSubstring) continue;
773
- result.push(entity);
774
- lowerSeen.add(lower);
573
+
574
+ // src/modules/nlp/support/patterns.ts
575
+ function countPatternMatches(text, patterns) {
576
+ let count = 0;
577
+ for (const pattern of patterns) {
578
+ const re = new RegExp(pattern.source, pattern.flags);
579
+ const matches = text.match(re);
580
+ if (matches) count += matches.length;
775
581
  }
776
- return result;
582
+ return count;
583
+ }
584
+ function countTransitionWords(text, words) {
585
+ const lower = text.toLowerCase();
586
+ return words.filter((w) => lower.includes(w)).length;
587
+ }
588
+
589
+ // src/utils/strings.ts
590
+ function countWords(text) {
591
+ return text.split(/\s+/).filter((w) => w.length > 0).length;
592
+ }
593
+ function countSentences(text) {
594
+ return text.split(/[.!?]+/).filter((s) => s.trim().length > 5).length;
595
+ }
596
+ function countSyllables(word) {
597
+ word = word.toLowerCase().replace(/[^a-z]/g, "");
598
+ if (word.length <= 3) return 1;
599
+ word = word.replace(/(?:[^laeiouy]es|ed|[^laeiouy]e)$/, "");
600
+ word = word.replace(/^y/, "");
601
+ const matches = word.match(/[aeiouy]{1,2}/g);
602
+ return matches ? Math.max(matches.length, 1) : 1;
777
603
  }
778
- function mergeEntityLists(compromiseList, supplementalList, limit) {
779
- const combined = [...compromiseList, ...supplementalList];
780
- return smartDedup(combined).slice(0, limit);
604
+
605
+ // src/modules/nlp/support/readability.ts
606
+ function computeFleschReadingEase(text) {
607
+ const words = text.split(/\s+/).filter((w) => w.length > 0);
608
+ const sentences = text.split(/[.!?]+/).filter((s) => s.trim().length > 5);
609
+ const totalSyllables = words.reduce((sum, w) => sum + countSyllables(w), 0);
610
+ if (words.length === 0 || sentences.length === 0) return 0;
611
+ const avgSentenceLength2 = words.length / sentences.length;
612
+ const avgSyllablesPerWord = totalSyllables / words.length;
613
+ return 206.835 - 1.015 * avgSentenceLength2 - 84.6 * avgSyllablesPerWord;
614
+ }
615
+ function countComplexWords(text) {
616
+ const words = text.split(/\s+/).filter((w) => w.length > 0);
617
+ return words.filter((w) => countSyllables(w) >= 4).length;
781
618
  }
619
+ function avgSentenceLength(text) {
620
+ const words = text.split(/\s+/).filter((w) => w.length > 0);
621
+ const sentences = text.split(/[.!?]+/).filter((s) => s.trim().length > 5);
622
+ if (sentences.length === 0) return 0;
623
+ return Math.round(words.length / sentences.length);
624
+ }
625
+
626
+ // src/modules/nlp/service.ts
782
627
  function extractEntities(text) {
783
628
  const doc = (0, import_compromise.default)(text);
784
629
  const compromisePeople = [...new Set(doc.people().out("array"))];
@@ -820,37 +665,202 @@ function extractEntities(text) {
820
665
  numberCount
821
666
  };
822
667
  }
823
- function computeFleschReadingEase(text) {
824
- const words = text.split(/\s+/).filter((w) => w.length > 0);
825
- const sentences = text.split(/[.!?]+/).filter((s) => s.trim().length > 5);
826
- const totalSyllables = words.reduce((sum, w) => sum + countSyllables(w), 0);
827
- if (words.length === 0 || sentences.length === 0) return 0;
828
- const avgSentenceLength2 = words.length / sentences.length;
829
- const avgSyllablesPerWord = totalSyllables / words.length;
830
- return 206.835 - 1.015 * avgSentenceLength2 - 84.6 * avgSyllablesPerWord;
831
- }
832
- function countComplexWords(text) {
833
- const words = text.split(/\s+/).filter((w) => w.length > 0);
834
- return words.filter((w) => countSyllables(w) >= 4).length;
835
- }
836
- function countPatternMatches(text, patterns) {
837
- let count = 0;
838
- for (const pattern of patterns) {
839
- const re = new RegExp(pattern.source, pattern.flags);
840
- const matches = text.match(re);
841
- if (matches) count += matches.length;
842
- }
843
- return count;
668
+
669
+ // src/modules/audits/constants.ts
670
+ var CATEGORY_DISPLAY_NAMES = {
671
+ contentExtractability: "Content Extractability",
672
+ contentStructure: "Content Structure for Reuse",
673
+ answerability: "Answerability",
674
+ entityClarity: "Entity Clarity",
675
+ groundingSignals: "Grounding Signals",
676
+ authorityContext: "Authority Context",
677
+ readabilityForCompression: "Readability for Compression"
678
+ };
679
+
680
+ // src/modules/audits/support/patterns.ts
681
+ var DEFINITION_PATTERNS = [
682
+ /\bis\s+defined\s+as\b/gi,
683
+ /\brefers?\s+to\b/gi,
684
+ /\bmeans?\s+that\b/gi,
685
+ /\bis\s+a\s+type\s+of\b/gi,
686
+ /\bcan\s+be\s+described\s+as\b/gi,
687
+ /\balso\s+known\s+as\b/gi
688
+ ];
689
+ var CITATION_PATTERNS = [
690
+ /\[\d+\]/g,
691
+ /\([\w\s]+,?\s*\d{4}\)/g,
692
+ /according\s+to/gi,
693
+ /research\s+(?:shows|indicates|suggests)/gi,
694
+ /studies?\s+(?:show|indicate|suggest|found)/gi,
695
+ /data\s+from/gi,
696
+ /as\s+reported\s+by/gi
697
+ ];
698
+ var ATTRIBUTION_PATTERNS = [
699
+ /according\s+to/gi,
700
+ /\bsaid\b/gi,
701
+ /\bstated\b/gi,
702
+ /\breported\b/gi,
703
+ /\bcited\s+by\b/gi
704
+ ];
705
+ var NUMERIC_CLAIM_PATTERNS = [
706
+ /\d+(?:\.\d+)?\s*%/g,
707
+ /\d+(?:\.\d+)?\s*(?:million|billion|thousand|trillion)/gi,
708
+ /\$[\d,.]+/g,
709
+ /increased\s+by/gi,
710
+ /decreased\s+by/gi,
711
+ /grew\s+by/gi
712
+ ];
713
+ var STEP_PATTERNS = [
714
+ /step\s+\d+/gi,
715
+ /^\s*\d+\.\s+\w/gm,
716
+ /\bfirst(?:ly)?,?\s/gi,
717
+ /\bsecond(?:ly)?,?\s/gi,
718
+ /\bfinally,?\s/gi,
719
+ /\bhow\s+to\b/gi
720
+ ];
721
+ var SUMMARY_MARKERS = [
722
+ /\bin\s+summary\b/gi,
723
+ /\bin\s+conclusion\b/gi,
724
+ /\bto\s+summarize\b/gi,
725
+ /\bkey\s+takeaways?\b/gi,
726
+ /\bbottom\s+line\b/gi,
727
+ /\btl;?dr\b/gi
728
+ ];
729
+ var QUESTION_PATTERNS = [
730
+ /what\s+is/gi,
731
+ /what\s+are/gi,
732
+ /how\s+to/gi,
733
+ /how\s+do/gi,
734
+ /why\s+is/gi,
735
+ /why\s+do/gi,
736
+ /when\s+to/gi,
737
+ /where\s+to/gi,
738
+ /which\s+is/gi,
739
+ /who\s+is/gi
740
+ ];
741
+ var DIRECT_ANSWER_PATTERNS = [
742
+ /^The\s+\w+\s+is\b/gm,
743
+ /^It\s+is\b/gm,
744
+ /^This\s+is\b/gm,
745
+ /^They\s+are\b/gm,
746
+ /\bsimply\s+put\b/gi,
747
+ /\bin\s+short\b/gi
748
+ ];
749
+ var TRANSITION_WORDS = [
750
+ "however",
751
+ "therefore",
752
+ "moreover",
753
+ "furthermore",
754
+ "consequently",
755
+ "additionally",
756
+ "in contrast",
757
+ "similarly",
758
+ "as a result",
759
+ "for example",
760
+ "for instance",
761
+ "on the other hand",
762
+ "nevertheless",
763
+ "meanwhile",
764
+ "likewise",
765
+ "in addition",
766
+ "specifically",
767
+ "in particular",
768
+ "notably",
769
+ "importantly"
770
+ ];
771
+ var AUTHOR_SELECTORS = [
772
+ '[rel="author"]',
773
+ ".author",
774
+ ".byline",
775
+ '[itemprop="author"]',
776
+ ".post-author",
777
+ ".entry-author",
778
+ 'meta[name="author"]'
779
+ ];
780
+ var DATE_SELECTORS = [
781
+ "time[datetime]",
782
+ '[itemprop="datePublished"]',
783
+ '[itemprop="dateModified"]',
784
+ ".published",
785
+ ".post-date",
786
+ ".entry-date",
787
+ 'meta[property="article:published_time"]',
788
+ 'meta[property="article:modified_time"]'
789
+ ];
790
+ var QUESTION_HEADING_PATTERN = /^(?:what|how|why|when|where|which|who|can|do|does|is|are|should|will)\b/i;
791
+ var QUOTED_ATTRIBUTION_PATTERNS = [
792
+ /"[^"]{10,}"\s*[-\u2013\u2014]\s*[A-Z][a-z]+/g,
793
+ /"[^"]{10,}",?\s+said\s+[A-Z]/g,
794
+ /"[^"]{10,}",?\s+according\s+to\s+[A-Z]/g,
795
+ /according\s+to\s+[A-Z][a-z]+[^,]*,\s*"[^"]{10,}"/g,
796
+ /\u201c[^\u201d]{10,}\u201d\s*[-\u2013\u2014]\s*[A-Z][a-z]+/g,
797
+ /\u201c[^\u201d]{10,}\u201d,?\s+said\s+[A-Z]/g
798
+ ];
799
+ var AI_CRAWLERS = [
800
+ "GPTBot",
801
+ "ChatGPT-User",
802
+ "ClaudeBot",
803
+ "PerplexityBot",
804
+ "Google-Extended"
805
+ ];
806
+ var MODIFIED_DATE_SELECTORS = [
807
+ '[itemprop="dateModified"]',
808
+ 'meta[property="article:modified_time"]'
809
+ ];
810
+ var PUBLISH_DATE_SELECTORS = [
811
+ "time[datetime]",
812
+ '[itemprop="datePublished"]',
813
+ 'meta[property="article:published_time"]'
814
+ ];
815
+
816
+ // src/modules/audits/support/dom.ts
817
+ function detectAnswerCapsules($) {
818
+ let total = 0;
819
+ let withCapsule = 0;
820
+ $("h2").each((_, el) => {
821
+ const headingText = $(el).text().trim();
822
+ const isQuestion = headingText.includes("?") || QUESTION_HEADING_PATTERN.test(headingText);
823
+ if (!isQuestion) return;
824
+ total++;
825
+ const nextP = $(el).nextAll("p").first();
826
+ if (!nextP.length) return;
827
+ const pText = nextP.text().trim();
828
+ const firstSentence = pText.split(/[.!?]/)[0] || "";
829
+ if (firstSentence.length > 0 && firstSentence.length <= 200) {
830
+ withCapsule++;
831
+ }
832
+ });
833
+ return { total, withCapsule };
844
834
  }
845
- function countTransitionWords(text, words) {
846
- const lower = text.toLowerCase();
847
- return words.filter((w) => lower.includes(w)).length;
835
+ function measureSectionLengths($) {
836
+ const headings = $("h1, h2, h3, h4, h5, h6");
837
+ if (headings.length === 0)
838
+ return { sectionCount: 0, avgWordsPerSection: 0, sections: [] };
839
+ const sections = [];
840
+ headings.each((_, el) => {
841
+ let words = 0;
842
+ let sibling = $(el).next();
843
+ while (sibling.length && !sibling.is("h1, h2, h3, h4, h5, h6")) {
844
+ const text = sibling.text().trim();
845
+ words += text.split(/\s+/).filter((w) => w.length > 0).length;
846
+ sibling = sibling.next();
847
+ }
848
+ if (words > 0) sections.push(words);
849
+ });
850
+ const avg = sections.length > 0 ? Math.round(sections.reduce((a, b) => a + b, 0) / sections.length) : 0;
851
+ return { sectionCount: sections.length, avgWordsPerSection: avg, sections };
848
852
  }
849
- function avgSentenceLength(text) {
850
- const words = text.split(/\s+/).filter((w) => w.length > 0);
851
- const sentences = text.split(/[.!?]+/).filter((s) => s.trim().length > 5);
852
- if (sentences.length === 0) return 0;
853
- return Math.round(words.length / sentences.length);
853
+ function parseJsonLdObjects($) {
854
+ const objects = [];
855
+ $('script[type="application/ld+json"]').each((_, el) => {
856
+ try {
857
+ const data = JSON.parse($(el).html() || "{}");
858
+ if (Array.isArray(data)) objects.push(...data);
859
+ else objects.push(data);
860
+ } catch {
861
+ }
862
+ });
863
+ return objects;
854
864
  }
855
865
 
856
866
  // src/modules/audits/support/scoring.ts
@@ -4104,13 +4114,13 @@ function renderSitemapMarkdown(result) {
4104
4114
  var import_chalk = __toESM(require("chalk"));
4105
4115
  function scoreColor(score, max) {
4106
4116
  const pct = max > 0 ? score / max * 100 : 0;
4107
- if (pct >= 70) return import_chalk.default.green;
4108
- if (pct >= 40) return import_chalk.default.yellow;
4117
+ if (pct >= 90) return import_chalk.default.green;
4118
+ if (pct >= 50) return import_chalk.default.yellow;
4109
4119
  return import_chalk.default.red;
4110
4120
  }
4111
4121
  function gradeColor(grade) {
4112
4122
  if (grade.startsWith("A")) return import_chalk.default.green;
4113
- if (grade.startsWith("B")) return import_chalk.default.yellow;
4123
+ if (grade.startsWith("B") || grade.startsWith("C")) return import_chalk.default.yellow;
4114
4124
  return import_chalk.default.red;
4115
4125
  }
4116
4126
  function pad(str, len) {
@@ -4257,7 +4267,7 @@ function renderSitemapPretty(result) {
4257
4267
  lines.push(import_chalk.default.bold(" Site-Wide Category Averages:"));
4258
4268
  lines.push("");
4259
4269
  for (const avg of Object.values(result.categoryAverages)) {
4260
- const color = avg.averagePct >= 70 ? import_chalk.default.green : avg.averagePct >= 40 ? import_chalk.default.yellow : import_chalk.default.red;
4270
+ const color = scoreColor(avg.averagePct, 100);
4261
4271
  const name = pad(avg.name, 38);
4262
4272
  const dts = dots(Math.max(2, 40 - avg.name.length));
4263
4273
  lines.push(` ${import_chalk.default.bold(name)} ${dts} ${color(`${avg.averagePct}%`)}`);
@@ -4337,6 +4347,36 @@ function renderSitemapReport(result, options) {
4337
4347
  }
4338
4348
 
4339
4349
  // src/modules/sitemap/service.ts
4350
+ var import_xml_to_html_converter = require("xml-to-html-converter");
4351
+ function stripCdata(raw) {
4352
+ const trimmed = raw.trim();
4353
+ if (trimmed.startsWith("<![CDATA[") && trimmed.endsWith("]]>")) {
4354
+ return trimmed.slice(9, -3);
4355
+ }
4356
+ return trimmed;
4357
+ }
4358
+ function collectLocText(nodes, urls) {
4359
+ for (const node of nodes) {
4360
+ if (node.xmlTag === "loc" && node.children) {
4361
+ const text = node.children.filter((c) => c.role === "textLeaf").map((c) => stripCdata(c.raw)).join("").trim();
4362
+ if (text) urls.push(text);
4363
+ }
4364
+ if (node.children) collectLocText(node.children, urls);
4365
+ }
4366
+ }
4367
+ function extractLocUrls(xml) {
4368
+ const nodes = (0, import_xml_to_html_converter.scaffold)(xml);
4369
+ const urls = [];
4370
+ collectLocText(nodes, urls);
4371
+ return urls;
4372
+ }
4373
+ function hasSitemapIndexNode(nodes) {
4374
+ for (const node of nodes) {
4375
+ if (node.xmlTag === "sitemapindex") return true;
4376
+ if (node.children && hasSitemapIndexNode(node.children)) return true;
4377
+ }
4378
+ return false;
4379
+ }
4340
4380
  async function fetchSitemapUrls(sitemapUrl, timeout, userAgent) {
4341
4381
  const response = await httpGet({
4342
4382
  url: sitemapUrl,
@@ -4346,11 +4386,13 @@ async function fetchSitemapUrls(sitemapUrl, timeout, userAgent) {
4346
4386
  if (response.status !== 200) {
4347
4387
  throw new Error(`Failed to fetch sitemap: HTTP ${response.status}`);
4348
4388
  }
4349
- const isSitemapIndex = response.data.includes("<sitemapindex");
4350
- if (isSitemapIndex) {
4389
+ const nodes = (0, import_xml_to_html_converter.scaffold)(response.data);
4390
+ if (hasSitemapIndexNode(nodes)) {
4351
4391
  return fetchSitemapIndexUrls(response.data, timeout, userAgent);
4352
4392
  }
4353
- return extractLocUrls(response.data);
4393
+ const urls = [];
4394
+ collectLocText(nodes, urls);
4395
+ return urls;
4354
4396
  }
4355
4397
  async function fetchSitemapIndexUrls(xml, timeout, userAgent) {
4356
4398
  const childSitemapUrls = extractLocUrls(xml);
@@ -4367,10 +4409,6 @@ async function fetchSitemapIndexUrls(xml, timeout, userAgent) {
4367
4409
  }
4368
4410
  return allUrls;
4369
4411
  }
4370
- function extractLocUrls(xml) {
4371
- const matches = xml.matchAll(/<loc>\s*(.*?)\s*<\/loc>/g);
4372
- return Array.from(matches, (m) => m[1]).filter(Boolean);
4373
- }
4374
4412
  function computeCategoryAverages(urlResults) {
4375
4413
  const successResults = urlResults.filter((r) => r.status === "success").map(
4376
4414
  (r) => r.result