geo-ai-search-optimization 2.3.0 → 2.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/action.yml +13 -8
- package/package.json +1 -1
- package/src/auto-fix.js +7 -5
- package/src/batch-full-page-audit.js +3 -2
- package/src/citability.js +2 -11
- package/src/citation-check.js +5 -16
- package/src/cli-site-ops-commands.js +128 -3
- package/src/compare.js +6 -6
- package/src/competitor-tracking.js +275 -0
- package/src/config.js +6 -6
- package/src/content-freshness.js +411 -0
- package/src/crawlers.js +16 -13
- package/src/deep-benchmark.js +3 -2
- package/src/eeat.js +2 -11
- package/src/explain.js +268 -0
- package/src/fetch-utils.js +12 -2
- package/src/freshness.js +4 -13
- package/src/full-audit.js +19 -2
- package/src/full-page-audit.js +10 -10
- package/src/heading-structure.js +2 -11
- package/src/index.d.ts +866 -1
- package/src/index.js +14 -1
- package/src/internal-links.js +2 -11
- package/src/link-quality.js +384 -0
- package/src/monitor.js +144 -0
- package/src/optimize-llms.js +583 -0
- package/src/page-audit.js +2 -14
- package/src/page-snapshot.js +1 -1
- package/src/pdf-report.js +9 -3
- package/src/platform-ready.js +2 -11
- package/src/plugins.js +1 -1
- package/src/readability.js +2 -11
- package/src/security.js +3 -18
- package/src/sitemap.js +4 -7
- package/src/social-meta.js +2 -14
- package/src/summary.js +1 -1
- package/src/topics.js +2 -11
- package/src/url-onboarding.js +2 -14
- package/src/validate-llms.js +25 -10
- package/src/validate-schema.js +14 -12
|
@@ -0,0 +1,411 @@
|
|
|
1
|
+
import fs from "node:fs/promises";
|
|
2
|
+
import path from "node:path";
|
|
3
|
+
import { fetchText } from "./fetch-utils.js";
|
|
4
|
+
import { writeScanOutput } from "./scan.js";
|
|
5
|
+
|
|
6
|
+
const CURRENT_YEAR = new Date().getFullYear();
|
|
7
|
+
|
|
8
|
+
const FRESH_SIGNALS = [
|
|
9
|
+
{ pattern: /\brecently\b/gi, label: "recently" },
|
|
10
|
+
{ pattern: /\bthis year\b/gi, label: "this year" },
|
|
11
|
+
{ pattern: /\blatest\b/gi, label: "latest" },
|
|
12
|
+
{ pattern: /\bupdated\b/gi, label: "updated" },
|
|
13
|
+
{ pattern: /\bnew\b/gi, label: "new" },
|
|
14
|
+
{ pattern: /\bcurrent\b/gi, label: "current" }
|
|
15
|
+
];
|
|
16
|
+
|
|
17
|
+
const STALE_SIGNALS = [
|
|
18
|
+
{ pattern: /\blast year\b/gi, label: "last year" },
|
|
19
|
+
{ pattern: /\bpreviously\b/gi, label: "previously" },
|
|
20
|
+
{ pattern: /\bformerly\b/gi, label: "formerly" },
|
|
21
|
+
{ pattern: /\bold\b/gi, label: "old" },
|
|
22
|
+
{ pattern: /\blegacy\b/gi, label: "legacy" },
|
|
23
|
+
{ pattern: /\bdeprecated\b/gi, label: "deprecated" }
|
|
24
|
+
];
|
|
25
|
+
|
|
26
|
+
const EVERGREEN_PATTERNS = [
|
|
27
|
+
/\bhow to\b/i,
|
|
28
|
+
/\bwhat is\b/i,
|
|
29
|
+
/\bdefinition\b/i,
|
|
30
|
+
/\bguide\b/i,
|
|
31
|
+
/\btutorial\b/i,
|
|
32
|
+
/\bprinciples?\b/i,
|
|
33
|
+
/\bfundamentals?\b/i,
|
|
34
|
+
/\bbest practices?\b/i,
|
|
35
|
+
/\boverview\b/i,
|
|
36
|
+
/\bintroduction\b/i
|
|
37
|
+
];
|
|
38
|
+
|
|
39
|
+
// Reuse date extraction logic inline (freshness.js does not export extractDates/parseDate)
|
|
40
|
+
const DATE_PATTERNS = [
|
|
41
|
+
{ pattern: /\b(20\d{2}-[01]\d-[0-3]\d)/g, format: "ISO" },
|
|
42
|
+
{ pattern: /\b(January|February|March|April|May|June|July|August|September|October|November|December)\s+(\d{1,2}),?\s+(20\d{2})/gi, format: "US" },
|
|
43
|
+
{ pattern: /"datePublished"\s*:\s*"([^"]+)"/g, format: "schema-published" },
|
|
44
|
+
{ pattern: /"dateModified"\s*:\s*"([^"]+)"/g, format: "schema-modified" },
|
|
45
|
+
{ pattern: /<time[^>]+datetime=["']([^"']+)["']/gi, format: "html-time" },
|
|
46
|
+
{ pattern: /name=["'](?:date|article:published_time|article:modified_time)["'][^>]+content=["']([^"']+)["']/gi, format: "meta" },
|
|
47
|
+
{ pattern: /content=["']([^"']+)["'][^>]+name=["'](?:date|article:published_time|article:modified_time)["']/gi, format: "meta-reverse" },
|
|
48
|
+
{ pattern: /\b(?:Published|Updated|Posted|Modified|Created|Reviewed|Last updated|Last reviewed)[:\s]+(\d{4}-\d{2}-\d{2}|\w+\s+\d{1,2},?\s+\d{4})/gi, format: "visible-label" }
|
|
49
|
+
];
|
|
50
|
+
|
|
51
|
+
const MONTH_MAP = {
|
|
52
|
+
january: 0, february: 1, march: 2, april: 3, may: 4, june: 5,
|
|
53
|
+
july: 6, august: 7, september: 8, october: 9, november: 10, december: 11
|
|
54
|
+
};
|
|
55
|
+
|
|
56
|
+
function parseDate(dateStr) {
|
|
57
|
+
const isoMatch = dateStr.match(/(\d{4})-(\d{2})-(\d{2})/);
|
|
58
|
+
if (isoMatch) {
|
|
59
|
+
const d = new Date(isoMatch[0]);
|
|
60
|
+
if (!Number.isNaN(d.getTime())) return d;
|
|
61
|
+
}
|
|
62
|
+
const usMatch = dateStr.match(/(January|February|March|April|May|June|July|August|September|October|November|December)\s+(\d{1,2}),?\s+(\d{4})/i);
|
|
63
|
+
if (usMatch) {
|
|
64
|
+
const month = MONTH_MAP[usMatch[1].toLowerCase()];
|
|
65
|
+
if (month !== undefined) {
|
|
66
|
+
return new Date(Number.parseInt(usMatch[3]), month, Number.parseInt(usMatch[2]));
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
const d = new Date(dateStr);
|
|
70
|
+
if (!Number.isNaN(d.getTime()) && d.getFullYear() >= 2000) return d;
|
|
71
|
+
return null;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
function extractDates(content) {
|
|
75
|
+
const found = [];
|
|
76
|
+
const seen = new Set();
|
|
77
|
+
for (const { pattern, format } of DATE_PATTERNS) {
|
|
78
|
+
let match;
|
|
79
|
+
const regex = new RegExp(pattern.source, pattern.flags);
|
|
80
|
+
while ((match = regex.exec(content)) !== null) {
|
|
81
|
+
const raw = match[1] || match[0];
|
|
82
|
+
const parsed = parseDate(raw);
|
|
83
|
+
if (parsed && !seen.has(parsed.toISOString().slice(0, 10))) {
|
|
84
|
+
seen.add(parsed.toISOString().slice(0, 10));
|
|
85
|
+
found.push({ raw: raw.trim().slice(0, 60), date: parsed.toISOString().slice(0, 10), source: format, timestamp: parsed.getTime() });
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
return found.sort((a, b) => b.timestamp - a.timestamp);
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
function resolvePublishedAndModified(dates) {
|
|
93
|
+
if (dates.length === 0) return { publishedDate: null, modifiedDate: null };
|
|
94
|
+
const published = dates.find((d) => d.source.includes("published") || d.source === "visible-label") || dates[dates.length - 1];
|
|
95
|
+
const modified = dates.find((d) => d.source.includes("modified")) || dates[0];
|
|
96
|
+
return {
|
|
97
|
+
publishedDate: published.date,
|
|
98
|
+
modifiedDate: modified.date
|
|
99
|
+
};
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
function countWords(text) {
|
|
103
|
+
const stripped = text.replace(/<[^>]+>/g, " ").replace(/\s+/g, " ").trim();
|
|
104
|
+
return stripped ? stripped.split(/\s+/).length : 0;
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
function getLines(content) {
|
|
108
|
+
return content.replace(/<[^>]+>/g, " ").split(/\n/);
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
function detectTemporalSignals(content) {
|
|
112
|
+
const lines = getLines(content);
|
|
113
|
+
const signals = [];
|
|
114
|
+
|
|
115
|
+
for (const group of [{ list: FRESH_SIGNALS, type: "fresh" }, { list: STALE_SIGNALS, type: "stale" }]) {
|
|
116
|
+
for (const { pattern, label } of group.list) {
|
|
117
|
+
const regex = new RegExp(pattern.source, pattern.flags);
|
|
118
|
+
for (let i = 0; i < lines.length; i++) {
|
|
119
|
+
let match;
|
|
120
|
+
while ((match = regex.exec(lines[i])) !== null) {
|
|
121
|
+
signals.push({ phrase: match[0], type: group.type, line: i + 1 });
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
// Year references
|
|
128
|
+
const yearRegex = /\b(20\d{2})\b/g;
|
|
129
|
+
for (let i = 0; i < lines.length; i++) {
|
|
130
|
+
let match;
|
|
131
|
+
const regex = new RegExp(yearRegex.source, yearRegex.flags);
|
|
132
|
+
while ((match = regex.exec(lines[i])) !== null) {
|
|
133
|
+
const year = Number.parseInt(match[1]);
|
|
134
|
+
if (year >= 2000 && year <= CURRENT_YEAR + 1) {
|
|
135
|
+
const type = year >= CURRENT_YEAR ? "fresh" : year >= CURRENT_YEAR - 1 ? "neutral" : "stale";
|
|
136
|
+
signals.push({ phrase: match[0], type, line: i + 1 });
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
return signals;
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
function detectStaleReferences(content) {
|
|
145
|
+
const stale = [];
|
|
146
|
+
const yearRegex = /\b(20\d{2})\b/g;
|
|
147
|
+
let match;
|
|
148
|
+
while ((match = yearRegex.exec(content)) !== null) {
|
|
149
|
+
const year = Number.parseInt(match[1]);
|
|
150
|
+
if (year <= CURRENT_YEAR - 3) {
|
|
151
|
+
stale.push({ reference: match[0], reason: `Year ${year} is ${CURRENT_YEAR - year} years old` });
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
const deprecatedTerms = [
|
|
156
|
+
{ pattern: /\bInternet Explorer\b/gi, reason: "Internet Explorer is discontinued" },
|
|
157
|
+
{ pattern: /\bFlash Player\b/gi, reason: "Adobe Flash Player is deprecated" },
|
|
158
|
+
{ pattern: /\bAngularJS\b/gi, reason: "AngularJS (1.x) is end-of-life" },
|
|
159
|
+
{ pattern: /\bPython 2\b/gi, reason: "Python 2 is end-of-life" }
|
|
160
|
+
];
|
|
161
|
+
|
|
162
|
+
for (const { pattern, reason } of deprecatedTerms) {
|
|
163
|
+
const regex = new RegExp(pattern.source, pattern.flags);
|
|
164
|
+
if (regex.test(content)) {
|
|
165
|
+
stale.push({ reference: pattern.source.replace(/\\b/g, ""), reason });
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
return stale;
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
function computeEvergreenScore(content) {
|
|
173
|
+
let hits = 0;
|
|
174
|
+
for (const pattern of EVERGREEN_PATTERNS) {
|
|
175
|
+
if (pattern.test(content)) hits++;
|
|
176
|
+
}
|
|
177
|
+
return Math.min(Math.round((hits / EVERGREEN_PATTERNS.length) * 100), 100);
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
function computeUpdateFrequencySignal(publishedDate, modifiedDate) {
|
|
181
|
+
if (!publishedDate || !modifiedDate) return "unknown";
|
|
182
|
+
if (publishedDate === modifiedDate) return "never-updated";
|
|
183
|
+
const pubMs = new Date(publishedDate).getTime();
|
|
184
|
+
const modMs = new Date(modifiedDate).getTime();
|
|
185
|
+
const gapDays = Math.floor((modMs - pubMs) / (1000 * 60 * 60 * 24));
|
|
186
|
+
if (gapDays <= 7) return "frequent";
|
|
187
|
+
if (gapDays <= 30) return "monthly";
|
|
188
|
+
if (gapDays <= 90) return "quarterly";
|
|
189
|
+
if (gapDays <= 365) return "yearly";
|
|
190
|
+
return "infrequent";
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
function computeScore(ageDays, datesFound, staleReferences, temporalSignals, publishedDate, modifiedDate, evergreenScore) {
|
|
194
|
+
let score = 100;
|
|
195
|
+
|
|
196
|
+
// Content age: -1 per 30 days since last modification (max -30)
|
|
197
|
+
if (ageDays !== null) {
|
|
198
|
+
const agePenalty = Math.min(Math.floor(ageDays / 30), 30);
|
|
199
|
+
score -= agePenalty;
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
// No dates found: -20
|
|
203
|
+
if (datesFound === 0) {
|
|
204
|
+
score -= 20;
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
// Stale references: -5 each (max -15)
|
|
208
|
+
const staleYearRefs = staleReferences.filter((r) => /^Year \d+/.test(r.reason) || /^\d{4}$/.test(r.reference));
|
|
209
|
+
const stalePenalty = Math.min(staleYearRefs.length * 5, 15);
|
|
210
|
+
score -= stalePenalty;
|
|
211
|
+
|
|
212
|
+
// No temporal signals: -10
|
|
213
|
+
if (temporalSignals.length === 0) {
|
|
214
|
+
score -= 10;
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
// datePublished == dateModified (never updated): -10
|
|
218
|
+
if (publishedDate && modifiedDate && publishedDate === modifiedDate) {
|
|
219
|
+
score -= 10;
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
// Evergreen bonus: +10 if content uses timeless language patterns
|
|
223
|
+
if (evergreenScore >= 30) {
|
|
224
|
+
score += 10;
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
return Math.max(0, Math.min(100, score));
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
function scoreLabel(score) {
|
|
231
|
+
if (score >= 80) return "Fresh";
|
|
232
|
+
if (score >= 60) return "Aging";
|
|
233
|
+
if (score >= 40) return "Stale";
|
|
234
|
+
return "Outdated";
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
function buildRecommendations(report) {
|
|
238
|
+
const recs = [];
|
|
239
|
+
|
|
240
|
+
if (!report.publishedDate && !report.modifiedDate) {
|
|
241
|
+
recs.push("Add datePublished and dateModified to your structured data (JSON-LD) for AI freshness signals.");
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
if (report.updateFrequencySignal === "never-updated") {
|
|
245
|
+
recs.push("Content has never been updated (datePublished equals dateModified). Update dateModified when content changes.");
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
if (report.staleReferences.length > 0) {
|
|
249
|
+
recs.push(`Found ${report.staleReferences.length} stale reference(s). Review and update outdated year mentions or deprecated technology references.`);
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
const freshCount = report.temporalSignals.filter((s) => s.type === "fresh").length;
|
|
253
|
+
const staleCount = report.temporalSignals.filter((s) => s.type === "stale").length;
|
|
254
|
+
|
|
255
|
+
if (freshCount === 0) {
|
|
256
|
+
recs.push("Add temporal freshness language (e.g., 'recently', 'latest', current year) to signal content currency.");
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
if (staleCount > freshCount) {
|
|
260
|
+
recs.push("Content has more stale temporal signals than fresh ones. Consider updating language to reflect current state.");
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
if (report.evergreenScore < 20) {
|
|
264
|
+
recs.push("Content lacks evergreen patterns. Consider adding timeless structural elements (guides, tutorials, definitions).");
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
if (report.ageDays !== null && report.ageDays > 180) {
|
|
268
|
+
recs.push("Content is over 6 months old. Review and refresh to maintain AI citation relevance.");
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
return recs;
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
export async function analyzeContentFreshness(input, options = {}) {
|
|
275
|
+
let content;
|
|
276
|
+
let source;
|
|
277
|
+
|
|
278
|
+
if (/^https?:\/\//i.test(input)) {
|
|
279
|
+
content = await fetchText(input);
|
|
280
|
+
source = input;
|
|
281
|
+
} else {
|
|
282
|
+
const filePath = path.resolve(input);
|
|
283
|
+
content = await fs.readFile(filePath, "utf8");
|
|
284
|
+
source = filePath;
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
const wordCount = countWords(content);
|
|
288
|
+
const dates = extractDates(content);
|
|
289
|
+
const { publishedDate, modifiedDate } = resolvePublishedAndModified(dates);
|
|
290
|
+
|
|
291
|
+
const now = new Date();
|
|
292
|
+
let ageDays = null;
|
|
293
|
+
if (modifiedDate) {
|
|
294
|
+
ageDays = Math.floor((now - new Date(modifiedDate)) / (1000 * 60 * 60 * 24));
|
|
295
|
+
} else if (publishedDate) {
|
|
296
|
+
ageDays = Math.floor((now - new Date(publishedDate)) / (1000 * 60 * 60 * 24));
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
const temporalSignals = detectTemporalSignals(content);
|
|
300
|
+
const staleReferences = detectStaleReferences(content);
|
|
301
|
+
const evergreenScore = computeEvergreenScore(content);
|
|
302
|
+
const updateFrequencySignal = computeUpdateFrequencySignal(publishedDate, modifiedDate);
|
|
303
|
+
|
|
304
|
+
const score = computeScore(ageDays, dates.length, staleReferences, temporalSignals, publishedDate, modifiedDate, evergreenScore);
|
|
305
|
+
const label = scoreLabel(score);
|
|
306
|
+
|
|
307
|
+
const report = {
|
|
308
|
+
kind: "geo-content-freshness",
|
|
309
|
+
source,
|
|
310
|
+
wordCount,
|
|
311
|
+
publishedDate,
|
|
312
|
+
modifiedDate,
|
|
313
|
+
ageDays,
|
|
314
|
+
temporalSignals,
|
|
315
|
+
staleReferences,
|
|
316
|
+
evergreenScore,
|
|
317
|
+
updateFrequencySignal,
|
|
318
|
+
score,
|
|
319
|
+
scoreLabel: label,
|
|
320
|
+
recommendations: [],
|
|
321
|
+
summary: ""
|
|
322
|
+
};
|
|
323
|
+
|
|
324
|
+
report.recommendations = buildRecommendations(report);
|
|
325
|
+
report.summary = ageDays !== null
|
|
326
|
+
? `Content freshness: ${score}/100 (${label}). Last modified: ${modifiedDate || publishedDate} (${ageDays} days ago). ${temporalSignals.length} temporal signals, ${staleReferences.length} stale references.`
|
|
327
|
+
: `Content freshness: ${score}/100 (${label}). No dates detected. ${temporalSignals.length} temporal signals, ${staleReferences.length} stale references.`;
|
|
328
|
+
|
|
329
|
+
return report;
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
export function renderContentFreshnessMarkdown(report) {
|
|
333
|
+
const lines = [
|
|
334
|
+
"# Content Freshness Analysis",
|
|
335
|
+
"",
|
|
336
|
+
`- Source: \`${report.source}\``,
|
|
337
|
+
`- Score: \`${report.score}/100\` (${report.scoreLabel})`,
|
|
338
|
+
`- Word count: \`${report.wordCount}\``,
|
|
339
|
+
`- Summary: ${report.summary}`,
|
|
340
|
+
""
|
|
341
|
+
];
|
|
342
|
+
|
|
343
|
+
lines.push(
|
|
344
|
+
"## Dates",
|
|
345
|
+
"",
|
|
346
|
+
`- Published: \`${report.publishedDate || "not detected"}\``,
|
|
347
|
+
`- Modified: \`${report.modifiedDate || "not detected"}\``,
|
|
348
|
+
`- Age (days): \`${report.ageDays ?? "N/A"}\``,
|
|
349
|
+
`- Update frequency: \`${report.updateFrequencySignal}\``,
|
|
350
|
+
""
|
|
351
|
+
);
|
|
352
|
+
|
|
353
|
+
lines.push(
|
|
354
|
+
"## Evergreen Score",
|
|
355
|
+
"",
|
|
356
|
+
`- Evergreen score: \`${report.evergreenScore}/100\``,
|
|
357
|
+
""
|
|
358
|
+
);
|
|
359
|
+
|
|
360
|
+
if (report.temporalSignals.length > 0) {
|
|
361
|
+
lines.push("## Temporal Signals", "");
|
|
362
|
+
const freshSignals = report.temporalSignals.filter((s) => s.type === "fresh");
|
|
363
|
+
const staleSignals = report.temporalSignals.filter((s) => s.type === "stale");
|
|
364
|
+
const neutralSignals = report.temporalSignals.filter((s) => s.type === "neutral");
|
|
365
|
+
if (freshSignals.length > 0) {
|
|
366
|
+
lines.push(`### Fresh (${freshSignals.length})`, "");
|
|
367
|
+
for (const s of freshSignals.slice(0, 10)) {
|
|
368
|
+
lines.push(`- "${s.phrase}" (line ${s.line})`);
|
|
369
|
+
}
|
|
370
|
+
lines.push("");
|
|
371
|
+
}
|
|
372
|
+
if (staleSignals.length > 0) {
|
|
373
|
+
lines.push(`### Stale (${staleSignals.length})`, "");
|
|
374
|
+
for (const s of staleSignals.slice(0, 10)) {
|
|
375
|
+
lines.push(`- "${s.phrase}" (line ${s.line})`);
|
|
376
|
+
}
|
|
377
|
+
lines.push("");
|
|
378
|
+
}
|
|
379
|
+
if (neutralSignals.length > 0) {
|
|
380
|
+
lines.push(`### Neutral (${neutralSignals.length})`, "");
|
|
381
|
+
for (const s of neutralSignals.slice(0, 10)) {
|
|
382
|
+
lines.push(`- "${s.phrase}" (line ${s.line})`);
|
|
383
|
+
}
|
|
384
|
+
lines.push("");
|
|
385
|
+
}
|
|
386
|
+
}
|
|
387
|
+
|
|
388
|
+
if (report.staleReferences.length > 0) {
|
|
389
|
+
lines.push("## Stale References", "");
|
|
390
|
+
for (const ref of report.staleReferences.slice(0, 10)) {
|
|
391
|
+
lines.push(`- \`${ref.reference}\`: ${ref.reason}`);
|
|
392
|
+
}
|
|
393
|
+
lines.push("");
|
|
394
|
+
}
|
|
395
|
+
|
|
396
|
+
lines.push("## Recommendations", "");
|
|
397
|
+
if (report.recommendations.length === 0) {
|
|
398
|
+
lines.push("- Content freshness signals are well-configured.");
|
|
399
|
+
} else {
|
|
400
|
+
for (const rec of report.recommendations) {
|
|
401
|
+
lines.push(`- ${rec}`);
|
|
402
|
+
}
|
|
403
|
+
}
|
|
404
|
+
lines.push("");
|
|
405
|
+
|
|
406
|
+
return lines.join("\n");
|
|
407
|
+
}
|
|
408
|
+
|
|
409
|
+
export async function writeContentFreshnessOutput(outputPath, content) {
|
|
410
|
+
return writeScanOutput(outputPath, content);
|
|
411
|
+
}
|
package/src/crawlers.js
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import fs from "node:fs/promises";
|
|
2
2
|
import path from "node:path";
|
|
3
|
+
import { fetchResponse } from "./fetch-utils.js";
|
|
3
4
|
import { writeScanOutput } from "./scan.js";
|
|
4
5
|
|
|
5
6
|
const KNOWN_AI_CRAWLERS = [
|
|
@@ -43,9 +44,10 @@ function parseRobotsTxt(content) {
|
|
|
43
44
|
continue;
|
|
44
45
|
}
|
|
45
46
|
|
|
46
|
-
const disallowMatch = line.match(/^disallow:\s*(
|
|
47
|
+
const disallowMatch = line.match(/^disallow:\s*(.*)$/i);
|
|
47
48
|
if (disallowMatch) {
|
|
48
|
-
|
|
49
|
+
const disallowPath = disallowMatch[1].trim();
|
|
50
|
+
current.rules.push({ type: "disallow", path: disallowPath });
|
|
49
51
|
continue;
|
|
50
52
|
}
|
|
51
53
|
|
|
@@ -75,7 +77,7 @@ function analyzeCrawlerAccess(blocks) {
|
|
|
75
77
|
if (specificBlock) {
|
|
76
78
|
matchedAgent = specificBlock.userAgent;
|
|
77
79
|
const hasDisallow = specificBlock.rules.some(
|
|
78
|
-
(r) => r.type === "disallow" &&
|
|
80
|
+
(r) => r.type === "disallow" && r.path === "/"
|
|
79
81
|
);
|
|
80
82
|
const hasAllow = specificBlock.rules.some(
|
|
81
83
|
(r) => r.type === "allow" && r.path === "/"
|
|
@@ -96,9 +98,15 @@ function analyzeCrawlerAccess(blocks) {
|
|
|
96
98
|
const hasDisallow = wildcardBlock.rules.some(
|
|
97
99
|
(r) => r.type === "disallow" && r.path === "/"
|
|
98
100
|
);
|
|
99
|
-
|
|
101
|
+
const hasAllow = wildcardBlock.rules.some(
|
|
102
|
+
(r) => r.type === "allow" && r.path === "/"
|
|
103
|
+
);
|
|
104
|
+
if (hasDisallow && !hasAllow) {
|
|
100
105
|
status = "blocked";
|
|
101
106
|
reason = "Blocked by wildcard User-agent: * Disallow: /";
|
|
107
|
+
} else if (hasDisallow && hasAllow) {
|
|
108
|
+
status = "partial";
|
|
109
|
+
reason = "Mixed rules for wildcard User-agent: *";
|
|
102
110
|
}
|
|
103
111
|
}
|
|
104
112
|
|
|
@@ -179,16 +187,11 @@ function generateOptimalRobotsTxt(crawlerResults, options = {}) {
|
|
|
179
187
|
async function fetchRobotsTxt(url) {
|
|
180
188
|
const parsedUrl = new URL(url);
|
|
181
189
|
const robotsUrl = `${parsedUrl.protocol}//${parsedUrl.host}/robots.txt`;
|
|
182
|
-
const
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
signal: AbortSignal.timeout(10_000)
|
|
186
|
-
});
|
|
187
|
-
if (!response.ok) {
|
|
188
|
-
return { found: false, url: robotsUrl, status: response.status, content: "" };
|
|
190
|
+
const result = await fetchResponse(robotsUrl);
|
|
191
|
+
if (!result.ok) {
|
|
192
|
+
return { found: false, url: robotsUrl, status: result.status, content: "" };
|
|
189
193
|
}
|
|
190
|
-
|
|
191
|
-
return { found: true, url: robotsUrl, status: response.status, content };
|
|
194
|
+
return { found: true, url: robotsUrl, status: result.status, content: result.text };
|
|
192
195
|
}
|
|
193
196
|
|
|
194
197
|
export async function analyzeCrawlers(input, options = {}) {
|
package/src/deep-benchmark.js
CHANGED
|
@@ -125,8 +125,9 @@ export async function deepBenchmark(ownUrl, competitorUrls, options = {}) {
|
|
|
125
125
|
}));
|
|
126
126
|
|
|
127
127
|
const ownComposite = ownResult.ok ? ownResult.data.compositeScore : 0;
|
|
128
|
-
const
|
|
129
|
-
|
|
128
|
+
const successfulCompetitors = successful.filter((r) => r.url !== ownUrl);
|
|
129
|
+
const avgCompComposite = successfulCompetitors.length > 0
|
|
130
|
+
? Math.round(successfulCompetitors.reduce((sum, r) => sum + r.data.compositeScore, 0) / successfulCompetitors.length)
|
|
130
131
|
: null;
|
|
131
132
|
|
|
132
133
|
return {
|
package/src/eeat.js
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import fs from "node:fs/promises";
|
|
2
2
|
import path from "node:path";
|
|
3
|
+
import { fetchText } from "./fetch-utils.js";
|
|
3
4
|
import { writeScanOutput } from "./scan.js";
|
|
4
5
|
|
|
5
6
|
function stripHtml(text) {
|
|
@@ -125,22 +126,12 @@ function buildRecommendations(experience, expertise, authority, trust, authorSig
|
|
|
125
126
|
return recs;
|
|
126
127
|
}
|
|
127
128
|
|
|
128
|
-
async function fetchContent(url) {
|
|
129
|
-
const response = await fetch(url, {
|
|
130
|
-
redirect: "follow",
|
|
131
|
-
headers: { "user-agent": "geo-ai-search-optimization/2.2.0" },
|
|
132
|
-
signal: AbortSignal.timeout(10_000)
|
|
133
|
-
});
|
|
134
|
-
if (!response.ok) throw new Error(`Failed to fetch: ${url} (status ${response.status})`);
|
|
135
|
-
return response.text();
|
|
136
|
-
}
|
|
137
|
-
|
|
138
129
|
export async function analyzeEeat(input, options = {}) {
|
|
139
130
|
let rawContent;
|
|
140
131
|
let source;
|
|
141
132
|
|
|
142
133
|
if (/^https?:\/\//i.test(input)) {
|
|
143
|
-
rawContent = await
|
|
134
|
+
rawContent = await fetchText(input);
|
|
144
135
|
source = input;
|
|
145
136
|
} else {
|
|
146
137
|
const filePath = path.resolve(input);
|