geo-ai-search-optimization 2.3.0 → 2.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,411 @@
1
+ import fs from "node:fs/promises";
2
+ import path from "node:path";
3
+ import { fetchText } from "./fetch-utils.js";
4
+ import { writeScanOutput } from "./scan.js";
5
+
6
+ const CURRENT_YEAR = new Date().getFullYear();
7
+
8
+ const FRESH_SIGNALS = [
9
+ { pattern: /\brecently\b/gi, label: "recently" },
10
+ { pattern: /\bthis year\b/gi, label: "this year" },
11
+ { pattern: /\blatest\b/gi, label: "latest" },
12
+ { pattern: /\bupdated\b/gi, label: "updated" },
13
+ { pattern: /\bnew\b/gi, label: "new" },
14
+ { pattern: /\bcurrent\b/gi, label: "current" }
15
+ ];
16
+
17
+ const STALE_SIGNALS = [
18
+ { pattern: /\blast year\b/gi, label: "last year" },
19
+ { pattern: /\bpreviously\b/gi, label: "previously" },
20
+ { pattern: /\bformerly\b/gi, label: "formerly" },
21
+ { pattern: /\bold\b/gi, label: "old" },
22
+ { pattern: /\blegacy\b/gi, label: "legacy" },
23
+ { pattern: /\bdeprecated\b/gi, label: "deprecated" }
24
+ ];
25
+
26
+ const EVERGREEN_PATTERNS = [
27
+ /\bhow to\b/i,
28
+ /\bwhat is\b/i,
29
+ /\bdefinition\b/i,
30
+ /\bguide\b/i,
31
+ /\btutorial\b/i,
32
+ /\bprinciples?\b/i,
33
+ /\bfundamentals?\b/i,
34
+ /\bbest practices?\b/i,
35
+ /\boverview\b/i,
36
+ /\bintroduction\b/i
37
+ ];
38
+
39
+ // Reuse date extraction logic inline (freshness.js does not export extractDates/parseDate)
40
+ const DATE_PATTERNS = [
41
+ { pattern: /\b(20\d{2}-[01]\d-[0-3]\d)/g, format: "ISO" },
42
+ { pattern: /\b(January|February|March|April|May|June|July|August|September|October|November|December)\s+(\d{1,2}),?\s+(20\d{2})/gi, format: "US" },
43
+ { pattern: /"datePublished"\s*:\s*"([^"]+)"/g, format: "schema-published" },
44
+ { pattern: /"dateModified"\s*:\s*"([^"]+)"/g, format: "schema-modified" },
45
+ { pattern: /<time[^>]+datetime=["']([^"']+)["']/gi, format: "html-time" },
46
+ { pattern: /name=["'](?:date|article:published_time|article:modified_time)["'][^>]+content=["']([^"']+)["']/gi, format: "meta" },
47
+ { pattern: /content=["']([^"']+)["'][^>]+name=["'](?:date|article:published_time|article:modified_time)["']/gi, format: "meta-reverse" },
48
+ { pattern: /\b(?:Published|Updated|Posted|Modified|Created|Reviewed|Last updated|Last reviewed)[:\s]+(\d{4}-\d{2}-\d{2}|\w+\s+\d{1,2},?\s+\d{4})/gi, format: "visible-label" }
49
+ ];
50
+
51
+ const MONTH_MAP = {
52
+ january: 0, february: 1, march: 2, april: 3, may: 4, june: 5,
53
+ july: 6, august: 7, september: 8, october: 9, november: 10, december: 11
54
+ };
55
+
56
+ function parseDate(dateStr) {
57
+ const isoMatch = dateStr.match(/(\d{4})-(\d{2})-(\d{2})/);
58
+ if (isoMatch) {
59
+ const d = new Date(isoMatch[0]);
60
+ if (!Number.isNaN(d.getTime())) return d;
61
+ }
62
+ const usMatch = dateStr.match(/(January|February|March|April|May|June|July|August|September|October|November|December)\s+(\d{1,2}),?\s+(\d{4})/i);
63
+ if (usMatch) {
64
+ const month = MONTH_MAP[usMatch[1].toLowerCase()];
65
+ if (month !== undefined) {
66
+ return new Date(Number.parseInt(usMatch[3]), month, Number.parseInt(usMatch[2]));
67
+ }
68
+ }
69
+ const d = new Date(dateStr);
70
+ if (!Number.isNaN(d.getTime()) && d.getFullYear() >= 2000) return d;
71
+ return null;
72
+ }
73
+
74
+ function extractDates(content) {
75
+ const found = [];
76
+ const seen = new Set();
77
+ for (const { pattern, format } of DATE_PATTERNS) {
78
+ let match;
79
+ const regex = new RegExp(pattern.source, pattern.flags);
80
+ while ((match = regex.exec(content)) !== null) {
81
+ const raw = match[1] || match[0];
82
+ const parsed = parseDate(raw);
83
+ if (parsed && !seen.has(parsed.toISOString().slice(0, 10))) {
84
+ seen.add(parsed.toISOString().slice(0, 10));
85
+ found.push({ raw: raw.trim().slice(0, 60), date: parsed.toISOString().slice(0, 10), source: format, timestamp: parsed.getTime() });
86
+ }
87
+ }
88
+ }
89
+ return found.sort((a, b) => b.timestamp - a.timestamp);
90
+ }
91
+
92
+ function resolvePublishedAndModified(dates) {
93
+ if (dates.length === 0) return { publishedDate: null, modifiedDate: null };
94
+ const published = dates.find((d) => d.source.includes("published") || d.source === "visible-label") || dates[dates.length - 1];
95
+ const modified = dates.find((d) => d.source.includes("modified")) || dates[0];
96
+ return {
97
+ publishedDate: published.date,
98
+ modifiedDate: modified.date
99
+ };
100
+ }
101
+
102
+ function countWords(text) {
103
+ const stripped = text.replace(/<[^>]+>/g, " ").replace(/\s+/g, " ").trim();
104
+ return stripped ? stripped.split(/\s+/).length : 0;
105
+ }
106
+
107
+ function getLines(content) {
108
+ return content.replace(/<[^>]+>/g, " ").split(/\n/);
109
+ }
110
+
111
+ function detectTemporalSignals(content) {
112
+ const lines = getLines(content);
113
+ const signals = [];
114
+
115
+ for (const group of [{ list: FRESH_SIGNALS, type: "fresh" }, { list: STALE_SIGNALS, type: "stale" }]) {
116
+ for (const { pattern, label } of group.list) {
117
+ const regex = new RegExp(pattern.source, pattern.flags);
118
+ for (let i = 0; i < lines.length; i++) {
119
+ let match;
120
+ while ((match = regex.exec(lines[i])) !== null) {
121
+ signals.push({ phrase: match[0], type: group.type, line: i + 1 });
122
+ }
123
+ }
124
+ }
125
+ }
126
+
127
+ // Year references
128
+ const yearRegex = /\b(20\d{2})\b/g;
129
+ for (let i = 0; i < lines.length; i++) {
130
+ let match;
131
+ const regex = new RegExp(yearRegex.source, yearRegex.flags);
132
+ while ((match = regex.exec(lines[i])) !== null) {
133
+ const year = Number.parseInt(match[1]);
134
+ if (year >= 2000 && year <= CURRENT_YEAR + 1) {
135
+ const type = year >= CURRENT_YEAR ? "fresh" : year >= CURRENT_YEAR - 1 ? "neutral" : "stale";
136
+ signals.push({ phrase: match[0], type, line: i + 1 });
137
+ }
138
+ }
139
+ }
140
+
141
+ return signals;
142
+ }
143
+
144
+ function detectStaleReferences(content) {
145
+ const stale = [];
146
+ const yearRegex = /\b(20\d{2})\b/g;
147
+ let match;
148
+ while ((match = yearRegex.exec(content)) !== null) {
149
+ const year = Number.parseInt(match[1]);
150
+ if (year <= CURRENT_YEAR - 3) {
151
+ stale.push({ reference: match[0], reason: `Year ${year} is ${CURRENT_YEAR - year} years old` });
152
+ }
153
+ }
154
+
155
+ const deprecatedTerms = [
156
+ { pattern: /\bInternet Explorer\b/gi, reason: "Internet Explorer is discontinued" },
157
+ { pattern: /\bFlash Player\b/gi, reason: "Adobe Flash Player is deprecated" },
158
+ { pattern: /\bAngularJS\b/gi, reason: "AngularJS (1.x) is end-of-life" },
159
+ { pattern: /\bPython 2\b/gi, reason: "Python 2 is end-of-life" }
160
+ ];
161
+
162
+ for (const { pattern, reason } of deprecatedTerms) {
163
+ const regex = new RegExp(pattern.source, pattern.flags);
164
+ if (regex.test(content)) {
165
+ stale.push({ reference: pattern.source.replace(/\\b/g, ""), reason });
166
+ }
167
+ }
168
+
169
+ return stale;
170
+ }
171
+
172
+ function computeEvergreenScore(content) {
173
+ let hits = 0;
174
+ for (const pattern of EVERGREEN_PATTERNS) {
175
+ if (pattern.test(content)) hits++;
176
+ }
177
+ return Math.min(Math.round((hits / EVERGREEN_PATTERNS.length) * 100), 100);
178
+ }
179
+
180
+ function computeUpdateFrequencySignal(publishedDate, modifiedDate) {
181
+ if (!publishedDate || !modifiedDate) return "unknown";
182
+ if (publishedDate === modifiedDate) return "never-updated";
183
+ const pubMs = new Date(publishedDate).getTime();
184
+ const modMs = new Date(modifiedDate).getTime();
185
+ const gapDays = Math.floor((modMs - pubMs) / (1000 * 60 * 60 * 24));
186
+ if (gapDays <= 7) return "frequent";
187
+ if (gapDays <= 30) return "monthly";
188
+ if (gapDays <= 90) return "quarterly";
189
+ if (gapDays <= 365) return "yearly";
190
+ return "infrequent";
191
+ }
192
+
193
+ function computeScore(ageDays, datesFound, staleReferences, temporalSignals, publishedDate, modifiedDate, evergreenScore) {
194
+ let score = 100;
195
+
196
+ // Content age: -1 per 30 days since last modification (max -30)
197
+ if (ageDays !== null) {
198
+ const agePenalty = Math.min(Math.floor(ageDays / 30), 30);
199
+ score -= agePenalty;
200
+ }
201
+
202
+ // No dates found: -20
203
+ if (datesFound === 0) {
204
+ score -= 20;
205
+ }
206
+
207
+ // Stale references: -5 each (max -15)
208
+ const staleYearRefs = staleReferences.filter((r) => /^Year \d+/.test(r.reason) || /^\d{4}$/.test(r.reference));
209
+ const stalePenalty = Math.min(staleYearRefs.length * 5, 15);
210
+ score -= stalePenalty;
211
+
212
+ // No temporal signals: -10
213
+ if (temporalSignals.length === 0) {
214
+ score -= 10;
215
+ }
216
+
217
+ // datePublished == dateModified (never updated): -10
218
+ if (publishedDate && modifiedDate && publishedDate === modifiedDate) {
219
+ score -= 10;
220
+ }
221
+
222
+ // Evergreen bonus: +10 if content uses timeless language patterns
223
+ if (evergreenScore >= 30) {
224
+ score += 10;
225
+ }
226
+
227
+ return Math.max(0, Math.min(100, score));
228
+ }
229
+
230
+ function scoreLabel(score) {
231
+ if (score >= 80) return "Fresh";
232
+ if (score >= 60) return "Aging";
233
+ if (score >= 40) return "Stale";
234
+ return "Outdated";
235
+ }
236
+
237
+ function buildRecommendations(report) {
238
+ const recs = [];
239
+
240
+ if (!report.publishedDate && !report.modifiedDate) {
241
+ recs.push("Add datePublished and dateModified to your structured data (JSON-LD) for AI freshness signals.");
242
+ }
243
+
244
+ if (report.updateFrequencySignal === "never-updated") {
245
+ recs.push("Content has never been updated (datePublished equals dateModified). Update dateModified when content changes.");
246
+ }
247
+
248
+ if (report.staleReferences.length > 0) {
249
+ recs.push(`Found ${report.staleReferences.length} stale reference(s). Review and update outdated year mentions or deprecated technology references.`);
250
+ }
251
+
252
+ const freshCount = report.temporalSignals.filter((s) => s.type === "fresh").length;
253
+ const staleCount = report.temporalSignals.filter((s) => s.type === "stale").length;
254
+
255
+ if (freshCount === 0) {
256
+ recs.push("Add temporal freshness language (e.g., 'recently', 'latest', current year) to signal content currency.");
257
+ }
258
+
259
+ if (staleCount > freshCount) {
260
+ recs.push("Content has more stale temporal signals than fresh ones. Consider updating language to reflect current state.");
261
+ }
262
+
263
+ if (report.evergreenScore < 20) {
264
+ recs.push("Content lacks evergreen patterns. Consider adding timeless structural elements (guides, tutorials, definitions).");
265
+ }
266
+
267
+ if (report.ageDays !== null && report.ageDays > 180) {
268
+ recs.push("Content is over 6 months old. Review and refresh to maintain AI citation relevance.");
269
+ }
270
+
271
+ return recs;
272
+ }
273
+
274
+ export async function analyzeContentFreshness(input, options = {}) {
275
+ let content;
276
+ let source;
277
+
278
+ if (/^https?:\/\//i.test(input)) {
279
+ content = await fetchText(input);
280
+ source = input;
281
+ } else {
282
+ const filePath = path.resolve(input);
283
+ content = await fs.readFile(filePath, "utf8");
284
+ source = filePath;
285
+ }
286
+
287
+ const wordCount = countWords(content);
288
+ const dates = extractDates(content);
289
+ const { publishedDate, modifiedDate } = resolvePublishedAndModified(dates);
290
+
291
+ const now = new Date();
292
+ let ageDays = null;
293
+ if (modifiedDate) {
294
+ ageDays = Math.floor((now - new Date(modifiedDate)) / (1000 * 60 * 60 * 24));
295
+ } else if (publishedDate) {
296
+ ageDays = Math.floor((now - new Date(publishedDate)) / (1000 * 60 * 60 * 24));
297
+ }
298
+
299
+ const temporalSignals = detectTemporalSignals(content);
300
+ const staleReferences = detectStaleReferences(content);
301
+ const evergreenScore = computeEvergreenScore(content);
302
+ const updateFrequencySignal = computeUpdateFrequencySignal(publishedDate, modifiedDate);
303
+
304
+ const score = computeScore(ageDays, dates.length, staleReferences, temporalSignals, publishedDate, modifiedDate, evergreenScore);
305
+ const label = scoreLabel(score);
306
+
307
+ const report = {
308
+ kind: "geo-content-freshness",
309
+ source,
310
+ wordCount,
311
+ publishedDate,
312
+ modifiedDate,
313
+ ageDays,
314
+ temporalSignals,
315
+ staleReferences,
316
+ evergreenScore,
317
+ updateFrequencySignal,
318
+ score,
319
+ scoreLabel: label,
320
+ recommendations: [],
321
+ summary: ""
322
+ };
323
+
324
+ report.recommendations = buildRecommendations(report);
325
+ report.summary = ageDays !== null
326
+ ? `Content freshness: ${score}/100 (${label}). Last modified: ${modifiedDate || publishedDate} (${ageDays} days ago). ${temporalSignals.length} temporal signals, ${staleReferences.length} stale references.`
327
+ : `Content freshness: ${score}/100 (${label}). No dates detected. ${temporalSignals.length} temporal signals, ${staleReferences.length} stale references.`;
328
+
329
+ return report;
330
+ }
331
+
332
+ export function renderContentFreshnessMarkdown(report) {
333
+ const lines = [
334
+ "# Content Freshness Analysis",
335
+ "",
336
+ `- Source: \`${report.source}\``,
337
+ `- Score: \`${report.score}/100\` (${report.scoreLabel})`,
338
+ `- Word count: \`${report.wordCount}\``,
339
+ `- Summary: ${report.summary}`,
340
+ ""
341
+ ];
342
+
343
+ lines.push(
344
+ "## Dates",
345
+ "",
346
+ `- Published: \`${report.publishedDate || "not detected"}\``,
347
+ `- Modified: \`${report.modifiedDate || "not detected"}\``,
348
+ `- Age (days): \`${report.ageDays ?? "N/A"}\``,
349
+ `- Update frequency: \`${report.updateFrequencySignal}\``,
350
+ ""
351
+ );
352
+
353
+ lines.push(
354
+ "## Evergreen Score",
355
+ "",
356
+ `- Evergreen score: \`${report.evergreenScore}/100\``,
357
+ ""
358
+ );
359
+
360
+ if (report.temporalSignals.length > 0) {
361
+ lines.push("## Temporal Signals", "");
362
+ const freshSignals = report.temporalSignals.filter((s) => s.type === "fresh");
363
+ const staleSignals = report.temporalSignals.filter((s) => s.type === "stale");
364
+ const neutralSignals = report.temporalSignals.filter((s) => s.type === "neutral");
365
+ if (freshSignals.length > 0) {
366
+ lines.push(`### Fresh (${freshSignals.length})`, "");
367
+ for (const s of freshSignals.slice(0, 10)) {
368
+ lines.push(`- "${s.phrase}" (line ${s.line})`);
369
+ }
370
+ lines.push("");
371
+ }
372
+ if (staleSignals.length > 0) {
373
+ lines.push(`### Stale (${staleSignals.length})`, "");
374
+ for (const s of staleSignals.slice(0, 10)) {
375
+ lines.push(`- "${s.phrase}" (line ${s.line})`);
376
+ }
377
+ lines.push("");
378
+ }
379
+ if (neutralSignals.length > 0) {
380
+ lines.push(`### Neutral (${neutralSignals.length})`, "");
381
+ for (const s of neutralSignals.slice(0, 10)) {
382
+ lines.push(`- "${s.phrase}" (line ${s.line})`);
383
+ }
384
+ lines.push("");
385
+ }
386
+ }
387
+
388
+ if (report.staleReferences.length > 0) {
389
+ lines.push("## Stale References", "");
390
+ for (const ref of report.staleReferences.slice(0, 10)) {
391
+ lines.push(`- \`${ref.reference}\`: ${ref.reason}`);
392
+ }
393
+ lines.push("");
394
+ }
395
+
396
+ lines.push("## Recommendations", "");
397
+ if (report.recommendations.length === 0) {
398
+ lines.push("- Content freshness signals are well-configured.");
399
+ } else {
400
+ for (const rec of report.recommendations) {
401
+ lines.push(`- ${rec}`);
402
+ }
403
+ }
404
+ lines.push("");
405
+
406
+ return lines.join("\n");
407
+ }
408
+
409
+ export async function writeContentFreshnessOutput(outputPath, content) {
410
+ return writeScanOutput(outputPath, content);
411
+ }
package/src/crawlers.js CHANGED
@@ -1,5 +1,6 @@
1
1
  import fs from "node:fs/promises";
2
2
  import path from "node:path";
3
+ import { fetchResponse } from "./fetch-utils.js";
3
4
  import { writeScanOutput } from "./scan.js";
4
5
 
5
6
  const KNOWN_AI_CRAWLERS = [
@@ -43,9 +44,10 @@ function parseRobotsTxt(content) {
43
44
  continue;
44
45
  }
45
46
 
46
- const disallowMatch = line.match(/^disallow:\s*(.+)$/i);
47
+ const disallowMatch = line.match(/^disallow:\s*(.*)$/i);
47
48
  if (disallowMatch) {
48
- current.rules.push({ type: "disallow", path: disallowMatch[1].trim() });
49
+ const disallowPath = disallowMatch[1].trim();
50
+ current.rules.push({ type: "disallow", path: disallowPath });
49
51
  continue;
50
52
  }
51
53
 
@@ -75,7 +77,7 @@ function analyzeCrawlerAccess(blocks) {
75
77
  if (specificBlock) {
76
78
  matchedAgent = specificBlock.userAgent;
77
79
  const hasDisallow = specificBlock.rules.some(
78
- (r) => r.type === "disallow" && (r.path === "/" || r.path === "")
80
+ (r) => r.type === "disallow" && r.path === "/"
79
81
  );
80
82
  const hasAllow = specificBlock.rules.some(
81
83
  (r) => r.type === "allow" && r.path === "/"
@@ -96,9 +98,15 @@ function analyzeCrawlerAccess(blocks) {
96
98
  const hasDisallow = wildcardBlock.rules.some(
97
99
  (r) => r.type === "disallow" && r.path === "/"
98
100
  );
99
- if (hasDisallow) {
101
+ const hasAllow = wildcardBlock.rules.some(
102
+ (r) => r.type === "allow" && r.path === "/"
103
+ );
104
+ if (hasDisallow && !hasAllow) {
100
105
  status = "blocked";
101
106
  reason = "Blocked by wildcard User-agent: * Disallow: /";
107
+ } else if (hasDisallow && hasAllow) {
108
+ status = "partial";
109
+ reason = "Mixed rules for wildcard User-agent: *";
102
110
  }
103
111
  }
104
112
 
@@ -179,16 +187,11 @@ function generateOptimalRobotsTxt(crawlerResults, options = {}) {
179
187
  async function fetchRobotsTxt(url) {
180
188
  const parsedUrl = new URL(url);
181
189
  const robotsUrl = `${parsedUrl.protocol}//${parsedUrl.host}/robots.txt`;
182
- const response = await fetch(robotsUrl, {
183
- redirect: "follow",
184
- headers: { "user-agent": "geo-ai-search-optimization/2.2.0" },
185
- signal: AbortSignal.timeout(10_000)
186
- });
187
- if (!response.ok) {
188
- return { found: false, url: robotsUrl, status: response.status, content: "" };
190
+ const result = await fetchResponse(robotsUrl);
191
+ if (!result.ok) {
192
+ return { found: false, url: robotsUrl, status: result.status, content: "" };
189
193
  }
190
- const content = await response.text();
191
- return { found: true, url: robotsUrl, status: response.status, content };
194
+ return { found: true, url: robotsUrl, status: result.status, content: result.text };
192
195
  }
193
196
 
194
197
  export async function analyzeCrawlers(input, options = {}) {
@@ -125,8 +125,9 @@ export async function deepBenchmark(ownUrl, competitorUrls, options = {}) {
125
125
  }));
126
126
 
127
127
  const ownComposite = ownResult.ok ? ownResult.data.compositeScore : 0;
128
- const avgCompComposite = successful.length > 1
129
- ? Math.round(successful.filter((r) => r.url !== ownUrl).reduce((sum, r) => sum + r.data.compositeScore, 0) / (successful.length - 1))
128
+ const successfulCompetitors = successful.filter((r) => r.url !== ownUrl);
129
+ const avgCompComposite = successfulCompetitors.length > 0
130
+ ? Math.round(successfulCompetitors.reduce((sum, r) => sum + r.data.compositeScore, 0) / successfulCompetitors.length)
130
131
  : null;
131
132
 
132
133
  return {
package/src/eeat.js CHANGED
@@ -1,5 +1,6 @@
1
1
  import fs from "node:fs/promises";
2
2
  import path from "node:path";
3
+ import { fetchText } from "./fetch-utils.js";
3
4
  import { writeScanOutput } from "./scan.js";
4
5
 
5
6
  function stripHtml(text) {
@@ -125,22 +126,12 @@ function buildRecommendations(experience, expertise, authority, trust, authorSig
125
126
  return recs;
126
127
  }
127
128
 
128
- async function fetchContent(url) {
129
- const response = await fetch(url, {
130
- redirect: "follow",
131
- headers: { "user-agent": "geo-ai-search-optimization/2.2.0" },
132
- signal: AbortSignal.timeout(10_000)
133
- });
134
- if (!response.ok) throw new Error(`Failed to fetch: ${url} (status ${response.status})`);
135
- return response.text();
136
- }
137
-
138
129
  export async function analyzeEeat(input, options = {}) {
139
130
  let rawContent;
140
131
  let source;
141
132
 
142
133
  if (/^https?:\/\//i.test(input)) {
143
- rawContent = await fetchContent(input);
134
+ rawContent = await fetchText(input);
144
135
  source = input;
145
136
  } else {
146
137
  const filePath = path.resolve(input);