geo-ai-search-optimization 2.0.0 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,298 @@
1
+ import fs from "node:fs/promises";
2
+ import path from "node:path";
3
+ import { writeScanOutput } from "./scan.js";
4
+
5
+ function extractLinks(content, baseUrl) {
6
+ const links = [];
7
+ let base;
8
+ try {
9
+ base = new URL(baseUrl || "https://example.com");
10
+ } catch {
11
+ base = new URL("https://example.com");
12
+ }
13
+
14
+ // HTML links
15
+ const htmlMatches = content.matchAll(/<a\s[^>]*href=["']([^"'#]+)["'][^>]*>([\s\S]*?)<\/a>/gi);
16
+ for (const match of htmlMatches) {
17
+ const href = match[1].trim();
18
+ const anchorText = match[2].replace(/<[^>]+>/g, "").trim();
19
+ links.push({ href, anchorText, source: "html" });
20
+ }
21
+
22
+ // Markdown links
23
+ const mdMatches = content.matchAll(/\[([^\]]+)\]\(([^)#]+)\)/g);
24
+ for (const match of mdMatches) {
25
+ links.push({ href: match[2].trim(), anchorText: match[1].trim(), source: "markdown" });
26
+ }
27
+
28
+ const internal = [];
29
+ const external = [];
30
+
31
+ for (const link of links) {
32
+ try {
33
+ const resolved = new URL(link.href, base);
34
+ const isInternal = resolved.hostname === base.hostname || link.href.startsWith("/") || link.href.startsWith("./");
35
+
36
+ if (isInternal) {
37
+ internal.push({
38
+ url: resolved.pathname,
39
+ fullUrl: resolved.href,
40
+ anchorText: link.anchorText,
41
+ source: link.source
42
+ });
43
+ } else {
44
+ external.push({
45
+ url: resolved.href,
46
+ anchorText: link.anchorText,
47
+ source: link.source
48
+ });
49
+ }
50
+ } catch {
51
+ if (link.href.startsWith("/") || link.href.startsWith("./")) {
52
+ internal.push({
53
+ url: link.href,
54
+ fullUrl: link.href,
55
+ anchorText: link.anchorText,
56
+ source: link.source
57
+ });
58
+ }
59
+ }
60
+ }
61
+
62
+ return { internal, external };
63
+ }
64
+
65
+ function analyzeAnchorText(links) {
66
+ const issues = [];
67
+ const generic = ["click here", "read more", "learn more", "here", "link", "this", "more", "see more"];
68
+ const genericLinks = links.filter((l) => generic.includes(l.anchorText.toLowerCase()));
69
+ const emptyLinks = links.filter((l) => !l.anchorText || l.anchorText.length === 0);
70
+ const longLinks = links.filter((l) => l.anchorText.length > 60);
71
+
72
+ if (genericLinks.length > 0) {
73
+ issues.push({
74
+ severity: "warning",
75
+ message: `${genericLinks.length} link(s) use generic anchor text: ${genericLinks.slice(0, 3).map((l) => `"${l.anchorText}"`).join(", ")}`,
76
+ count: genericLinks.length
77
+ });
78
+ }
79
+
80
+ if (emptyLinks.length > 0) {
81
+ issues.push({
82
+ severity: "error",
83
+ message: `${emptyLinks.length} link(s) have empty anchor text`,
84
+ count: emptyLinks.length
85
+ });
86
+ }
87
+
88
+ if (longLinks.length > 0) {
89
+ issues.push({
90
+ severity: "info",
91
+ message: `${longLinks.length} link(s) have very long anchor text (> 60 chars)`,
92
+ count: longLinks.length
93
+ });
94
+ }
95
+
96
+ const descriptive = links.filter((l) =>
97
+ l.anchorText.length >= 3 &&
98
+ l.anchorText.length <= 60 &&
99
+ !generic.includes(l.anchorText.toLowerCase())
100
+ );
101
+
102
+ return {
103
+ total: links.length,
104
+ descriptive: descriptive.length,
105
+ generic: genericLinks.length,
106
+ empty: emptyLinks.length,
107
+ qualityRatio: links.length > 0 ? Math.round((descriptive.length / links.length) * 100) : 100,
108
+ issues
109
+ };
110
+ }
111
+
112
+ function analyzeInternalPaths(internalLinks) {
113
+ const pathCounts = {};
114
+ for (const link of internalLinks) {
115
+ const p = link.url.split("?")[0].split("#")[0];
116
+ pathCounts[p] = (pathCounts[p] || 0) + 1;
117
+ }
118
+
119
+ const uniquePaths = Object.keys(pathCounts);
120
+ const mostLinked = Object.entries(pathCounts)
121
+ .sort((a, b) => b[1] - a[1])
122
+ .slice(0, 10)
123
+ .map(([url, count]) => ({ url, count }));
124
+
125
+ const depths = uniquePaths.map((p) => p.split("/").filter(Boolean).length);
126
+ const avgDepth = depths.length > 0 ? Math.round((depths.reduce((a, b) => a + b, 0) / depths.length) * 10) / 10 : 0;
127
+ const maxDepth = depths.length > 0 ? Math.max(...depths) : 0;
128
+
129
+ return {
130
+ uniquePaths: uniquePaths.length,
131
+ totalLinks: internalLinks.length,
132
+ avgDepth,
133
+ maxDepth,
134
+ mostLinked,
135
+ duplicateLinks: internalLinks.length - uniquePaths.length
136
+ };
137
+ }
138
+
139
+ function computeScore(internalLinks, externalLinks, anchorAnalysis, pathAnalysis) {
140
+ let score = 0;
141
+
142
+ // Internal link presence
143
+ score += Math.min(internalLinks.length * 5, 25);
144
+
145
+ // External link presence (citations)
146
+ score += Math.min(externalLinks.length * 3, 15);
147
+
148
+ // Anchor text quality
149
+ score += Math.round(anchorAnalysis.qualityRatio * 0.3);
150
+
151
+ // Unique path coverage
152
+ score += Math.min(pathAnalysis.uniquePaths * 3, 15);
153
+
154
+ // Penalties
155
+ score -= anchorAnalysis.empty * 5;
156
+ score -= anchorAnalysis.generic * 2;
157
+
158
+ return Math.max(0, Math.min(100, score));
159
+ }
160
+
161
+ function buildRecommendations(internalLinks, externalLinks, anchorAnalysis, pathAnalysis) {
162
+ const recs = [];
163
+
164
+ if (internalLinks.length === 0) {
165
+ recs.push("Add internal links to connect related content and help AI understand site structure.");
166
+ } else if (internalLinks.length < 3) {
167
+ recs.push("Add more internal links (aim for 3-5 per page) to improve content discovery.");
168
+ }
169
+
170
+ if (externalLinks.length === 0) {
171
+ recs.push("Add external source links to increase content credibility and citability.");
172
+ }
173
+
174
+ for (const issue of anchorAnalysis.issues) {
175
+ if (issue.severity === "error" || issue.severity === "warning") {
176
+ recs.push(issue.message.charAt(0).toUpperCase() + issue.message.slice(1) + ". Use descriptive anchor text instead.");
177
+ }
178
+ }
179
+
180
+ if (pathAnalysis.maxDepth > 4) {
181
+ recs.push(`Some linked pages are ${pathAnalysis.maxDepth} levels deep. Flatten URL structure for better crawlability.`);
182
+ }
183
+
184
+ return recs;
185
+ }
186
+
187
+ async function fetchContent(url) {
188
+ const response = await fetch(url, {
189
+ redirect: "follow",
190
+ headers: { "user-agent": "geo-ai-search-optimization/2.2.0" },
191
+ signal: AbortSignal.timeout(10_000)
192
+ });
193
+ if (!response.ok) throw new Error(`Failed to fetch: ${url} (status ${response.status})`);
194
+ return response.text();
195
+ }
196
+
197
+ export async function analyzeInternalLinks(input, options = {}) {
198
+ let content;
199
+ let source;
200
+ let baseUrl = options.baseUrl;
201
+
202
+ if (/^https?:\/\//i.test(input)) {
203
+ content = await fetchContent(input);
204
+ source = input;
205
+ baseUrl = baseUrl || input;
206
+ } else {
207
+ const filePath = path.resolve(input);
208
+ content = await fs.readFile(filePath, "utf8");
209
+ source = filePath;
210
+ }
211
+
212
+ const { internal, external } = extractLinks(content, baseUrl);
213
+ const anchorAnalysis = analyzeAnchorText([...internal, ...external]);
214
+ const internalAnchorAnalysis = analyzeAnchorText(internal);
215
+ const pathAnalysis = analyzeInternalPaths(internal);
216
+ const score = computeScore(internal, external, anchorAnalysis, pathAnalysis);
217
+ const recommendations = buildRecommendations(internal, external, anchorAnalysis, pathAnalysis);
218
+
219
+ return {
220
+ kind: "geo-internal-links",
221
+ source,
222
+ internalLinks: {
223
+ total: internal.length,
224
+ unique: pathAnalysis.uniquePaths,
225
+ anchorQuality: internalAnchorAnalysis.qualityRatio,
226
+ mostLinked: pathAnalysis.mostLinked,
227
+ avgDepth: pathAnalysis.avgDepth,
228
+ maxDepth: pathAnalysis.maxDepth
229
+ },
230
+ externalLinks: {
231
+ total: external.length,
232
+ domains: [...new Set(external.map((l) => {
233
+ try { return new URL(l.url).hostname; } catch { return l.url; }
234
+ }))].slice(0, 20)
235
+ },
236
+ anchorAnalysis,
237
+ score,
238
+ scoreLabel: score >= 70 ? "Good" : score >= 40 ? "Fair" : "Needs work",
239
+ recommendations,
240
+ summary: `${internal.length} internal, ${external.length} external links. Anchor quality: ${anchorAnalysis.qualityRatio}%.`
241
+ };
242
+ }
243
+
244
+ export function renderInternalLinksMarkdown(report) {
245
+ const lines = [
246
+ "# Internal Link Analysis",
247
+ "",
248
+ `- Source: \`${report.source}\``,
249
+ `- Score: \`${report.score}/100\` (${report.scoreLabel})`,
250
+ `- Summary: ${report.summary}`,
251
+ "",
252
+ "## Internal Links",
253
+ "",
254
+ `- Total: \`${report.internalLinks.total}\``,
255
+ `- Unique paths: \`${report.internalLinks.unique}\``,
256
+ `- Anchor text quality: \`${report.internalLinks.anchorQuality}%\``,
257
+ `- Average link depth: \`${report.internalLinks.avgDepth}\``,
258
+ "",
259
+ "## External Links",
260
+ "",
261
+ `- Total: \`${report.externalLinks.total}\``,
262
+ `- Unique domains: ${report.externalLinks.domains.slice(0, 10).map((d) => `\`${d}\``).join(", ") || "none"}`,
263
+ ""
264
+ ];
265
+
266
+ if (report.internalLinks.mostLinked.length > 0) {
267
+ lines.push("## Most Linked Internal Paths", "");
268
+ for (const item of report.internalLinks.mostLinked) {
269
+ lines.push(`- \`${item.url}\` (${item.count}x)`);
270
+ }
271
+ lines.push("");
272
+ }
273
+
274
+ if (report.anchorAnalysis.issues.length > 0) {
275
+ lines.push("## Anchor Text Issues", "");
276
+ for (const issue of report.anchorAnalysis.issues) {
277
+ const icon = issue.severity === "error" ? "❌" : issue.severity === "warning" ? "⚠️" : "ℹ️";
278
+ lines.push(`- ${icon} ${issue.message}`);
279
+ }
280
+ lines.push("");
281
+ }
282
+
283
+ lines.push("## Recommendations", "");
284
+ if (report.recommendations.length === 0) {
285
+ lines.push("- Link structure is well-optimized.");
286
+ } else {
287
+ for (const rec of report.recommendations) {
288
+ lines.push(`- ${rec}`);
289
+ }
290
+ }
291
+ lines.push("");
292
+
293
+ return lines.join("\n");
294
+ }
295
+
296
+ export async function writeInternalLinksOutput(outputPath, content) {
297
+ return writeScanOutput(outputPath, content);
298
+ }
package/src/page-audit.js CHANGED
@@ -15,7 +15,7 @@ async function fetchText(url) {
15
15
  const response = await fetch(url, {
16
16
  redirect: "follow",
17
17
  headers: {
18
- "user-agent": "geo-ai-search-optimization/1.4.0"
18
+ "user-agent": "geo-ai-search-optimization/2.2.0"
19
19
  }
20
20
  });
21
21
 
@@ -0,0 +1,198 @@
1
+ import fs from "node:fs/promises";
2
+ import path from "node:path";
3
+ import { writeScanOutput } from "./scan.js";
4
+
5
+ const DEFAULT_DATA_DIR = ".geo-data/page-snapshots";
6
+
7
+ function sanitizeKey(input) {
8
+ return input
9
+ .replace(/^https?:\/\//, "")
10
+ .replace(/[^a-zA-Z0-9.-]/g, "_")
11
+ .replace(/_+/g, "_")
12
+ .slice(0, 120);
13
+ }
14
+
15
+ export async function savePageSnapshot(pageAuditResult, options = {}) {
16
+ const dataDir = path.resolve(options.dataDir || DEFAULT_DATA_DIR);
17
+ await fs.mkdir(dataDir, { recursive: true });
18
+
19
+ const key = sanitizeKey(pageAuditResult.input || pageAuditResult.source || "unknown");
20
+ const timestamp = new Date().toISOString().replace(/[:.]/g, "-");
21
+ const filename = `${key}__${timestamp}.json`;
22
+ const filePath = path.join(dataDir, filename);
23
+
24
+ const snapshot = {
25
+ timestamp: new Date().toISOString(),
26
+ input: pageAuditResult.input,
27
+ source: pageAuditResult.source,
28
+ kind: pageAuditResult.kind,
29
+ compositeScore: pageAuditResult.compositeScore,
30
+ dimensions: pageAuditResult.dimensions
31
+ ? Object.fromEntries(
32
+ Object.entries(pageAuditResult.dimensions).map(([k, v]) => [k, { score: v.score, label: v.label }])
33
+ )
34
+ : { base: { score: pageAuditResult.score?.score ?? pageAuditResult.score ?? 0 } }
35
+ };
36
+
37
+ await fs.writeFile(filePath, `${JSON.stringify(snapshot, null, 2)}\n`, "utf8");
38
+ return { path: filePath, key, snapshot };
39
+ }
40
+
41
+ export async function listPageSnapshots(input, options = {}) {
42
+ const dataDir = path.resolve(options.dataDir || DEFAULT_DATA_DIR);
43
+ const key = sanitizeKey(input);
44
+
45
+ let files;
46
+ try {
47
+ files = await fs.readdir(dataDir);
48
+ } catch {
49
+ return [];
50
+ }
51
+
52
+ return files
53
+ .filter((f) => f.startsWith(`${key}__`) && f.endsWith(".json"))
54
+ .sort()
55
+ .reverse();
56
+ }
57
+
58
+ export async function loadPageSnapshot(filePath) {
59
+ const content = await fs.readFile(filePath, "utf8");
60
+ return JSON.parse(content);
61
+ }
62
+
63
+ export async function buildPageTrend(input, options = {}) {
64
+ const dataDir = path.resolve(options.dataDir || DEFAULT_DATA_DIR);
65
+ const key = sanitizeKey(input);
66
+ const files = await listPageSnapshots(input, options);
67
+ const last = options.last || 0;
68
+ const selectedFiles = last > 0 ? files.slice(0, last) : files;
69
+
70
+ if (selectedFiles.length === 0) {
71
+ return {
72
+ kind: "geo-page-trend",
73
+ input,
74
+ key,
75
+ snapshotCount: 0,
76
+ snapshots: [],
77
+ trend: null,
78
+ summary: `No snapshots found for ${input}. Run full-page-audit --save first.`
79
+ };
80
+ }
81
+
82
+ const snapshots = [];
83
+ for (const file of selectedFiles) {
84
+ try {
85
+ const data = await loadPageSnapshot(path.join(dataDir, file));
86
+ snapshots.push(data);
87
+ } catch {
88
+ // Skip corrupt snapshots
89
+ }
90
+ }
91
+
92
+ // Reverse to chronological order
93
+ snapshots.reverse();
94
+
95
+ // Compute trend
96
+ let trend = null;
97
+ if (snapshots.length >= 2) {
98
+ const first = snapshots[0];
99
+ const latest = snapshots[snapshots.length - 1];
100
+ const firstScore = first.compositeScore ?? first.dimensions?.base?.score ?? 0;
101
+ const latestScore = latest.compositeScore ?? latest.dimensions?.base?.score ?? 0;
102
+ const delta = latestScore - firstScore;
103
+
104
+ // Per-dimension trends
105
+ const dimensionTrends = {};
106
+ const allDimKeys = new Set([
107
+ ...Object.keys(first.dimensions || {}),
108
+ ...Object.keys(latest.dimensions || {})
109
+ ]);
110
+
111
+ for (const dimKey of allDimKeys) {
112
+ const firstDim = first.dimensions?.[dimKey]?.score ?? 0;
113
+ const latestDim = latest.dimensions?.[dimKey]?.score ?? 0;
114
+ dimensionTrends[dimKey] = {
115
+ first: firstDim,
116
+ latest: latestDim,
117
+ delta: latestDim - firstDim,
118
+ direction: latestDim > firstDim ? "up" : latestDim < firstDim ? "down" : "flat"
119
+ };
120
+ }
121
+
122
+ trend = {
123
+ firstDate: first.timestamp,
124
+ latestDate: latest.timestamp,
125
+ firstScore,
126
+ latestScore,
127
+ delta,
128
+ direction: delta > 0 ? "improving" : delta < 0 ? "declining" : "stable",
129
+ dimensionTrends
130
+ };
131
+ }
132
+
133
+ return {
134
+ kind: "geo-page-trend",
135
+ input,
136
+ key,
137
+ snapshotCount: snapshots.length,
138
+ snapshots: snapshots.map((s) => ({
139
+ timestamp: s.timestamp,
140
+ compositeScore: s.compositeScore ?? s.dimensions?.base?.score ?? 0
141
+ })),
142
+ trend,
143
+ summary: trend
144
+ ? `${snapshots.length} snapshots. Score: ${trend.firstScore} → ${trend.latestScore} (${trend.delta >= 0 ? "+" : ""}${trend.delta}). Trend: ${trend.direction}.`
145
+ : `${snapshots.length} snapshot(s) for ${input}.`
146
+ };
147
+ }
148
+
149
+ export function renderPageTrendMarkdown(report) {
150
+ const lines = [
151
+ "# Page Trend Analysis",
152
+ "",
153
+ `- Input: \`${report.input}\``,
154
+ `- Snapshots: \`${report.snapshotCount}\``,
155
+ `- Summary: ${report.summary}`,
156
+ ""
157
+ ];
158
+
159
+ if (report.snapshotCount === 0) {
160
+ lines.push("No snapshots found. Run `full-page-audit <url> --save` to start tracking.", "");
161
+ return lines.join("\n");
162
+ }
163
+
164
+ if (report.trend) {
165
+ const arrow = report.trend.direction === "improving" ? "📈" : report.trend.direction === "declining" ? "📉" : "➡️";
166
+ lines.push(
167
+ "## Overall Trend",
168
+ "",
169
+ `- ${arrow} **${report.trend.firstScore} → ${report.trend.latestScore}** (${report.trend.delta >= 0 ? "+" : ""}${report.trend.delta})`,
170
+ `- Direction: **${report.trend.direction}**`,
171
+ `- Period: ${report.trend.firstDate.slice(0, 10)} → ${report.trend.latestDate.slice(0, 10)}`,
172
+ ""
173
+ );
174
+
175
+ if (report.trend.dimensionTrends) {
176
+ lines.push("## Dimension Trends", "", "| Dimension | First | Latest | Delta | Direction |", "|-----------|-------|--------|-------|-----------|");
177
+ for (const [dim, t] of Object.entries(report.trend.dimensionTrends)) {
178
+ const icon = t.direction === "up" ? "📈" : t.direction === "down" ? "📉" : "➡️";
179
+ lines.push(`| ${dim} | ${t.first} | ${t.latest} | ${t.delta >= 0 ? "+" : ""}${t.delta} | ${icon} ${t.direction} |`);
180
+ }
181
+ lines.push("");
182
+ }
183
+ }
184
+
185
+ if (report.snapshots.length > 0) {
186
+ lines.push("## Snapshot History", "");
187
+ for (const s of report.snapshots) {
188
+ lines.push(`- \`${s.timestamp.slice(0, 19)}\` — Score: **${s.compositeScore}**`);
189
+ }
190
+ lines.push("");
191
+ }
192
+
193
+ return lines.join("\n");
194
+ }
195
+
196
+ export async function writePageTrendOutput(outputPath, content) {
197
+ return writeScanOutput(outputPath, content);
198
+ }