geo-ai-search-optimization 2.0.0 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,311 @@
1
+ import fs from "node:fs/promises";
2
+ import path from "node:path";
3
+ import { writeScanOutput } from "./scan.js";
4
+
5
+ function stripHtml(text) {
6
+ return text
7
+ .replace(/<script[\s\S]*?<\/script>/gi, " ")
8
+ .replace(/<style[\s\S]*?<\/style>/gi, " ")
9
+ .replace(/<[^>]+>/g, " ")
10
+ .replace(/&[a-z0-9#]+;/gi, " ")
11
+ .replace(/\s+/g, " ")
12
+ .trim();
13
+ }
14
+
15
+ function extractPlainText(content) {
16
+ if (/<html|<head|<body/i.test(content)) {
17
+ return stripHtml(content);
18
+ }
19
+ return content.replace(/^---[\s\S]*?---/, "").trim();
20
+ }
21
+
22
+ function splitSentences(text) {
23
+ return text
24
+ .split(/(?<=[.!?。!?])\s+/)
25
+ .map((s) => s.trim())
26
+ .filter((s) => s.length > 10);
27
+ }
28
+
29
+ function splitParagraphs(text) {
30
+ return text
31
+ .split(/\n\s*\n/)
32
+ .map((p) => p.replace(/\s+/g, " ").trim())
33
+ .filter((p) => p.length > 20);
34
+ }
35
+
36
+ const STAT_PATTERN = /\b\d+(\.\d+)?\s*(%|percent|million|billion|thousand|x|times|fold)\b/i;
37
+ const DATE_PATTERN = /\b(20[12]\d|19\d\d)[-/]\d{1,2}[-/]\d{1,2}\b|\b(January|February|March|April|May|June|July|August|September|October|November|December)\s+\d{1,2},?\s+20[12]\d\b/i;
38
+ const NUMBER_FACT_PATTERN = /\b\d{2,}(\.\d+)?\b/;
39
+ const CLAIM_VERBS = /\b(is|are|was|were|shows?|demonstrates?|indicates?|proves?|reveals?|found|according to|reported|measured|calculated|estimated|averaged)\b/i;
40
+ const COMPARISON_PATTERN = /\b(more than|less than|greater|fewer|higher|lower|faster|slower|better|worse|compared to|versus|outperform|exceed)\b/i;
41
+
42
+ function analyzeClaims(sentences) {
43
+ let factualClaims = 0;
44
+ let statClaims = 0;
45
+ let datedClaims = 0;
46
+ let comparisonClaims = 0;
47
+
48
+ for (const sentence of sentences) {
49
+ const hasNumber = NUMBER_FACT_PATTERN.test(sentence);
50
+ const hasClaimVerb = CLAIM_VERBS.test(sentence);
51
+ const hasStat = STAT_PATTERN.test(sentence);
52
+ const hasDate = DATE_PATTERN.test(sentence);
53
+ const hasComparison = COMPARISON_PATTERN.test(sentence);
54
+
55
+ if (hasNumber && hasClaimVerb) factualClaims++;
56
+ if (hasStat) statClaims++;
57
+ if (hasDate) datedClaims++;
58
+ if (hasComparison) comparisonClaims++;
59
+ }
60
+
61
+ const total = sentences.length || 1;
62
+ return {
63
+ total: sentences.length,
64
+ factualClaims,
65
+ statClaims,
66
+ datedClaims,
67
+ comparisonClaims,
68
+ claimDensity: Math.round((factualClaims / total) * 100),
69
+ statDensity: Math.round((statClaims / total) * 100)
70
+ };
71
+ }
72
+
73
+ const ENTITY_PATTERN = /\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)+\b/g;
74
+ const TECH_TERM_PATTERN = /\b[A-Z]{2,}(?:\s+[A-Z]{2,})*\b/g;
75
+ const BRAND_PATTERN = /\b[A-Z][a-zA-Z]*(?:\.(?:com|io|ai|org|net))\b/g;
76
+
77
+ function analyzeEntities(text) {
78
+ const namedEntities = new Set((text.match(ENTITY_PATTERN) || []).map((e) => e.trim()));
79
+ const techTerms = new Set((text.match(TECH_TERM_PATTERN) || []).filter((t) => t.length > 1));
80
+ const brands = new Set((text.match(BRAND_PATTERN) || []).map((b) => b.trim()));
81
+ const wordCount = text.split(/\s+/).length || 1;
82
+
83
+ return {
84
+ namedEntities: [...namedEntities].slice(0, 20),
85
+ techTerms: [...techTerms].slice(0, 20),
86
+ brands: [...brands].slice(0, 10),
87
+ entityDensity: Math.round(((namedEntities.size + techTerms.size) / wordCount) * 1000) / 10
88
+ };
89
+ }
90
+
91
+ function analyzeQuotability(sentences) {
92
+ const quotable = [];
93
+
94
+ for (const sentence of sentences) {
95
+ const wordCount = sentence.split(/\s+/).length;
96
+ if (wordCount < 8 || wordCount > 40) continue;
97
+
98
+ let score = 0;
99
+ if (STAT_PATTERN.test(sentence)) score += 3;
100
+ if (CLAIM_VERBS.test(sentence)) score += 2;
101
+ if (NUMBER_FACT_PATTERN.test(sentence)) score += 2;
102
+ if (COMPARISON_PATTERN.test(sentence)) score += 2;
103
+ if (wordCount >= 12 && wordCount <= 25) score += 1;
104
+
105
+ if (score >= 3) {
106
+ quotable.push({ sentence: sentence.slice(0, 200), score, wordCount });
107
+ }
108
+ }
109
+
110
+ return quotable.sort((a, b) => b.score - a.score).slice(0, 10);
111
+ }
112
+
113
+ function analyzeStructure(content) {
114
+ const hasList = /<[ou]l\b/i.test(content) || /^[-*]\s/m.test(content);
115
+ const hasTable = /<table\b/i.test(content) || /\|.*\|.*\|/m.test(content);
116
+ const hasDefinition = /<dl\b/i.test(content) || /\b(is defined as|refers to|means)\b/i.test(content);
117
+ const hasHeading = /<h[1-6]\b/i.test(content) || /^#{1,6}\s/m.test(content);
118
+ const headingCount = ((content.match(/<h[1-6]\b/gi) || []).length) +
119
+ ((content.match(/^#{1,6}\s/gm) || []).length);
120
+
121
+ let structureScore = 0;
122
+ if (hasList) structureScore += 20;
123
+ if (hasTable) structureScore += 20;
124
+ if (hasDefinition) structureScore += 15;
125
+ if (hasHeading && headingCount >= 3) structureScore += 25;
126
+ else if (hasHeading) structureScore += 15;
127
+ if (headingCount >= 5) structureScore += 20;
128
+
129
+ return {
130
+ hasList,
131
+ hasTable,
132
+ hasDefinition,
133
+ hasHeading,
134
+ headingCount,
135
+ structureScore: Math.min(structureScore, 100)
136
+ };
137
+ }
138
+
139
+ function computeCitabilityScore(claims, entities, quotable, structure, wordCount) {
140
+ const w = {
141
+ claimDensity: 25,
142
+ entityRichness: 15,
143
+ quotability: 25,
144
+ structure: 20,
145
+ length: 15
146
+ };
147
+
148
+ const claimScore = Math.min(claims.claimDensity * 2, 100);
149
+ const entityScore = Math.min(entities.entityDensity * 10, 100);
150
+ const quotabilityScore = Math.min(quotable.length * 10, 100);
151
+ const lengthScore = wordCount >= 300 ? (wordCount >= 800 ? 100 : 60) : 20;
152
+
153
+ const weighted =
154
+ (claimScore * w.claimDensity +
155
+ entityScore * w.entityRichness +
156
+ quotabilityScore * w.quotability +
157
+ structure.structureScore * w.structure +
158
+ lengthScore * w.length) / 100;
159
+
160
+ return Math.round(Math.min(weighted, 100));
161
+ }
162
+
163
+ function getScoreLabel(score) {
164
+ if (score >= 80) return "Highly citable";
165
+ if (score >= 60) return "Moderately citable";
166
+ if (score >= 40) return "Low citability";
167
+ return "Poor citability";
168
+ }
169
+
170
+ function buildRecommendations(claims, entities, quotable, structure, wordCount) {
171
+ const recs = [];
172
+
173
+ if (claims.claimDensity < 15) {
174
+ recs.push("Add more factual claims with specific numbers, statistics, or data points to increase claim density.");
175
+ }
176
+ if (claims.statDensity < 5) {
177
+ recs.push("Include statistics (percentages, measurements, benchmarks) to make claims more citable.");
178
+ }
179
+ if (entities.entityDensity < 1) {
180
+ recs.push("Reference more named entities, brands, or technical terms to help AI identify the topic.");
181
+ }
182
+ if (quotable.length < 3) {
183
+ recs.push("Write more self-contained, quotable sentences (12-25 words) with clear factual assertions.");
184
+ }
185
+ if (!structure.hasList) {
186
+ recs.push("Add bulleted or numbered lists to make information scannable and extractable.");
187
+ }
188
+ if (!structure.hasTable) {
189
+ recs.push("Add comparison tables or data tables for easy extraction by AI systems.");
190
+ }
191
+ if (structure.headingCount < 3) {
192
+ recs.push("Add more descriptive headings to organize content into clearly defined sections.");
193
+ }
194
+ if (wordCount < 300) {
195
+ recs.push("Expand content to at least 300 words to provide sufficient depth for AI citation.");
196
+ }
197
+
198
+ return recs;
199
+ }
200
+
201
+ async function fetchContent(url) {
202
+ const response = await fetch(url, {
203
+ redirect: "follow",
204
+ headers: { "user-agent": "geo-ai-search-optimization/2.2.0" },
205
+ signal: AbortSignal.timeout(10_000)
206
+ });
207
+ if (!response.ok) throw new Error(`Failed to fetch: ${url} (status ${response.status})`);
208
+ return response.text();
209
+ }
210
+
211
+ export async function analyzeCitability(input, options = {}) {
212
+ let rawContent;
213
+ let source;
214
+
215
+ if (/^https?:\/\//i.test(input)) {
216
+ rawContent = await fetchContent(input);
217
+ source = input;
218
+ } else {
219
+ const filePath = path.resolve(input);
220
+ rawContent = await fs.readFile(filePath, "utf8");
221
+ source = filePath;
222
+ }
223
+
224
+ const plainText = extractPlainText(rawContent);
225
+ const sentences = splitSentences(plainText);
226
+ const paragraphs = splitParagraphs(plainText);
227
+ const wordCount = plainText.split(/\s+/).length;
228
+
229
+ const claims = analyzeClaims(sentences);
230
+ const entities = analyzeEntities(plainText);
231
+ const quotable = analyzeQuotability(sentences);
232
+ const structure = analyzeStructure(rawContent);
233
+ const score = computeCitabilityScore(claims, entities, quotable, structure, wordCount);
234
+ const recommendations = buildRecommendations(claims, entities, quotable, structure, wordCount);
235
+
236
+ return {
237
+ kind: "geo-citability",
238
+ source,
239
+ wordCount,
240
+ sentenceCount: sentences.length,
241
+ paragraphCount: paragraphs.length,
242
+ score,
243
+ scoreLabel: getScoreLabel(score),
244
+ claims,
245
+ entities,
246
+ quotableSentences: quotable,
247
+ structure,
248
+ recommendations,
249
+ summary: `Citability score: ${score}/100 (${getScoreLabel(score)}). ${recommendations[0] || "Content is well-structured for AI citation."}`
250
+ };
251
+ }
252
+
253
+ export function renderCitabilityMarkdown(report) {
254
+ const lines = [
255
+ "# Citability Analysis",
256
+ "",
257
+ `- Source: \`${report.source}\``,
258
+ `- Citability Score: \`${report.score}/100\` (${report.scoreLabel})`,
259
+ `- Word Count: \`${report.wordCount}\``,
260
+ `- Sentences: \`${report.sentenceCount}\``,
261
+ `- Summary: ${report.summary}`,
262
+ "",
263
+ "## Claim Analysis",
264
+ "",
265
+ `- Factual claims: \`${report.claims.factualClaims}/${report.claims.total}\` (${report.claims.claimDensity}%)`,
266
+ `- Statistical claims: \`${report.claims.statClaims}\` (${report.claims.statDensity}%)`,
267
+ `- Dated claims: \`${report.claims.datedClaims}\``,
268
+ `- Comparison claims: \`${report.claims.comparisonClaims}\``,
269
+ "",
270
+ "## Entity Analysis",
271
+ "",
272
+ `- Entity density: \`${report.entities.entityDensity}%\``,
273
+ `- Named entities: ${report.entities.namedEntities.slice(0, 10).map((e) => `\`${e}\``).join(", ") || "none detected"}`,
274
+ `- Technical terms: ${report.entities.techTerms.slice(0, 10).map((t) => `\`${t}\``).join(", ") || "none detected"}`,
275
+ "",
276
+ "## Structure",
277
+ "",
278
+ `- Lists: \`${report.structure.hasList}\``,
279
+ `- Tables: \`${report.structure.hasTable}\``,
280
+ `- Definitions: \`${report.structure.hasDefinition}\``,
281
+ `- Headings: \`${report.structure.headingCount}\``,
282
+ `- Structure score: \`${report.structure.structureScore}/100\``,
283
+ "",
284
+ "## Top Quotable Sentences",
285
+ ""
286
+ ];
287
+
288
+ if (report.quotableSentences.length === 0) {
289
+ lines.push("- No highly quotable sentences detected.");
290
+ } else {
291
+ for (const q of report.quotableSentences) {
292
+ lines.push(`- (score ${q.score}) ${q.sentence}`);
293
+ }
294
+ }
295
+
296
+ lines.push("", "## Recommendations", "");
297
+ if (report.recommendations.length === 0) {
298
+ lines.push("- Content is well-optimized for AI citability.");
299
+ } else {
300
+ for (const rec of report.recommendations) {
301
+ lines.push(`- ${rec}`);
302
+ }
303
+ }
304
+
305
+ lines.push("");
306
+ return lines.join("\n");
307
+ }
308
+
309
+ export async function writeCitabilityOutput(outputPath, content) {
310
+ return writeScanOutput(outputPath, content);
311
+ }
@@ -25,7 +25,7 @@ async function fetchWithTimeout(url, timeoutMs = 10000) {
25
25
  const response = await fetch(url, {
26
26
  signal: controller.signal,
27
27
  headers: {
28
- "user-agent": "geo-ai-search-optimization/1.5.0"
28
+ "user-agent": "geo-ai-search-optimization/2.2.0"
29
29
  },
30
30
  redirect: "follow"
31
31
  });