@houtini/fanout-mcp 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,335 @@
1
+ import Anthropic from "@anthropic-ai/sdk";
2
+ export class KeywordFanOut {
3
+ client;
4
+ constructor(apiKey) {
5
+ this.client = new Anthropic({ apiKey });
6
+ }
7
+ async generateVariants(keyword, content, variantTypes, context) {
8
+ const startTime = Date.now();
9
+ const prompt = this.buildPrompt(keyword, content, variantTypes, context);
10
+ try {
11
+ const response = await this.client.messages.create({
12
+ model: "claude-sonnet-4-20250514",
13
+ max_tokens: 4000,
14
+ temperature: 0.7,
15
+ messages: [
16
+ {
17
+ role: "user",
18
+ content: prompt,
19
+ },
20
+ ],
21
+ });
22
+ const rawText = response.content
23
+ .filter((block) => block.type === "text")
24
+ .map((block) => block.text)
25
+ .join("\n");
26
+ const variants = this.parseVariants(rawText, keyword, context);
27
+ const validated = this.validateVariants(variants, content);
28
+ const generationTime = Date.now() - startTime;
29
+ return validated;
30
+ }
31
+ catch (error) {
32
+ throw error;
33
+ }
34
+ }
35
+ buildPrompt(keyword, content, variantTypes, context) {
36
+ const contentSummary = this.generateContentSummary(content);
37
+ const contextInfo = this.formatContextInfo(context);
38
+ const typeInstructions = this.generateTypeInstructions(variantTypes, keyword);
39
+ return `<thinking>
40
+ You are implementing Google's Query Fan-Out methodology for content gap analysis.
41
+
42
+ CONTEXT:
43
+ - Target Keyword: "${keyword}"
44
+ - Content Topic: ${contentSummary}
45
+ - Content Type: ${this.detectContentType(content)}
46
+ ${contextInfo}
47
+
48
+ YOUR TASK:
49
+ Generate query variants that real users would actually type when searching for information
50
+ related to "${keyword}". Each variant must maintain semantic relationship to the keyword.
51
+
52
+ ${typeInstructions}
53
+
54
+ QUALITY REQUIREMENTS:
55
+ ✅ Realistic (users would actually type these)
56
+ ✅ Semantically related to "${keyword}"
57
+ ✅ Answerable by content (when checking coverage)
58
+ ✅ Diverse (different angles, not repetitive)
59
+ ✅ No hallucinated brands/products
60
+ ❌ No marketing jargon
61
+ ❌ No overly complex queries
62
+ ❌ No irrelevant tangents
63
+
64
+ OUTPUT FORMAT:
65
+ Return ONLY valid JSON (no markdown, no explanation):
66
+ {
67
+ "followUp": ["query1", "query2", "query3"],
68
+ "specification": ["query1", "query2", "query3"],
69
+ "generalization": ["query1", "query2"],
70
+ "equivalent": ["query1", "query2", "query3"],
71
+ "comparison": ["query1", "query2", "query3"],
72
+ "clarification": ["query1", "query2"],
73
+ "relatedAspects": ["query1", "query2", "query3"],
74
+ "temporal": ["query1", "query2"]
75
+ }
76
+
77
+ Only include keys for the variant types requested. Generate 3-5 variants per type.
78
+ </thinking>
79
+
80
+ Generate the variants now:`;
81
+ }
82
+ generateTypeInstructions(variantTypes, keyword) {
83
+ const instructions = {
84
+ equivalent: `1. EQUIVALENT VARIANTS (3-5 variants)
85
+ Definition: Alternative phrasings with the same intent; different ways to express "${keyword}"
86
+ Quality Criteria:
87
+ - Must have identical search intent
88
+ - Natural language variations
89
+ - Regional/dialect differences acceptable
90
+
91
+ Examples:
92
+ - "sim racing cockpit" → "racing simulator rig", "sim rig setup"
93
+ - "best protein powder" → "top protein supplements", "recommended protein powder"
94
+
95
+ Your equivalent variants:`,
96
+ specification: `2. SPECIFICATION VARIANTS (3-5 variants)
97
+ Definition: More specific/detailed versions with added qualifiers
98
+ Quality Criteria:
99
+ - Add brands, models, use cases, or technical details
100
+ - Must be answerable with specific information
101
+ - Drill down into particular aspects
102
+
103
+ Examples:
104
+ - "sim racing wheels" → "Fanatec DD Pro wheel review", "best sim racing wheel for Formula 1"
105
+ - "protein powder" → "whey protein isolate for muscle gain", "vegan protein powder brands"
106
+
107
+ Your specification variants:`,
108
+ generalization: `3. GENERALIZATION VARIANTS (2-3 variants)
109
+ Definition: Broader versions that encompass the keyword within larger context
110
+ Quality Criteria:
111
+ - Zoom out to related broader topics
112
+ - Must still be relevant to original intent
113
+ - Opens up to category-level questions
114
+
115
+ Examples:
116
+ - "direct drive sim racing wheels" → "sim racing wheels comparison", "force feedback racing wheels"
117
+ - "vegan protein powder" → "plant-based protein sources", "vegan supplements"
118
+
119
+ Your generalization variants:`,
120
+ followUp: `4. FOLLOW-UP VARIANTS (3-5 variants)
121
+ Definition: Logical next questions after learning about "${keyword}"
122
+ Quality Criteria:
123
+ - Assumes user has basic knowledge from original query
124
+ - Explores deeper aspects or related topics
125
+ - Natural progression of learning/research
126
+
127
+ Examples:
128
+ - "sim racing wheels" → "how to calibrate sim racing wheel", "best pedals to pair with racing wheel"
129
+ - "protein powder" → "when to take protein powder", "protein powder side effects"
130
+
131
+ Your follow-up variants:`,
132
+ comparison: `5. COMPARISON VARIANTS (3-5 variants)
133
+ Definition: Queries seeking to compare options, alternatives, or solutions
134
+ Quality Criteria:
135
+ - Must compare specific entities or approaches
136
+ - "vs", "versus", "compared to" patterns
137
+ - "best" for specific criteria
138
+
139
+ Examples:
140
+ - "sim racing wheels" → "Fanatec vs Thrustmaster wheels", "direct drive vs belt driven wheels"
141
+ - "protein powder" → "whey vs casein protein", "best budget protein powder"
142
+
143
+ Your comparison variants:`,
144
+ clarification: `6. CLARIFICATION VARIANTS (2-3 variants)
145
+ Definition: Questions seeking to understand concepts, definitions, mechanisms
146
+ Quality Criteria:
147
+ - "What is...", "How does...", "Why..." patterns
148
+ - Address knowledge gaps
149
+ - Explain mechanisms or concepts
150
+
151
+ Examples:
152
+ - "direct drive wheels" → "what is direct drive technology", "how do direct drive wheels work"
153
+ - "protein powder" → "what is whey protein", "how is protein powder made"
154
+
155
+ Your clarification variants:`,
156
+ relatedAspects: `7. RELATED ASPECTS VARIANTS (3-5 variants)
157
+ Definition: Connected topics or implicit facets not stated in original query
158
+ Quality Criteria:
159
+ - Identify underlying facets (setup, compatibility, maintenance, etc.)
160
+ - Natural extensions of the topic
161
+ - Address implicit user needs
162
+
163
+ Examples:
164
+ - "sim racing wheels" → "sim racing wheel setup guide", "wheel compatibility with PC games"
165
+ - "protein powder" → "protein powder recipes", "how to mix protein powder"
166
+
167
+ Your related aspects variants:`,
168
+ temporal: `8. TEMPORAL VARIANTS (2-3 variants)
169
+ Definition: Time-specific versions with temporal qualifiers
170
+ Quality Criteria:
171
+ - Include year, season, or time-based context
172
+ - "latest", "new", "2024" qualifiers
173
+ - Current trends or releases
174
+
175
+ Examples:
176
+ - "sim racing wheels" → "best sim racing wheels 2024", "new sim racing wheels released 2024"
177
+ - "protein powder" → "protein powder black friday deals", "trending protein powders 2024"
178
+
179
+ Your temporal variants:`,
180
+ };
181
+ return variantTypes.map((type) => instructions[type]).join("\n\n");
182
+ }
183
+ generateContentSummary(content) {
184
+ const titleWords = content.title.split(" ");
185
+ const descWords = content.description
186
+ ? content.description.split(" ").slice(0, 20).join(" ")
187
+ : "";
188
+ return `${titleWords.slice(0, 10).join(" ")}... ${descWords}`;
189
+ }
190
+ detectContentType(content) {
191
+ const markdown = content.markdown.toLowerCase();
192
+ if (markdown.includes("review") || markdown.includes("rating")) {
193
+ return "review/comparison";
194
+ }
195
+ if (markdown.includes("how to") ||
196
+ markdown.includes("guide") ||
197
+ markdown.includes("tutorial")) {
198
+ return "guide/tutorial";
199
+ }
200
+ if (markdown.includes("buy") || markdown.includes("price")) {
201
+ return "product page";
202
+ }
203
+ return "article/informational";
204
+ }
205
+ formatContextInfo(context) {
206
+ if (!context)
207
+ return "";
208
+ const lines = [];
209
+ if (context.temporal) {
210
+ if (context.temporal.currentDate) {
211
+ lines.push(`- Current Date: ${context.temporal.currentDate}`);
212
+ }
213
+ if (context.temporal.season) {
214
+ lines.push(`- Season: ${context.temporal.season}`);
215
+ }
216
+ }
217
+ if (context.intent) {
218
+ lines.push(`- User Intent: ${context.intent}`);
219
+ }
220
+ if (context.specificity_preference) {
221
+ lines.push(`- Specificity Preference: ${context.specificity_preference}`);
222
+ }
223
+ return lines.length > 0 ? "\nADDITIONAL CONTEXT:\n" + lines.join("\n") : "";
224
+ }
225
+ parseVariants(rawText, keyword, context) {
226
+ const jsonMatch = rawText.match(/\{[\s\S]*\}/);
227
+ if (!jsonMatch) {
228
+ throw new Error("No JSON found in response");
229
+ }
230
+ const parsed = JSON.parse(jsonMatch[0]);
231
+ const queries = [];
232
+ const typeMapping = [
233
+ ["equivalent", "equivalent"],
234
+ ["specification", "specification"],
235
+ ["generalization", "generalization"],
236
+ ["followUp", "followUp"],
237
+ ["comparison", "comparison"],
238
+ ["clarification", "clarification"],
239
+ ["relatedAspects", "relatedAspects"],
240
+ ["temporal", "temporal"],
241
+ ];
242
+ for (const [key, variantType] of typeMapping) {
243
+ const variants = parsed[key];
244
+ if (variants && Array.isArray(variants)) {
245
+ for (const query of variants) {
246
+ queries.push({
247
+ query: query.trim(),
248
+ importance: this.assignImportance(variantType),
249
+ rationale: `Generated via keyword fan-out (${variantType} variant of "${keyword}")`,
250
+ variantType,
251
+ sourceKeyword: keyword,
252
+ generationMethod: "fan-out",
253
+ contextSignals: context
254
+ ? {
255
+ temporal: context.temporal?.currentDate,
256
+ intent: context.intent,
257
+ specificity: this.calculateSpecificity(variantType, context.specificity_preference),
258
+ }
259
+ : undefined,
260
+ });
261
+ }
262
+ }
263
+ }
264
+ return queries;
265
+ }
266
+ assignImportance(variantType) {
267
+ const importanceMap = {
268
+ equivalent: "high",
269
+ specification: "high",
270
+ comparison: "high",
271
+ clarification: "medium",
272
+ generalization: "medium",
273
+ followUp: "medium",
274
+ relatedAspects: "low",
275
+ temporal: "low",
276
+ };
277
+ return importanceMap[variantType];
278
+ }
279
+ calculateSpecificity(variantType, preference) {
280
+ const baseSpecificity = {
281
+ specification: 0.9,
282
+ equivalent: 0.7,
283
+ comparison: 0.7,
284
+ clarification: 0.5,
285
+ followUp: 0.6,
286
+ generalization: 0.3,
287
+ relatedAspects: 0.5,
288
+ temporal: 0.6,
289
+ };
290
+ let specificity = baseSpecificity[variantType];
291
+ if (preference === "broad") {
292
+ specificity *= 0.7;
293
+ }
294
+ else if (preference === "specific") {
295
+ specificity *= 1.3;
296
+ }
297
+ return Math.min(1, Math.max(0, specificity));
298
+ }
299
+ validateVariants(variants, content) {
300
+ const deduplicated = this.deduplicateVariants(variants);
301
+ const realistic = this.filterUnrealisticQueries(deduplicated);
302
+ return realistic;
303
+ }
304
+ deduplicateVariants(variants) {
305
+ const seen = new Set();
306
+ const unique = [];
307
+ for (const variant of variants) {
308
+ const normalized = variant.query.toLowerCase().trim();
309
+ if (!seen.has(normalized)) {
310
+ seen.add(normalized);
311
+ unique.push(variant);
312
+ }
313
+ }
314
+ return unique;
315
+ }
316
+ filterUnrealisticQueries(variants) {
317
+ return variants.filter((variant) => {
318
+ const query = variant.query.toLowerCase();
319
+ if (query.length < 5 || query.length > 150)
320
+ return false;
321
+ if (query.split(" ").length > 15)
322
+ return false;
323
+ const marketingWords = [
324
+ "revolutionary",
325
+ "game-changing",
326
+ "cutting-edge",
327
+ "state-of-the-art",
328
+ "next-generation",
329
+ ];
330
+ if (marketingWords.some((word) => query.includes(word)))
331
+ return false;
332
+ return true;
333
+ });
334
+ }
335
+ }
@@ -0,0 +1,6 @@
1
+ import { ContentData, QueryGraph, AnalysisDepth } from "../types.js";
2
+ export declare class QueryDecomposer {
3
+ private client;
4
+ constructor(apiKey: string);
5
+ decomposeQueries(content: ContentData, depth?: AnalysisDepth, focusArea?: string): Promise<QueryGraph>;
6
+ }
@@ -0,0 +1,68 @@
1
+ import Anthropic from "@anthropic-ai/sdk";
2
+ import { createDecompositionPrompt } from "../prompts/decomposition.js";
3
+ const QUERY_COUNTS = {
4
+ quick: 5,
5
+ standard: 15,
6
+ comprehensive: 30,
7
+ };
8
+ export class QueryDecomposer {
9
+ client;
10
+ constructor(apiKey) {
11
+ this.client = new Anthropic({ apiKey });
12
+ }
13
+ async decomposeQueries(content, depth = "standard", focusArea) {
14
+ const queryCount = QUERY_COUNTS[depth];
15
+ let prompt = createDecompositionPrompt(content, queryCount);
16
+ if (focusArea) {
17
+ prompt += `\n\nFOCUS AREA: Generate queries specifically related to "${focusArea}".`;
18
+ }
19
+ try {
20
+ const response = await this.client.messages.create({
21
+ model: "claude-sonnet-4-20250514",
22
+ max_tokens: 4000,
23
+ messages: [
24
+ {
25
+ role: "user",
26
+ content: prompt,
27
+ },
28
+ ],
29
+ });
30
+ const content_block = response.content[0];
31
+ if (content_block.type !== "text") {
32
+ throw new Error("Unexpected response type from Claude");
33
+ }
34
+ const text = content_block.text;
35
+ // Remove thinking tags if present
36
+ const cleanText = text.replace(/<thinking>[\s\S]*?<\/thinking>/gi, '').trim();
37
+ // Extract JSON object - look for complete structure
38
+ const jsonMatch = cleanText.match(/\{[\s\S]*\}/);
39
+ if (!jsonMatch) {
40
+ throw new Error(`Failed to extract JSON from response. Response text: ${cleanText.substring(0, 500)}`);
41
+ }
42
+ // Clean up common JSON issues
43
+ let jsonStr = jsonMatch[0];
44
+ // Remove trailing commas before closing braces/brackets
45
+ jsonStr = jsonStr.replace(/,(\s*[}\]])/g, '$1');
46
+ // Try to parse
47
+ let queryGraph;
48
+ try {
49
+ queryGraph = JSON.parse(jsonStr);
50
+ }
51
+ catch (parseError) {
52
+ throw new Error(`JSON parsing failed: ${parseError instanceof Error ? parseError.message : 'Unknown error'}. JSON: ${jsonStr.substring(0, 500)}`);
53
+ }
54
+ if (!queryGraph.prerequisite ||
55
+ !queryGraph.core ||
56
+ !queryGraph.followup) {
57
+ throw new Error("Invalid query graph structure");
58
+ }
59
+ return queryGraph;
60
+ }
61
+ catch (error) {
62
+ if (error instanceof Error) {
63
+ throw new Error(`Query decomposition failed: ${error.message}`);
64
+ }
65
+ throw new Error("Query decomposition failed");
66
+ }
67
+ }
68
+ }
@@ -0,0 +1,26 @@
1
+ import { QueryGraph, EnhancedQueryGraph, CoverageAssessment, ContentData } from "../types.js";
2
+ export declare class ReportFormatter {
3
+ formatReport(content: ContentData, queryGraph: QueryGraph | EnhancedQueryGraph, assessments: CoverageAssessment[], timings?: {
4
+ fetchTime: number;
5
+ queryTime: number;
6
+ assessTime: number;
7
+ totalTime: number;
8
+ }): string;
9
+ private isEnhancedGraph;
10
+ private buildReport;
11
+ private calculateStatistics;
12
+ private calculateCoverageScore;
13
+ private extractRecommendations;
14
+ private estimateCost;
15
+ private calculateTechnicalMetrics;
16
+ private generateMarkdown;
17
+ private formatFanOutSection;
18
+ private formatVariantTypeName;
19
+ private formatQuerySection;
20
+ private calculateAvgSpecificity;
21
+ private calculateAvgRealism;
22
+ private countGenericQueries;
23
+ private calculateDomainTermUsage;
24
+ private calculateOverclaimRate;
25
+ private calculateUnderclaimRate;
26
+ }