npm - @houtini/fanout-mcp - Versions diffs - 0.2.0 - Mend

@houtini/fanout-mcp 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

package/LICENSE +204 -0
package/README.md +625 -0
package/dist/index.d.ts +2 -0
package/dist/index.js +147 -0
package/dist/prompts/assessment.d.ts +2 -0
package/dist/prompts/assessment.js +87 -0
package/dist/prompts/decomposition.d.ts +2 -0
package/dist/prompts/decomposition.js +92 -0
package/dist/services/content-fetcher.d.ts +6 -0
package/dist/services/content-fetcher.js +124 -0
package/dist/services/coverage-assessor.d.ts +8 -0
package/dist/services/coverage-assessor.js +79 -0
package/dist/services/keyword-fanout.d.ts +17 -0
package/dist/services/keyword-fanout.js +335 -0
package/dist/services/query-decomposer.d.ts +6 -0
package/dist/services/query-decomposer.js +68 -0
package/dist/services/report-formatter.d.ts +26 -0
package/dist/services/report-formatter.js +492 -0
package/dist/tools/analyze-content-gap.d.ts +69 -0
package/dist/tools/analyze-content-gap.js +248 -0
package/dist/types.d.ts +173 -0
package/dist/types.js +1 -0
package/package.json +66 -0
package/research/README.md +242 -0
package/research/google-fanout-adaptation.md +738 -0
package/research/keyword-fanout-explained.md +274 -0

package/dist/services/keyword-fanout.js ADDED Viewed

@@ -0,0 +1,335 @@
+import Anthropic from "@anthropic-ai/sdk";
+export class KeywordFanOut {
+    client;
+    constructor(apiKey) {
+        this.client = new Anthropic({ apiKey });
+    }
+    async generateVariants(keyword, content, variantTypes, context) {
+        const startTime = Date.now();
+        const prompt = this.buildPrompt(keyword, content, variantTypes, context);
+        try {
+            const response = await this.client.messages.create({
+                model: "claude-sonnet-4-20250514",
+                max_tokens: 4000,
+                temperature: 0.7,
+                messages: [
+                    {
+                        role: "user",
+                        content: prompt,
+                    },
+                ],
+            });
+            const rawText = response.content
+                .filter((block) => block.type === "text")
+                .map((block) => block.text)
+                .join("\n");
+            const variants = this.parseVariants(rawText, keyword, context);
+            const validated = this.validateVariants(variants, content);
+            const generationTime = Date.now() - startTime;
+            return validated;
+        }
+        catch (error) {
+            throw error;
+        }
+    }
+    buildPrompt(keyword, content, variantTypes, context) {
+        const contentSummary = this.generateContentSummary(content);
+        const contextInfo = this.formatContextInfo(context);
+        const typeInstructions = this.generateTypeInstructions(variantTypes, keyword);
+        return `<thinking>
+You are implementing Google's Query Fan-Out methodology for content gap analysis.
+CONTEXT:
+- Target Keyword: "${keyword}"
+- Content Topic: ${contentSummary}
+- Content Type: ${this.detectContentType(content)}
+${contextInfo}
+YOUR TASK:
+Generate query variants that real users would actually type when searching for information
+related to "${keyword}". Each variant must maintain semantic relationship to the keyword.
+${typeInstructions}
+QUALITY REQUIREMENTS:
+✅ Realistic (users would actually type these)
+✅ Semantically related to "${keyword}"
+✅ Answerable by content (when checking coverage)
+✅ Diverse (different angles, not repetitive)
+✅ No hallucinated brands/products
+❌ No marketing jargon
+❌ No overly complex queries
+❌ No irrelevant tangents
+OUTPUT FORMAT:
+Return ONLY valid JSON (no markdown, no explanation):
+{
+  "followUp": ["query1", "query2", "query3"],
+  "specification": ["query1", "query2", "query3"],
+  "generalization": ["query1", "query2"],
+  "equivalent": ["query1", "query2", "query3"],
+  "comparison": ["query1", "query2", "query3"],
+  "clarification": ["query1", "query2"],
+  "relatedAspects": ["query1", "query2", "query3"],
+  "temporal": ["query1", "query2"]
+}
+Only include keys for the variant types requested. Generate 3-5 variants per type.
+</thinking>
+Generate the variants now:`;
+    }
+    generateTypeInstructions(variantTypes, keyword) {
+        const instructions = {
+            equivalent: `1. EQUIVALENT VARIANTS (3-5 variants)
+Definition: Alternative phrasings with the same intent; different ways to express "${keyword}"
+Quality Criteria:
+- Must have identical search intent
+- Natural language variations
+- Regional/dialect differences acceptable
+Examples:
+- "sim racing cockpit" → "racing simulator rig", "sim rig setup"
+- "best protein powder" → "top protein supplements", "recommended protein powder"
+Your equivalent variants:`,
+            specification: `2. SPECIFICATION VARIANTS (3-5 variants)
+Definition: More specific/detailed versions with added qualifiers
+Quality Criteria:
+- Add brands, models, use cases, or technical details
+- Must be answerable with specific information
+- Drill down into particular aspects
+Examples:
+- "sim racing wheels" → "Fanatec DD Pro wheel review", "best sim racing wheel for Formula 1"
+- "protein powder" → "whey protein isolate for muscle gain", "vegan protein powder brands"
+Your specification variants:`,
+            generalization: `3. GENERALIZATION VARIANTS (2-3 variants)
+Definition: Broader versions that encompass the keyword within larger context
+Quality Criteria:
+- Zoom out to related broader topics
+- Must still be relevant to original intent
+- Opens up to category-level questions
+Examples:
+- "direct drive sim racing wheels" → "sim racing wheels comparison", "force feedback racing wheels"
+- "vegan protein powder" → "plant-based protein sources", "vegan supplements"
+Your generalization variants:`,
+            followUp: `4. FOLLOW-UP VARIANTS (3-5 variants)
+Definition: Logical next questions after learning about "${keyword}"
+Quality Criteria:
+- Assumes user has basic knowledge from original query
+- Explores deeper aspects or related topics
+- Natural progression of learning/research
+Examples:
+- "sim racing wheels" → "how to calibrate sim racing wheel", "best pedals to pair with racing wheel"
+- "protein powder" → "when to take protein powder", "protein powder side effects"
+Your follow-up variants:`,
+            comparison: `5. COMPARISON VARIANTS (3-5 variants)
+Definition: Queries seeking to compare options, alternatives, or solutions
+Quality Criteria:
+- Must compare specific entities or approaches
+- "vs", "versus", "compared to" patterns
+- "best" for specific criteria
+Examples:
+- "sim racing wheels" → "Fanatec vs Thrustmaster wheels", "direct drive vs belt driven wheels"
+- "protein powder" → "whey vs casein protein", "best budget protein powder"
+Your comparison variants:`,
+            clarification: `6. CLARIFICATION VARIANTS (2-3 variants)
+Definition: Questions seeking to understand concepts, definitions, mechanisms
+Quality Criteria:
+- "What is...", "How does...", "Why..." patterns
+- Address knowledge gaps
+- Explain mechanisms or concepts
+Examples:
+- "direct drive wheels" → "what is direct drive technology", "how do direct drive wheels work"
+- "protein powder" → "what is whey protein", "how is protein powder made"
+Your clarification variants:`,
+            relatedAspects: `7. RELATED ASPECTS VARIANTS (3-5 variants)
+Definition: Connected topics or implicit facets not stated in original query
+Quality Criteria:
+- Identify underlying facets (setup, compatibility, maintenance, etc.)
+- Natural extensions of the topic
+- Address implicit user needs
+Examples:
+- "sim racing wheels" → "sim racing wheel setup guide", "wheel compatibility with PC games"
+- "protein powder" → "protein powder recipes", "how to mix protein powder"
+Your related aspects variants:`,
+            temporal: `8. TEMPORAL VARIANTS (2-3 variants)
+Definition: Time-specific versions with temporal qualifiers
+Quality Criteria:
+- Include year, season, or time-based context
+- "latest", "new", "2024" qualifiers
+- Current trends or releases
+Examples:
+- "sim racing wheels" → "best sim racing wheels 2024", "new sim racing wheels released 2024"
+- "protein powder" → "protein powder black friday deals", "trending protein powders 2024"
+Your temporal variants:`,
+        };
+        return variantTypes.map((type) => instructions[type]).join("\n\n");
+    }
+    generateContentSummary(content) {
+        const titleWords = content.title.split(" ");
+        const descWords = content.description
+            ? content.description.split(" ").slice(0, 20).join(" ")
+            : "";
+        return `${titleWords.slice(0, 10).join(" ")}... ${descWords}`;
+    }
+    detectContentType(content) {
+        const markdown = content.markdown.toLowerCase();
+        if (markdown.includes("review") || markdown.includes("rating")) {
+            return "review/comparison";
+        }
+        if (markdown.includes("how to") ||
+            markdown.includes("guide") ||
+            markdown.includes("tutorial")) {
+            return "guide/tutorial";
+        }
+        if (markdown.includes("buy") || markdown.includes("price")) {
+            return "product page";
+        }
+        return "article/informational";
+    }
+    formatContextInfo(context) {
+        if (!context)
+            return "";
+        const lines = [];
+        if (context.temporal) {
+            if (context.temporal.currentDate) {
+                lines.push(`- Current Date: ${context.temporal.currentDate}`);
+            }
+            if (context.temporal.season) {
+                lines.push(`- Season: ${context.temporal.season}`);
+            }
+        }
+        if (context.intent) {
+            lines.push(`- User Intent: ${context.intent}`);
+        }
+        if (context.specificity_preference) {
+            lines.push(`- Specificity Preference: ${context.specificity_preference}`);
+        }
+        return lines.length > 0 ? "\nADDITIONAL CONTEXT:\n" + lines.join("\n") : "";
+    }
+    parseVariants(rawText, keyword, context) {
+        const jsonMatch = rawText.match(/\{[\s\S]*\}/);
+        if (!jsonMatch) {
+            throw new Error("No JSON found in response");
+        }
+        const parsed = JSON.parse(jsonMatch[0]);
+        const queries = [];
+        const typeMapping = [
+            ["equivalent", "equivalent"],
+            ["specification", "specification"],
+            ["generalization", "generalization"],
+            ["followUp", "followUp"],
+            ["comparison", "comparison"],
+            ["clarification", "clarification"],
+            ["relatedAspects", "relatedAspects"],
+            ["temporal", "temporal"],
+        ];
+        for (const [key, variantType] of typeMapping) {
+            const variants = parsed[key];
+            if (variants && Array.isArray(variants)) {
+                for (const query of variants) {
+                    queries.push({
+                        query: query.trim(),
+                        importance: this.assignImportance(variantType),
+                        rationale: `Generated via keyword fan-out (${variantType} variant of "${keyword}")`,
+                        variantType,
+                        sourceKeyword: keyword,
+                        generationMethod: "fan-out",
+                        contextSignals: context
+                            ? {
+                                temporal: context.temporal?.currentDate,
+                                intent: context.intent,
+                                specificity: this.calculateSpecificity(variantType, context.specificity_preference),
+                            }
+                            : undefined,
+                    });
+                }
+            }
+        }
+        return queries;
+    }
+    assignImportance(variantType) {
+        const importanceMap = {
+            equivalent: "high",
+            specification: "high",
+            comparison: "high",
+            clarification: "medium",
+            generalization: "medium",
+            followUp: "medium",
+            relatedAspects: "low",
+            temporal: "low",
+        };
+        return importanceMap[variantType];
+    }
+    calculateSpecificity(variantType, preference) {
+        const baseSpecificity = {
+            specification: 0.9,
+            equivalent: 0.7,
+            comparison: 0.7,
+            clarification: 0.5,
+            followUp: 0.6,
+            generalization: 0.3,
+            relatedAspects: 0.5,
+            temporal: 0.6,
+        };
+        let specificity = baseSpecificity[variantType];
+        if (preference === "broad") {
+            specificity *= 0.7;
+        }
+        else if (preference === "specific") {
+            specificity *= 1.3;
+        }
+        return Math.min(1, Math.max(0, specificity));
+    }
+    validateVariants(variants, content) {
+        const deduplicated = this.deduplicateVariants(variants);
+        const realistic = this.filterUnrealisticQueries(deduplicated);
+        return realistic;
+    }
+    deduplicateVariants(variants) {
+        const seen = new Set();
+        const unique = [];
+        for (const variant of variants) {
+            const normalized = variant.query.toLowerCase().trim();
+            if (!seen.has(normalized)) {
+                seen.add(normalized);
+                unique.push(variant);
+            }
+        }
+        return unique;
+    }
+    filterUnrealisticQueries(variants) {
+        return variants.filter((variant) => {
+            const query = variant.query.toLowerCase();
+            if (query.length < 5 || query.length > 150)
+                return false;
+            if (query.split(" ").length > 15)
+                return false;
+            const marketingWords = [
+                "revolutionary",
+                "game-changing",
+                "cutting-edge",
+                "state-of-the-art",
+                "next-generation",
+            ];
+            if (marketingWords.some((word) => query.includes(word)))
+                return false;
+            return true;
+        });
+    }
+}

package/dist/services/query-decomposer.d.ts ADDED Viewed

@@ -0,0 +1,6 @@
+import { ContentData, QueryGraph, AnalysisDepth } from "../types.js";
+export declare class QueryDecomposer {
+    private client;
+    constructor(apiKey: string);
+    decomposeQueries(content: ContentData, depth?: AnalysisDepth, focusArea?: string): Promise<QueryGraph>;
+}

package/dist/services/query-decomposer.js ADDED Viewed

@@ -0,0 +1,68 @@
+import Anthropic from "@anthropic-ai/sdk";
+import { createDecompositionPrompt } from "../prompts/decomposition.js";
+const QUERY_COUNTS = {
+    quick: 5,
+    standard: 15,
+    comprehensive: 30,
+};
+export class QueryDecomposer {
+    client;
+    constructor(apiKey) {
+        this.client = new Anthropic({ apiKey });
+    }
+    async decomposeQueries(content, depth = "standard", focusArea) {
+        const queryCount = QUERY_COUNTS[depth];
+        let prompt = createDecompositionPrompt(content, queryCount);
+        if (focusArea) {
+            prompt += `\n\nFOCUS AREA: Generate queries specifically related to "${focusArea}".`;
+        }
+        try {
+            const response = await this.client.messages.create({
+                model: "claude-sonnet-4-20250514",
+                max_tokens: 4000,
+                messages: [
+                    {
+                        role: "user",
+                        content: prompt,
+                    },
+                ],
+            });
+            const content_block = response.content[0];
+            if (content_block.type !== "text") {
+                throw new Error("Unexpected response type from Claude");
+            }
+            const text = content_block.text;
+            // Remove thinking tags if present
+            const cleanText = text.replace(/<thinking>[\s\S]*?<\/thinking>/gi, '').trim();
+            // Extract JSON object - look for complete structure
+            const jsonMatch = cleanText.match(/\{[\s\S]*\}/);
+            if (!jsonMatch) {
+                throw new Error(`Failed to extract JSON from response. Response text: ${cleanText.substring(0, 500)}`);
+            }
+            // Clean up common JSON issues
+            let jsonStr = jsonMatch[0];
+            // Remove trailing commas before closing braces/brackets
+            jsonStr = jsonStr.replace(/,(\s*[}\]])/g, '$1');
+            // Try to parse
+            let queryGraph;
+            try {
+                queryGraph = JSON.parse(jsonStr);
+            }
+            catch (parseError) {
+                throw new Error(`JSON parsing failed: ${parseError instanceof Error ? parseError.message : 'Unknown error'}. JSON: ${jsonStr.substring(0, 500)}`);
+            }
+            if (!queryGraph.prerequisite ||
+                !queryGraph.core ||
+                !queryGraph.followup) {
+                throw new Error("Invalid query graph structure");
+            }
+            return queryGraph;
+        }
+        catch (error) {
+            if (error instanceof Error) {
+                throw new Error(`Query decomposition failed: ${error.message}`);
+            }
+            throw new Error("Query decomposition failed");
+        }
+    }
+}

package/dist/services/report-formatter.d.ts ADDED Viewed

@@ -0,0 +1,26 @@
+import { QueryGraph, EnhancedQueryGraph, CoverageAssessment, ContentData } from "../types.js";
+export declare class ReportFormatter {
+    formatReport(content: ContentData, queryGraph: QueryGraph | EnhancedQueryGraph, assessments: CoverageAssessment[], timings?: {
+        fetchTime: number;
+        queryTime: number;
+        assessTime: number;
+        totalTime: number;
+    }): string;
+    private isEnhancedGraph;
+    private buildReport;
+    private calculateStatistics;
+    private calculateCoverageScore;
+    private extractRecommendations;
+    private estimateCost;
+    private calculateTechnicalMetrics;
+    private generateMarkdown;
+    private formatFanOutSection;
+    private formatVariantTypeName;
+    private formatQuerySection;
+    private calculateAvgSpecificity;
+    private calculateAvgRealism;
+    private countGenericQueries;
+    private calculateDomainTermUsage;
+    private calculateOverclaimRate;
+    private calculateUnderclaimRate;
+}