npm - @houtini/fanout-mcp - Versions diffs - 0.2.8 → 0.2.9 - Mend

@houtini/fanout-mcp 0.2.8 → 0.2.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/dist/index.js +1 -1
package/package.json +69 -69
package/research/README.md +0 -242
package/research/google-fanout-adaptation.md +0 -738
package/research/keyword-fanout-explained.md +0 -274

package/dist/index.js CHANGED Viewed

@@ -5,7 +5,7 @@ import { CallToolRequestSchema, ListToolsRequestSchema, } from "@modelcontextpro
 import { analyzeContentGap, AnalyzeContentGapSchema, } from "./tools/analyze-content-gap.js";
 const server = new Server({
     name: "fanout-mcp",
-    version: "0.1.0",
+    version: "0.2.8",
 }, {
     capabilities: {
         tools: {},

package/package.json CHANGED Viewed

@@ -1,69 +1,69 @@
-{
-  "name": "@houtini/fanout-mcp",
-  "version": "0.2.8",
-  "mcpName": "io.github.houtini-ai/fanout",
-  "description": "Multi-URL comparative content analysis with topical gap detection",
-  "type": "module",
-  "main": "dist/index.js",
-  "bin": {
-    "fanout-mcp": "dist/index.js"
-  },
-  "scripts": {
-    "build": "tsc",
-    "dev": "tsc --watch",
-    "prepare": "npm run build"
-  },
-  "keywords": [
-    "mcp",
-    "model-context-protocol",
-    "content-analysis",
-    "content-gap-analysis",
-    "seo",
-    "geo",
-    "generative-engine-optimization",
-    "query-generation",
-    "query-decomposition",
-    "keyword-fanout",
-    "self-rag",
-    "rag",
-    "ai-search",
-    "claude",
-    "anthropic"
-  ],
-  "author": "Richard Baxter <richard@houtini.ai>",
-  "license": "Apache-2.0",
-  "repository": {
-    "type": "git",
-    "url": "git+https://github.com/houtini-ai/fanout-mcp.git"
-  },
-  "homepage": "https://houtini.com/query-fan-out-mcp-for-ai-search-optimisation/",
-  "bugs": {
-    "url": "https://github.com/houtini-ai/fanout-mcp/issues"
-  },
-  "engines": {
-    "node": ">=18.0.0"
-  },
-  "files": [
-    "dist",
-    "README.md",
-    "LICENSE",
-    "houtini-logo.jpg",
-    "server.json",
-    "research/keyword-fanout-explained.md",
-    "research/google-fanout-adaptation.md"
-  ],
-  "dependencies": {
-    "@anthropic-ai/sdk": "^0.32.1",
-    "@modelcontextprotocol/sdk": "^1.26.0",
-    "cheerio": "^1.0.0-rc.12",
-    "fast-xml-parser": "^5.3.4",
-    "node-fetch": "^3.3.2",
-    "turndown": "^7.1.2",
-    "zod": "^3.22.4"
-  },
-  "devDependencies": {
-    "@types/node": "^20.10.0",
-    "@types/turndown": "^5.0.4",
-    "typescript": "^5.3.0"
-  }
-}
+{
+  "name": "@houtini/fanout-mcp",
+  "version": "0.2.9",
+  "mcpName": "io.github.houtini-ai/fanout",
+  "description": "Multi-URL comparative content analysis with topical gap detection",
+  "type": "module",
+  "main": "dist/index.js",
+  "bin": {
+    "fanout-mcp": "dist/index.js"
+  },
+  "scripts": {
+    "build": "tsc",
+    "dev": "tsc --watch",
+    "prepare": "npm run build"
+  },
+  "keywords": [
+    "mcp",
+    "model-context-protocol",
+    "content-analysis",
+    "content-gap-analysis",
+    "seo",
+    "geo",
+    "generative-engine-optimization",
+    "query-generation",
+    "query-decomposition",
+    "keyword-fanout",
+    "self-rag",
+    "rag",
+    "ai-search",
+    "claude",
+    "anthropic"
+  ],
+  "author": "Richard Baxter <richard@houtini.ai>",
+  "license": "Apache-2.0",
+  "repository": {
+    "type": "git",
+    "url": "git+https://github.com/houtini-ai/fanout-mcp.git"
+  },
+  "homepage": "https://houtini.com/query-fan-out-mcp-for-ai-search-optimisation/",
+  "bugs": {
+    "url": "https://github.com/houtini-ai/fanout-mcp/issues"
+  },
+  "engines": {
+    "node": ">=18.0.0"
+  },
+  "files": [
+    "dist",
+    "README.md",
+    "LICENSE",
+    "houtini-logo.jpg",
+    "server.json",
+    "research/keyword-fanout-explained.md",
+    "research/google-fanout-adaptation.md"
+  ],
+  "dependencies": {
+    "@anthropic-ai/sdk": "^0.32.1",
+    "@modelcontextprotocol/sdk": "^1.26.0",
+    "cheerio": "^1.0.0-rc.12",
+    "fast-xml-parser": "^5.3.6",
+    "node-fetch": "^3.3.2",
+    "turndown": "^7.1.2",
+    "zod": "^3.22.4"
+  },
+  "devDependencies": {
+    "@types/node": "^20.10.0",
+    "@types/turndown": "^5.0.4",
+    "typescript": "^5.3.0"
+  }
+}

package/research/README.md DELETED Viewed

@@ -1,242 +0,0 @@
-# Fan-Out MCP: Research Phase Complete
-**Date:** December 15, 2024
-**Status:** ✅ Research Validated → Ready for Implementation
----
-## Executive Summary
-We've successfully validated the "fan-out" MCP concept through deep research into cutting-edge Information Retrieval science. **The approach is sound and implementable.**
-### What We Validated
-✅ **Query Decomposition** - Established technique from 2022+ research
-✅ **Reverse HyDE** - Emerging but validated for intent prediction
-✅ **Self-RAG** - Perfect for coverage assessment and validation
-✅ **Content Gap Analysis (GEO)** - Hot topic in AI search optimization
-### What We Decided
-**Multi-Mode Architecture:**
-1. **Single URL** - Deep analysis with full query graph (start here)
-2. **Batch Processing** - 5-20 URLs with aggregate coverage matrix
-3. **Sitemap Analysis** - Full site audit with dashboard (future)
-**Output Format:**
-- Data-driven with coverage scores
-- Justified with evidence quotes or explicit gaps
-- Actionable with specific, prioritized recommendations
-- Downloadable for larger analyses (JSON/HTML reports)
----
-## Research Findings (TL;DR)
-### Is The Approach Sound?
-**YES - 95% Confidence**
-The four techniques we identified are all actively researched at MIT, Stanford, Microsoft Research, and Google Research. Our innovation is **combining all four** into a unified content gap analysis system.
-### Key Papers
-1. **Least-to-Most Prompting** (2022) - Query decomposition foundation
-2. **Self-RAG** (2023, arXiv:2310.05837) - Coverage assessment
-3. **HyDE** (Gao et al., 2022) - Hypothetical document embeddings
-4. **GEO Research** - Google/Stanford work on AI search biases
-### The Gap We're Filling
-Nobody has combined these techniques into a practical content gap analysis tool. This is our opportunity to build something cutting-edge that addresses a real need (optimizing content for AI search engines).
----
-## Technical Architecture
-### MCP Tools
-```typescript
-1. analyze_content_gap(url: string, depth?: string, focus_area?: string)
-   // Single URL analysis with full query graph
-   // Returns: Detailed coverage report with recommendations
-2. analyze_batch_urls(urls: string[], depth?: string)
-   // Batch processing with coverage matrix
-   // Returns: Aggregate analysis + downloadable data
-3. analyze_sitemap(sitemap_url: string, max_pages?: number)
-   // Full site analysis (future enhancement)
-   // Returns: Dashboard artifact + downloadable report
-```
-### Processing Pipeline
-```
-1. FETCH → Scrape content (Supadata/Firecrawl)
-2. DECOMPOSE → Generate query graph (Sonnet 4.5 + LtM principles)
-3. ASSESS → Coverage validation (Sonnet 4.5 + Self-RAG critique)
-4. ANALYZE → Gap prioritization and recommendations
-5. OUTPUT → Formatted report (markdown/JSON/artifact)
-```
-### Token Budget
-**Single URL:** ~19K tokens per analysis
-**Batch (10 URLs):** ~50K tokens
-**Conclusion:** Very feasible within Claude Desktop limits
----
-## Implementation Plan
-### Phase 1: MVP (Single URL) - Week 1
-- [ ] Create `fanout-mcp` repository structure
-- [ ] Implement `analyze_content_gap` tool
-- [ ] Create prompt templates for decomposition + assessment
-- [ ] Test on your own articles (known content)
-- [ ] Iterate until output quality is reliable
-### Phase 2: Batch Processing - Week 2
-- [ ] Implement `analyze_batch_urls` tool
-- [ ] Add coverage matrix aggregation
-- [ ] Create downloadable JSON output
-- [ ] Test with 5-10 related articles
-### Phase 3: Polish & Publish - Week 3
-- [ ] Error handling and edge cases
-- [ ] Documentation and examples
-- [ ] Publish as `@houtini/fanout-mcp`
-- [ ] Write article about the process
-### Phase 4: Integration (Future)
-- [ ] Integrate with Content Machine pipeline
-- [ ] Add sitemap analysis mode
-- [ ] Create artifact dashboards for visualization
----
-## Success Criteria
-### MVP Success
-- ✅ Generates specific, realistic queries (not generic)
-- ✅ Accurately identifies gaps (no false positives)
-- ✅ Provides actionable recommendations (not vague)
-- ✅ Completes in <30 seconds per URL
-### Production Success
-- ✅ Content writers actually use it
-- ✅ Recommendations improve coverage scores
-- ✅ Integration with Content Machine works smoothly
----
-## Files Created
-All research documentation is in `C:\MCP\fanout-mcp\research\`:
-1. **ir-research-findings.md** - Full Gemini deep research report
-2. **design-decisions.md** - Answers to your specific questions
-3. **technical-implementation.md** - Detailed architecture and code patterns
-4. **README.md** - This summary document
----
-## What Makes This Cutting-Edge
-### Research-Backed Innovation
-- **Least-to-Most Prompting** - Proven to outperform Chain-of-Thought
-- **Reverse HyDE** - Novel application of embedding alignment to content
-- **Self-RAG** - Latest technique for self-critique and validation
-- **GEO Context** - Perfect timing as AI search becomes dominant
-### Practical Value
-- Solves real problem (content gap analysis for AI search)
-- Automates tedious manual process
-- Provides data-driven, justified recommendations
-- Integrates with existing content workflows
-### Technical Excellence
-- Proper separation of concerns (MCP handles data, Sonnet handles reasoning)
-- Adversarial validation prevents hallucinated coverage
-- Prioritization based on query importance and tier
-- Scalable from single URL to full site analysis
----
-## Confidence Assessment
-| Aspect | Confidence | Notes |
-|--------|-----------|-------|
-| Research Backing | 100% ✅ | All techniques validated in literature |
-| Technical Feasibility | 95% ✅ | Prompting complexity is main challenge |
-| Market Timing | 95% ✅ | GEO is emerging now |
-| Implementation Effort | 70% ⚠️ | Will require prompt iteration |
----
-## Risks & Mitigations
-### Risk 1: Query Generation Too Generic
-**Mitigation:** Prompt engineering with specific examples, iterative refinement
-### Risk 2: False Positives in Coverage
-**Mitigation:** Self-RAG adversarial validation, require exact evidence quotes
-### Risk 3: Processing Time Too Long
-**Mitigation:** Start with single URL mode, add caching for batch processing
-### Risk 4: Output Too Complex
-**Mitigation:** Multiple output modes (quick/standard/comprehensive)
----
-## Next Action: Start Building
-You now have everything needed to start implementation:
-- ✅ Research validates the approach
-- ✅ Architecture is designed
-- ✅ Tool signatures are defined
-- ✅ Prompt templates are sketched
-- ✅ Success criteria are clear
-**Recommended First Step:** Create the repository structure and implement a minimal version of `analyze_content_gap` that just does query decomposition. Test that first, then add coverage assessment.
----
-## Questions Answered
-### Q1: Is our approach sound?
-**A:** YES - Extremely sound, backed by cutting-edge research from top institutions.
-### Q2: Single URL or batch processing?
-**A:** BOTH - Start with single URL (MVP), add batch processing (practical), then sitemap (enterprise).
-### Q3: What should the output look like?
-**A:** Data-driven coverage scores, justified with evidence, actionable recommendations, downloadable for larger analyses.
----
-## Final Recommendation
-🚀 **BUILD IT**
-This is a cutting-edge tool at the perfect time:
-- Research is solid (100% validated)
-- Market need is emerging (GEO is hot)
-- Technical feasibility is high (95%)
-- Integration path is clear (Content Machine)
-Start with the MVP (single URL analysis) to prove the concept. If that works well, the batch and sitemap modes are straightforward extensions.
-The combination of these four research techniques hasn't been done before in a practical tool. This could become a reference implementation for content gap analysis in the GEO era.
----
-**Status:** ✅ Research Phase Complete
-**Next Phase:** Implementation
-**Timeline:** 2-3 weeks to MVP
-**Confidence:** 🟢 High

package/research/google-fanout-adaptation.md DELETED Viewed

@@ -1,738 +0,0 @@
-# Google's Query Fan-Out: Research Adaptation
-**Date:** December 15, 2024
-**Status:** Design Document - Ready for Implementation
-**Research Source:** Google's query variant generation methodology (Patent US 11663201 B2, related research)
----
-## Executive Summary
-This document outlines how we're adapting Google's query fan-out methodology for our content gap analysis use case. Google's approach uses trained generative models to expand user queries into multiple variants, leveraging contextual signals and iterative refinement. We're implementing a similar system using Claude Sonnet 4.5 via LLM prompting rather than neural network training.
-**Key Decision:** Use LLM prompting with Claude Sonnet 4.5 instead of trained models for flexibility, faster iteration, and avoiding training data requirements.
----
-## Source Research: Google's Methodology
-### Core Approach
-Google's system generates query variants through a "multitask model" that can produce different types of variants on demand. The system:
-1. Takes an original query as input
-2. Incorporates contextual signals (temporal, user profile, intent)
-3. Generates multiple variant types simultaneously
-4. Scores variants based on relevance and answer-finding capability
-5. Uses iterative refinement with feedback loops
-### Technical Architecture
-**Input Features:**
-- Original query text
-- Context vector (temporal, user attributes, dialog history)
-- Type value input (specifies which variant type to generate)
-- Prior variants and search responses (for iterative refinement)
-**Output:**
-- Multiple query variants with response scores
-- Grading based on answer retrieval success
-- Filtering of misleading/irrelevant variants
----
-## Query Variant Types: Google's Taxonomy
-### The 8 Core Types
-Based on Google's research and our analysis, we're implementing these variant types:
-#### 1. **Follow-up Queries**
-**Definition:** Logical next questions a user might ask after the original query.
-**Google Example:**
-- Original: "What are the best protein powders for runners?"
-- Follow-up: "best protein powder for post-run recovery"
-**Our Adaptation:** Generate 3-5 follow-up queries that assume the user has consumed the content and wants to go deeper or explore related aspects.
----
-#### 2. **Generalization Queries**
-**Definition:** Broader versions of the original query that encompass the specific query within a larger context.
-**Our Adaptation:** Create variants that zoom out from the specific keyword to related broader topics.
-**Example:**
-- Original: "direct drive sim racing wheels"
-- Generalization: "sim racing wheels comparison", "force feedback racing wheels"
----
-#### 3. **Specification Queries**
-**Definition:** More detailed or specific versions that drill down into particular aspects.
-**Our Adaptation:** Add qualifiers, brands, use cases, or technical details to make queries more specific.
-**Example:**
-- Original: "sim racing wheels"
-- Specification: "Fanatec DD Pro wheel review", "best sim racing wheel for Formula 1", "direct drive vs belt driven racing wheels"
----
-#### 4. **Entailment/Equivalent Queries**
-**Definition:** Alternative phrasings with the same intent; logically implied questions.
-**Google calls these:** "Canonicalization Queries" - standardized forms
-**Our Adaptation:** Rephrase the keyword in different ways whilst maintaining semantic meaning.
-**Example:**
-- Original: "sim racing cockpit"
-- Equivalent: "racing simulator rig", "sim rig setup", "racing seat and wheel stand"
----
-#### 5. **Comparison Queries**
-**Definition:** Queries seeking to compare options, alternatives, or competing solutions.
-**Our Adaptation:** Generate "vs" queries, "best of" queries, and alternative exploration queries.
-**Example:**
-- Original: "sim racing wheels"
-- Comparison: "Fanatec vs Thrustmaster wheels", "direct drive vs gear driven sim wheels", "best budget sim racing wheel"
----
-#### 6. **Clarification Queries**
-**Definition:** Questions seeking to understand concepts, definitions, or mechanisms.
-**Our Adaptation:** "What is...", "How does...", "Why..." variants that address understanding gaps.
-**Example:**
-- Original: "direct drive sim racing wheels"
-- Clarification: "what is direct drive technology", "how do direct drive wheels work", "why are direct drive wheels better"
----
-#### 7. **Related Aspects Queries**
-**Definition:** Connected topics or latent sub-intents not explicitly stated in the original query.
-**Google Example:** For "Bluetooth headphones with comfortable over-ear design and long-lasting battery", fan-out recognizes facets like design, technology, then generates sub-queries about user reviews, expert reviews, comparisons.
-**Our Adaptation:** Identify implicit facets and generate queries exploring those aspects.
-**Example:**
-- Original: "sim racing wheels"
-- Related Aspects: "sim racing wheel setup guide", "best pedals for sim racing", "wheel compatibility with PC games"
----
-#### 8. **Temporal Queries**
-**Definition:** Time-specific versions incorporating seasonal, current, or time-bound context.
-**Our Adaptation:** When temporal context is provided, generate variants with time qualifiers.
-**Example:**
-- Original: "sim racing wheels"
-- With context {season: "winter", year: "2024"}: "best sim racing wheels 2024", "sim racing black friday deals", "new sim racing wheels released in 2024"
----
-## Context Signals and Attributes
-### What Google Uses
-**Temporal Context:**
-- Time of query
-- Season
-- Proximity to events
-**User-Specific Context:**
-- Profile data
-- Past queries
-- Dialog history
-- User attributes
-**Task/Intent Context:**
-- Dialog intent classification
-- Query intent (research, commercial, comparison)
-- Entity and variable identification
-**Environmental Context:**
-- Ambient noise
-- Device type
-- Input modality (spoken/typed)
-### What We're Implementing
-**Phase 1 (Immediate):**
-```typescript
-interface AnalysisContext {
-  temporal?: {
-    currentDate?: string;
-    season?: string;
-  };
-  intent?: "shopping" | "research" | "navigation" | "entertainment";
-  specificity_preference?: "broad" | "specific" | "balanced";
-}
-```
-**Why Limited Context?**
-1. Privacy - we don't have user profile data
-2. Simplicity - content analysis doesn't need full user modeling
-3. Extensibility - structure allows future expansion
-**Phase 2 (Future Enhancement):**
-- User location for regional queries
-- Content language detection
-- Topic domain classification
----
-## Quality Criteria for Variants
-### Google's Criteria
-1. **Relevance:** Variants must relate to original intent
-2. **Answer-Finding:** Must lead to retrievable answers
-3. **Diversity:** Cover different facets without redundancy
-4. **Factual Accuracy:** No hallucinated or misleading queries
-5. **Response Scores:** Each variant gets scored for quality
-### Our Implementation
-**Validation Methods:**
-1. **Relevance Check (Prompt-Level):**
-   - Instruct Claude to maintain semantic relationship
-   - Request realistic, user-typed queries only
-   - Emphasize connection to source keyword
-2. **Coverage Check (Self-RAG):**
-   - Test each variant against actual content
-   - Assess if content can answer the variant
-   - Filter out variants with no content support
-3. **Diversity Check (Post-Processing):**
-   - Deduplicate near-identical variants
-   - Ensure distribution across variant types
-   - Check for complementary rather than overlapping coverage
-4. **Quality Metrics:**
-```typescript
-interface VariantQuality {
-  totalVariants: number;
-  variantDistribution: Record<FanOutVariantType, number>;
-  avgCoverageScore: number;
-  uniquenessRatio: number;  // % of non-duplicate variants
-  answerabilityRate: number; // % that content can answer
-}
-```
----
-## Iterative Refinement Approach
-### Google's Method
-**Actor-Critic Model:**
-- Actor (generative model) creates variants
-- Critic (evaluation system) scores them
-- Feedback from search responses informs next iteration
-- Up to 20 iterations with reinforcement learning
-**Feedback Loop:**
-- Prior variants inform new generation
-- Search system responses guide refinement
-- User interactions update context vector
-### Our Adaptation
-**Single-Pass Generation (Phase 1):**
-We're starting with single-pass generation for these reasons:
-1. **Speed:** Content analysis needs fast results
-2. **Simplicity:** Easier to implement and debug
-3. **Cost:** Multiple LLM calls expensive at scale
-4. **Use Case:** Our analysis doesn't require search-engine-level precision
-**Quality Control Without Iteration:**
-- Comprehensive prompt with examples
-- Few-shot learning within prompt
-- Post-generation validation
-- Self-RAG coverage assessment
-**Future Enhancement (Phase 2):**
-If quality isn't sufficient, we can add:
-```typescript
-async generateVariantsIterative(
-  keyword: string,
-  content: ContentData,
-  maxIterations: number = 3
-): Promise<FanOutQuery[]> {
-  let variants: FanOutQuery[] = [];
-  let feedback: string = "";
-  for (let i = 0; i < maxIterations; i++) {
-    const newVariants = await this.generateVariants(
-      keyword,
-      content,
-      variantTypes,
-      context,
-      feedback  // Include feedback from previous iteration
-    );
-    variants = this.mergeAndDeduplicate(variants, newVariants);
-    feedback = await this.assessQuality(variants, content);
-    if (this.qualityThresholdMet(variants)) break;
-  }
-  return variants;
-}
-```
----
-## Prompt Engineering Methodology
-### Structure Based on Google's Approach
-**Our Prompt Template:**
-```typescript
-const VARIANT_GENERATION_PROMPT = `
-<thinking>
-You are implementing Google's Query Fan-Out methodology for content gap analysis.
-CONTEXT:
-- Target Keyword: "{keyword}"
-- Content Topic: {contentSummary}
-- Content Type: {article/guide/product page}
-${temporal context if provided}
-${intent context if provided}
-YOUR TASK:
-Generate query variants that real users would actually type when searching for information
-related to "{keyword}". Each variant must maintain semantic relationship to the keyword.
-VARIANT TYPES TO GENERATE:
-1. FOLLOW-UP QUERIES (3-5 variants)
-Definition: Logical next questions after learning about {keyword}
-Quality Criteria:
-- Assumes user has basic knowledge from original query
-- Explores deeper aspects or related topics
-- Natural progression of learning/research
-Examples from your domain:
-- [Domain-specific few-shot examples]
-Your follow-up variants:
-2. SPECIFICATION QUERIES (3-5 variants)
-Definition: More specific/detailed versions with added qualifiers
-Quality Criteria:
-- Add brands, models, use cases, or technical details
-- Must be answerable with specific information
-- Drill down into particular aspects
-Examples:
-- [Domain-specific few-shot examples]
-Your specification variants:
-[Continue for all 8 types...]
-QUALITY REQUIREMENTS:
-✅ Realistic (users would actually type these)
-✅ Semantically related to "{keyword}"
-✅ Answerable by content (when checking coverage)
-✅ Diverse (different angles, not repetitive)
-✅ No hallucinated brands/products
-❌ No marketing jargon
-❌ No overly complex queries
-❌ No irrelevant tangents
-OUTPUT FORMAT:
-Return ONLY valid JSON (no markdown, no explanation):
-{
-  "followUp": ["query1", "query2", "query3"],
-  "specification": ["query1", "query2", "query3"],
-  "generalization": ["query1", "query2"],
-  "equivalent": ["query1", "query2", "query3"],
-  "comparison": ["query1", "query2", "query3"],
-  "clarification": ["query1", "query2"],
-  "relatedAspects": ["query1", "query2", "query3"],
-  "temporal": ["query1", "query2"]  // Only if temporal context provided
-}
-</thinking>
-Generate the variants now:
-`;
-```
-### Key Prompt Engineering Principles
-**1. Few-Shot Examples:**
-- Provide 2-3 examples per variant type
-- Use domain-relevant examples (sim racing, in our case)
-- Show both good and bad examples with explanation
-**2. Explicit Quality Criteria:**
-- List requirements inline for each type
-- Reference Google's methodology where relevant
-- Use checkmarks/crosses for visual clarity
-**3. Output Structure Enforcement:**
-- Request specific JSON format
-- Use `<thinking>` tags to prevent JSON pollution
-- Specify "no markdown, no explanation"
-**4. Context Incorporation:**
-- Inject temporal/intent context naturally
-- Make context optional (fallback to generic if missing)
-- Show how context influences generation
-**5. Semantic Constraints:**
-- Emphasize "realistic user queries"
-- Require semantic relationship to source
-- Prohibit hallucinations and jargon
----
-## Implementation Decisions
-### Why Claude Sonnet 4.5?
-**Advantages:**
-1. **Strong reasoning:** Can understand complex variant type definitions
-2. **Few-shot learning:** Excellent at following examples
-3. **JSON reliability:** With `<thinking>` tags, produces clean JSON
-4. **Context window:** Can handle extensive prompt with examples
-5. **Speed:** Fast enough for real-time analysis
-**Comparison to Google's Approach:**
-- Google: Custom-trained neural networks
-- Us: General-purpose LLM with prompting
-- Trade-off: We sacrifice some precision for flexibility and speed
-### Why Skip Neural Network Training?
-**Reasons:**
-1. **No Training Data:** Don't have Google's query logs
-2. **Flexibility:** LLM can adapt to new domains instantly
-3. **Iteration Speed:** Prompt changes vs model retraining
-4. **Resource Constraints:** Training expensive and time-consuming
-5. **Generalization:** LLM handles diverse topics without domain-specific training
-**Validation:**
-- Google's patent shows LLMs work for this task
-- We can match quality through prompt engineering
-- If needed, future fine-tuning possible with collected data
-### Why These 8 Variant Types?
-**Selection Criteria:**
-1. **Coverage:** These types cover all major search intents
-2. **Google's Validation:** All types mentioned in their research
-3. **Content Analysis Fit:** Relevant to gap identification
-4. **Implementability:** Can be prompted effectively
-**Types We Considered But Excluded:**
-- **Language Translation:** Not relevant for English content analysis
-- **Latent Topics (as separate type):** Covered by "Related Aspects"
-- **Canonicalization (as separate):** Merged into "Equivalent"
-### Why Single-Pass Generation?
-**Decision:** Start with single-pass, add iteration if needed
-**Reasoning:**
-1. **Speed Matters:** Content analysis should be fast
-2. **Cost Control:** Each LLM call has API cost
-3. **YAGNI Principle:** Don't build iteration until we prove we need it
-4. **Quality First Pass:** Good prompting can produce quality without iteration
-**Success Criteria for Single-Pass:**
-- 80%+ variants are relevant
-- 60%+ variants are answerable by content
-- <10% duplicate/near-duplicate variants
-**Trigger for Adding Iteration:**
-- Relevance drops below 70%
-- Duplication exceeds 20%
-- User feedback indicates quality issues
----
-## Integration Strategy
-### Hybrid Mode Philosophy
-**Three Analysis Modes:**
-1. **Content-Only (Original):**
-   - Infer queries from content structure
-   - No external keyword input
-   - Pure content-based decomposition
-2. **Hybrid (New Primary):**
-   - Content inference + keyword fan-out
-   - Merge both query sets intelligently
-   - Show clear attribution in report
-3. **Keyword-Only (New Optional):**
-   - Skip content inference entirely
-   - Focus purely on keyword variants
-   - Faster for targeted analysis
-### Merging Strategy
-**Challenge:** Avoid duplicates between content-inferred and keyword variants
-**Solution:**
-```typescript
-mergeQueryGraphs(
-  contentQueries: QueryGraph,
-  fanOutQueries: FanOutQuery[]
-): EnhancedQueryGraph {
-  // 1. Normalize all queries (lowercase, remove punctuation)
-  // 2. Calculate semantic similarity (cosine similarity on embeddings)
-  // 3. Deduplicate if similarity > 0.85
-  // 4. Distribute fan-out variants into tiers based on specificity
-  // 5. Mark source clearly for reporting
-}
-```
-**Distribution Logic:**
-- **Tier 1 (Prerequisite):** Clarification, Generalization variants
-- **Tier 2 (Core):** Equivalent, Specification variants
-- **Tier 3 (Follow-up):** Follow-up, Comparison, Related Aspects variants
-- **Temporal:** Distributed based on specificity
-### Report Presentation
-**Two-Section Layout:**
-**Section 1: Content-Inferred Queries**
-- Standard 3-tier layout
-- Queries generated from content analysis
-- Shows how well content addresses natural progression
-**Section 2: Keyword Fan-Out Analysis**
-- Grouped by variant type
-- Color-coded differently (teal/cyan vs blue/purple/orange)
-- Includes variant type descriptions
-- Shows coverage by type
-**Synthesis Section:**
-- Combined coverage score
-- Recommendations prioritized by source
-- Gap analysis considering both methods
----
-## What We're NOT Implementing (And Why)
-### 1. User Profiling System
-**Google Has:** Rich user profiles (location, demographics, history)
-**We Don't Need:**
-- **Privacy Concerns:** Don't want to track user data
-- **Use Case Difference:** Analyzing content, not personalizing search
-- **Complexity:** Adds significant overhead for minimal benefit
-**What We Do Instead:** Optional basic context (intent, temporal)
----
-### 2. Reinforcement Learning / Neural Network Training
-**Google Has:** Trained models with RL optimization
-**We Don't Need:**
-- **No Training Data:** Don't have query logs at scale
-- **Flexibility:** LLM adapts instantly without retraining
-- **Maintenance:** No model versioning/deployment overhead
-**What We Do Instead:** Prompt engineering with Claude Sonnet 4.5
----
-### 3. Multi-Iteration Refinement (Initially)
-**Google Has:** Up to 20 iterations with actor-critic architecture
-**We Don't Need (Yet):**
-- **Speed:** Single-pass is faster
-- **Cost:** Multiple LLM calls expensive
-- **YAGNI:** Build it if quality isn't good enough
-**What We Do Instead:** High-quality single-pass with thorough prompting
----
-### 4. Search System Integration
-**Google Has:** Feedback loop with actual search results
-**We Don't Need:**
-- **Different Use Case:** Analyzing content, not searching web
-- **Complexity:** Would require search API integration
-- **Self-RAG Sufficient:** Coverage assessment handles validation
-**What We Do Instead:** Self-RAG against content for validation
----
-### 5. Real-Time Query Log Analysis
-**Google Has:** Massive query logs for pattern learning
-**We Don't Need:**
-- **Scale:** Not building search engine
-- **Privacy:** Don't collect user queries
-- **LLM Handles It:** General model understands query patterns
-**What We Do Instead:** Rely on LLM's training on web-scale text
----
-## Future Enhancement Opportunities
-### Phase 2 Additions (If Needed)
-**1. Iterative Refinement:**
-```typescript
-// Add if single-pass quality insufficient
-async generateVariantsIterative(...) {
-  // Actor-critic loop with feedback
-}
-```
-**2. Semantic Deduplication:**
-```typescript
-// Use embeddings for better duplicate detection
-async deduplicateWithEmbeddings(variants: string[]) {
-  // Calculate cosine similarity
-  // Cluster near-duplicates
-}
-```
-**3. Context Prediction:**
-```typescript
-// Auto-infer intent from content
-async inferContext(content: ContentData): Promise<AnalysisContext> {
-  // Classify content type
-  // Detect temporal signals
-  // Predict user intent
-}
-```
-**4. Cross-Variant Verification:**
-```typescript
-// Check variants against each other for coherence
-async verifyVariantCoherence(variants: FanOutQuery[]) {
-  // Ensure no contradictions
-  // Validate logical relationships
-}
-```
-### Research Opportunities
-**1. LLM vs Trained Model Comparison:**
-- Collect quality metrics from our system
-- Compare to benchmarks from Google's research
-- Publish findings if novel insights emerge
-**2. Optimal Variant Count Study:**
-- Test 3 vs 5 vs 10 variants per type
-- Measure coverage improvement per additional variant
-- Find diminishing returns point
-**3. Context Signal Effectiveness:**
-- A/B test with/without context
-- Measure quality improvement per signal type
-- Identify most valuable signals for content analysis
----
-## Success Criteria
-### MVP Success (Phase 1)
-**Functional Requirements:**
-✅ Generates variants for all 8 types
-✅ Variants are realistic and user-typed
-✅ Coverage assessment works for both content and keyword queries
-✅ Reports clearly distinguish query sources
-✅ No performance regression from current system
-**Quality Metrics:**
-- **Relevance:** 80%+ variants semantically related to keyword
-- **Answerability:** 60%+ variants answerable by content
-- **Diversity:** <10% duplicate/near-duplicate variants
-- **Speed:** Complete analysis in <60 seconds for standard depth
-### Long-Term Success (Phase 2+)
-**Enhancement Triggers:**
-- User feedback requests iteration
-- Quality metrics below targets consistently
-- New use cases require advanced features
----
-## Implementation Checklist
-**Design Phase (This Document):**
-- [x] Read and analyze Google's research
-- [x] Document variant type taxonomy
-- [x] Define context signal structure
-- [x] Design prompt engineering approach
-- [x] Identify what we're NOT implementing
-- [x] Establish success criteria
-**Implementation Phase (Next Steps):**
-- [ ] Extend TypeScript types (src/types.ts)
-- [ ] Create KeywordFanOut service (src/services/keyword-fanout.ts)
-- [ ] Update analyze-content-gap tool (src/tools/analyze-content-gap.ts)
-- [ ] Enhance report formatter (src/services/report-formatter.ts)
-- [ ] Update artifact instructions
-- [ ] Write comprehensive tests
-- [ ] Update README with examples
-- [ ] Validate against quality criteria
-**Documentation Phase:**
-- [ ] Update TESTING-HANDOVER.md with new scenarios
-- [ ] Add "Based on Research" section to README
-- [ ] Create example outputs for all three modes
-- [ ] Document prompt engineering decisions
----
-## Conclusion
-We're adapting Google's sophisticated query fan-out methodology for a focused content analysis use case. By using LLM prompting instead of neural network training, we achieve flexibility and fast iteration whilst maintaining the core principles of Google's approach:
-1. **Multiple variant types** for comprehensive coverage
-2. **Context signals** to improve relevance
-3. **Quality criteria** to filter poor variants
-4. **Systematic methodology** grounded in research
-The design prioritizes pragmatism—implementing what's necessary for our use case whilst leaving room for future enhancement based on real-world usage.
-**Next Step:** Begin Phase 1 implementation, starting with type system extensions.
----
-**Document Version:** 1.0
-**Last Updated:** December 15, 2024
-**Author:** Richard Baxter (with Claude)
-**Review Status:** ✅ Ready for Implementation

package/research/keyword-fanout-explained.md DELETED Viewed

@@ -1,274 +0,0 @@
-# Understanding Keyword Fan-Out: From Google Research to Production
-**I've been building content optimization tools for about three months now, and the gap between traditional keyword research and what AI search engines actually need is... significant.** Not because keywords don't matter anymore – they absolutely do – but because AI search engines like ChatGPT and Perplexity don't just match keywords, they understand query intent and retrieve content based on semantic variants you've probably never considered.
-This is where Google's query fan-out methodology becomes genuinely useful (to their detriment, I might add – this research should be more widely known).
----
-## The Problem with Traditional Keyword Research
-Here's what I noticed whilst building the Fan-Out MCP: traditional keyword tools give you exact-match variants and maybe some synonyms. "Sim racing wheel" becomes "racing wheel" or "steering wheel for racing games". Useful, but incomplete.
-AI search engines work differently. When someone asks ChatGPT "what's the best wheel for Gran Turismo 7", the AI isn't just matching those exact words. It's understanding that this query relates to:
-- PS5 compatibility questions
-- Direct drive vs belt-driven comparisons
-- Budget ranges for different wheel types
-- Force feedback technology explanations
-- Setup complexity for beginners
-That's eight different content angles from one query, and you'd miss most of them with traditional keyword research.
----
-## Google's Approach: Query Fan-Out
-Google's research (documented in [arXiv:2210.12084](https://arxiv.org/pdf/2210.12084) and Patent US 11663201 B2) uses neural networks to expand queries into multiple variant types. The system generates what they call "multitask" variants – different types of query transformations happening simultaneously.
-### The Eight Variant Types
-I've implemented all eight types in the Fan-Out MCP, and here's what each one actually does:
-**1. Equivalent Variants**
-Alternative phrasings with identical intent. "PS5 racing wheel" → "PlayStation 5 steering wheel", "Sony PS5 racing controller".
-Test results: 100% coverage on these. Content that mentions one phrasing almost always covers the equivalents naturally.
-**2. Specification Variants**
-More detailed versions with qualifiers. "racing wheel" → "Fanatec GT DD Pro review", "budget racing wheel under £300".
-Test results: 80-90% coverage. These expose real gaps – my sim racing guide covered general wheels but missed specific budget breakdowns.
-**3. Follow-Up Variants**
-Logical next questions. "racing wheel" → "how to calibrate racing wheel", "best games for racing wheel".
-Test results: 60-70% coverage. Major gap area. Content answers the primary question but misses the obvious follow-ups users will search for next.
-**4. Comparison Variants**
-"Vs" queries and alternatives. "racing wheel" → "Thrustmaster vs Logitech wheels", "direct drive vs belt driven".
-Test results: 85% coverage. Buyer's guides naturally include comparisons, so content performs well here.
-**5. Clarification Variants**
-Understanding questions. "direct drive wheel" → "what is direct drive technology", "how does force feedback work".
-Test results: 50-60% coverage. Content assumes knowledge. Big opportunity for SEO + AI search optimization.
-**6. Generalization Variants**
-Broader encompassing queries. "Fanatec DD Pro" → "direct drive wheels", "force feedback racing wheels".
-Test results: 70% coverage. Content written for specific products usually mentions the broader category.
-**7. Related Aspects Variants**
-Connected implicit needs. "racing wheel" → "racing wheel desk mount", "wheel compatibility with PC".
-Test results: 40-60% coverage. These are the "I didn't even think to include this" gaps.
-**8. Temporal Variants**
-Time-specific versions. "racing wheels" → "best racing wheels 2024", "new racing wheels December 2024".
-Test results: Variable. Depends entirely on whether content includes dates and temporal qualifiers.
----
-## Our Adaptation: Why Prompts Instead of Neural Networks
-Google's approach uses trained generative models. Ours uses Claude Sonnet 4.5 with carefully structured prompts. Here's why:
-**Flexibility:** I can adjust variant types, add new ones, or tune generation behaviour in minutes. Training a neural network takes days and requires GPU infrastructure.
-**Quality:** Claude Sonnet 4.5 generates remarkably realistic queries. Average realism score from testing: 0.75/1.0. That's better than I expected, honestly.
-**Cost:** Running inference on a 70B parameter model is expensive. Claude's API is predictable and scalable.
-**Iteration speed:** When I discovered that default should be 5 types instead of 8 (generalization, relatedAspects, and temporal are less actionable), I changed one line of code. With a trained model, that's a complete retraining cycle.
-The tradeoff? We can't do iterative refinement like Google's system (where the model uses search results to improve variants). But for content gap analysis, we don't need that – we're assessing content, not retrieving it.
----
-## What Testing Revealed
-I ran seven comprehensive tests on a 6,491-word sim racing buyer's guide. Here's what actually happened:
-### Test 1: Content-Only Baseline
-14 queries generated purely from content structure. Coverage: 79/100. Processing time: 90 seconds.
-This validated the query decomposition approach. The three-tier system (prerequisite/core/follow-up) produces natural, realistic queries.
-### Test 2: Hybrid Mode (The Critical Test)
-14 content queries + 21 fan-out variants = 35 total. Coverage: 80/100. Processing time: 174 seconds.
-**Key finding:** Fan-out variants exposed gaps content analysis missed. The article covered "best PS5 racing wheel" but missed "wireless PS5 racing wheel options" and "how to calibrate PS5 racing wheel". Both are obvious user needs that content-only analysis didn't surface.
-This is the value proposition, basically. Content analysis tells you what queries your content answers. Fan-out tells you what query variants users will actually search for.
-### Test 3: Keyword-Only Mode
-19 variants, no content queries. Coverage: 76/100. Processing time: 86 seconds (50% faster).
-Validated that keyword variants alone provide actionable insights. Users who know their target keyword can skip content inference entirely.
-### Test 7: Single-Word Keyword Edge Case
-Keyword: "PS5". Generated 20 variants, ALL contextually relevant to sim racing.
-**Critical finding:** Single-word keywords work brilliantly when content context is strong. No generic drift ("what is PS5") because the content understanding guides variant generation. I wasn't expecting this level of context awareness, genuinely.
----
-## The Default Five vs All Eight Types
-Look, this is an important distinction that took testing to clarify.
-**Default (5 types):** equivalent, specification, followUp, comparison, clarification
-- 15-25 variants per keyword
-- Most actionable for content optimization
-- Covers immediate user needs
-**Opt-in (3 additional types):** generalization, relatedAspects, temporal
-- Adds 9-15 more variants
-- Broader coverage but less immediately actionable
-- Useful for comprehensive content audits
-The research shows all eight types, but in practice, the default five hit the sweet spot between coverage and actionability. The other three are available via the `fan_out_types` parameter when you need them.
----
-## Quality Metrics That Actually Matter
-From testing across multiple scenarios:
-**Realism: 0.75/1.0**
-Variants sound like natural user queries. No AI slop like "leverage your sim racing setup" or "unlock direct drive potential".
-**Specificity: 0.44/1.0**
-Appropriate detail level. Not too generic ("racing" alone), not overly specific ("Fanatec Clubsport V2.5 with BMW GT2 wheel and CSL Elite pedals load cell brake mod version 1.2").
-**Generic Query Count: 0**
-Zero "what is PS5" style variants despite using "PS5" as a single-word keyword. Context guides generation effectively.
-**Domain Term Usage: 0.55**
-Good use of technical vocabulary. Variants include terms like "direct drive", "force feedback", "load cell" appropriately.
-**Coverage Accuracy: 85%**
-Low hallucination rate. When the system says content covers a query, it actually does.
----
-## Performance Characteristics
-Assessment time dominates everything. Query generation is fast (~20 seconds for 15-35 queries), but assessing each query takes 4-5 seconds. This scales linearly.
-**Processing times:**
-- Content-Only: ~90s (14 queries)
-- Hybrid: ~174s (35 queries)
-- Keyword-Only: ~86s (19 queries)
-The math checks out: assessment time per query is consistent regardless of mode. Keyword complexity has zero impact (single-word vs multi-word identical).
----
-## What This Means for Content Optimization
-Traditional SEO workflow:
-1. Research keywords
-2. Write content targeting those keywords
-3. Hope you rank
-AI search optimization workflow:
-1. Research primary keywords
-2. **Generate query variants** (fan-out)
-3. Write content addressing primary + variants
-4. **Validate coverage** (Self-RAG assessment)
-5. Fill identified gaps
-The fan-out step is what's missing from most content strategies. You're optimizing for the exact query but missing the 15-20 variants AI search engines will use to evaluate whether your content is authoritative.
-This is why some perfectly good articles get cited by ChatGPT whilst others don't – it's not keyword density, it's coverage of semantic variants.
----
-## The Technical Implementation
-The system uses Claude Sonnet 4.5 with structured prompts in `<thinking>` tags to prevent JSON parsing errors. Each variant type gets specific instructions:
-```typescript
-"Generate 3-5 EQUIVALENT variants that have identical search intent
-but use different phrasings. Must be natural user queries, not
-marketing speak. Examples: 'sim racing wheel' → 'racing simulator wheel',
-'steering wheel for racing games'"
-```
-Context signals (temporal, intent, specificity_preference) influence generation via prompt injection. Shopping intent produces "where to buy" and "best budget" variants. Research intent produces "how to" and comparison variants.
-The quality validation step filters unrealistic queries (over 15 words, marketing jargon, overly complex) before returning results.
----
-## Limitations Worth Knowing
-**Assessment time scales linearly.** 50+ queries will take 4-5 minutes. This is fine for single-page analysis but would be slow for site-wide audits. Batch processing helps but doesn't eliminate the constraint.
-**Content length matters.** The system works best with 2,000-10,000 word articles. Very short content generates few queries. Very long content may exceed context windows.
-**English only.** Non-English content will be analyzed but query generation quality suffers. This is a Claude limitation, not a fan-out limitation.
-**Context understanding required.** Works brilliantly with focused topical content (like a sim racing guide). Less effective with highly diverse content (like general news sites).
----
-## Where This Fits in the Toolchain
-I built this MCP server to fill a specific gap in my content optimization workflow. Traditional tools tell you which keywords to target. This tool tells you whether your content actually addresses those keywords *and their variants*.
-It sits between keyword research and content creation:
-```
-Keyword Research (Ahrefs, SEMrush)
-         ↓
-Fan-Out Analysis (this MCP)
-         ↓
-Content Creation (with variant coverage)
-         ↓
-Coverage Validation (Self-RAG assessment)
-```
-The research explainer might make it sound complicated, but in practice it's straightforward: give it a URL and a keyword, get back a list of query variants your content should address.
----
-## Future Directions
-The patent mentions iterative refinement – using search results to improve variants. That's interesting for search engines but probably overkill for content analysis. What would be useful:
-**Batch URL analysis:** Process 10-20 URLs simultaneously, identify which pages cover which variants, surface coverage gaps across the entire site.
-**Competitive analysis:** Compare variant coverage between your content and competitors. Show specifically which variants they're covering that you're missing.
-**Historical tracking:** Monitor variant coverage over time. See if content updates actually improved coverage or just added words.
-These are all feasible extensions of the current architecture (fair enough – I'm already thinking about implementation).
----
-## The Bottom Line
-Google's query fan-out research provides a legitimate framework for understanding how search engines expand user queries. Our adaptation using Claude Sonnet 4.5 proves the approach works without neural network training overhead.
-Testing validates the core hypothesis: content that addresses query variants performs better in AI search results. The system generates realistic variants (0.75 realism score), maintains appropriate specificity (0.44), and achieves high coverage accuracy (85%).
-The default five variant types provide the most actionable insights for content optimization. The additional three types are available when you need comprehensive coverage.
-If you're optimizing content for AI search engines, understanding query variants isn't optional anymore. It's the difference between getting cited by ChatGPT and getting ignored.
----
-**Research Sources:**
-- [Google Query Fan-Out Research (arXiv:2210.12084)](https://arxiv.org/pdf/2210.12084)
-- Google Patent US 11663201 B2: Query variant generation
-- Our implementation: `google-fanout-adaptation.md`
-- Test results: `TESTING-REPORT.md`
-**Version:** 1.0
-**Date:** December 15, 2024
-**Author:** Richard Baxter
-**Testing Corpus:** 6,491-word sim racing buyer's guide across 7 test scenarios