npm - seo-intel - Versions diffs - 1.5.2 → 1.5.23 - Mend

seo-intel 1.5.2 → 1.5.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

package/CHANGELOG.md +26 -0
package/Start SEO Intel.command +10 -0
package/analyses/aeo/scorer.js +60 -6
package/analyses/blog-draft/index.js +62 -10
package/analyses/templates/index.js +1 -1
package/analysis/prompt-builder.js +167 -2
package/analysis/technical-audit.js +177 -0
package/cli.js +446 -25
package/crawler/index.js +36 -2
package/crawler/sitemap.js +44 -0
package/db/db.js +62 -9
package/db/schema.sql +19 -0
package/exports/queries.js +32 -0
package/exports/technical.js +181 -1
package/extractor/qwen.js +135 -13
package/lib/scan-export.js +204 -0
package/package.json +1 -1
package/reports/generate-html.js +517 -50
package/server.js +319 -25
package/setup/checks.js +65 -5
package/setup/engine.js +1 -0
package/setup/web-routes.js +22 -3
package/setup/wizard.html +8 -6

package/crawler/index.js CHANGED Viewed

@@ -263,6 +263,10 @@ export async function* crawlDomain(startUrl, opts = {}) {
   // ── Sitemap-first: seed queue from sitemap.xml (section-aware) ──
   try {
     const sitemapUrls = await fetchSitemap(startUrl);
+    // Report full sitemap inventory to caller (for DB persistence / audit diff)
+    if (sitemapUrls.length > 0 && typeof opts.onSitemapDiscovered === 'function') {
+      try { await opts.onSitemapDiscovered(sitemapUrls); } catch { /* ignore */ }
+    }
     if (sitemapUrls.length > 0) {
       // Apply section budgets if tiered crawling is enabled
       const budgeted = tiered ? applySectionBudgets(sitemapUrls, maxPages) : sitemapUrls;
@@ -452,9 +456,36 @@ async function processPage(page, url, base, depth, queue, maxDepth) {
   status = res?.status() || 0;
   const loadMs = Date.now() - t0;
+  // ── Final URL after redirects ──
+  let finalUrl = null;
+  try { finalUrl = page.url() || null; } catch { /* ignore */ }
+  // ── Redirect chain (walk request.redirectedFrom() backwards) ──
+  const redirectChain = [];
+  try {
+    let req = res?.request();
+    const chain = [];
+    while (req) {
+      const prev = req.redirectedFrom?.();
+      if (!prev) break;
+      const prevRes = await prev.response().catch(() => null);
+      chain.push({ url: prev.url(), status: prevRes?.status() ?? null });
+      req = prev;
+    }
+    // chain is in reverse order (closest redirect first); reverse for chronological
+    redirectChain.push(...chain.reverse());
+  } catch { /* ignore */ }
+  // ── X-Robots-Tag header ──
+  let xRobotsTag = null;
+  try {
+    const headers = res?.headers?.() || {};
+    xRobotsTag = headers['x-robots-tag'] || null;
+  } catch { /* ignore */ }
   // ── Return status for backoff logic (don't silently drop 4xx) ──
   if (status === 429 || status === 503 || status === 403) {
-    return { url, depth, status, loadMs, wordCount: 0, isIndexable: false, title: '', metaDesc: '', headings: [], links: [], bodyText: '', schemaTypes: [], vitals: {}, publishedDate: null, modifiedDate: null, contentHash: null };
+    return { url, depth, status, loadMs, wordCount: 0, isIndexable: false, title: '', metaDesc: '', headings: [], links: [], bodyText: '', schemaTypes: [], vitals: {}, publishedDate: null, modifiedDate: null, contentHash: null, finalUrl, redirectChain, xRobotsTag };
   }
   if (status >= 400) return null;
@@ -507,7 +538,9 @@ async function processPage(page, url, base, depth, queue, maxDepth) {
   const wordCount = await page.$eval('body', el => el.innerText.split(/\s+/).filter(Boolean).length).catch(() => 0);
   const robotsMeta = await page.$eval('meta[name="robots"]', el => el.content).catch(() => '');
-  const isIndexable = !robotsMeta.toLowerCase().includes('noindex');
+  const metaNoindex = robotsMeta.toLowerCase().includes('noindex');
+  const headerNoindex = (xRobotsTag || '').toLowerCase().includes('noindex');
+  const isIndexable = !(metaNoindex || headerNoindex);
   const hasCanonical = await page.$('link[rel="canonical"]').then(el => !!el).catch(() => false);
   const hasOgTags = await page.$('meta[property^="og:"]').then(el => !!el).catch(() => false);
@@ -576,6 +609,7 @@ async function processPage(page, url, base, depth, queue, maxDepth) {
     hasCanonical, hasOgTags,
     hasRobots: !!robotsMeta,
     hasSchema: schemaTypes.length > 0,
+    finalUrl, redirectChain, xRobotsTag,
   };
 }

package/crawler/sitemap.js CHANGED Viewed

@@ -101,3 +101,47 @@ function extractTagContent(xml, tagName) {
   }
   return results;
 }
+/**
+ * HEAD-check a single URL without following redirects.
+ * Returns { status, location } — location is the Location header when 3XX.
+ * Never throws — errors return { status: 0, error: msg }.
+ */
+export async function headCheck(url, { timeoutMs = 8000 } = {}) {
+  try {
+    const ctrl = new AbortController();
+    const t = setTimeout(() => ctrl.abort(), timeoutMs);
+    const res = await fetch(url, {
+      method: 'HEAD',
+      redirect: 'manual',
+      signal: ctrl.signal,
+      headers: { 'User-Agent': 'SEOIntelBot/1.0' },
+    }).finally(() => clearTimeout(t));
+    return {
+      status: res.status,
+      location: res.headers.get('location') || null,
+    };
+  } catch (err) {
+    return { status: 0, error: err.message };
+  }
+}
+/**
+ * Run HEAD checks against an array of sitemap URL rows in parallel (capped).
+ * Accepts [{ id, url }]. Invokes onResult(row, result) per check.
+ */
+export async function headCheckAll(rows, { concurrency = 6, onResult } = {}) {
+  const queue = [...rows];
+  const worker = async () => {
+    while (queue.length) {
+      const row = queue.shift();
+      if (!row) break;
+      const result = await headCheck(row.url);
+      if (onResult) {
+        try { await onResult(row, result); } catch { /* swallow */ }
+      }
+    }
+  };
+  const workers = Array.from({ length: Math.min(concurrency, rows.length) }, () => worker());
+  await Promise.all(workers);
+}

package/db/db.js CHANGED Viewed

@@ -24,7 +24,11 @@ export function getDb(dbPath = './seo-intel.db') {
   try { _db.exec('ALTER TABLE pages ADD COLUMN title TEXT'); } catch { /* already exists */ }
   try { _db.exec('ALTER TABLE pages ADD COLUMN meta_desc TEXT'); } catch { /* already exists */ }
   try { _db.exec('ALTER TABLE pages ADD COLUMN body_text TEXT'); } catch { /* already exists */ }
+  try { _db.exec('ALTER TABLE pages ADD COLUMN final_url TEXT'); } catch { /* already exists */ }
+  try { _db.exec('ALTER TABLE pages ADD COLUMN redirect_chain TEXT'); } catch { /* already exists */ }
+  try { _db.exec('ALTER TABLE pages ADD COLUMN x_robots_tag TEXT'); } catch { /* already exists */ }
   try { _db.exec('ALTER TABLE analyses ADD COLUMN technical_gaps TEXT'); } catch { /* already exists */ }
+  try { _db.exec('ALTER TABLE extractions ADD COLUMN intent_scores TEXT'); } catch { /* already exists */ }
   // Backfill first_seen_at from crawled_at for existing rows
   _db.exec('UPDATE pages SET first_seen_at = crawled_at WHERE first_seen_at IS NULL');
@@ -279,12 +283,13 @@ function normalizePageUrl(rawUrl) {
   } catch { return rawUrl; }
 }
-export function upsertPage(db, { domainId, url, statusCode, wordCount, loadMs, isIndexable, clickDepth = 0, publishedDate = null, modifiedDate = null, contentHash = null, title = null, metaDesc = null, bodyText = null }) {
+export function upsertPage(db, { domainId, url, statusCode, wordCount, loadMs, isIndexable, clickDepth = 0, publishedDate = null, modifiedDate = null, contentHash = null, title = null, metaDesc = null, bodyText = null, finalUrl = null, redirectChain = null, xRobotsTag = null }) {
   url = normalizePageUrl(url);
   const now = Date.now();
+  const redirectChainJson = redirectChain ? JSON.stringify(redirectChain) : null;
   db.prepare(`
-    INSERT INTO pages (domain_id, url, crawled_at, first_seen_at, status_code, word_count, load_ms, is_indexable, click_depth, published_date, modified_date, content_hash, title, meta_desc, body_text)
-    VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
+    INSERT INTO pages (domain_id, url, crawled_at, first_seen_at, status_code, word_count, load_ms, is_indexable, click_depth, published_date, modified_date, content_hash, title, meta_desc, body_text, final_url, redirect_chain, x_robots_tag)
+    VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
     ON CONFLICT(url) DO UPDATE SET
       crawled_at     = excluded.crawled_at,
       status_code    = excluded.status_code,
@@ -296,8 +301,11 @@ export function upsertPage(db, { domainId, url, statusCode, wordCount, loadMs, i
       content_hash   = excluded.content_hash,
       title          = excluded.title,
       meta_desc      = excluded.meta_desc,
-      body_text      = excluded.body_text
-  `).run(domainId, url, now, now, statusCode, wordCount, loadMs, isIndexable ? 1 : 0, clickDepth, publishedDate, modifiedDate, contentHash, title || null, metaDesc || null, bodyText || null);
+      body_text      = excluded.body_text,
+      final_url      = excluded.final_url,
+      redirect_chain = excluded.redirect_chain,
+      x_robots_tag   = excluded.x_robots_tag
+  `).run(domainId, url, now, now, statusCode, wordCount, loadMs, isIndexable ? 1 : 0, clickDepth, publishedDate, modifiedDate, contentHash, title || null, metaDesc || null, bodyText || null, finalUrl || null, redirectChainJson, xRobotsTag || null);
   // first_seen_at is NOT in the ON CONFLICT UPDATE — it stays from original INSERT
   return db.prepare('SELECT id FROM pages WHERE url = ?').get(url);
 }
@@ -327,14 +335,15 @@ export function insertExtraction(db, { pageId, data }) {
   return db.prepare(`
     INSERT OR REPLACE INTO extractions
       (page_id, title, meta_desc, h1, product_type, pricing_tier, cta_primary,
-       tech_stack, schema_types, search_intent, primary_entities, extracted_at)
-    VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
+       tech_stack, schema_types, search_intent, intent_scores, primary_entities, extracted_at)
+    VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
   `).run(
     pageId, data.title, data.meta_desc, data.h1,
     data.product_type, data.pricing_tier, data.cta_primary,
     JSON.stringify(data.tech_stack || []),
     JSON.stringify(data.schema_types || []),
     data.search_intent || 'Informational',
+    JSON.stringify(data.intent_scores || {}),
     JSON.stringify(data.primary_entities || []),
     Date.now()
   );
@@ -421,10 +430,14 @@ export function getSchemasByProject(db, project) {
 }
 export function getCompetitorSummary(db, project) {
+  // target + owned rows are merged into a single 'target' row.
+  // This handles the common case where the target domain (e.g. dgents.ai) redirects
+  // to www.dgents.ai, which gets crawled as an owned subdomain — the parallel crawl
+  // race means pages end up under 'owned', leaving the target with 0 pages.
   return db.prepare(`
     SELECT
       d.domain,
-      d.role,
+      CASE WHEN d.role IN ('target', 'owned') THEN 'target' ELSE d.role END AS role,
       COUNT(DISTINCT p.id) as page_count,
       AVG(p.word_count) as avg_word_count,
       GROUP_CONCAT(DISTINCT e.product_type) as product_types,
@@ -434,7 +447,9 @@ export function getCompetitorSummary(db, project) {
     JOIN pages p ON p.domain_id = d.id
     LEFT JOIN extractions e ON e.page_id = p.id
     WHERE d.project = ?
-    GROUP BY d.domain, d.role
+    GROUP BY
+      CASE WHEN d.role IN ('target', 'owned') THEN 'target-group' ELSE d.domain END,
+      CASE WHEN d.role IN ('target', 'owned') THEN 'target' ELSE d.role END
   `).all(project);
 }
@@ -538,6 +553,41 @@ export function getTemplateSamples(db, groupId) {
   ).all(groupId);
 }
+// ── Sitemap URL inventory ─────────────────────────────────────────────────
+export function upsertSitemapUrls(db, domainId, urls, sitemapSource = null) {
+  if (!urls || !urls.length) return 0;
+  const now = Date.now();
+  const stmt = db.prepare(`
+    INSERT INTO sitemap_urls (domain_id, url, sitemap_source, discovered_at)
+    VALUES (?, ?, ?, ?)
+    ON CONFLICT(domain_id, url) DO UPDATE SET
+      sitemap_source = COALESCE(excluded.sitemap_source, sitemap_urls.sitemap_source),
+      discovered_at = excluded.discovered_at
+  `);
+  db.exec('BEGIN');
+  try {
+    for (const u of urls) {
+      const normalized = normalizePageUrl(u);
+      stmt.run(domainId, normalized, sitemapSource, now);
+    }
+    db.exec('COMMIT');
+  } catch (e) { db.exec('ROLLBACK'); throw e; }
+  return urls.length;
+}
+export function getSitemapUrlsForDomain(db, domainId) {
+  return db.prepare(
+    'SELECT * FROM sitemap_urls WHERE domain_id = ?'
+  ).all(domainId);
+}
+export function updateSitemapHeadResult(db, id, { status, location }) {
+  db.prepare(
+    'UPDATE sitemap_urls SET head_status = ?, head_location = ?, head_checked_at = ? WHERE id = ?'
+  ).run(status ?? null, location ?? null, Date.now(), id);
+}
 // ── Domain sync / prune ───────────────────────────────────────────────────
 /**
@@ -576,6 +626,9 @@ export function pruneStaleDomains(db, project, configDomains) {
         db.prepare(`DELETE FROM pages WHERE domain_id = ?`).run(id);
       }
+      // Sitemap URLs for this domain
+      try { db.prepare('DELETE FROM sitemap_urls WHERE domain_id = ?').run(id); } catch { /* table may not exist */ }
       // Template groups for this domain
       db.prepare(
         'DELETE FROM template_samples WHERE group_id IN (SELECT id FROM template_groups WHERE project = ? AND domain = ?)'

package/db/schema.sql CHANGED Viewed

@@ -26,6 +26,9 @@ CREATE TABLE IF NOT EXISTS pages (
   title          TEXT,               -- page <title>
   meta_desc      TEXT,               -- meta description
   body_text      TEXT,               -- cleaned body text for extraction (stored at crawl time)
+  final_url      TEXT,               -- URL after redirects (page.url() post-nav)
+  redirect_chain TEXT,               -- JSON array of [{url, status}] hops, empty array if none
+  x_robots_tag   TEXT,               -- X-Robots-Tag response header value (raw)
   FOREIGN KEY (domain_id) REFERENCES domains(id)
 );
@@ -41,6 +44,7 @@ CREATE TABLE IF NOT EXISTS extractions (
   tech_stack       TEXT,             -- JSON array
   schema_types     TEXT,             -- JSON array (Article, Product, FAQ, etc.)
   search_intent    TEXT,             -- 'Informational' | 'Navigational' | 'Commercial' | 'Transactional'
+  intent_scores    TEXT,             -- JSON object: {"commercial":70,"informational":20,"comparison":10}
   primary_entities TEXT,             -- JSON array of 3-7 core concept strings
   extracted_at     INTEGER NOT NULL
 );
@@ -194,6 +198,21 @@ CREATE TABLE IF NOT EXISTS citability_scores (
 CREATE INDEX IF NOT EXISTS idx_citability_page ON citability_scores(page_id);
+-- Sitemap URL inventory (one row per URL declared in a sitemap)
+CREATE TABLE IF NOT EXISTS sitemap_urls (
+  id             INTEGER PRIMARY KEY AUTOINCREMENT,
+  domain_id      INTEGER NOT NULL REFERENCES domains(id),
+  url            TEXT NOT NULL,
+  sitemap_source TEXT,                         -- which sitemap file this came from
+  discovered_at  INTEGER NOT NULL,
+  head_status    INTEGER,                      -- HTTP status from HEAD check (null until audit runs)
+  head_location  TEXT,                         -- Location header when redirected
+  head_checked_at INTEGER,
+  UNIQUE(domain_id, url)
+);
+CREATE INDEX IF NOT EXISTS idx_sitemap_urls_domain ON sitemap_urls(domain_id);
 -- Indexes
 CREATE INDEX IF NOT EXISTS idx_pages_domain ON pages(domain_id);
 CREATE INDEX IF NOT EXISTS idx_keywords_page ON keywords(page_id);

package/exports/queries.js CHANGED Viewed

@@ -76,6 +76,9 @@ export function getTechnicalDataset(db, project) {
       p.word_count,
       p.click_depth,
       p.is_indexable,
+      p.title,
+      p.published_date,
+      p.modified_date,
       d.domain,
       d.role,
       COALESCE(e.meta_desc, '') AS meta_desc,
@@ -86,6 +89,17 @@ export function getTechnicalDataset(db, project) {
       COALESCE((SELECT COUNT(*) FROM page_schemas ps WHERE ps.page_id = p.id), 0) AS schema_count,
       COALESCE((SELECT COUNT(*) FROM page_schemas ps WHERE ps.page_id = p.id AND LOWER(ps.schema_type) = 'breadcrumblist'), 0) AS breadcrumb_count,
       COALESCE((SELECT COUNT(*) FROM headings h WHERE h.page_id = p.id AND h.level = 1), 0) AS h1_count,
+      COALESCE((SELECT COUNT(*) FROM headings h WHERE h.page_id = p.id AND h.level = 1), 0) > 1 AS has_multiple_h1,
+      COALESCE((SELECT COUNT(*) FROM page_schemas ps WHERE ps.page_id = p.id AND LOWER(ps.schema_type) IN ('faqpage', 'faq')), 0) AS faq_schema_count,
+      COALESCE((SELECT COUNT(*) FROM page_schemas ps WHERE ps.page_id = p.id AND LOWER(ps.schema_type) = 'howto'), 0) AS howto_schema_count,
+      COALESCE((
+        SELECT COUNT(*) FROM headings h
+        WHERE h.page_id = p.id AND h.level IN (2, 3)
+          AND (h.text LIKE 'what %' OR h.text LIKE 'how %' OR h.text LIKE 'why %'
+            OR h.text LIKE 'when %' OR h.text LIKE 'which %' OR h.text LIKE 'can %'
+            OR h.text LIKE 'does %' OR h.text LIKE 'is %' OR h.text LIKE 'are %'
+            OR h.text LIKE '%?')
+      ), 0) AS question_heading_count,
       COALESCE((
         SELECT COUNT(*)
         FROM links l
@@ -115,6 +129,24 @@ export function getTechnicalDataset(db, project) {
   `).all(project);
 }
+/**
+ * Get keywords associated with pages missing a specific schema type.
+ * Used to show "Missing FAQ Schema → Low PAA chance for query X".
+ */
+export function getKeywordsForSchemaDeficientPages(db, project, pageIds) {
+  if (!pageIds.length) return [];
+  const placeholders = pageIds.map(() => '?').join(',');
+  return db.prepare(`
+    SELECT k.keyword, k.location, k.page_id, p.url,
+           e.search_intent
+    FROM keywords k
+    JOIN pages p ON p.id = k.page_id
+    LEFT JOIN extractions e ON e.page_id = p.id
+    WHERE k.page_id IN (${placeholders})
+    ORDER BY k.page_id, k.location
+  `).all(...pageIds);
+}
 export function getSchemaCoverage(db, project, vsDomain = null) {
   const params = [project];
   let competitorFilter = '';

package/exports/technical.js CHANGED Viewed

@@ -1,5 +1,5 @@
 import { collectTop, inferPriorityFromCount, makeAction, sortActions } from './heuristics.js';
-import { getTechnicalDataset } from './queries.js';
+import { getTechnicalDataset, getKeywordsForSchemaDeficientPages } from './queries.js';
 export function buildTechnicalActions(db, project) {
   const rows = getTechnicalDataset(db, project);
@@ -176,5 +176,185 @@ export function buildTechnicalActions(db, project) {
     }));
   }
+  // ── Title length issues ──────────────────────────────────────────────────
+  const titleTooLong = rows.filter(r =>
+    r.title && r.title.length > 65 && Number(r.status_code) < 400 && r.is_indexable
+  );
+  if (titleTooLong.length) {
+    actions.push(makeAction({
+      id: 'technical-title-too-long',
+      type: 'improve',
+      priority: inferPriorityFromCount(titleTooLong.length, { critical: 20, high: 8, medium: 3 }),
+      area: 'content',
+      title: `Shorten page titles on ${titleTooLong.length} pages exceeding 65 characters`,
+      why: 'Titles over 65 characters are truncated in SERPs, hiding your key message and reducing CTR.',
+      evidence: collectTop(titleTooLong.map(r => `${r.url} (${r.title.length} chars)`), 8),
+      implementationHints: [
+        'Keep titles under 60–65 characters to avoid SERP truncation.',
+        'Lead with the primary keyword and brand separator at the end.',
+      ],
+    }));
+  }
+  const titleTooShort = rows.filter(r =>
+    r.title && r.title.length < 30 && Number(r.status_code) < 400 && r.is_indexable
+  );
+  if (titleTooShort.length) {
+    actions.push(makeAction({
+      id: 'technical-title-too-short',
+      type: 'improve',
+      priority: inferPriorityFromCount(titleTooShort.length, { critical: 15, high: 6, medium: 2 }),
+      area: 'content',
+      title: `Expand thin page titles on ${titleTooShort.length} pages under 30 characters`,
+      why: 'Very short titles waste valuable SERP real estate and under-signal page relevance to search engines.',
+      evidence: collectTop(titleTooShort.map(r => `${r.url} ("${r.title}")`), 8),
+      implementationHints: [
+        'Include the primary keyword, secondary modifier, and brand in the title.',
+        'Target 50–60 characters for maximum SERP visibility.',
+      ],
+    }));
+  }
+  // ── Missing date metadata ────────────────────────────────────────────────
+  const missingDates = rows.filter(r =>
+    !r.published_date && !r.modified_date &&
+    (r.word_count || 0) >= 500 &&
+    Number(r.status_code) < 400 && r.is_indexable
+  );
+  if (missingDates.length) {
+    actions.push(makeAction({
+      id: 'technical-missing-dates',
+      type: 'improve',
+      priority: inferPriorityFromCount(missingDates.length, { critical: 20, high: 8, medium: 3 }),
+      area: 'schema',
+      title: `Add publish/modified dates to ${missingDates.length} content pages`,
+      why: 'Date metadata in schema and HTML signals freshness to AI models and search engines, boosting citability and freshness scoring.',
+      evidence: collectTop(missingDates.map(r => `${r.url} (${r.word_count} words)`), 8),
+      implementationHints: [
+        'Add datePublished and dateModified in Article/BlogPosting/NewsArticle schema JSON-LD.',
+        'Include <time datetime="..."> or meta date tags in the HTML head.',
+        'Keep dateModified updated on meaningful content revisions.',
+      ],
+    }));
+  }
+  // ── FAQ content without FAQPage schema ──────────────────────────────────
+  const faqContentNoSchema = rows.filter(r =>
+    r.question_heading_count >= 3 && !r.faq_schema_count &&
+    Number(r.status_code) < 400 && r.is_indexable
+  );
+  if (faqContentNoSchema.length) {
+    // Enrich with affected keywords to show SERP impact
+    const faqPageIds = faqContentNoSchema.map(r => r.id);
+    const faqKeywords = getKeywordsForSchemaDeficientPages(db, project, faqPageIds);
+    const faqImpact = faqKeywords
+      .filter(k => k.location === 'h2' || k.location === 'h1')
+      .slice(0, 5)
+      .map(k => `"${k.keyword}" on ${k.url.replace(/^https?:\/\/[^/]+/, '')} → low People Also Ask chance without FAQ schema`);
+    actions.push(makeAction({
+      id: 'technical-faq-content-no-schema',
+      type: 'add_schema',
+      priority: inferPriorityFromCount(faqContentNoSchema.length, { critical: 10, high: 4, medium: 2 }),
+      area: 'schema',
+      title: `Add FAQPage schema to ${faqContentNoSchema.length} pages with Q&A content`,
+      why: 'Pages with multiple question headings but no FAQPage schema miss FAQ rich results and lose AI citability score.',
+      evidence: collectTop(faqContentNoSchema.map(r => `${r.url} (${r.question_heading_count} question headings)`), 8),
+      impact: faqImpact.length ? faqImpact : undefined,
+      implementationHints: [
+        'Wrap each question heading + answer paragraph in FAQPage JSON-LD with Question/Answer entities.',
+        'Keep answers under 300 words each — Google truncates longer ones in rich results.',
+      ],
+    }));
+  }
+  // ── HowTo content without HowTo schema ──────────────────────────────────
+  const howtoContentNoSchema = rows.filter(r => {
+    const title = String(r.title || '').toLowerCase();
+    const h1 = String(r.h1 || '').toLowerCase();
+    const hasHowToSignal = /\bhow to\b|\bstep[- ]by[- ]step\b|\bsetup guide\b|\binstall guide\b/.test(title) ||
+                           /\bhow to\b|\bstep[- ]by[- ]step\b|\bsetup guide\b|\binstall guide\b/.test(h1);
+    return hasHowToSignal && !r.howto_schema_count &&
+      Number(r.status_code) < 400 && r.is_indexable;
+  });
+  if (howtoContentNoSchema.length) {
+    const howtoPageIds = howtoContentNoSchema.map(r => r.id);
+    const howtoKeywords = getKeywordsForSchemaDeficientPages(db, project, howtoPageIds);
+    const howtoImpact = howtoKeywords
+      .filter(k => k.location === 'title' || k.location === 'h1')
+      .slice(0, 5)
+      .map(k => `"${k.keyword}" → missing HowTo rich result (step-by-step carousel)`);
+    actions.push(makeAction({
+      id: 'technical-howto-content-no-schema',
+      type: 'add_schema',
+      priority: inferPriorityFromCount(howtoContentNoSchema.length, { critical: 8, high: 3, medium: 1 }),
+      area: 'schema',
+      title: `Add HowTo schema to ${howtoContentNoSchema.length} step-by-step guide pages`,
+      why: 'How-to guides without HowTo schema miss rich results and rank lower for procedural queries.',
+      evidence: collectTop(howtoContentNoSchema.map(r => `${r.url}`), 8),
+      impact: howtoImpact.length ? howtoImpact : undefined,
+      implementationHints: [
+        'Wrap numbered steps in HowTo JSON-LD with HowToStep entities.',
+        'Include tool, supply, and time/cost fields where applicable.',
+      ],
+    }));
+  }
+  // ── Multiple H1 headings ─────────────────────────────────────────────────
+  const multipleH1 = rows.filter(r =>
+    r.has_multiple_h1 && Number(r.status_code) < 400 && r.is_indexable
+  );
+  if (multipleH1.length) {
+    actions.push(makeAction({
+      id: 'technical-multiple-h1',
+      type: 'fix',
+      priority: inferPriorityFromCount(multipleH1.length, { critical: 15, high: 6, medium: 2 }),
+      area: 'content',
+      title: `Fix multiple H1 headings on ${multipleH1.length} pages`,
+      why: 'Multiple H1s dilute topical focus and create ambiguity about the primary page topic for search engines.',
+      evidence: collectTop(multipleH1.map(r => r.url), 10),
+      implementationHints: [
+        'Keep exactly one H1 that matches the page\'s primary keyword intent.',
+        'Demote secondary H1s to H2 or H3 as appropriate.',
+      ],
+    }));
+  }
+  // ── Homepage links to external sites (nav leak) ──────────────────────
+  // Flag when homepage has external links in nav-like positions (anchor text
+  // suggests navigation: short text like "Deck", "Docs", "Blog" etc.)
+  const homepage = rows.find(r => {
+    const path = new URL(r.url).pathname;
+    return (path === '/' || path === '') && Number(r.status_code) < 400;
+  });
+  if (homepage) {
+    const navAnchors = ['deck', 'docs', 'blog', 'about', 'home', 'pricing', 'features', 'faq', 'team', 'contact', 'app', 'dashboard', 'whitepaper', 'roadmap', 'litepaper'];
+    const externalNavLinks = db.prepare(`
+      SELECT l.target_url, l.anchor_text
+      FROM links l
+      WHERE l.source_id = ? AND l.is_internal = 0
+        AND LENGTH(l.anchor_text) > 0 AND LENGTH(l.anchor_text) < 20
+    `).all(homepage.id)
+      .filter(l => navAnchors.some(n => l.anchor_text.toLowerCase().includes(n)));
+    if (externalNavLinks.length) {
+      actions.push(makeAction({
+        id: 'technical-nav-links-external',
+        type: 'fix',
+        priority: 'high',
+        area: 'structure',
+        title: `${externalNavLinks.length} navigation link(s) on homepage point to external sites`,
+        why: 'Nav-level links to external domains leak PageRank and confuse users expecting to stay on-site. Use internal landing pages or relative paths instead.',
+        evidence: externalNavLinks.map(l => `"${l.anchor_text}" → ${l.target_url}`),
+        implementationHints: [
+          'Replace external nav links with internal pages (e.g. /deck instead of Google Docs link).',
+          'If the content must be external, use a landing page wrapper with canonical.',
+          'Ensure the logo/brand link always points to the homepage.',
+        ],
+      }));
+    }
+  }
   return sortActions(actions);
 }