npm - @conduction/docusaurus-preset - Versions diffs - 3.7.0 → 3.8.0 - Mend

@conduction/docusaurus-preset 3.7.0 → 3.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/README.md +36 -4
package/bin/validate-ai-baseline.mjs +15 -8
package/package.json +1 -1
package/src/components/DetailHero/DetailHero.jsx +37 -0
package/src/index.js +13 -1
package/src/plugins/indexnow.js +158 -0
package/src/theme/DocItem/Content/index.jsx +104 -0

package/README.md CHANGED Viewed

@@ -236,11 +236,43 @@ createConfig({
 });
 ```
-**Known follow-ups (not yet automatic)**
+**`BreadcrumbList` JSON-LD**
-- `BreadcrumbList` JSON-LD on every page. The DocBreadcrumbs DOM already renders; the schema needs a theme swizzle. Tracked as a 3.7+ candidate.
-- `TechArticle` JSON-LD on docs pages with `dateModified` from git mtime. Same swizzle scope.
-- Per-page title format. Docusaurus defaults to `{Page} | {Site}` which produces `OpenRegister | OpenRegister` on per-app homepages. Override per page via frontmatter `title:` for now; a `titleFormat` option may land later.
+- Docs pages: emitted automatically by Docusaurus 3.10+ via the bundled `DocBreadcrumbs/StructuredData` component. Older Docusaurus versions render the same data as inline microdata (`itemscope`/`itemprop`), which Google still reads.
+- Marketing / landing pages: `<DetailHero>` emits a `BreadcrumbList` JSON-LD block from its existing `crumb` prop. Pages that pass `crumb={[...]}` to `<DetailHero>` get the schema for free; no additional component needed.
+**`TechArticle` JSON-LD on docs pages**
+The preset's `DocItem/Content` swizzle prepends a `TechArticle` JSON-LD block to every documentation page. Fields derived from the page's frontmatter and Docusaurus metadata:
+- `headline` and `description` from frontmatter title + description
+- `datePublished` and `dateModified` from `metadata.lastUpdatedAt` (git mtime by default)
+- `author` from frontmatter `author:` or `authors:` (string, object, or array). Defaults to "Conduction" as the team author
+- `publisher` references the shared Conduction `Organization` via `@id`
+- `mainEntityOfPage` resolves to the doc's canonical URL
+Sites can opt out per-page by setting `techArticle: false` in the doc's frontmatter.
+**IndexNow integration for Bing + AI surfaces**
+`@conduction/docusaurus-preset/plugins/indexnow` is auto-loaded by `createConfig`. Sites enable it by passing a key:
+```js
+createConfig({
+  // ...
+  indexnow: {
+    key: 'abc123...', // 64-char key from bing.com/indexnow/getstarted
+  },
+});
+```
+The plugin writes `<key>.txt` to the build output (for IndexNow's ownership handshake) and POSTs the full sitemap URL list to `api.indexnow.org` after a successful build. Bing recrawls within minutes; Yandex consumes the same payload. DuckDuckGo, Copilot, and ChatGPT Search all read Bing's index, so a single ping covers most non-Google surfaces.
+Failure-tolerant: timeouts or 5xx responses log a warning and let the deploy continue. Disable via `indexnow: { disable: true }`.
+**Per-page title format**
+Docusaurus defaults to `{Page} | {Site}`, which produces `OpenRegister | OpenRegister` on per-app homepages. Override per page via frontmatter `title:` for now; a `titleFormat` option may land in a future release.
 ## Releasing

package/bin/validate-ai-baseline.mjs CHANGED Viewed

@@ -87,12 +87,19 @@ check('sitemap.xml exists and has at least 1 URL', () => {
   return {ok: true, msg: `${n} URLs`};
 });
-/* sitemap.xml should ship <lastmod> on every URL. Google treats lastmod
-   as the only sitemap-level signal that actually informs recrawl
-   priority, and only when it's trustworthy. Preset 3.7+ wraps user-
-   supplied opts.presets to inject DEFAULT_SITEMAP_OPTIONS (lastmod:
-   'date') into any classic preset entry, so every site that bumps
-   should see lastmod automatically. Hard-fail blocks regression. */
+/* sitemap.xml should ship <lastmod> on the majority of URLs. Google
+   treats lastmod as the only sitemap-level signal that actually informs
+   recrawl priority, and only when it's trustworthy. Preset 3.7+ wraps
+   user-supplied opts.presets to inject DEFAULT_SITEMAP_OPTIONS
+   (lastmod: 'date') into any classic preset entry, so every site that
+   bumps should see lastmod automatically.
+   Hard-fail when lastmod is missing entirely (means the preset wrap
+   didn't kick in). Pass when at least half of URLs have lastmod —
+   Docusaurus' auto-generated routes (/docs/category/X/, the root path
+   without a source file, redirects, etc.) legitimately don't have a
+   git mtime to use, so 100% coverage is unrealistic. ~80% is typical
+   for a content-heavy docs site. */
 check('sitemap.xml emits <lastmod> on URLs', () => {
   const body = readBuild('sitemap.xml');
   const locCount = (body.match(/<loc>/g) || []).length;
@@ -102,8 +109,8 @@ check('sitemap.xml emits <lastmod> on URLs', () => {
     return {ok: false, msg: `0 / ${locCount} URLs have <lastmod>. Upgrade to @conduction/docusaurus-preset ^3.7.0 or set sitemap.lastmod in docusaurus.config.`};
   }
   const ratio = lastmodCount / locCount;
-  if (ratio < 0.9) {
-    return {ok: false, msg: `only ${lastmodCount} / ${locCount} URLs have <lastmod>`};
+  if (ratio < 0.5) {
+    return {ok: false, msg: `only ${lastmodCount} / ${locCount} URLs have <lastmod> (under 50%); investigate which routes are missing source files`};
   }
   return {ok: true, msg: `${lastmodCount} / ${locCount} URLs (${Math.round(ratio * 100)}%)`};
 });

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@conduction/docusaurus-preset",
-  "version": "3.7.0",
+  "version": "3.8.0",
   "scripts": {
     "prepack": "node scripts/prepack-bundle-css.js"
   },

package/src/components/DetailHero/DetailHero.jsx CHANGED Viewed

@@ -155,6 +155,36 @@ export default function DetailHero({
     return schema;
   })() : null;
+  /* BreadcrumbList JSON-LD from the existing `crumb` prop. The hero
+     already renders a visible breadcrumb chain; this just emits the
+     schema.org/BreadcrumbList equivalent so Google can render SERP
+     breadcrumbs. Items with an href become navigable list entries;
+     bare strings (typically the last "you are here" position) get a
+     name + position with no item URL. The current page is added as
+     the final position so the schema is self-contained. */
+  const breadcrumbListJsonLd = (crumb && Array.isArray(crumb) && crumb.length > 0) ? (() => {
+    const baseUrl = (siteConfig?.url || '').replace(/\/$/, '');
+    const items = crumb.map((c, i) => {
+      const name = typeof c === 'string' ? c : c.label;
+      const href = (typeof c === 'object' && c.href) ? c.href : undefined;
+      const url = href
+        ? (href.startsWith('http') ? href : `${baseUrl}${href}`)
+        : undefined;
+      const entry = {
+        '@type': 'ListItem',
+        position: i + 1,
+        name,
+      };
+      if (url) entry.item = url;
+      return entry;
+    });
+    return {
+      '@context': 'https://schema.org',
+      '@type': 'BreadcrumbList',
+      itemListElement: items,
+    };
+  })() : null;
   return (
     <section className={[styles.head, hasIllustration && styles.withIllustration, bgClass, className].filter(Boolean).join(' ')}>
       {softwareApplicationJsonLd && (
@@ -164,6 +194,13 @@ export default function DetailHero({
           </script>
         </Head>
       )}
+      {breadcrumbListJsonLd && (
+        <Head>
+          <script type="application/ld+json">
+            {JSON.stringify(breadcrumbListJsonLd)}
+          </script>
+        </Head>
+      )}
       {crumb && Array.isArray(crumb) && (
         <div className={styles.crumb}>
           {crumb.map((c, i) => {

package/src/index.js CHANGED Viewed

@@ -665,12 +665,24 @@ function createConfig(opts) {
        It no-ops when the file already exists in outDir, so a site's own
        static/robots.txt or static/llms.txt always wins. Sites disable
        per-file or wholesale via opts.aiCrawling.disable. Hand-rolled
-       plugins in opts.plugins are appended after this default. */
+       plugins in opts.plugins are appended after these defaults.
+       The IndexNow plugin pings api.indexnow.org with the sitemap URLs
+       after a successful build so Bing (and the AI surfaces it feeds,
+       Copilot / ChatGPT Search / DuckDuckGo) recrawl within minutes
+       instead of the usual 1-4 weeks. No-ops without opts.indexnow.key
+       (the per-site IndexNow key, generated once at bing.com/indexnow).
+       Sites that prefer the long-tail crawl path opt out by passing
+       indexnow: { disable: true } or just leaving the key unset. */
     plugins: [
       [
         require.resolve('./plugins/ai-crawling.js'),
         opts.aiCrawling || {},
       ],
+      [
+        require.resolve('./plugins/indexnow.js'),
+        opts.indexnow || {},
+      ],
       ...(opts.plugins || []),
     ],
   };

package/src/plugins/indexnow.js ADDED Viewed

@@ -0,0 +1,158 @@
+/**
+ * @conduction/docusaurus-preset/plugins/indexnow
+ *
+ * Docusaurus plugin that pings the IndexNow API after a successful
+ * build so Bing (which feeds Copilot, ChatGPT Search, DuckDuckGo)
+ * recrawls every URL in the site's sitemap within minutes instead
+ * of the usual 1-4 weeks. Yandex also accepts the same payload.
+ *
+ * Reference: https://www.indexnow.org/documentation
+ *
+ * How it works
+ *   1. Every consuming site exposes a unique key at /<key>.txt at
+ *      build time (the key file's body must contain the key, that's
+ *      the verification handshake IndexNow requires).
+ *   2. After build, the plugin POSTs the full URL list to
+ *      api.indexnow.org with that key.
+ *   3. Bing fetches /<key>.txt to verify ownership, then schedules
+ *      recrawl of every URL in the payload.
+ *
+ * Options:
+ *   key       string  64-char IndexNow key. Required. Sites generate
+ *                     once at https://www.bing.com/indexnow/getstarted
+ *                     and reuse forever.
+ *   keyLocation  string  optional; if the key file lives at a
+ *                     non-default path, override here. Default:
+ *                     <siteUrl>/<key>.txt
+ *   disable   boolean  opt out without removing the plugin entry.
+ *   host      string   IndexNow API host. Default api.indexnow.org;
+ *                     Bing and Yandex both forward to each other,
+ *                     so one POST notifies both.
+ *
+ * Why postBuild + not in CI: postBuild runs once per successful
+ * build, so the ping fires only when content actually changed. If
+ * we wired it as a separate workflow we'd need to detect changes
+ * ourselves; the Docusaurus build is the natural trigger.
+ *
+ * If the IndexNow endpoint is unreachable (network blip, rate limit)
+ * the plugin logs and continues; we never want a transient external
+ * service to fail a deploy.
+ */
+const fs = require('fs');
+const path = require('path');
+const https = require('https');
+function indexNowPlugin(context, options = {}) {
+  if (!options || options.disable) {
+    return {name: 'conduction-indexnow', postBuild() {}};
+  }
+  const key = options.key;
+  if (!key) {
+    return {
+      name: 'conduction-indexnow',
+      postBuild() {
+        console.warn(
+          '[indexnow] no `key` option provided; skipping. Set ' +
+          'opts.indexnow.key (or pass through createConfig) to enable.'
+        );
+      },
+    };
+  }
+  const host = options.host || 'api.indexnow.org';
+  return {
+    name: 'conduction-indexnow',
+    async postBuild({outDir, siteConfig}) {
+      const siteUrl = (siteConfig.url || '').replace(/\/$/, '');
+      if (!siteUrl) {
+        console.warn('[indexnow] siteConfig.url missing; skipping.');
+        return;
+      }
+      /* Ensure the verification key file exists at /<key>.txt so
+         IndexNow can confirm we own the host. Body is just the key
+         per the IndexNow handshake protocol. */
+      const keyFile = path.join(outDir, `${key}.txt`);
+      try {
+        fs.writeFileSync(keyFile, key, 'utf8');
+      } catch (e) {
+        console.warn(`[indexnow] failed to write ${keyFile}: ${e.message}`);
+        return;
+      }
+      /* Build the URL list from the rendered sitemap.xml. Sitemap-
+         backed instead of a directory walk because the sitemap is
+         the same canonical list Google + Bing already trust, and
+         it respects the site's ignorePatterns + i18n routes
+         automatically. */
+      const sitemapPath = path.join(outDir, 'sitemap.xml');
+      let urls = [];
+      try {
+        const xml = fs.readFileSync(sitemapPath, 'utf8');
+        const matches = xml.match(/<loc>([^<]+)<\/loc>/g) || [];
+        urls = matches.map(m => m.replace(/<\/?loc>/g, ''));
+      } catch (e) {
+        console.warn(`[indexnow] could not read sitemap.xml: ${e.message}`);
+        return;
+      }
+      if (urls.length === 0) {
+        console.warn('[indexnow] sitemap.xml has no <loc> entries; skipping.');
+        return;
+      }
+      /* IndexNow caps each POST at 10000 URLs. Most Conduction sites
+         are well under that; this guard exists so a freak large sitemap
+         doesn't 400 the request. */
+      const capped = urls.slice(0, 10000);
+      const host_without_protocol = siteUrl.replace(/^https?:\/\//, '');
+      const keyLocation = options.keyLocation || `${siteUrl}/${key}.txt`;
+      const payload = JSON.stringify({
+        host: host_without_protocol,
+        key,
+        keyLocation,
+        urlList: capped,
+      });
+      await new Promise(resolve => {
+        const req = https.request(
+          {
+            hostname: host,
+            port: 443,
+            path: '/indexnow',
+            method: 'POST',
+            headers: {
+              'Content-Type': 'application/json; charset=utf-8',
+              'Content-Length': Buffer.byteLength(payload),
+            },
+            timeout: 10000,
+          },
+          res => {
+            const ok = res.statusCode >= 200 && res.statusCode < 300;
+            if (ok) {
+              console.log(`[indexnow] submitted ${capped.length} URLs to ${host} (${res.statusCode})`);
+            } else {
+              console.warn(`[indexnow] ${host} returned ${res.statusCode}; deploy continues. URLs to retry next time will sync via the normal crawl.`);
+            }
+            res.resume();
+            resolve();
+          }
+        );
+        req.on('error', err => {
+          console.warn(`[indexnow] request failed: ${err.message}; deploy continues.`);
+          resolve();
+        });
+        req.on('timeout', () => {
+          console.warn('[indexnow] request timed out after 10s; deploy continues.');
+          req.destroy();
+          resolve();
+        });
+        req.write(payload);
+        req.end();
+      });
+    },
+  };
+}
+module.exports = indexNowPlugin;

package/src/theme/DocItem/Content/index.jsx ADDED Viewed

@@ -0,0 +1,104 @@
+/**
+ * Brand DocItem/Content swizzle.
+ *
+ * Wraps Docusaurus's default DocItem/Content (which renders the
+ * markdown body) and prepends a `TechArticle` JSON-LD block built
+ * from the page's metadata + frontmatter. Every documentation page
+ * across the Conduction fleet ships this schema automatically, which
+ * is the single biggest remaining SEO-rich-result gap (the audit
+ * found no Article/TechArticle anywhere on the fleet docs sites).
+ *
+ * What we emit:
+ *   - headline:       page title
+ *   - description:    frontmatter description (if any)
+ *   - datePublished:  frontmatter date OR metadata.lastUpdatedAt
+ *   - dateModified:   metadata.lastUpdatedAt (git mtime by default)
+ *   - author:         frontmatter author (string or object) OR
+ *                     "Conduction" as fallback
+ *   - publisher:      reference to the shared Conduction Organization
+ *   - mainEntityOfPage: canonical doc URL
+ *
+ * Why TechArticle (not plain Article): docs are technical content,
+ * and TechArticle is the schema.org subtype Google + Bing reward for
+ * developer documentation. Article would also work but TechArticle
+ * is more specific.
+ *
+ * Sites that don't want the schema on a particular page set
+ * `frontMatter.techArticle: false` in the doc's frontmatter.
+ */
+import React from 'react';
+import Head from '@docusaurus/Head';
+import useDocusaurusContext from '@docusaurus/useDocusaurusContext';
+import {useDoc} from '@docusaurus/plugin-content-docs/client';
+import DocItemContent from '@theme-init/DocItem/Content';
+function buildTechArticleJsonLd(siteUrl, metadata, frontMatter) {
+  const url = siteUrl
+    ? `${siteUrl.replace(/\/$/, '')}${metadata.permalink}`
+    : metadata.permalink;
+  const schema = {
+    '@context': 'https://schema.org',
+    '@type': 'TechArticle',
+    '@id': `${url}#article`,
+    mainEntityOfPage: url,
+    headline: frontMatter.title || metadata.title,
+    inLanguage: 'en',
+    publisher: {'@id': 'https://www.conduction.nl/#org'},
+  };
+  if (frontMatter.description || metadata.description) {
+    schema.description = frontMatter.description || metadata.description;
+  }
+  const datePublished = frontMatter.date || metadata.lastUpdatedAt;
+  if (datePublished) {
+    schema.datePublished = typeof datePublished === 'number'
+      ? new Date(datePublished * 1000).toISOString()
+      : new Date(datePublished).toISOString();
+  }
+  if (metadata.lastUpdatedAt) {
+    schema.dateModified = new Date(metadata.lastUpdatedAt * 1000).toISOString();
+  }
+  /* Author: accept frontmatter string ("Ruben"), object ({name, url}),
+     or list of authors. Default to Conduction as the team author. */
+  const fmAuthor = frontMatter.author || frontMatter.authors;
+  if (fmAuthor) {
+    if (typeof fmAuthor === 'string') {
+      schema.author = {'@type': 'Person', name: fmAuthor};
+    } else if (Array.isArray(fmAuthor)) {
+      schema.author = fmAuthor.map(a =>
+        typeof a === 'string'
+          ? {'@type': 'Person', name: a}
+          : {'@type': 'Person', name: a.name, url: a.url});
+    } else if (typeof fmAuthor === 'object') {
+      schema.author = {'@type': 'Person', name: fmAuthor.name, url: fmAuthor.url};
+    }
+  } else {
+    schema.author = {
+      '@type': 'Organization',
+      name: 'Conduction',
+      '@id': 'https://www.conduction.nl/#org',
+    };
+  }
+  return schema;
+}
+export default function DocItemContentWithSchema(props) {
+  const {siteConfig} = useDocusaurusContext();
+  const {metadata, frontMatter} = useDoc();
+  const emitSchema = frontMatter.techArticle !== false;
+  const schema = emitSchema
+    ? buildTechArticleJsonLd(siteConfig.url, metadata, frontMatter)
+    : null;
+  return (
+    <>
+      {schema && (
+        <Head>
+          <script type="application/ld+json">
+            {JSON.stringify(schema)}
+          </script>
+        </Head>
+      )}
+      <DocItemContent {...props} />
+    </>
+  );
+}