@conduction/docusaurus-preset 3.7.0 → 3.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -236,11 +236,43 @@ createConfig({
236
236
  });
237
237
  ```
238
238
 
239
- **Known follow-ups (not yet automatic)**
239
+ **`BreadcrumbList` JSON-LD**
240
240
 
241
- - `BreadcrumbList` JSON-LD on every page. The DocBreadcrumbs DOM already renders; the schema needs a theme swizzle. Tracked as a 3.7+ candidate.
242
- - `TechArticle` JSON-LD on docs pages with `dateModified` from git mtime. Same swizzle scope.
243
- - Per-page title format. Docusaurus defaults to `{Page} | {Site}` which produces `OpenRegister | OpenRegister` on per-app homepages. Override per page via frontmatter `title:` for now; a `titleFormat` option may land later.
241
+ - Docs pages: emitted automatically by Docusaurus 3.10+ via the bundled `DocBreadcrumbs/StructuredData` component. Older Docusaurus versions render the same data as inline microdata (`itemscope`/`itemprop`), which Google still reads.
242
+ - Marketing / landing pages: `<DetailHero>` emits a `BreadcrumbList` JSON-LD block from its existing `crumb` prop. Pages that pass `crumb={[...]}` to `<DetailHero>` get the schema for free; no additional component needed.
243
+
244
+ **`TechArticle` JSON-LD on docs pages**
245
+
246
+ The preset's `DocItem/Content` swizzle prepends a `TechArticle` JSON-LD block to every documentation page. Fields derived from the page's frontmatter and Docusaurus metadata:
247
+
248
+ - `headline` and `description` from frontmatter title + description
249
+ - `datePublished` and `dateModified` from `metadata.lastUpdatedAt` (git mtime by default)
250
+ - `author` from frontmatter `author:` or `authors:` (string, object, or array). Defaults to "Conduction" as the team author
251
+ - `publisher` references the shared Conduction `Organization` via `@id`
252
+ - `mainEntityOfPage` resolves to the doc's canonical URL
253
+
254
+ Sites can opt out per-page by setting `techArticle: false` in the doc's frontmatter.
255
+
256
+ **IndexNow integration for Bing + AI surfaces**
257
+
258
+ `@conduction/docusaurus-preset/plugins/indexnow` is auto-loaded by `createConfig`. Sites enable it by passing a key:
259
+
260
+ ```js
261
+ createConfig({
262
+ // ...
263
+ indexnow: {
264
+ key: 'abc123...', // 64-char key from bing.com/indexnow/getstarted
265
+ },
266
+ });
267
+ ```
268
+
269
+ The plugin writes `<key>.txt` to the build output (for IndexNow's ownership handshake) and POSTs the full sitemap URL list to `api.indexnow.org` after a successful build. Bing recrawls within minutes; Yandex consumes the same payload. DuckDuckGo, Copilot, and ChatGPT Search all read Bing's index, so a single ping covers most non-Google surfaces.
270
+
271
+ Failure-tolerant: timeouts or 5xx responses log a warning and let the deploy continue. Disable via `indexnow: { disable: true }`.
272
+
273
+ **Per-page title format**
274
+
275
+ Docusaurus defaults to `{Page} | {Site}`, which produces `OpenRegister | OpenRegister` on per-app homepages. Override per page via frontmatter `title:` for now; a `titleFormat` option may land in a future release.
244
276
 
245
277
  ## Releasing
246
278
 
@@ -87,12 +87,19 @@ check('sitemap.xml exists and has at least 1 URL', () => {
87
87
  return {ok: true, msg: `${n} URLs`};
88
88
  });
89
89
 
90
- /* sitemap.xml should ship <lastmod> on every URL. Google treats lastmod
91
- as the only sitemap-level signal that actually informs recrawl
92
- priority, and only when it's trustworthy. Preset 3.7+ wraps user-
93
- supplied opts.presets to inject DEFAULT_SITEMAP_OPTIONS (lastmod:
94
- 'date') into any classic preset entry, so every site that bumps
95
- should see lastmod automatically. Hard-fail blocks regression. */
90
+ /* sitemap.xml should ship <lastmod> on the majority of URLs. Google
91
+ treats lastmod as the only sitemap-level signal that actually informs
92
+ recrawl priority, and only when it's trustworthy. Preset 3.7+ wraps
93
+ user-supplied opts.presets to inject DEFAULT_SITEMAP_OPTIONS
94
+ (lastmod: 'date') into any classic preset entry, so every site that
95
+ bumps should see lastmod automatically.
96
+
97
+ Hard-fail when lastmod is missing entirely (means the preset wrap
98
+ didn't kick in). Pass when at least half of URLs have lastmod —
99
+ Docusaurus' auto-generated routes (/docs/category/X/, the root path
100
+ without a source file, redirects, etc.) legitimately don't have a
101
+ git mtime to use, so 100% coverage is unrealistic. ~80% is typical
102
+ for a content-heavy docs site. */
96
103
  check('sitemap.xml emits <lastmod> on URLs', () => {
97
104
  const body = readBuild('sitemap.xml');
98
105
  const locCount = (body.match(/<loc>/g) || []).length;
@@ -102,8 +109,8 @@ check('sitemap.xml emits <lastmod> on URLs', () => {
102
109
  return {ok: false, msg: `0 / ${locCount} URLs have <lastmod>. Upgrade to @conduction/docusaurus-preset ^3.7.0 or set sitemap.lastmod in docusaurus.config.`};
103
110
  }
104
111
  const ratio = lastmodCount / locCount;
105
- if (ratio < 0.9) {
106
- return {ok: false, msg: `only ${lastmodCount} / ${locCount} URLs have <lastmod>`};
112
+ if (ratio < 0.5) {
113
+ return {ok: false, msg: `only ${lastmodCount} / ${locCount} URLs have <lastmod> (under 50%); investigate which routes are missing source files`};
107
114
  }
108
115
  return {ok: true, msg: `${lastmodCount} / ${locCount} URLs (${Math.round(ratio * 100)}%)`};
109
116
  });
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@conduction/docusaurus-preset",
3
- "version": "3.7.0",
3
+ "version": "3.8.0",
4
4
  "scripts": {
5
5
  "prepack": "node scripts/prepack-bundle-css.js"
6
6
  },
@@ -155,6 +155,36 @@ export default function DetailHero({
155
155
  return schema;
156
156
  })() : null;
157
157
 
158
+ /* BreadcrumbList JSON-LD from the existing `crumb` prop. The hero
159
+ already renders a visible breadcrumb chain; this just emits the
160
+ schema.org/BreadcrumbList equivalent so Google can render SERP
161
+ breadcrumbs. Items with an href become navigable list entries;
162
+ bare strings (typically the last "you are here" position) get a
163
+ name + position with no item URL. The current page is added as
164
+ the final position so the schema is self-contained. */
165
+ const breadcrumbListJsonLd = (crumb && Array.isArray(crumb) && crumb.length > 0) ? (() => {
166
+ const baseUrl = (siteConfig?.url || '').replace(/\/$/, '');
167
+ const items = crumb.map((c, i) => {
168
+ const name = typeof c === 'string' ? c : c.label;
169
+ const href = (typeof c === 'object' && c.href) ? c.href : undefined;
170
+ const url = href
171
+ ? (href.startsWith('http') ? href : `${baseUrl}${href}`)
172
+ : undefined;
173
+ const entry = {
174
+ '@type': 'ListItem',
175
+ position: i + 1,
176
+ name,
177
+ };
178
+ if (url) entry.item = url;
179
+ return entry;
180
+ });
181
+ return {
182
+ '@context': 'https://schema.org',
183
+ '@type': 'BreadcrumbList',
184
+ itemListElement: items,
185
+ };
186
+ })() : null;
187
+
158
188
  return (
159
189
  <section className={[styles.head, hasIllustration && styles.withIllustration, bgClass, className].filter(Boolean).join(' ')}>
160
190
  {softwareApplicationJsonLd && (
@@ -164,6 +194,13 @@ export default function DetailHero({
164
194
  </script>
165
195
  </Head>
166
196
  )}
197
+ {breadcrumbListJsonLd && (
198
+ <Head>
199
+ <script type="application/ld+json">
200
+ {JSON.stringify(breadcrumbListJsonLd)}
201
+ </script>
202
+ </Head>
203
+ )}
167
204
  {crumb && Array.isArray(crumb) && (
168
205
  <div className={styles.crumb}>
169
206
  {crumb.map((c, i) => {
package/src/index.js CHANGED
@@ -665,12 +665,24 @@ function createConfig(opts) {
665
665
  It no-ops when the file already exists in outDir, so a site's own
666
666
  static/robots.txt or static/llms.txt always wins. Sites disable
667
667
  per-file or wholesale via opts.aiCrawling.disable. Hand-rolled
668
- plugins in opts.plugins are appended after this default. */
668
+ plugins in opts.plugins are appended after these defaults.
669
+
670
+ The IndexNow plugin pings api.indexnow.org with the sitemap URLs
671
+ after a successful build so Bing (and the AI surfaces it feeds,
672
+ Copilot / ChatGPT Search / DuckDuckGo) recrawl within minutes
673
+ instead of the usual 1-4 weeks. No-ops without opts.indexnow.key
674
+ (the per-site IndexNow key, generated once at bing.com/indexnow).
675
+ Sites that prefer the long-tail crawl path opt out by passing
676
+ indexnow: { disable: true } or just leaving the key unset. */
669
677
  plugins: [
670
678
  [
671
679
  require.resolve('./plugins/ai-crawling.js'),
672
680
  opts.aiCrawling || {},
673
681
  ],
682
+ [
683
+ require.resolve('./plugins/indexnow.js'),
684
+ opts.indexnow || {},
685
+ ],
674
686
  ...(opts.plugins || []),
675
687
  ],
676
688
  };
@@ -0,0 +1,158 @@
1
+ /**
2
+ * @conduction/docusaurus-preset/plugins/indexnow
3
+ *
4
+ * Docusaurus plugin that pings the IndexNow API after a successful
5
+ * build so Bing (which feeds Copilot, ChatGPT Search, DuckDuckGo)
6
+ * recrawls every URL in the site's sitemap within minutes instead
7
+ * of the usual 1-4 weeks. Yandex also accepts the same payload.
8
+ *
9
+ * Reference: https://www.indexnow.org/documentation
10
+ *
11
+ * How it works
12
+ * 1. Every consuming site exposes a unique key at /<key>.txt at
13
+ * build time (the key file's body must contain the key, that's
14
+ * the verification handshake IndexNow requires).
15
+ * 2. After build, the plugin POSTs the full URL list to
16
+ * api.indexnow.org with that key.
17
+ * 3. Bing fetches /<key>.txt to verify ownership, then schedules
18
+ * recrawl of every URL in the payload.
19
+ *
20
+ * Options:
21
+ * key string 64-char IndexNow key. Required. Sites generate
22
+ * once at https://www.bing.com/indexnow/getstarted
23
+ * and reuse forever.
24
+ * keyLocation string optional; if the key file lives at a
25
+ * non-default path, override here. Default:
26
+ * <siteUrl>/<key>.txt
27
+ * disable boolean opt out without removing the plugin entry.
28
+ * host string IndexNow API host. Default api.indexnow.org;
29
+ * Bing and Yandex both forward to each other,
30
+ * so one POST notifies both.
31
+ *
32
+ * Why postBuild + not in CI: postBuild runs once per successful
33
+ * build, so the ping fires only when content actually changed. If
34
+ * we wired it as a separate workflow we'd need to detect changes
35
+ * ourselves; the Docusaurus build is the natural trigger.
36
+ *
37
+ * If the IndexNow endpoint is unreachable (network blip, rate limit)
38
+ * the plugin logs and continues; we never want a transient external
39
+ * service to fail a deploy.
40
+ */
41
+
42
+ const fs = require('fs');
43
+ const path = require('path');
44
+ const https = require('https');
45
+
46
+ function indexNowPlugin(context, options = {}) {
47
+ if (!options || options.disable) {
48
+ return {name: 'conduction-indexnow', postBuild() {}};
49
+ }
50
+ const key = options.key;
51
+ if (!key) {
52
+ return {
53
+ name: 'conduction-indexnow',
54
+ postBuild() {
55
+ console.warn(
56
+ '[indexnow] no `key` option provided; skipping. Set ' +
57
+ 'opts.indexnow.key (or pass through createConfig) to enable.'
58
+ );
59
+ },
60
+ };
61
+ }
62
+ const host = options.host || 'api.indexnow.org';
63
+
64
+ return {
65
+ name: 'conduction-indexnow',
66
+
67
+ async postBuild({outDir, siteConfig}) {
68
+ const siteUrl = (siteConfig.url || '').replace(/\/$/, '');
69
+ if (!siteUrl) {
70
+ console.warn('[indexnow] siteConfig.url missing; skipping.');
71
+ return;
72
+ }
73
+
74
+ /* Ensure the verification key file exists at /<key>.txt so
75
+ IndexNow can confirm we own the host. Body is just the key
76
+ per the IndexNow handshake protocol. */
77
+ const keyFile = path.join(outDir, `${key}.txt`);
78
+ try {
79
+ fs.writeFileSync(keyFile, key, 'utf8');
80
+ } catch (e) {
81
+ console.warn(`[indexnow] failed to write ${keyFile}: ${e.message}`);
82
+ return;
83
+ }
84
+
85
+ /* Build the URL list from the rendered sitemap.xml. Sitemap-
86
+ backed instead of a directory walk because the sitemap is
87
+ the same canonical list Google + Bing already trust, and
88
+ it respects the site's ignorePatterns + i18n routes
89
+ automatically. */
90
+ const sitemapPath = path.join(outDir, 'sitemap.xml');
91
+ let urls = [];
92
+ try {
93
+ const xml = fs.readFileSync(sitemapPath, 'utf8');
94
+ const matches = xml.match(/<loc>([^<]+)<\/loc>/g) || [];
95
+ urls = matches.map(m => m.replace(/<\/?loc>/g, ''));
96
+ } catch (e) {
97
+ console.warn(`[indexnow] could not read sitemap.xml: ${e.message}`);
98
+ return;
99
+ }
100
+ if (urls.length === 0) {
101
+ console.warn('[indexnow] sitemap.xml has no <loc> entries; skipping.');
102
+ return;
103
+ }
104
+ /* IndexNow caps each POST at 10000 URLs. Most Conduction sites
105
+ are well under that; this guard exists so a freak large sitemap
106
+ doesn't 400 the request. */
107
+ const capped = urls.slice(0, 10000);
108
+
109
+ const host_without_protocol = siteUrl.replace(/^https?:\/\//, '');
110
+ const keyLocation = options.keyLocation || `${siteUrl}/${key}.txt`;
111
+ const payload = JSON.stringify({
112
+ host: host_without_protocol,
113
+ key,
114
+ keyLocation,
115
+ urlList: capped,
116
+ });
117
+
118
+ await new Promise(resolve => {
119
+ const req = https.request(
120
+ {
121
+ hostname: host,
122
+ port: 443,
123
+ path: '/indexnow',
124
+ method: 'POST',
125
+ headers: {
126
+ 'Content-Type': 'application/json; charset=utf-8',
127
+ 'Content-Length': Buffer.byteLength(payload),
128
+ },
129
+ timeout: 10000,
130
+ },
131
+ res => {
132
+ const ok = res.statusCode >= 200 && res.statusCode < 300;
133
+ if (ok) {
134
+ console.log(`[indexnow] submitted ${capped.length} URLs to ${host} (${res.statusCode})`);
135
+ } else {
136
+ console.warn(`[indexnow] ${host} returned ${res.statusCode}; deploy continues. URLs to retry next time will sync via the normal crawl.`);
137
+ }
138
+ res.resume();
139
+ resolve();
140
+ }
141
+ );
142
+ req.on('error', err => {
143
+ console.warn(`[indexnow] request failed: ${err.message}; deploy continues.`);
144
+ resolve();
145
+ });
146
+ req.on('timeout', () => {
147
+ console.warn('[indexnow] request timed out after 10s; deploy continues.');
148
+ req.destroy();
149
+ resolve();
150
+ });
151
+ req.write(payload);
152
+ req.end();
153
+ });
154
+ },
155
+ };
156
+ }
157
+
158
+ module.exports = indexNowPlugin;
@@ -0,0 +1,104 @@
1
+ /**
2
+ * Brand DocItem/Content swizzle.
3
+ *
4
+ * Wraps Docusaurus's default DocItem/Content (which renders the
5
+ * markdown body) and prepends a `TechArticle` JSON-LD block built
6
+ * from the page's metadata + frontmatter. Every documentation page
7
+ * across the Conduction fleet ships this schema automatically, which
8
+ * is the single biggest remaining SEO-rich-result gap (the audit
9
+ * found no Article/TechArticle anywhere on the fleet docs sites).
10
+ *
11
+ * What we emit:
12
+ * - headline: page title
13
+ * - description: frontmatter description (if any)
14
+ * - datePublished: frontmatter date OR metadata.lastUpdatedAt
15
+ * - dateModified: metadata.lastUpdatedAt (git mtime by default)
16
+ * - author: frontmatter author (string or object) OR
17
+ * "Conduction" as fallback
18
+ * - publisher: reference to the shared Conduction Organization
19
+ * - mainEntityOfPage: canonical doc URL
20
+ *
21
+ * Why TechArticle (not plain Article): docs are technical content,
22
+ * and TechArticle is the schema.org subtype Google + Bing reward for
23
+ * developer documentation. Article would also work but TechArticle
24
+ * is more specific.
25
+ *
26
+ * Sites that don't want the schema on a particular page set
27
+ * `frontMatter.techArticle: false` in the doc's frontmatter.
28
+ */
29
+
30
+ import React from 'react';
31
+ import Head from '@docusaurus/Head';
32
+ import useDocusaurusContext from '@docusaurus/useDocusaurusContext';
33
+ import {useDoc} from '@docusaurus/plugin-content-docs/client';
34
+ import DocItemContent from '@theme-init/DocItem/Content';
35
+
36
+ function buildTechArticleJsonLd(siteUrl, metadata, frontMatter) {
37
+ const url = siteUrl
38
+ ? `${siteUrl.replace(/\/$/, '')}${metadata.permalink}`
39
+ : metadata.permalink;
40
+ const schema = {
41
+ '@context': 'https://schema.org',
42
+ '@type': 'TechArticle',
43
+ '@id': `${url}#article`,
44
+ mainEntityOfPage: url,
45
+ headline: frontMatter.title || metadata.title,
46
+ inLanguage: 'en',
47
+ publisher: {'@id': 'https://www.conduction.nl/#org'},
48
+ };
49
+ if (frontMatter.description || metadata.description) {
50
+ schema.description = frontMatter.description || metadata.description;
51
+ }
52
+ const datePublished = frontMatter.date || metadata.lastUpdatedAt;
53
+ if (datePublished) {
54
+ schema.datePublished = typeof datePublished === 'number'
55
+ ? new Date(datePublished * 1000).toISOString()
56
+ : new Date(datePublished).toISOString();
57
+ }
58
+ if (metadata.lastUpdatedAt) {
59
+ schema.dateModified = new Date(metadata.lastUpdatedAt * 1000).toISOString();
60
+ }
61
+ /* Author: accept frontmatter string ("Ruben"), object ({name, url}),
62
+ or list of authors. Default to Conduction as the team author. */
63
+ const fmAuthor = frontMatter.author || frontMatter.authors;
64
+ if (fmAuthor) {
65
+ if (typeof fmAuthor === 'string') {
66
+ schema.author = {'@type': 'Person', name: fmAuthor};
67
+ } else if (Array.isArray(fmAuthor)) {
68
+ schema.author = fmAuthor.map(a =>
69
+ typeof a === 'string'
70
+ ? {'@type': 'Person', name: a}
71
+ : {'@type': 'Person', name: a.name, url: a.url});
72
+ } else if (typeof fmAuthor === 'object') {
73
+ schema.author = {'@type': 'Person', name: fmAuthor.name, url: fmAuthor.url};
74
+ }
75
+ } else {
76
+ schema.author = {
77
+ '@type': 'Organization',
78
+ name: 'Conduction',
79
+ '@id': 'https://www.conduction.nl/#org',
80
+ };
81
+ }
82
+ return schema;
83
+ }
84
+
85
+ export default function DocItemContentWithSchema(props) {
86
+ const {siteConfig} = useDocusaurusContext();
87
+ const {metadata, frontMatter} = useDoc();
88
+ const emitSchema = frontMatter.techArticle !== false;
89
+ const schema = emitSchema
90
+ ? buildTechArticleJsonLd(siteConfig.url, metadata, frontMatter)
91
+ : null;
92
+ return (
93
+ <>
94
+ {schema && (
95
+ <Head>
96
+ <script type="application/ld+json">
97
+ {JSON.stringify(schema)}
98
+ </script>
99
+ </Head>
100
+ )}
101
+ <DocItemContent {...props} />
102
+ </>
103
+ );
104
+ }