npm - @docsector/docsector-reader - Versions diffs - 4.5.3 → 4.5.4 - Mend

@docsector/docsector-reader 4.5.3 → 4.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/README.md +5 -5
package/bin/docsector.js +29 -26
package/package.json +1 -1
package/public/robots.txt +0 -1
package/src/ai-assistant/server.js +1 -1
package/src/index.js +2 -2
package/src/pages/manual/basic/ai-assistant.overview.en-US.md +1 -1
package/src/pages/manual/basic/ai-assistant.overview.pt-BR.md +1 -1
package/src/quasar.factory.js +11 -3
package/src/sitemap.js +25 -16

package/README.md CHANGED Viewed

@@ -27,7 +27,7 @@ Transform Markdown content into beautiful, navigable documentation sites — wit
 - 🤖 **Open in ChatGPT / Claude** — One-click links to open the current page directly in ChatGPT or Claude for Q&A
 - 🤖 **LLM Bot Detection** — Automatically serves raw Markdown to known AI crawlers (GPTBot, ClaudeBot, PerplexityBot, Cloudflare-AI-Search, GrokBot, and others)
 - 🗺️ **Sitemap Generation** — Automatic `sitemap.xml` generation at build time with root-relative URLs by default and absolute URLs when `siteUrl` is configured
-- 🤖 **AI-Friendly robots.txt** — Scaffold includes a `robots.txt` explicitly allowing 24 AI crawlers (GPTBot, ClaudeBot, PerplexityBot, Cloudflare-AI-Search, GrokBot, etc.) and advertises `Sitemap: /sitemap.xml`
+- 🤖 **AI-Friendly robots.txt** — Scaffold includes a `robots.txt` explicitly allowing 24 AI crawlers (GPTBot, ClaudeBot, PerplexityBot, Cloudflare-AI-Search, GrokBot, etc.), and the build appends `Sitemap: /sitemap.xml` at the end for crawler discovery
 - 🧭 **Content Signals** — Optional `Content-Signal` directive for declaring AI usage policy (`ai-train`, `search`, `ai-input`) in `robots.txt`
 - 🧩 **Agent Skills Discovery Index** — Optional `/.well-known/agent-skills/index.json` with RFC v0.2.0 schema and SHA-256 digests
 - ✍️ **Docsector Authoring Skill** — Publishable `SKILL.md` that teaches agents Docsector blocks, page patterns, MCP lookup, and WebMCP tools
@@ -352,8 +352,8 @@ export default {
 Use Cloudflare AI Search as the first provider path:
 - Create an AI Search instance in Cloudflare.
-- Build and deploy the Docsector site first; build output always publishes `/sitemap.xml` and adds `Sitemap: /sitemap.xml` to `robots.txt` for crawler discovery.
-- Use a Website data source. For the cleanest retrieval, point its specific sitemap to `/ai-search-sitemap.xml`; otherwise the crawler can discover `/sitemap.xml` from `robots.txt`.
+- Build and deploy the Docsector site first; build output always publishes `/sitemap.xml` and appends `Sitemap: /sitemap.xml` to the end of `robots.txt` for crawler discovery.
+- Use a Website data source. For the cleanest retrieval, point its specific sitemap to `/ai-search-sitemap.xml`. Docsector keeps that Markdown-focused sitemap available for explicit AI Search configuration, but does not auto-announce it from `robots.txt` so Cloudflare does not index the same content twice alongside `/sitemap.xml`.
 - Add metadata fields such as title, path, locale, book, version, and subpage if you want filtering later.
 - Set `AI_SEARCH_INSTANCE_NAME` as a Cloudflare Pages environment variable or local `.dev.vars` entry.
 - Bind the instance to Pages as `AI_SEARCH` when available, or set encrypted Pages secrets for `CLOUDFLARE_ACCOUNT_ID` and `CLOUDFLARE_API_TOKEN` with AI Search run access.
@@ -368,7 +368,7 @@ When enabled, `docsector build` can generate:
 | `functions/assistant.js` | Cloudflare Pages Function for browser assistant requests |
 | `dist/spa/sitemap.xml` | Default crawler sitemap advertised from `robots.txt` |
 | `dist/spa/robots.txt` | Crawler policy with `Sitemap: /sitemap.xml` |
-| `dist/spa/ai-search-sitemap.xml` | Markdown-focused sitemap for AI Search crawling |
+| `dist/spa/ai-search-sitemap.xml` | Markdown-focused sitemap for explicit AI Search Website data source configuration |
 | `dist/spa/.well-known/ai-search/manifest.json` | Source metadata for indexed documentation pages |
 | `dist/spa/_routes.json` | Routes the internal assistant endpoint to the Pages Function |
@@ -619,7 +619,7 @@ Notes:
 - `aiTrain`, `search`, and `aiInput` accept `yes` / `no` (or booleans).
 - Default scope is only `User-agent: *`.
 - Build patch is idempotent: repeated builds do not duplicate `Content-Signal` lines.
-- Build also keeps `Sitemap: /sitemap.xml` discoverable in `robots.txt` so crawlers can find the generated sitemap automatically.
+- Build also keeps `Sitemap: /sitemap.xml` discoverable at the end of `robots.txt` so crawlers can find the generated sitemap automatically.
 ### Validate

package/bin/docsector.js CHANGED Viewed

@@ -24,7 +24,7 @@ const packageRoot = resolve(__dirname, '..')
 const args = process.argv.slice(2)
 const command = args[0]
-const VERSION = '4.5.3'
+const VERSION = '4.5.4'
 const AUTHORING_SKILL_NAME = 'docsector-documentation-authoring'
 const AUTHORING_SKILL_DESCRIPTION = 'Author Docsector documentation with Markdown, custom blocks, MCP, and WebMCP.'
 const AUTHORING_SKILL_PUBLIC_PATH = `/.well-known/agent-skills/${AUTHORING_SKILL_NAME}/SKILL.md`
@@ -157,6 +157,32 @@ export default {
   // sitemap.xml is still generated with root-relative URLs when omitted.
   // siteUrl: 'https://docs.example.com',
+  // @ Home page source (optional)
+  // Use a remote README.md as homepage content at build-time.
+  // Falls back to local src/pages/Homepage.{lang}.md on fetch failure by default.
+  // homePage: {
+  //   source: 'remote-readme', // 'local' | 'remote-readme'
+  //   remoteReadmeUrl: 'https://raw.githubusercontent.com/your-org/your-repo/main/README.md',
+  //   timeoutMs: 8000,
+  //   fallbackToLocal: true
+  // },
+  // --- Language configs ---
+  // @ Languages
+  languages: [
+    {
+      image: '/images/flags/united-states-of-america.png',
+      label: 'English (US)',
+      value: 'en-US'
+    }
+  ],
+  // @ Default language
+  defaultLanguage: 'en-US'
+  // --- AI configs ---
   // @ MCP (Model Context Protocol)
   // Uncomment to enable an MCP server at /mcp for AI assistant integration.
   // Requires Cloudflare Pages Functions (or compatible serverless platform).
@@ -197,16 +223,6 @@ export default {
   //   }
   // },
-  // @ Home page source (optional)
-  // Use a remote README.md as homepage content at build-time.
-  // Falls back to local src/pages/Homepage.{lang}.md on fetch failure by default.
-  // homePage: {
-  //   source: 'remote-readme', // 'local' | 'remote-readme'
-  //   remoteReadmeUrl: 'https://raw.githubusercontent.com/your-org/your-repo/main/README.md',
-  //   timeoutMs: 8000,
-  //   fallbackToLocal: true
-  // },
   // @ Homepage Link headers for agent discovery (optional)
   // linkHeaders: {
   //   enabled: true,
@@ -270,19 +286,7 @@ export default {
   //       url: '${AUTHORING_SKILL_PUBLIC_PATH}'
   //     }
   //   ]
-  // },
-  // @ Languages
-  languages: [
-    {
-      image: '/images/flags/united-states-of-america.png',
-      label: 'English (US)',
-      value: 'en-US'
-    }
-  ],
-  // @ Default language
-  defaultLanguage: 'en-US'
+  // }
 }
 `
@@ -638,7 +642,6 @@ const TEMPLATE_ROBOTS_TXT = `\
 User-agent: *
 Allow: /
 Content-Signal: ai-train=yes, search=yes, ai-input=yes
-Sitemap: /sitemap.xml
 # Explicitly allow AI crawlers
 # OpenAI
@@ -793,7 +796,7 @@ npm run build
 \`\`\`
 The optimized SPA output will be in \`dist/spa/\`.
-Docsector also generates \`dist/spa/sitemap.xml\` and keeps \`robots.txt\` discoverable with \`Sitemap: /sitemap.xml\`. Set \`siteUrl\` in \`docsector.config.js\` when you want absolute sitemap URLs.
+Docsector also generates \`dist/spa/sitemap.xml\` and appends \`Sitemap: /sitemap.xml\` to the end of \`dist/spa/robots.txt\` during build. Set \`siteUrl\` in \`docsector.config.js\` when you want absolute sitemap URLs.
 `
 // =============================================================================

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@docsector/docsector-reader",
-  "version": "4.5.3",
+  "version": "4.5.4",
   "description": "A documentation rendering engine built with Vue 3, Quasar v2 and Vite. Transform Markdown into beautiful, navigable documentation sites.",
   "productName": "Docsector Reader",
   "author": "Rodrigo de Araujo Vieira",

package/public/robots.txt CHANGED Viewed

@@ -1,6 +1,5 @@
 User-agent: *
 Allow: /
-Sitemap: /sitemap.xml
 User-agent: Cloudflare-AI-Search
 Allow: /

package/src/ai-assistant/server.js CHANGED Viewed

@@ -251,7 +251,7 @@ function buildSystemPrompt (body, currentPageMarkdown = '') {
   const lines = [
     'You are Docsector Assistant, a concise documentation assistant.',
     'Answer using the indexed documentation context. If the answer is not in the docs, say so clearly.',
-    'Prefer short, actionable answers and cite the relevant source chunks when available.'
+    'Prefer short, actionable answers.'
   ]
   if (locale) lines.push(`User locale: ${locale}.`)

package/src/index.js CHANGED Viewed

@@ -179,7 +179,7 @@ export function createDocsector (config = {}) {
     },
     contentSignals: {
-      enabled: false,
+      enabled: true,
       aiTrain: 'yes',
       search: 'yes',
       aiInput: 'yes',
@@ -189,7 +189,7 @@ export function createDocsector (config = {}) {
     },
     agentSkills: {
-      enabled: false,
+      enabled: true,
       path: '/.well-known/agent-skills/index.json',
       schema: 'https://schemas.agentskills.io/discovery/0.2.0/schema.json',
       skills: [],

package/src/pages/manual/basic/ai-assistant.overview.en-US.md CHANGED Viewed

@@ -47,7 +47,7 @@ For cleaner retrieval, point the specific sitemap setting to:
 https://docs.example.com/ai-search-sitemap.xml
 ```
-The AI Search sitemap points to Markdown URLs, which are cleaner for retrieval than rendered SPA HTML. The manifest at `/.well-known/ai-search/manifest.json` lists titles, routes, locales, books, versions, and subpages for the same source set.
+The AI Search sitemap points to Markdown URLs, which are cleaner for retrieval than rendered SPA HTML. Docsector keeps it available for explicit Cloudflare configuration, but does not auto-advertise it from `robots.txt` to avoid duplicate indexing alongside `/sitemap.xml`. The manifest at `/.well-known/ai-search/manifest.json` lists titles, routes, locales, books, versions, and subpages for the same source set.
 ## Runtime Endpoint

package/src/pages/manual/basic/ai-assistant.overview.pt-BR.md CHANGED Viewed

@@ -47,7 +47,7 @@ Para uma recuperação mais limpa, aponte a configuração de sitemap específic
 https://docs.example.com/ai-search-sitemap.xml
 ```
-O sitemap do AI Search aponta para URLs Markdown, que são mais limpas para recuperação do que HTML renderizado pela SPA. O manifest em `/.well-known/ai-search/manifest.json` lista títulos, rotas, locales, books, versões e subpáginas do mesmo conjunto de fontes.
+O sitemap do AI Search aponta para URLs Markdown, que são mais limpas para recuperação do que HTML renderizado pela SPA. O Docsector mantém esse arquivo disponível para configuração explícita no Cloudflare, mas não o anuncia automaticamente em `robots.txt`, para evitar indexação duplicada junto com `/sitemap.xml`. O manifest em `/.well-known/ai-search/manifest.json` lista títulos, rotas, locales, books, versões e subpáginas do mesmo conjunto de fontes.
 ## Endpoint Runtime

package/src/quasar.factory.js CHANGED Viewed

@@ -1958,6 +1958,16 @@ function collectStandardSitemapEntries ({ pagesDir, pageEntries = [], defaultLan
   return entries
 }
+export function getAdvertisedRobotsSitemapPaths ({ sitemapEnabled = true } = {}) {
+  const paths = []
+  if (sitemapEnabled) {
+    paths.push('/sitemap.xml')
+  }
+  return paths
+}
 /**
  * Create a Vite plugin that generates static `.md` files at build time.
  *
@@ -2461,9 +2471,7 @@ export async function onRequest (context) {
         }
       }
-      const robotsSitemapPaths = []
-      if (sitemapEnabled) robotsSitemapPaths.push('/sitemap.xml')
-      if (aiSearchSitemapGenerated) robotsSitemapPaths.push('/ai-search-sitemap.xml')
+      const robotsSitemapPaths = getAdvertisedRobotsSitemapPaths({ sitemapEnabled })
       if (robotsSitemapPaths.length > 0) {
         const robotsPath = resolve(distDir, 'robots.txt')

package/src/sitemap.js CHANGED Viewed

@@ -76,28 +76,37 @@ export function appendSitemapsToRobots (robotsContent, { sitemaps = [], siteUrl
     ? robotsContent
     : 'User-agent: *\nAllow: /\n'
-  const existingIdentities = new Set(
-    input
-      .replace(/\r\n/g, '\n')
-      .split('\n')
-      .map(line => line.match(/^\s*Sitemap\s*:\s*(.+?)\s*$/i)?.[1])
-      .filter(Boolean)
-      .map(normalizeSitemapIdentity)
-  )
-  const addedIdentities = new Set()
-  const sitemapLines = (Array.isArray(sitemaps) ? sitemaps : [sitemaps])
+  const bodyLines = []
+  const existingSitemaps = []
+  for (const line of input.replace(/\r\n/g, '\n').split('\n')) {
+    const sitemap = line.match(/^\s*Sitemap\s*:\s*(.+?)\s*$/i)?.[1]
+    if (sitemap) {
+      existingSitemaps.push(sitemap)
+      continue
+    }
+    bodyLines.push(line)
+  }
+  const seenIdentities = new Set()
+  const normalizedSitemaps = [
+    ...(Array.isArray(sitemaps) ? sitemaps : [sitemaps]),
+    ...existingSitemaps
+  ]
     .filter(Boolean)
     .map(sitemap => resolveSitemapUrl(sitemap, siteUrl))
     .filter(sitemap => {
       const identity = normalizeSitemapIdentity(sitemap)
-      if (existingIdentities.has(identity) || addedIdentities.has(identity)) return false
-      addedIdentities.add(identity)
+      if (seenIdentities.has(identity)) return false
+      seenIdentities.add(identity)
       return true
     })
-    .map(sitemap => `Sitemap: ${sitemap}`)
-  if (sitemapLines.length === 0) return input
+  if (normalizedSitemaps.length === 0) return input
+  const body = bodyLines.join('\n').replace(/\s+$/g, '')
+  const sitemapLines = normalizedSitemaps.map(sitemap => `Sitemap: ${sitemap}`)
-  return `${input.replace(/\s+$/g, '')}\n${sitemapLines.join('\n')}\n`
+  return `${body}\n\n${sitemapLines.join('\n')}\n`
 }