@docsector/docsector-reader 4.5.3 → 4.5.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -27,7 +27,7 @@ Transform Markdown content into beautiful, navigable documentation sites — wit
27
27
  - 🤖 **Open in ChatGPT / Claude** — One-click links to open the current page directly in ChatGPT or Claude for Q&A
28
28
  - 🤖 **LLM Bot Detection** — Automatically serves raw Markdown to known AI crawlers (GPTBot, ClaudeBot, PerplexityBot, Cloudflare-AI-Search, GrokBot, and others)
29
29
  - 🗺️ **Sitemap Generation** — Automatic `sitemap.xml` generation at build time with root-relative URLs by default and absolute URLs when `siteUrl` is configured
30
- - 🤖 **AI-Friendly robots.txt** — Scaffold includes a `robots.txt` explicitly allowing 24 AI crawlers (GPTBot, ClaudeBot, PerplexityBot, Cloudflare-AI-Search, GrokBot, etc.) and advertises `Sitemap: /sitemap.xml`
30
+ - 🤖 **AI-Friendly robots.txt** — Scaffold includes a `robots.txt` explicitly allowing 24 AI crawlers (GPTBot, ClaudeBot, PerplexityBot, Cloudflare-AI-Search, GrokBot, etc.), and the build appends `Sitemap: /sitemap.xml` at the end for crawler discovery
31
31
  - 🧭 **Content Signals** — Optional `Content-Signal` directive for declaring AI usage policy (`ai-train`, `search`, `ai-input`) in `robots.txt`
32
32
  - 🧩 **Agent Skills Discovery Index** — Optional `/.well-known/agent-skills/index.json` with RFC v0.2.0 schema and SHA-256 digests
33
33
  - ✍️ **Docsector Authoring Skill** — Publishable `SKILL.md` that teaches agents Docsector blocks, page patterns, MCP lookup, and WebMCP tools
@@ -78,6 +78,7 @@ Transform Markdown content into beautiful, navigable documentation sites — wit
78
78
  - 🌍 **Remote README as Home** — Optional build-time remote README source for homepage with automatic local fallback and automatic primary-title handoff when the remote README already provides the project heading
79
79
  - 🔗 **GitHub-Compatible Heading Anchors** — Markdown headings use GitHub-style slugs so standard README Table of Contents links work inside Docsector
80
80
  - 🧬 **Scaffolded Homepage Override Wiring** — New consumer projects automatically wire `virtual:docsector-homepage-override` into i18n message building
81
+ - 🤖 **Scaffolded AI Assistant Config** — New consumer projects include a ready-to-enable `aiAssistant` example in `docsector.config.js` so the built-in assistant is visible in the default scaffold
81
82
  - 🧰 **Scaffolded Dev Reliability** — New consumer projects protect Docsector virtual registries and Markdown CommonJS plugins from Vite optimizer edge cases during dev and build
82
83
  - 📖 **Expandable Markdown Sections** — Use `<d-block-expandable title="...">...</d-block-expandable>` to collapse secondary content while keeping rich Markdown support inside the body
83
84
  - 1️⃣ **Stepper Guides** — Use `<d-block-stepper>` with nested `<d-block-step title="...">...</d-block-step>` items to render native Quasar vertical steppers with rich Markdown and optional per-step icon overrides
@@ -352,8 +353,8 @@ export default {
352
353
  Use Cloudflare AI Search as the first provider path:
353
354
 
354
355
  - Create an AI Search instance in Cloudflare.
355
- - Build and deploy the Docsector site first; build output always publishes `/sitemap.xml` and adds `Sitemap: /sitemap.xml` to `robots.txt` for crawler discovery.
356
- - Use a Website data source. For the cleanest retrieval, point its specific sitemap to `/ai-search-sitemap.xml`; otherwise the crawler can discover `/sitemap.xml` from `robots.txt`.
356
+ - Build and deploy the Docsector site first; build output always publishes `/sitemap.xml` and appends `Sitemap: /sitemap.xml` to the end of `robots.txt` for crawler discovery.
357
+ - Use a Website data source. For the cleanest retrieval, point its specific sitemap to `/ai-search-sitemap.xml`. Docsector keeps that Markdown-focused sitemap available for explicit AI Search configuration, but does not auto-announce it from `robots.txt` so Cloudflare does not index the same content twice alongside `/sitemap.xml`.
357
358
  - Add metadata fields such as title, path, locale, book, version, and subpage if you want filtering later.
358
359
  - Set `AI_SEARCH_INSTANCE_NAME` as a Cloudflare Pages environment variable or local `.dev.vars` entry.
359
360
  - Bind the instance to Pages as `AI_SEARCH` when available, or set encrypted Pages secrets for `CLOUDFLARE_ACCOUNT_ID` and `CLOUDFLARE_API_TOKEN` with AI Search run access.
@@ -368,7 +369,7 @@ When enabled, `docsector build` can generate:
368
369
  | `functions/assistant.js` | Cloudflare Pages Function for browser assistant requests |
369
370
  | `dist/spa/sitemap.xml` | Default crawler sitemap advertised from `robots.txt` |
370
371
  | `dist/spa/robots.txt` | Crawler policy with `Sitemap: /sitemap.xml` |
371
- | `dist/spa/ai-search-sitemap.xml` | Markdown-focused sitemap for AI Search crawling |
372
+ | `dist/spa/ai-search-sitemap.xml` | Markdown-focused sitemap for explicit AI Search Website data source configuration |
372
373
  | `dist/spa/.well-known/ai-search/manifest.json` | Source metadata for indexed documentation pages |
373
374
  | `dist/spa/_routes.json` | Routes the internal assistant endpoint to the Pages Function |
374
375
 
@@ -619,7 +620,7 @@ Notes:
619
620
  - `aiTrain`, `search`, and `aiInput` accept `yes` / `no` (or booleans).
620
621
  - Default scope is only `User-agent: *`.
621
622
  - Build patch is idempotent: repeated builds do not duplicate `Content-Signal` lines.
622
- - Build also keeps `Sitemap: /sitemap.xml` discoverable in `robots.txt` so crawlers can find the generated sitemap automatically.
623
+ - Build also keeps `Sitemap: /sitemap.xml` discoverable at the end of `robots.txt` so crawlers can find the generated sitemap automatically.
623
624
 
624
625
  ### Validate
625
626
 
package/bin/docsector.js CHANGED
@@ -24,7 +24,7 @@ const packageRoot = resolve(__dirname, '..')
24
24
  const args = process.argv.slice(2)
25
25
  const command = args[0]
26
26
 
27
- const VERSION = '4.5.3'
27
+ const VERSION = '4.5.5'
28
28
  const AUTHORING_SKILL_NAME = 'docsector-documentation-authoring'
29
29
  const AUTHORING_SKILL_DESCRIPTION = 'Author Docsector documentation with Markdown, custom blocks, MCP, and WebMCP.'
30
30
  const AUTHORING_SKILL_PUBLIC_PATH = `/.well-known/agent-skills/${AUTHORING_SKILL_NAME}/SKILL.md`
@@ -157,6 +157,72 @@ export default {
157
157
  // sitemap.xml is still generated with root-relative URLs when omitted.
158
158
  // siteUrl: 'https://docs.example.com',
159
159
 
160
+ // @ Home page source (optional)
161
+ // Use a remote README.md as homepage content at build-time.
162
+ // Falls back to local src/pages/Homepage.{lang}.md on fetch failure by default.
163
+ // homePage: {
164
+ // source: 'remote-readme', // 'local' | 'remote-readme'
165
+ // remoteReadmeUrl: 'https://raw.githubusercontent.com/your-org/your-repo/main/README.md',
166
+ // timeoutMs: 8000,
167
+ // fallbackToLocal: true
168
+ // },
169
+
170
+ // --- Language configs ---
171
+
172
+ // @ Languages
173
+ languages: [
174
+ {
175
+ image: '/images/flags/united-states-of-america.png',
176
+ label: 'English (US)',
177
+ value: 'en-US'
178
+ }
179
+ ],
180
+
181
+ // @ Default language
182
+ defaultLanguage: 'en-US'
183
+
184
+ // --- AI configs ---
185
+ // @ AI Assistant (optional)
186
+ // Customize the built-in AI assistant or disable it if not needed.
187
+ // aiAssistant: {
188
+ // enabled: true,
189
+ // provider: 'aiSearch',
190
+ // endpoint: '/assistant',
191
+ // ui: {
192
+ // title: 'Docsector AI Assistant',
193
+ // subtitle: 'Ask, search, or explore the documentation.',
194
+ // drawerWidth: 380,
195
+ // wideBreakpoint: 1280,
196
+ // showCitations: true,
197
+ // suggestedPrompts: [
198
+ // 'How do I get started?',
199
+ // 'Summarize this page.',
200
+ // 'Where is the related API reference?'
201
+ // ]
202
+ // },
203
+ // aiSearch: {
204
+ // binding: 'AI_SEARCH',
205
+ // instanceNameEnv: 'AI_SEARCH_INSTANCE_NAME',
206
+ // namespace: '',
207
+ // accountIdEnv: 'CLOUDFLARE_ACCOUNT_ID',
208
+ // apiTokenEnv: 'CLOUDFLARE_API_TOKEN',
209
+ // model: '@cf/meta/llama-4-scout-17b-16e-instruct',
210
+ // retrievalType: 'vector',
211
+ // maxResults: 10,
212
+ // matchThreshold: 0.4,
213
+ // contextExpansion: 1,
214
+ // queryRewrite: {
215
+ // enabled: false
216
+ // },
217
+ // reranking: {
218
+ // enabled: false,
219
+ // model: '@cf/baai/bge-reranker-base',
220
+ // matchThreshold: 0.4
221
+ // },
222
+ // stream: true
223
+ // }
224
+ // },
225
+
160
226
  // @ MCP (Model Context Protocol)
161
227
  // Uncomment to enable an MCP server at /mcp for AI assistant integration.
162
228
  // Requires Cloudflare Pages Functions (or compatible serverless platform).
@@ -197,16 +263,6 @@ export default {
197
263
  // }
198
264
  // },
199
265
 
200
- // @ Home page source (optional)
201
- // Use a remote README.md as homepage content at build-time.
202
- // Falls back to local src/pages/Homepage.{lang}.md on fetch failure by default.
203
- // homePage: {
204
- // source: 'remote-readme', // 'local' | 'remote-readme'
205
- // remoteReadmeUrl: 'https://raw.githubusercontent.com/your-org/your-repo/main/README.md',
206
- // timeoutMs: 8000,
207
- // fallbackToLocal: true
208
- // },
209
-
210
266
  // @ Homepage Link headers for agent discovery (optional)
211
267
  // linkHeaders: {
212
268
  // enabled: true,
@@ -270,19 +326,7 @@ export default {
270
326
  // url: '${AUTHORING_SKILL_PUBLIC_PATH}'
271
327
  // }
272
328
  // ]
273
- // },
274
-
275
- // @ Languages
276
- languages: [
277
- {
278
- image: '/images/flags/united-states-of-america.png',
279
- label: 'English (US)',
280
- value: 'en-US'
281
- }
282
- ],
283
-
284
- // @ Default language
285
- defaultLanguage: 'en-US'
329
+ // }
286
330
  }
287
331
  `
288
332
 
@@ -638,7 +682,6 @@ const TEMPLATE_ROBOTS_TXT = `\
638
682
  User-agent: *
639
683
  Allow: /
640
684
  Content-Signal: ai-train=yes, search=yes, ai-input=yes
641
- Sitemap: /sitemap.xml
642
685
 
643
686
  # Explicitly allow AI crawlers
644
687
  # OpenAI
@@ -793,7 +836,7 @@ npm run build
793
836
  \`\`\`
794
837
 
795
838
  The optimized SPA output will be in \`dist/spa/\`.
796
- Docsector also generates \`dist/spa/sitemap.xml\` and keeps \`robots.txt\` discoverable with \`Sitemap: /sitemap.xml\`. Set \`siteUrl\` in \`docsector.config.js\` when you want absolute sitemap URLs.
839
+ Docsector also generates \`dist/spa/sitemap.xml\` and appends \`Sitemap: /sitemap.xml\` to the end of \`dist/spa/robots.txt\` during build. Set \`siteUrl\` in \`docsector.config.js\` when you want absolute sitemap URLs.
797
840
  `
798
841
 
799
842
  // =============================================================================
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@docsector/docsector-reader",
3
- "version": "4.5.3",
3
+ "version": "4.5.5",
4
4
  "description": "A documentation rendering engine built with Vue 3, Quasar v2 and Vite. Transform Markdown into beautiful, navigable documentation sites.",
5
5
  "productName": "Docsector Reader",
6
6
  "author": "Rodrigo de Araujo Vieira",
package/public/robots.txt CHANGED
@@ -1,6 +1,5 @@
1
1
  User-agent: *
2
2
  Allow: /
3
- Sitemap: /sitemap.xml
4
3
 
5
4
  User-agent: Cloudflare-AI-Search
6
5
  Allow: /
@@ -251,7 +251,7 @@ function buildSystemPrompt (body, currentPageMarkdown = '') {
251
251
  const lines = [
252
252
  'You are Docsector Assistant, a concise documentation assistant.',
253
253
  'Answer using the indexed documentation context. If the answer is not in the docs, say so clearly.',
254
- 'Prefer short, actionable answers and cite the relevant source chunks when available.'
254
+ 'Prefer short, actionable answers.'
255
255
  ]
256
256
 
257
257
  if (locale) lines.push(`User locale: ${locale}.`)
package/src/index.js CHANGED
@@ -179,7 +179,7 @@ export function createDocsector (config = {}) {
179
179
  },
180
180
 
181
181
  contentSignals: {
182
- enabled: false,
182
+ enabled: true,
183
183
  aiTrain: 'yes',
184
184
  search: 'yes',
185
185
  aiInput: 'yes',
@@ -189,7 +189,7 @@ export function createDocsector (config = {}) {
189
189
  },
190
190
 
191
191
  agentSkills: {
192
- enabled: false,
192
+ enabled: true,
193
193
  path: '/.well-known/agent-skills/index.json',
194
194
  schema: 'https://schemas.agentskills.io/discovery/0.2.0/schema.json',
195
195
  skills: [],
@@ -47,7 +47,7 @@ For cleaner retrieval, point the specific sitemap setting to:
47
47
  https://docs.example.com/ai-search-sitemap.xml
48
48
  ```
49
49
 
50
- The AI Search sitemap points to Markdown URLs, which are cleaner for retrieval than rendered SPA HTML. The manifest at `/.well-known/ai-search/manifest.json` lists titles, routes, locales, books, versions, and subpages for the same source set.
50
+ The AI Search sitemap points to Markdown URLs, which are cleaner for retrieval than rendered SPA HTML. Docsector keeps it available for explicit Cloudflare configuration, but does not auto-advertise it from `robots.txt` to avoid duplicate indexing alongside `/sitemap.xml`. The manifest at `/.well-known/ai-search/manifest.json` lists titles, routes, locales, books, versions, and subpages for the same source set.
51
51
 
52
52
  ## Runtime Endpoint
53
53
 
@@ -47,7 +47,7 @@ Para uma recuperação mais limpa, aponte a configuração de sitemap específic
47
47
  https://docs.example.com/ai-search-sitemap.xml
48
48
  ```
49
49
 
50
- O sitemap do AI Search aponta para URLs Markdown, que são mais limpas para recuperação do que HTML renderizado pela SPA. O manifest em `/.well-known/ai-search/manifest.json` lista títulos, rotas, locales, books, versões e subpáginas do mesmo conjunto de fontes.
50
+ O sitemap do AI Search aponta para URLs Markdown, que são mais limpas para recuperação do que HTML renderizado pela SPA. O Docsector mantém esse arquivo disponível para configuração explícita no Cloudflare, mas não o anuncia automaticamente em `robots.txt`, para evitar indexação duplicada junto com `/sitemap.xml`. O manifest em `/.well-known/ai-search/manifest.json` lista títulos, rotas, locales, books, versões e subpáginas do mesmo conjunto de fontes.
51
51
 
52
52
  ## Endpoint Runtime
53
53
 
@@ -1958,6 +1958,16 @@ function collectStandardSitemapEntries ({ pagesDir, pageEntries = [], defaultLan
1958
1958
  return entries
1959
1959
  }
1960
1960
 
1961
+ export function getAdvertisedRobotsSitemapPaths ({ sitemapEnabled = true } = {}) {
1962
+ const paths = []
1963
+
1964
+ if (sitemapEnabled) {
1965
+ paths.push('/sitemap.xml')
1966
+ }
1967
+
1968
+ return paths
1969
+ }
1970
+
1961
1971
  /**
1962
1972
  * Create a Vite plugin that generates static `.md` files at build time.
1963
1973
  *
@@ -2461,9 +2471,7 @@ export async function onRequest (context) {
2461
2471
  }
2462
2472
  }
2463
2473
 
2464
- const robotsSitemapPaths = []
2465
- if (sitemapEnabled) robotsSitemapPaths.push('/sitemap.xml')
2466
- if (aiSearchSitemapGenerated) robotsSitemapPaths.push('/ai-search-sitemap.xml')
2474
+ const robotsSitemapPaths = getAdvertisedRobotsSitemapPaths({ sitemapEnabled })
2467
2475
 
2468
2476
  if (robotsSitemapPaths.length > 0) {
2469
2477
  const robotsPath = resolve(distDir, 'robots.txt')
package/src/sitemap.js CHANGED
@@ -76,28 +76,37 @@ export function appendSitemapsToRobots (robotsContent, { sitemaps = [], siteUrl
76
76
  ? robotsContent
77
77
  : 'User-agent: *\nAllow: /\n'
78
78
 
79
- const existingIdentities = new Set(
80
- input
81
- .replace(/\r\n/g, '\n')
82
- .split('\n')
83
- .map(line => line.match(/^\s*Sitemap\s*:\s*(.+?)\s*$/i)?.[1])
84
- .filter(Boolean)
85
- .map(normalizeSitemapIdentity)
86
- )
87
-
88
- const addedIdentities = new Set()
89
- const sitemapLines = (Array.isArray(sitemaps) ? sitemaps : [sitemaps])
79
+ const bodyLines = []
80
+ const existingSitemaps = []
81
+
82
+ for (const line of input.replace(/\r\n/g, '\n').split('\n')) {
83
+ const sitemap = line.match(/^\s*Sitemap\s*:\s*(.+?)\s*$/i)?.[1]
84
+ if (sitemap) {
85
+ existingSitemaps.push(sitemap)
86
+ continue
87
+ }
88
+
89
+ bodyLines.push(line)
90
+ }
91
+
92
+ const seenIdentities = new Set()
93
+ const normalizedSitemaps = [
94
+ ...(Array.isArray(sitemaps) ? sitemaps : [sitemaps]),
95
+ ...existingSitemaps
96
+ ]
90
97
  .filter(Boolean)
91
98
  .map(sitemap => resolveSitemapUrl(sitemap, siteUrl))
92
99
  .filter(sitemap => {
93
100
  const identity = normalizeSitemapIdentity(sitemap)
94
- if (existingIdentities.has(identity) || addedIdentities.has(identity)) return false
95
- addedIdentities.add(identity)
101
+ if (seenIdentities.has(identity)) return false
102
+ seenIdentities.add(identity)
96
103
  return true
97
104
  })
98
- .map(sitemap => `Sitemap: ${sitemap}`)
99
105
 
100
- if (sitemapLines.length === 0) return input
106
+ if (normalizedSitemaps.length === 0) return input
107
+
108
+ const body = bodyLines.join('\n').replace(/\s+$/g, '')
109
+ const sitemapLines = normalizedSitemaps.map(sitemap => `Sitemap: ${sitemap}`)
101
110
 
102
- return `${input.replace(/\s+$/g, '')}\n${sitemapLines.join('\n')}\n`
111
+ return `${body}\n\n${sitemapLines.join('\n')}\n`
103
112
  }