@docsector/docsector-reader 4.5.3 → 4.5.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -5
- package/bin/docsector.js +29 -26
- package/package.json +1 -1
- package/public/robots.txt +0 -1
- package/src/ai-assistant/server.js +1 -1
- package/src/index.js +2 -2
- package/src/pages/manual/basic/ai-assistant.overview.en-US.md +1 -1
- package/src/pages/manual/basic/ai-assistant.overview.pt-BR.md +1 -1
- package/src/quasar.factory.js +11 -3
- package/src/sitemap.js +25 -16
package/README.md
CHANGED
|
@@ -27,7 +27,7 @@ Transform Markdown content into beautiful, navigable documentation sites — wit
|
|
|
27
27
|
- 🤖 **Open in ChatGPT / Claude** — One-click links to open the current page directly in ChatGPT or Claude for Q&A
|
|
28
28
|
- 🤖 **LLM Bot Detection** — Automatically serves raw Markdown to known AI crawlers (GPTBot, ClaudeBot, PerplexityBot, Cloudflare-AI-Search, GrokBot, and others)
|
|
29
29
|
- 🗺️ **Sitemap Generation** — Automatic `sitemap.xml` generation at build time with root-relative URLs by default and absolute URLs when `siteUrl` is configured
|
|
30
|
-
- 🤖 **AI-Friendly robots.txt** — Scaffold includes a `robots.txt` explicitly allowing 24 AI crawlers (GPTBot, ClaudeBot, PerplexityBot, Cloudflare-AI-Search, GrokBot, etc.) and
|
|
30
|
+
- 🤖 **AI-Friendly robots.txt** — Scaffold includes a `robots.txt` explicitly allowing 24 AI crawlers (GPTBot, ClaudeBot, PerplexityBot, Cloudflare-AI-Search, GrokBot, etc.), and the build appends `Sitemap: /sitemap.xml` at the end for crawler discovery
|
|
31
31
|
- 🧭 **Content Signals** — Optional `Content-Signal` directive for declaring AI usage policy (`ai-train`, `search`, `ai-input`) in `robots.txt`
|
|
32
32
|
- 🧩 **Agent Skills Discovery Index** — Optional `/.well-known/agent-skills/index.json` with RFC v0.2.0 schema and SHA-256 digests
|
|
33
33
|
- ✍️ **Docsector Authoring Skill** — Publishable `SKILL.md` that teaches agents Docsector blocks, page patterns, MCP lookup, and WebMCP tools
|
|
@@ -352,8 +352,8 @@ export default {
|
|
|
352
352
|
Use Cloudflare AI Search as the first provider path:
|
|
353
353
|
|
|
354
354
|
- Create an AI Search instance in Cloudflare.
|
|
355
|
-
- Build and deploy the Docsector site first; build output always publishes `/sitemap.xml` and
|
|
356
|
-
- Use a Website data source. For the cleanest retrieval, point its specific sitemap to `/ai-search-sitemap.xml
|
|
355
|
+
- Build and deploy the Docsector site first; build output always publishes `/sitemap.xml` and appends `Sitemap: /sitemap.xml` to the end of `robots.txt` for crawler discovery.
|
|
356
|
+
- Use a Website data source. For the cleanest retrieval, point its specific sitemap to `/ai-search-sitemap.xml`. Docsector keeps that Markdown-focused sitemap available for explicit AI Search configuration, but does not auto-announce it from `robots.txt` so Cloudflare does not index the same content twice alongside `/sitemap.xml`.
|
|
357
357
|
- Add metadata fields such as title, path, locale, book, version, and subpage if you want filtering later.
|
|
358
358
|
- Set `AI_SEARCH_INSTANCE_NAME` as a Cloudflare Pages environment variable or local `.dev.vars` entry.
|
|
359
359
|
- Bind the instance to Pages as `AI_SEARCH` when available, or set encrypted Pages secrets for `CLOUDFLARE_ACCOUNT_ID` and `CLOUDFLARE_API_TOKEN` with AI Search run access.
|
|
@@ -368,7 +368,7 @@ When enabled, `docsector build` can generate:
|
|
|
368
368
|
| `functions/assistant.js` | Cloudflare Pages Function for browser assistant requests |
|
|
369
369
|
| `dist/spa/sitemap.xml` | Default crawler sitemap advertised from `robots.txt` |
|
|
370
370
|
| `dist/spa/robots.txt` | Crawler policy with `Sitemap: /sitemap.xml` |
|
|
371
|
-
| `dist/spa/ai-search-sitemap.xml` | Markdown-focused sitemap for AI Search
|
|
371
|
+
| `dist/spa/ai-search-sitemap.xml` | Markdown-focused sitemap for explicit AI Search Website data source configuration |
|
|
372
372
|
| `dist/spa/.well-known/ai-search/manifest.json` | Source metadata for indexed documentation pages |
|
|
373
373
|
| `dist/spa/_routes.json` | Routes the internal assistant endpoint to the Pages Function |
|
|
374
374
|
|
|
@@ -619,7 +619,7 @@ Notes:
|
|
|
619
619
|
- `aiTrain`, `search`, and `aiInput` accept `yes` / `no` (or booleans).
|
|
620
620
|
- Default scope is only `User-agent: *`.
|
|
621
621
|
- Build patch is idempotent: repeated builds do not duplicate `Content-Signal` lines.
|
|
622
|
-
- Build also keeps `Sitemap: /sitemap.xml` discoverable
|
|
622
|
+
- Build also keeps `Sitemap: /sitemap.xml` discoverable at the end of `robots.txt` so crawlers can find the generated sitemap automatically.
|
|
623
623
|
|
|
624
624
|
### Validate
|
|
625
625
|
|
package/bin/docsector.js
CHANGED
|
@@ -24,7 +24,7 @@ const packageRoot = resolve(__dirname, '..')
|
|
|
24
24
|
const args = process.argv.slice(2)
|
|
25
25
|
const command = args[0]
|
|
26
26
|
|
|
27
|
-
const VERSION = '4.5.
|
|
27
|
+
const VERSION = '4.5.4'
|
|
28
28
|
const AUTHORING_SKILL_NAME = 'docsector-documentation-authoring'
|
|
29
29
|
const AUTHORING_SKILL_DESCRIPTION = 'Author Docsector documentation with Markdown, custom blocks, MCP, and WebMCP.'
|
|
30
30
|
const AUTHORING_SKILL_PUBLIC_PATH = `/.well-known/agent-skills/${AUTHORING_SKILL_NAME}/SKILL.md`
|
|
@@ -157,6 +157,32 @@ export default {
|
|
|
157
157
|
// sitemap.xml is still generated with root-relative URLs when omitted.
|
|
158
158
|
// siteUrl: 'https://docs.example.com',
|
|
159
159
|
|
|
160
|
+
// @ Home page source (optional)
|
|
161
|
+
// Use a remote README.md as homepage content at build-time.
|
|
162
|
+
// Falls back to local src/pages/Homepage.{lang}.md on fetch failure by default.
|
|
163
|
+
// homePage: {
|
|
164
|
+
// source: 'remote-readme', // 'local' | 'remote-readme'
|
|
165
|
+
// remoteReadmeUrl: 'https://raw.githubusercontent.com/your-org/your-repo/main/README.md',
|
|
166
|
+
// timeoutMs: 8000,
|
|
167
|
+
// fallbackToLocal: true
|
|
168
|
+
// },
|
|
169
|
+
|
|
170
|
+
// --- Language configs ---
|
|
171
|
+
|
|
172
|
+
// @ Languages
|
|
173
|
+
languages: [
|
|
174
|
+
{
|
|
175
|
+
image: '/images/flags/united-states-of-america.png',
|
|
176
|
+
label: 'English (US)',
|
|
177
|
+
value: 'en-US'
|
|
178
|
+
}
|
|
179
|
+
],
|
|
180
|
+
|
|
181
|
+
// @ Default language
|
|
182
|
+
defaultLanguage: 'en-US'
|
|
183
|
+
|
|
184
|
+
// --- AI configs ---
|
|
185
|
+
|
|
160
186
|
// @ MCP (Model Context Protocol)
|
|
161
187
|
// Uncomment to enable an MCP server at /mcp for AI assistant integration.
|
|
162
188
|
// Requires Cloudflare Pages Functions (or compatible serverless platform).
|
|
@@ -197,16 +223,6 @@ export default {
|
|
|
197
223
|
// }
|
|
198
224
|
// },
|
|
199
225
|
|
|
200
|
-
// @ Home page source (optional)
|
|
201
|
-
// Use a remote README.md as homepage content at build-time.
|
|
202
|
-
// Falls back to local src/pages/Homepage.{lang}.md on fetch failure by default.
|
|
203
|
-
// homePage: {
|
|
204
|
-
// source: 'remote-readme', // 'local' | 'remote-readme'
|
|
205
|
-
// remoteReadmeUrl: 'https://raw.githubusercontent.com/your-org/your-repo/main/README.md',
|
|
206
|
-
// timeoutMs: 8000,
|
|
207
|
-
// fallbackToLocal: true
|
|
208
|
-
// },
|
|
209
|
-
|
|
210
226
|
// @ Homepage Link headers for agent discovery (optional)
|
|
211
227
|
// linkHeaders: {
|
|
212
228
|
// enabled: true,
|
|
@@ -270,19 +286,7 @@ export default {
|
|
|
270
286
|
// url: '${AUTHORING_SKILL_PUBLIC_PATH}'
|
|
271
287
|
// }
|
|
272
288
|
// ]
|
|
273
|
-
// }
|
|
274
|
-
|
|
275
|
-
// @ Languages
|
|
276
|
-
languages: [
|
|
277
|
-
{
|
|
278
|
-
image: '/images/flags/united-states-of-america.png',
|
|
279
|
-
label: 'English (US)',
|
|
280
|
-
value: 'en-US'
|
|
281
|
-
}
|
|
282
|
-
],
|
|
283
|
-
|
|
284
|
-
// @ Default language
|
|
285
|
-
defaultLanguage: 'en-US'
|
|
289
|
+
// }
|
|
286
290
|
}
|
|
287
291
|
`
|
|
288
292
|
|
|
@@ -638,7 +642,6 @@ const TEMPLATE_ROBOTS_TXT = `\
|
|
|
638
642
|
User-agent: *
|
|
639
643
|
Allow: /
|
|
640
644
|
Content-Signal: ai-train=yes, search=yes, ai-input=yes
|
|
641
|
-
Sitemap: /sitemap.xml
|
|
642
645
|
|
|
643
646
|
# Explicitly allow AI crawlers
|
|
644
647
|
# OpenAI
|
|
@@ -793,7 +796,7 @@ npm run build
|
|
|
793
796
|
\`\`\`
|
|
794
797
|
|
|
795
798
|
The optimized SPA output will be in \`dist/spa/\`.
|
|
796
|
-
Docsector also generates \`dist/spa/sitemap.xml\` and
|
|
799
|
+
Docsector also generates \`dist/spa/sitemap.xml\` and appends \`Sitemap: /sitemap.xml\` to the end of \`dist/spa/robots.txt\` during build. Set \`siteUrl\` in \`docsector.config.js\` when you want absolute sitemap URLs.
|
|
797
800
|
`
|
|
798
801
|
|
|
799
802
|
// =============================================================================
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@docsector/docsector-reader",
|
|
3
|
-
"version": "4.5.
|
|
3
|
+
"version": "4.5.4",
|
|
4
4
|
"description": "A documentation rendering engine built with Vue 3, Quasar v2 and Vite. Transform Markdown into beautiful, navigable documentation sites.",
|
|
5
5
|
"productName": "Docsector Reader",
|
|
6
6
|
"author": "Rodrigo de Araujo Vieira",
|
package/public/robots.txt
CHANGED
|
@@ -251,7 +251,7 @@ function buildSystemPrompt (body, currentPageMarkdown = '') {
|
|
|
251
251
|
const lines = [
|
|
252
252
|
'You are Docsector Assistant, a concise documentation assistant.',
|
|
253
253
|
'Answer using the indexed documentation context. If the answer is not in the docs, say so clearly.',
|
|
254
|
-
'Prefer short, actionable answers
|
|
254
|
+
'Prefer short, actionable answers.'
|
|
255
255
|
]
|
|
256
256
|
|
|
257
257
|
if (locale) lines.push(`User locale: ${locale}.`)
|
package/src/index.js
CHANGED
|
@@ -179,7 +179,7 @@ export function createDocsector (config = {}) {
|
|
|
179
179
|
},
|
|
180
180
|
|
|
181
181
|
contentSignals: {
|
|
182
|
-
enabled:
|
|
182
|
+
enabled: true,
|
|
183
183
|
aiTrain: 'yes',
|
|
184
184
|
search: 'yes',
|
|
185
185
|
aiInput: 'yes',
|
|
@@ -189,7 +189,7 @@ export function createDocsector (config = {}) {
|
|
|
189
189
|
},
|
|
190
190
|
|
|
191
191
|
agentSkills: {
|
|
192
|
-
enabled:
|
|
192
|
+
enabled: true,
|
|
193
193
|
path: '/.well-known/agent-skills/index.json',
|
|
194
194
|
schema: 'https://schemas.agentskills.io/discovery/0.2.0/schema.json',
|
|
195
195
|
skills: [],
|
|
@@ -47,7 +47,7 @@ For cleaner retrieval, point the specific sitemap setting to:
|
|
|
47
47
|
https://docs.example.com/ai-search-sitemap.xml
|
|
48
48
|
```
|
|
49
49
|
|
|
50
|
-
The AI Search sitemap points to Markdown URLs, which are cleaner for retrieval than rendered SPA HTML. The manifest at `/.well-known/ai-search/manifest.json` lists titles, routes, locales, books, versions, and subpages for the same source set.
|
|
50
|
+
The AI Search sitemap points to Markdown URLs, which are cleaner for retrieval than rendered SPA HTML. Docsector keeps it available for explicit Cloudflare configuration, but does not auto-advertise it from `robots.txt` to avoid duplicate indexing alongside `/sitemap.xml`. The manifest at `/.well-known/ai-search/manifest.json` lists titles, routes, locales, books, versions, and subpages for the same source set.
|
|
51
51
|
|
|
52
52
|
## Runtime Endpoint
|
|
53
53
|
|
|
@@ -47,7 +47,7 @@ Para uma recuperação mais limpa, aponte a configuração de sitemap específic
|
|
|
47
47
|
https://docs.example.com/ai-search-sitemap.xml
|
|
48
48
|
```
|
|
49
49
|
|
|
50
|
-
O sitemap do AI Search aponta para URLs Markdown, que são mais limpas para recuperação do que HTML renderizado pela SPA. O manifest em `/.well-known/ai-search/manifest.json` lista títulos, rotas, locales, books, versões e subpáginas do mesmo conjunto de fontes.
|
|
50
|
+
O sitemap do AI Search aponta para URLs Markdown, que são mais limpas para recuperação do que HTML renderizado pela SPA. O Docsector mantém esse arquivo disponível para configuração explícita no Cloudflare, mas não o anuncia automaticamente em `robots.txt`, para evitar indexação duplicada junto com `/sitemap.xml`. O manifest em `/.well-known/ai-search/manifest.json` lista títulos, rotas, locales, books, versões e subpáginas do mesmo conjunto de fontes.
|
|
51
51
|
|
|
52
52
|
## Endpoint Runtime
|
|
53
53
|
|
package/src/quasar.factory.js
CHANGED
|
@@ -1958,6 +1958,16 @@ function collectStandardSitemapEntries ({ pagesDir, pageEntries = [], defaultLan
|
|
|
1958
1958
|
return entries
|
|
1959
1959
|
}
|
|
1960
1960
|
|
|
1961
|
+
export function getAdvertisedRobotsSitemapPaths ({ sitemapEnabled = true } = {}) {
|
|
1962
|
+
const paths = []
|
|
1963
|
+
|
|
1964
|
+
if (sitemapEnabled) {
|
|
1965
|
+
paths.push('/sitemap.xml')
|
|
1966
|
+
}
|
|
1967
|
+
|
|
1968
|
+
return paths
|
|
1969
|
+
}
|
|
1970
|
+
|
|
1961
1971
|
/**
|
|
1962
1972
|
* Create a Vite plugin that generates static `.md` files at build time.
|
|
1963
1973
|
*
|
|
@@ -2461,9 +2471,7 @@ export async function onRequest (context) {
|
|
|
2461
2471
|
}
|
|
2462
2472
|
}
|
|
2463
2473
|
|
|
2464
|
-
const robotsSitemapPaths =
|
|
2465
|
-
if (sitemapEnabled) robotsSitemapPaths.push('/sitemap.xml')
|
|
2466
|
-
if (aiSearchSitemapGenerated) robotsSitemapPaths.push('/ai-search-sitemap.xml')
|
|
2474
|
+
const robotsSitemapPaths = getAdvertisedRobotsSitemapPaths({ sitemapEnabled })
|
|
2467
2475
|
|
|
2468
2476
|
if (robotsSitemapPaths.length > 0) {
|
|
2469
2477
|
const robotsPath = resolve(distDir, 'robots.txt')
|
package/src/sitemap.js
CHANGED
|
@@ -76,28 +76,37 @@ export function appendSitemapsToRobots (robotsContent, { sitemaps = [], siteUrl
|
|
|
76
76
|
? robotsContent
|
|
77
77
|
: 'User-agent: *\nAllow: /\n'
|
|
78
78
|
|
|
79
|
-
const
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
.
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
79
|
+
const bodyLines = []
|
|
80
|
+
const existingSitemaps = []
|
|
81
|
+
|
|
82
|
+
for (const line of input.replace(/\r\n/g, '\n').split('\n')) {
|
|
83
|
+
const sitemap = line.match(/^\s*Sitemap\s*:\s*(.+?)\s*$/i)?.[1]
|
|
84
|
+
if (sitemap) {
|
|
85
|
+
existingSitemaps.push(sitemap)
|
|
86
|
+
continue
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
bodyLines.push(line)
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
const seenIdentities = new Set()
|
|
93
|
+
const normalizedSitemaps = [
|
|
94
|
+
...(Array.isArray(sitemaps) ? sitemaps : [sitemaps]),
|
|
95
|
+
...existingSitemaps
|
|
96
|
+
]
|
|
90
97
|
.filter(Boolean)
|
|
91
98
|
.map(sitemap => resolveSitemapUrl(sitemap, siteUrl))
|
|
92
99
|
.filter(sitemap => {
|
|
93
100
|
const identity = normalizeSitemapIdentity(sitemap)
|
|
94
|
-
if (
|
|
95
|
-
|
|
101
|
+
if (seenIdentities.has(identity)) return false
|
|
102
|
+
seenIdentities.add(identity)
|
|
96
103
|
return true
|
|
97
104
|
})
|
|
98
|
-
.map(sitemap => `Sitemap: ${sitemap}`)
|
|
99
105
|
|
|
100
|
-
if (
|
|
106
|
+
if (normalizedSitemaps.length === 0) return input
|
|
107
|
+
|
|
108
|
+
const body = bodyLines.join('\n').replace(/\s+$/g, '')
|
|
109
|
+
const sitemapLines = normalizedSitemaps.map(sitemap => `Sitemap: ${sitemap}`)
|
|
101
110
|
|
|
102
|
-
return `${
|
|
111
|
+
return `${body}\n\n${sitemapLines.join('\n')}\n`
|
|
103
112
|
}
|