doc-fetch-cli 2.0.4 → 2.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -0
- package/bin/doc-fetch_darwin_amd64 +0 -0
- package/bin/doc-fetch_windows_amd64.exe +0 -0
- package/doc-fetch +0 -0
- package/doc-fetch_darwin_amd64 +0 -0
- package/doc-fetch_darwin_arm64 +0 -0
- package/doc-fetch_linux_amd64 +0 -0
- package/doc-fetch_windows_amd64.exe +0 -0
- package/package.json +1 -1
- package/website/BLOG-SETUP-SUMMARY.md +385 -0
- package/website/DEPLOYMENT.md +189 -0
- package/website/LAUNCH-CHECKLIST.md +134 -0
- package/website/README.md +75 -0
- package/website/SEO-STRATEGY.md +347 -0
- package/website/URL-STRUCTURE.md +334 -0
- package/website/WEBSITE-SUMMARY.md +246 -0
- package/website/package-lock.json +1628 -0
- package/website/package.json +39 -0
- package/website/pnpm-lock.yaml +1061 -0
- package/website/src/app.d.ts +13 -0
- package/website/src/app.html +11 -0
- package/website/src/lib/actions/addCopyButtons.ts +73 -0
- package/website/src/lib/assets/favicon.svg +1 -0
- package/website/src/lib/components/CopyCodeButton.svelte +97 -0
- package/website/src/lib/components/DarkModeToggle.svelte +140 -0
- package/website/src/lib/components/ReadingProgress.svelte +36 -0
- package/website/src/lib/components/RelatedPosts.svelte +151 -0
- package/website/src/lib/components/TableOfContents.svelte +184 -0
- package/website/src/lib/index.ts +1 -0
- package/website/src/lib/posts/convert-docs-to-markdown.md +506 -0
- package/website/src/routes/+layout.svelte +59 -0
- package/website/src/routes/+page.svelte +1033 -0
- package/website/src/routes/about/+page.svelte +607 -0
- package/website/src/routes/blog/+page.svelte +486 -0
- package/website/src/routes/blog/[slug]/+page.svelte +988 -0
- package/website/src/routes/blog/[slug]/+page.ts +53 -0
- package/website/src/routes/sitemap.xml/+server.ts +62 -0
- package/website/static/favicon.svg +10 -0
- package/website/static/og.png +2 -0
- package/website/static/og.svg +26 -0
- package/website/static/robots.txt +43 -0
- package/website/svelte.config.js +13 -0
- package/website/tsconfig.json +20 -0
- package/website/vite.config.ts +6 -0
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
// SSR Load function for proper SEO
|
|
2
|
+
import type { PageLoad } from './$types';
|
|
3
|
+
|
|
4
|
+
export const load: PageLoad = async ({ params }) => {
|
|
5
|
+
const posts = {
|
|
6
|
+
'convert-docs-to-markdown-for-llm': {
|
|
7
|
+
slug: 'convert-docs-to-markdown-for-llm',
|
|
8
|
+
title: 'How to Convert Documentation to Markdown for LLMs (Complete Guide)',
|
|
9
|
+
excerpt: 'Step-by-step guide: Transform entire documentation websites into clean, AI-ready markdown. Includes tools, techniques, and best practices for optimal LLM context.',
|
|
10
|
+
date: '2026-02-21',
|
|
11
|
+
author: 'AlphaTechini',
|
|
12
|
+
readTime: '8 min read',
|
|
13
|
+
tags: ['Tutorial', 'LLM', 'Markdown'],
|
|
14
|
+
category: 'rag',
|
|
15
|
+
subcategory: 'context-preparation',
|
|
16
|
+
modifiedDate: '2026-02-21',
|
|
17
|
+
relatedPosts: [
|
|
18
|
+
'llm-txt-index-guide',
|
|
19
|
+
'ai-agent-documentation-problem',
|
|
20
|
+
'best-practices-rag-context-preparation'
|
|
21
|
+
],
|
|
22
|
+
faqs: [
|
|
23
|
+
{
|
|
24
|
+
question: 'What is the best way to convert documentation to markdown?',
|
|
25
|
+
answer: 'Automated tools like DocFetch are most efficient. They crawl entire documentation sites, extract clean content, and generate structured markdown with semantic indexing (llm.txt).'
|
|
26
|
+
},
|
|
27
|
+
{
|
|
28
|
+
question: 'Why convert documentation to markdown for LLMs?',
|
|
29
|
+
answer: 'LLMs need complete context in a single prompt. Markdown provides clean, structured text without HTML bloat, navigation, or ads that waste tokens.'
|
|
30
|
+
},
|
|
31
|
+
{
|
|
32
|
+
question: 'What is llm.txt and how does it help?',
|
|
33
|
+
answer: 'llm.txt is a semantic index file that categorizes documentation sections (GUIDE, API, TUTORIAL) with descriptions. It helps AI agents navigate large documentation efficiently.'
|
|
34
|
+
},
|
|
35
|
+
{
|
|
36
|
+
question: 'Can I automate documentation conversion?',
|
|
37
|
+
answer: 'Yes. Tools like DocFetch can automatically fetch, clean, and convert entire documentation sites with one command. Set up cron jobs for regular updates.'
|
|
38
|
+
}
|
|
39
|
+
],
|
|
40
|
+
content: '' // Will be loaded dynamically
|
|
41
|
+
}
|
|
42
|
+
};
|
|
43
|
+
|
|
44
|
+
const post = posts[params.slug as keyof typeof posts];
|
|
45
|
+
|
|
46
|
+
if (!post) {
|
|
47
|
+
throw new Error('Post not found');
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
return {
|
|
51
|
+
post
|
|
52
|
+
};
|
|
53
|
+
};
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
// Dynamic sitemap generation for SEO
|
|
2
|
+
import type { RequestHandler } from '@sveltejs/kit';
|
|
3
|
+
|
|
4
|
+
export const GET: RequestHandler = async () => {
|
|
5
|
+
// In production, fetch from CMS/markdown files
|
|
6
|
+
const baseUrl = 'https://docfetch.dev';
|
|
7
|
+
|
|
8
|
+
const staticPages = [
|
|
9
|
+
{ path: '/', priority: '1.0', changefreq: 'weekly' },
|
|
10
|
+
{ path: '/#features', priority: '0.8', changefreq: 'monthly' },
|
|
11
|
+
{ path: '/#installation', priority: '0.8', changefreq: 'monthly' },
|
|
12
|
+
{ path: '/#usage', priority: '0.7', changefreq: 'monthly' },
|
|
13
|
+
{ path: '/blog', priority: '0.9', changefreq: 'daily' }
|
|
14
|
+
];
|
|
15
|
+
|
|
16
|
+
const blogPosts = [
|
|
17
|
+
{
|
|
18
|
+
slug: 'convert-docs-to-markdown-for-llm',
|
|
19
|
+
path: '/rag/context-preparation/convert-docs-to-markdown',
|
|
20
|
+
lastmod: '2026-02-21',
|
|
21
|
+
priority: '0.8',
|
|
22
|
+
changefreq: 'weekly'
|
|
23
|
+
},
|
|
24
|
+
{
|
|
25
|
+
slug: 'llm-txt-index-guide',
|
|
26
|
+
path: '/rag/context-preparation/llm-txt-guide',
|
|
27
|
+
lastmod: '2026-02-21',
|
|
28
|
+
priority: '0.7',
|
|
29
|
+
changefreq: 'weekly'
|
|
30
|
+
},
|
|
31
|
+
{
|
|
32
|
+
slug: 'ai-agent-documentation-problem',
|
|
33
|
+
path: '/llm-tools/documentation/ai-agents-cant-read-docs',
|
|
34
|
+
lastmod: '2026-02-21',
|
|
35
|
+
priority: '0.7',
|
|
36
|
+
changefreq: 'weekly'
|
|
37
|
+
}
|
|
38
|
+
];
|
|
39
|
+
|
|
40
|
+
const allPages = [...staticPages, ...blogPosts];
|
|
41
|
+
|
|
42
|
+
const sitemap = `<?xml version="1.0" encoding="UTF-8"?>
|
|
43
|
+
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"
|
|
44
|
+
xmlns:news="http://www.google.com/schemas/sitemap-news/0.9"
|
|
45
|
+
xmlns:xhtml="http://www.w3.org/1999/xhtml"
|
|
46
|
+
xmlns:image="http://www.google.com/schemas/sitemap-image/1.1"
|
|
47
|
+
xmlns:video="http://www.google.com/schemas/sitemap-video/1.1">
|
|
48
|
+
${allPages.map(page => ` <url>
|
|
49
|
+
<loc>${baseUrl}${page.path}</loc>
|
|
50
|
+
<lastmod>${page.lastmod || new Date().toISOString().split('T')[0]}</lastmod>
|
|
51
|
+
<changefreq>${page.changefreq}</changefreq>
|
|
52
|
+
<priority>${page.priority}</priority>
|
|
53
|
+
</url>`).join('\n')}
|
|
54
|
+
</urlset>`;
|
|
55
|
+
|
|
56
|
+
return new Response(sitemap, {
|
|
57
|
+
headers: {
|
|
58
|
+
'Content-Type': 'application/xml',
|
|
59
|
+
'Cache-Control': 'max-age=0, must-revalidate'
|
|
60
|
+
}
|
|
61
|
+
});
|
|
62
|
+
};
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100">
|
|
2
|
+
<defs>
|
|
3
|
+
<linearGradient id="grad" x1="0%" y1="0%" x2="100%" y2="100%">
|
|
4
|
+
<stop offset="0%" style="stop-color:#0066cc;stop-opacity:1" />
|
|
5
|
+
<stop offset="100%" style="stop-color:#0052a3;stop-opacity:1" />
|
|
6
|
+
</linearGradient>
|
|
7
|
+
</defs>
|
|
8
|
+
<rect width="100" height="100" rx="20" fill="url(#grad)"/>
|
|
9
|
+
<text x="50" y="70" font-family="Georgia, serif" font-size="60" font-weight="bold" fill="white" text-anchor="middle">D</text>
|
|
10
|
+
</svg>
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
<svg width="1200" height="630" xmlns="http://www.w3.org/2000/svg">
|
|
2
|
+
<defs>
|
|
3
|
+
<linearGradient id="bg" x1="0%" y1="0%" x2="100%" y2="100%">
|
|
4
|
+
<stop offset="0%" stop-color="#0066cc"/>
|
|
5
|
+
<stop offset="100%" stop-color="#004080"/>
|
|
6
|
+
</linearGradient>
|
|
7
|
+
</defs>
|
|
8
|
+
|
|
9
|
+
<!-- Background -->
|
|
10
|
+
<rect width="1200" height="630" fill="url(#bg)"/>
|
|
11
|
+
|
|
12
|
+
<!-- Icon -->
|
|
13
|
+
<text x="150" y="400" font-family="Georgia, serif" font-size="280" fill="white">📚</text>
|
|
14
|
+
|
|
15
|
+
<!-- Title -->
|
|
16
|
+
<text x="450" y="280" font-family="-apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif" font-size="72" font-weight="700" fill="white">DocFetch</text>
|
|
17
|
+
|
|
18
|
+
<!-- Tagline -->
|
|
19
|
+
<text x="450" y="360" font-family="-apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif" font-size="36" fill="#e0e0e0">Transform documentation into AI-ready markdown</text>
|
|
20
|
+
|
|
21
|
+
<!-- URL -->
|
|
22
|
+
<text x="450" y="450" font-family="monospace" font-size="28" fill="#b3d9ff">docfetch.dev</text>
|
|
23
|
+
|
|
24
|
+
<!-- Features -->
|
|
25
|
+
<text x="450" y="520" font-family="-apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif" font-size="24" fill="#cce5ff">Single-file markdown • LLM.txt indexing • One command</text>
|
|
26
|
+
</svg>
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
# Robots.txt for DocFetch
|
|
2
|
+
# Allow all crawlers full access
|
|
3
|
+
|
|
4
|
+
User-agent: *
|
|
5
|
+
Allow: /
|
|
6
|
+
|
|
7
|
+
# Sitemap location
|
|
8
|
+
Sitemap: https://docfetch.dev/sitemap.xml
|
|
9
|
+
|
|
10
|
+
# Crawl-delay (optional, be nice to servers)
|
|
11
|
+
Crawl-delay: 1
|
|
12
|
+
|
|
13
|
+
# Block common bad paths
|
|
14
|
+
Disallow: /_app/
|
|
15
|
+
Disallow: /node_modules/
|
|
16
|
+
Disallow: /*.json$
|
|
17
|
+
|
|
18
|
+
# Allow all blog content
|
|
19
|
+
Allow: /blog/
|
|
20
|
+
Allow: /rag/
|
|
21
|
+
Allow: /llm-tools/
|
|
22
|
+
Allow: /web3/
|
|
23
|
+
Allow: /ai-infra/
|
|
24
|
+
|
|
25
|
+
# Specific rules for Googlebot (priority crawler)
|
|
26
|
+
User-agent: Googlebot
|
|
27
|
+
Allow: /
|
|
28
|
+
Crawl-delay: 0
|
|
29
|
+
|
|
30
|
+
# Bing
|
|
31
|
+
User-agent: Bingbot
|
|
32
|
+
Allow: /
|
|
33
|
+
Crawl-delay: 2
|
|
34
|
+
|
|
35
|
+
# Block AI scrapers if desired (controversial but option)
|
|
36
|
+
# User-agent: GPTBot
|
|
37
|
+
# Disallow: /
|
|
38
|
+
|
|
39
|
+
# User-agent: ChatGPT-User
|
|
40
|
+
# Disallow: /
|
|
41
|
+
|
|
42
|
+
# User-agent: CCBot
|
|
43
|
+
# Disallow: /
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import adapter from '@sveltejs/adapter-auto';
|
|
2
|
+
|
|
3
|
+
/** @type {import('@sveltejs/kit').Config} */
|
|
4
|
+
const config = {
|
|
5
|
+
kit: {
|
|
6
|
+
// adapter-auto only supports some environments, see https://svelte.dev/docs/kit/adapter-auto for a list.
|
|
7
|
+
// If your environment is not supported, or you settled on a specific environment, switch out the adapter.
|
|
8
|
+
// See https://svelte.dev/docs/kit/adapters for more information about adapters.
|
|
9
|
+
adapter: adapter()
|
|
10
|
+
}
|
|
11
|
+
};
|
|
12
|
+
|
|
13
|
+
export default config;
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
{
|
|
2
|
+
"extends": "./.svelte-kit/tsconfig.json",
|
|
3
|
+
"compilerOptions": {
|
|
4
|
+
"rewriteRelativeImportExtensions": true,
|
|
5
|
+
"allowJs": true,
|
|
6
|
+
"checkJs": true,
|
|
7
|
+
"esModuleInterop": true,
|
|
8
|
+
"forceConsistentCasingInFileNames": true,
|
|
9
|
+
"resolveJsonModule": true,
|
|
10
|
+
"skipLibCheck": true,
|
|
11
|
+
"sourceMap": true,
|
|
12
|
+
"strict": true,
|
|
13
|
+
"moduleResolution": "bundler"
|
|
14
|
+
}
|
|
15
|
+
// Path aliases are handled by https://svelte.dev/docs/kit/configuration#alias
|
|
16
|
+
// except $lib which is handled by https://svelte.dev/docs/kit/configuration#files
|
|
17
|
+
//
|
|
18
|
+
// To make changes to top-level options such as include and exclude, we recommend extending
|
|
19
|
+
// the generated config; see https://svelte.dev/docs/kit/configuration#typescript
|
|
20
|
+
}
|