@nuasite/llm-enhancements 0.0.57
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +360 -0
- package/dist/types/build-processor.d.ts +11 -0
- package/dist/types/build-processor.d.ts.map +1 -0
- package/dist/types/cms-marker.d.ts +19 -0
- package/dist/types/cms-marker.d.ts.map +1 -0
- package/dist/types/dev-middleware.d.ts +7 -0
- package/dist/types/dev-middleware.d.ts.map +1 -0
- package/dist/types/html-to-markdown.d.ts +14 -0
- package/dist/types/html-to-markdown.d.ts.map +1 -0
- package/dist/types/index.d.ts +5 -0
- package/dist/types/index.d.ts.map +1 -0
- package/dist/types/llm-endpoint.d.ts +15 -0
- package/dist/types/llm-endpoint.d.ts.map +1 -0
- package/dist/types/markdown-generator.d.ts +29 -0
- package/dist/types/markdown-generator.d.ts.map +1 -0
- package/dist/types/paths.d.ts +31 -0
- package/dist/types/paths.d.ts.map +1 -0
- package/dist/types/tests/tsconfig.tsbuildinfo +1 -0
- package/dist/types/tsconfig.tsbuildinfo +1 -0
- package/dist/types/types.d.ts +34 -0
- package/dist/types/types.d.ts.map +1 -0
- package/package.json +48 -0
- package/src/build-processor.ts +173 -0
- package/src/cms-marker.ts +56 -0
- package/src/dev-middleware.ts +240 -0
- package/src/html-to-markdown.ts +351 -0
- package/src/index.ts +29 -0
- package/src/llm-endpoint.ts +80 -0
- package/src/llms-txt-endpoint.ts +123 -0
- package/src/markdown-generator.ts +138 -0
- package/src/paths.ts +90 -0
- package/src/tsconfig.json +6 -0
- package/src/types.ts +67 -0
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
export interface PageMarkdownOptions {
|
|
2
|
+
/** Directory containing content collections (default: 'src/content') */
|
|
3
|
+
contentDir?: string;
|
|
4
|
+
/** Whether to include static (non-collection) pages (default: true) */
|
|
5
|
+
includeStaticPages?: boolean;
|
|
6
|
+
/** Whether to include frontmatter in output (default: true) */
|
|
7
|
+
includeFrontmatter?: boolean;
|
|
8
|
+
/** Enable /.well-known/llm.md endpoint (default: true) */
|
|
9
|
+
llmEndpoint?: boolean | LlmEndpointOptions;
|
|
10
|
+
}
|
|
11
|
+
export interface LlmEndpointOptions {
|
|
12
|
+
/** Site name override */
|
|
13
|
+
siteName?: string;
|
|
14
|
+
/** Site description override */
|
|
15
|
+
description?: string;
|
|
16
|
+
/** Additional content to append */
|
|
17
|
+
additionalContent?: string;
|
|
18
|
+
}
|
|
19
|
+
export interface MarkdownOutput {
|
|
20
|
+
/** YAML frontmatter fields */
|
|
21
|
+
frontmatter: Record<string, unknown>;
|
|
22
|
+
/** Markdown body content */
|
|
23
|
+
body: string;
|
|
24
|
+
/** Path to the original source file (if from collection) */
|
|
25
|
+
sourcePath?: string;
|
|
26
|
+
}
|
|
27
|
+
export interface ResolvedOptions {
|
|
28
|
+
contentDir: string;
|
|
29
|
+
includeStaticPages: boolean;
|
|
30
|
+
includeFrontmatter: boolean;
|
|
31
|
+
llmEndpoint: false | LlmEndpointOptions;
|
|
32
|
+
}
|
|
33
|
+
export declare function resolveOptions(options?: PageMarkdownOptions): ResolvedOptions;
|
|
34
|
+
//# sourceMappingURL=types.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/types.ts"],"names":[],"mappings":"AAAA,MAAM,WAAW,mBAAmB;IACnC,wEAAwE;IACxE,UAAU,CAAC,EAAE,MAAM,CAAA;IACnB,uEAAuE;IACvE,kBAAkB,CAAC,EAAE,OAAO,CAAA;IAC5B,+DAA+D;IAC/D,kBAAkB,CAAC,EAAE,OAAO,CAAA;IAC5B,0DAA0D;IAC1D,WAAW,CAAC,EAAE,OAAO,GAAG,kBAAkB,CAAA;CAC1C;AAED,MAAM,WAAW,kBAAkB;IAClC,yBAAyB;IACzB,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,gCAAgC;IAChC,WAAW,CAAC,EAAE,MAAM,CAAA;IACpB,mCAAmC;IACnC,iBAAiB,CAAC,EAAE,MAAM,CAAA;CAC1B;AAED,MAAM,WAAW,cAAc;IAC9B,8BAA8B;IAC9B,WAAW,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAA;IACpC,4BAA4B;IAC5B,IAAI,EAAE,MAAM,CAAA;IACZ,4DAA4D;IAC5D,UAAU,CAAC,EAAE,MAAM,CAAA;CACnB;AAED,MAAM,WAAW,eAAe;IAC/B,UAAU,EAAE,MAAM,CAAA;IAClB,kBAAkB,EAAE,OAAO,CAAA;IAC3B,kBAAkB,EAAE,OAAO,CAAA;IAC3B,WAAW,EAAE,KAAK,GAAG,kBAAkB,CAAA;CACvC;AAED,wBAAgB,cAAc,CAAC,OAAO,GAAE,mBAAwB,GAAG,eAAe,CAQjF"}
|
package/package.json
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@nuasite/llm-enhancements",
|
|
3
|
+
"description": "Expose pages as .md endpoints for Astro",
|
|
4
|
+
"files": [
|
|
5
|
+
"dist/**",
|
|
6
|
+
"src/**",
|
|
7
|
+
"README.md",
|
|
8
|
+
"package.json"
|
|
9
|
+
],
|
|
10
|
+
"homepage": "https://github.com/nuasite/nuasite/blob/main/packages/page-markdown/README.md",
|
|
11
|
+
"repository": {
|
|
12
|
+
"type": "git",
|
|
13
|
+
"url": "git+https://github.com/nuasite/nuasite.git",
|
|
14
|
+
"directory": "packages/page-markdown"
|
|
15
|
+
},
|
|
16
|
+
"license": "Apache-2.0",
|
|
17
|
+
"version": "0.0.57",
|
|
18
|
+
"module": "src/index.ts",
|
|
19
|
+
"types": "src/index.ts",
|
|
20
|
+
"type": "module",
|
|
21
|
+
"dependencies": {
|
|
22
|
+
"astro": "^5.16.6",
|
|
23
|
+
"node-html-parser": "^6.1.13"
|
|
24
|
+
},
|
|
25
|
+
"devDependencies": {
|
|
26
|
+
"@types/bun": "latest"
|
|
27
|
+
},
|
|
28
|
+
"peerDependencies": {
|
|
29
|
+
"typescript": "^5",
|
|
30
|
+
"vite": "^6",
|
|
31
|
+
"@nuasite/cms-marker": "0.0.57"
|
|
32
|
+
},
|
|
33
|
+
"peerDependenciesMeta": {
|
|
34
|
+
"@nuasite/cms-marker": {
|
|
35
|
+
"optional": true
|
|
36
|
+
}
|
|
37
|
+
},
|
|
38
|
+
"scripts": {
|
|
39
|
+
"prepack": "bun run ../../scripts/workspace-deps/resolve-deps.ts"
|
|
40
|
+
},
|
|
41
|
+
"keywords": [
|
|
42
|
+
"markdown",
|
|
43
|
+
"astro",
|
|
44
|
+
"nuasite",
|
|
45
|
+
"withastro",
|
|
46
|
+
"pages"
|
|
47
|
+
]
|
|
48
|
+
}
|
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
import type { AstroConfig, AstroIntegrationLogger } from 'astro'
|
|
2
|
+
import fs from 'node:fs/promises'
|
|
3
|
+
import path from 'node:path'
|
|
4
|
+
import { getCollectionContent } from './cms-marker'
|
|
5
|
+
import { htmlToMarkdown } from './html-to-markdown'
|
|
6
|
+
import { generateLlmMarkdown, type PageEntry, type SiteMetadata } from './llm-endpoint'
|
|
7
|
+
import { generateLlmsTxt } from './llms-txt-endpoint'
|
|
8
|
+
import { createCollectionOutput, createStaticOutput, generateMarkdown } from './markdown-generator'
|
|
9
|
+
import { getHtmlPath, getLlmOutputPath, getLlmsTxtOutputPath, getMdOutputPath, injectMarkdownLink, normalizePath } from './paths'
|
|
10
|
+
import type { ResolvedOptions } from './types'
|
|
11
|
+
|
|
12
|
+
interface PageInfo {
|
|
13
|
+
pathname: string
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* Get base URL from Astro config, removing trailing slash
|
|
18
|
+
*/
|
|
19
|
+
function getBaseUrl(config: AstroConfig): string {
|
|
20
|
+
const site = config.site
|
|
21
|
+
if (!site) return ''
|
|
22
|
+
return site.endsWith('/') ? site.slice(0, -1) : site
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
/**
|
|
26
|
+
* Process build output and generate .md files for all pages
|
|
27
|
+
*/
|
|
28
|
+
export async function processBuildOutput(
|
|
29
|
+
dir: URL,
|
|
30
|
+
pages: PageInfo[],
|
|
31
|
+
options: ResolvedOptions,
|
|
32
|
+
logger: AstroIntegrationLogger,
|
|
33
|
+
config: AstroConfig,
|
|
34
|
+
) {
|
|
35
|
+
const baseUrl = getBaseUrl(config)
|
|
36
|
+
const distDir = dir.pathname
|
|
37
|
+
let collectionCount = 0
|
|
38
|
+
let staticCount = 0
|
|
39
|
+
const pageEntries: PageEntry[] = []
|
|
40
|
+
let siteMetadata: SiteMetadata = {}
|
|
41
|
+
|
|
42
|
+
for (const page of pages) {
|
|
43
|
+
const pagePath = normalizePath(page.pathname === '' ? '/' : `/${page.pathname}`)
|
|
44
|
+
|
|
45
|
+
try {
|
|
46
|
+
const mdPath = getMdOutputPath(distDir, pagePath)
|
|
47
|
+
const htmlPath = getHtmlPath(distDir, pagePath)
|
|
48
|
+
|
|
49
|
+
// Try collection page first
|
|
50
|
+
const content = await getCollectionContent(pagePath, options.contentDir)
|
|
51
|
+
if (content) {
|
|
52
|
+
const output = createCollectionOutput(content.frontmatter, content.body, content.file)
|
|
53
|
+
const markdown = generateMarkdown(output, {
|
|
54
|
+
url: pagePath,
|
|
55
|
+
type: 'collection',
|
|
56
|
+
sourcePath: content.file,
|
|
57
|
+
}, options.includeFrontmatter)
|
|
58
|
+
|
|
59
|
+
await writeMarkdownFile(mdPath, markdown)
|
|
60
|
+
await injectLinkIntoHtml(htmlPath, pagePath)
|
|
61
|
+
pageEntries.push({
|
|
62
|
+
pathname: pagePath,
|
|
63
|
+
title: extractTitle(content.frontmatter.title),
|
|
64
|
+
type: 'collection',
|
|
65
|
+
})
|
|
66
|
+
collectionCount++
|
|
67
|
+
continue
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
// Fall back to static page handling
|
|
71
|
+
if (!options.includeStaticPages) continue
|
|
72
|
+
|
|
73
|
+
const htmlExists = await fileExists(htmlPath)
|
|
74
|
+
if (!htmlExists) continue
|
|
75
|
+
|
|
76
|
+
const html = await fs.readFile(htmlPath, 'utf-8')
|
|
77
|
+
const { metadata, body } = htmlToMarkdown(html)
|
|
78
|
+
const output = createStaticOutput(metadata, body)
|
|
79
|
+
|
|
80
|
+
const markdown = generateMarkdown(output, {
|
|
81
|
+
url: pagePath,
|
|
82
|
+
type: 'static',
|
|
83
|
+
}, options.includeFrontmatter)
|
|
84
|
+
|
|
85
|
+
await writeMarkdownFile(mdPath, markdown)
|
|
86
|
+
await injectLinkIntoHtml(htmlPath, pagePath)
|
|
87
|
+
pageEntries.push({
|
|
88
|
+
pathname: pagePath,
|
|
89
|
+
title: metadata.title,
|
|
90
|
+
type: 'static',
|
|
91
|
+
})
|
|
92
|
+
|
|
93
|
+
// Extract site metadata from homepage
|
|
94
|
+
if (pagePath === '/') {
|
|
95
|
+
siteMetadata = {
|
|
96
|
+
title: metadata.title,
|
|
97
|
+
description: metadata.description,
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
staticCount++
|
|
102
|
+
} catch (error) {
|
|
103
|
+
logger.warn(`Failed to process ${pagePath}: ${error}`)
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
const total = collectionCount + staticCount
|
|
108
|
+
if (total > 0) {
|
|
109
|
+
logger.info(`Generated ${total} .md files (${collectionCount} collection, ${staticCount} static)`)
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
// Generate llm.md if enabled
|
|
113
|
+
if (options.llmEndpoint !== false) {
|
|
114
|
+
if (!baseUrl) {
|
|
115
|
+
logger.warn('Skipping /.well-known/llm.md generation: no `site` configured in astro.config')
|
|
116
|
+
} else {
|
|
117
|
+
try {
|
|
118
|
+
const llmContent = generateLlmMarkdown(pageEntries, { ...siteMetadata, baseUrl }, options.llmEndpoint)
|
|
119
|
+
const llmPath = getLlmOutputPath(distDir)
|
|
120
|
+
await writeMarkdownFile(llmPath, llmContent)
|
|
121
|
+
logger.info('Generated /.well-known/llm.md')
|
|
122
|
+
} catch (error) {
|
|
123
|
+
logger.warn(`Failed to generate llm.md: ${error}`)
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
// Generate llms.txt if enabled
|
|
129
|
+
if (options.llmsTxt !== false) {
|
|
130
|
+
if (!baseUrl) {
|
|
131
|
+
logger.warn('Skipping /llms.txt generation: no `site` configured in astro.config')
|
|
132
|
+
} else {
|
|
133
|
+
try {
|
|
134
|
+
const llmsTxtContent = generateLlmsTxt(pageEntries, { ...siteMetadata, baseUrl }, options.llmsTxt)
|
|
135
|
+
const llmsTxtPath = getLlmsTxtOutputPath(distDir)
|
|
136
|
+
await writeMarkdownFile(llmsTxtPath, llmsTxtContent)
|
|
137
|
+
logger.info('Generated /llms.txt')
|
|
138
|
+
} catch (error) {
|
|
139
|
+
logger.warn(`Failed to generate llms.txt: ${error}`)
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
async function injectLinkIntoHtml(htmlPath: string, pagePath: string): Promise<void> {
|
|
146
|
+
try {
|
|
147
|
+
const html = await fs.readFile(htmlPath, 'utf-8')
|
|
148
|
+
await fs.writeFile(htmlPath, injectMarkdownLink(html, pagePath), 'utf-8')
|
|
149
|
+
} catch {
|
|
150
|
+
// File might not exist for some pages
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
async function fileExists(filePath: string): Promise<boolean> {
|
|
155
|
+
try {
|
|
156
|
+
await fs.access(filePath)
|
|
157
|
+
return true
|
|
158
|
+
} catch {
|
|
159
|
+
return false
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
async function writeMarkdownFile(filePath: string, content: string): Promise<void> {
|
|
164
|
+
await fs.mkdir(path.dirname(filePath), { recursive: true })
|
|
165
|
+
await fs.writeFile(filePath, content, 'utf-8')
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
function extractTitle(title: unknown): string | undefined {
|
|
169
|
+
if (typeof title === 'string') {
|
|
170
|
+
return title
|
|
171
|
+
}
|
|
172
|
+
return undefined
|
|
173
|
+
}
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
export interface CollectionInfo {
|
|
2
|
+
name: string
|
|
3
|
+
slug: string
|
|
4
|
+
file: string
|
|
5
|
+
}
|
|
6
|
+
|
|
7
|
+
export interface ParsedContent {
|
|
8
|
+
frontmatter: Record<string, { value: string; line: number }>
|
|
9
|
+
body: string
|
|
10
|
+
bodyStartLine: number
|
|
11
|
+
file: string
|
|
12
|
+
collectionName: string
|
|
13
|
+
collectionSlug: string
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
type FindCollectionSource = (pagePath: string, contentDir?: string) => Promise<CollectionInfo | undefined>
|
|
17
|
+
type ParseMarkdownContent = (collectionInfo: CollectionInfo) => Promise<ParsedContent | undefined>
|
|
18
|
+
|
|
19
|
+
let findCollectionSource: FindCollectionSource | undefined
|
|
20
|
+
let parseMarkdownContent: ParseMarkdownContent | undefined
|
|
21
|
+
let initialized = false
|
|
22
|
+
|
|
23
|
+
async function init() {
|
|
24
|
+
if (initialized) return
|
|
25
|
+
initialized = true
|
|
26
|
+
|
|
27
|
+
try {
|
|
28
|
+
const cmsMarker = await import('@nuasite/cms-marker')
|
|
29
|
+
findCollectionSource = cmsMarker.findCollectionSource
|
|
30
|
+
parseMarkdownContent = cmsMarker.parseMarkdownContent
|
|
31
|
+
} catch {
|
|
32
|
+
// cms-marker not available
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
export async function getCollectionContent(
|
|
37
|
+
pagePath: string,
|
|
38
|
+
contentDir: string,
|
|
39
|
+
): Promise<ParsedContent | undefined> {
|
|
40
|
+
await init()
|
|
41
|
+
|
|
42
|
+
if (!findCollectionSource || !parseMarkdownContent) {
|
|
43
|
+
return undefined
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
const collectionInfo = await findCollectionSource(pagePath, contentDir)
|
|
47
|
+
if (!collectionInfo) {
|
|
48
|
+
return undefined
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
return parseMarkdownContent(collectionInfo)
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
export function hasCmsMarker(): boolean {
|
|
55
|
+
return findCollectionSource !== undefined && parseMarkdownContent !== undefined
|
|
56
|
+
}
|
|
@@ -0,0 +1,240 @@
|
|
|
1
|
+
import type { AstroConfig } from 'astro'
|
|
2
|
+
import type { ViteDevServer } from 'vite'
|
|
3
|
+
import { getCollectionContent } from './cms-marker'
|
|
4
|
+
import { htmlToMarkdown } from './html-to-markdown'
|
|
5
|
+
import { generateLlmMarkdown, type PageEntry, type SiteMetadata } from './llm-endpoint'
|
|
6
|
+
import { generateLlmsTxt } from './llms-txt-endpoint'
|
|
7
|
+
import { createCollectionOutput, createStaticOutput, generateMarkdown } from './markdown-generator'
|
|
8
|
+
import { injectMarkdownLink, LLM_ENDPOINT_PATH, LLMS_TXT_PATH, mdUrlToPagePath, normalizePath } from './paths'
|
|
9
|
+
import type { ResolvedOptions } from './types'
|
|
10
|
+
|
|
11
|
+
const ASSET_PATTERN = /\.(js|css|png|jpg|jpeg|gif|svg|ico|woff|woff2|ttf|eot|json)$/
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* Generate markdown for a given page path
|
|
15
|
+
*/
|
|
16
|
+
async function generateMarkdownForPath(
|
|
17
|
+
pagePath: string,
|
|
18
|
+
host: string,
|
|
19
|
+
options: ResolvedOptions,
|
|
20
|
+
): Promise<string | null> {
|
|
21
|
+
// Try collection page first
|
|
22
|
+
const content = await getCollectionContent(pagePath, options.contentDir)
|
|
23
|
+
if (content) {
|
|
24
|
+
const output = createCollectionOutput(content.frontmatter, content.body, content.file)
|
|
25
|
+
return generateMarkdown(output, {
|
|
26
|
+
url: pagePath,
|
|
27
|
+
type: 'collection',
|
|
28
|
+
sourcePath: content.file,
|
|
29
|
+
}, options.includeFrontmatter)
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
// Fall back to static page handling
|
|
33
|
+
if (!options.includeStaticPages) {
|
|
34
|
+
return null
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
const response = await fetch(`http://${host}${pagePath}`, {
|
|
38
|
+
headers: { Accept: 'text/html' },
|
|
39
|
+
})
|
|
40
|
+
|
|
41
|
+
if (!response.ok) return null
|
|
42
|
+
|
|
43
|
+
const contentType = response.headers.get('content-type')
|
|
44
|
+
if (!contentType?.includes('text/html')) return null
|
|
45
|
+
|
|
46
|
+
const html = await response.text()
|
|
47
|
+
const { metadata, body } = htmlToMarkdown(html)
|
|
48
|
+
const output = createStaticOutput(metadata, body)
|
|
49
|
+
|
|
50
|
+
return generateMarkdown(output, {
|
|
51
|
+
url: pagePath,
|
|
52
|
+
type: 'static',
|
|
53
|
+
}, options.includeFrontmatter)
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
/**
|
|
57
|
+
* Discover all pages and their metadata for the LLM endpoint
|
|
58
|
+
*/
|
|
59
|
+
async function discoverPages(host: string, options: ResolvedOptions): Promise<{ pages: PageEntry[]; siteMetadata: SiteMetadata }> {
|
|
60
|
+
const pages: PageEntry[] = []
|
|
61
|
+
let siteMetadata: SiteMetadata = {}
|
|
62
|
+
|
|
63
|
+
// Fetch the sitemap or root to discover pages
|
|
64
|
+
// First try to get homepage metadata
|
|
65
|
+
try {
|
|
66
|
+
const homeResponse = await fetch(`http://${host}/`, {
|
|
67
|
+
headers: { Accept: 'text/html' },
|
|
68
|
+
})
|
|
69
|
+
if (homeResponse.ok) {
|
|
70
|
+
const html = await homeResponse.text()
|
|
71
|
+
const { metadata } = htmlToMarkdown(html)
|
|
72
|
+
siteMetadata = {
|
|
73
|
+
title: metadata.title,
|
|
74
|
+
description: metadata.description,
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
} catch {
|
|
78
|
+
// Ignore errors
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
// Try to get pages from Astro's dev server manifest via __astro_dev_toolbar__
|
|
82
|
+
// For now, we'll discover pages by checking common routes and the content directory
|
|
83
|
+
// In dev mode, we just report what we can discover
|
|
84
|
+
|
|
85
|
+
// Check if homepage exists
|
|
86
|
+
try {
|
|
87
|
+
const content = await getCollectionContent('/', options.contentDir)
|
|
88
|
+
if (content) {
|
|
89
|
+
pages.push({ pathname: '/', title: content.frontmatter.title as string | undefined, type: 'collection' })
|
|
90
|
+
} else if (options.includeStaticPages) {
|
|
91
|
+
const response = await fetch(`http://${host}/`, { headers: { Accept: 'text/html' } })
|
|
92
|
+
if (response.ok) {
|
|
93
|
+
const html = await response.text()
|
|
94
|
+
const { metadata } = htmlToMarkdown(html)
|
|
95
|
+
pages.push({ pathname: '/', title: metadata.title, type: 'static' })
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
} catch {
|
|
99
|
+
// Ignore
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
return { pages, siteMetadata }
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
/**
|
|
106
|
+
* Get base URL from Astro config, removing trailing slash
|
|
107
|
+
*/
|
|
108
|
+
function getBaseUrl(config: AstroConfig): string {
|
|
109
|
+
const site = config.site
|
|
110
|
+
if (!site) return ''
|
|
111
|
+
return site.endsWith('/') ? site.slice(0, -1) : site
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
/**
|
|
115
|
+
* Create dev server middleware to handle markdown requests
|
|
116
|
+
*/
|
|
117
|
+
export function createDevMiddleware(server: ViteDevServer, options: ResolvedOptions, config: AstroConfig) {
|
|
118
|
+
const baseUrl = getBaseUrl(config)
|
|
119
|
+
|
|
120
|
+
// Serve /llms.txt endpoint (only if site is configured)
|
|
121
|
+
const llmsTxtOptions = options.llmsTxt
|
|
122
|
+
if (llmsTxtOptions !== false && baseUrl) {
|
|
123
|
+
server.middlewares.use(async (req, res, next) => {
|
|
124
|
+
const url = req.url || ''
|
|
125
|
+
|
|
126
|
+
if (url !== LLMS_TXT_PATH) {
|
|
127
|
+
return next()
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
try {
|
|
131
|
+
const host = req.headers.host || 'localhost:4321'
|
|
132
|
+
const { pages, siteMetadata } = await discoverPages(host, options)
|
|
133
|
+
const content = generateLlmsTxt(pages, { ...siteMetadata, baseUrl }, llmsTxtOptions)
|
|
134
|
+
|
|
135
|
+
res.setHeader('Content-Type', 'text/plain; charset=utf-8')
|
|
136
|
+
res.setHeader('Access-Control-Allow-Origin', '*')
|
|
137
|
+
res.end(content)
|
|
138
|
+
return
|
|
139
|
+
} catch (error) {
|
|
140
|
+
console.error('[page-markdown] Error generating llms.txt:', error)
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
return next()
|
|
144
|
+
})
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
// Serve /.well-known/llm.md endpoint (only if site is configured)
|
|
148
|
+
const llmEndpointOptions = options.llmEndpoint
|
|
149
|
+
if (llmEndpointOptions !== false && baseUrl) {
|
|
150
|
+
server.middlewares.use(async (req, res, next) => {
|
|
151
|
+
const url = req.url || ''
|
|
152
|
+
|
|
153
|
+
if (url !== LLM_ENDPOINT_PATH) {
|
|
154
|
+
return next()
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
try {
|
|
158
|
+
const host = req.headers.host || 'localhost:4321'
|
|
159
|
+
const { pages, siteMetadata } = await discoverPages(host, options)
|
|
160
|
+
const markdown = generateLlmMarkdown(pages, { ...siteMetadata, baseUrl }, llmEndpointOptions)
|
|
161
|
+
|
|
162
|
+
res.setHeader('Content-Type', 'text/markdown; charset=utf-8')
|
|
163
|
+
res.setHeader('Access-Control-Allow-Origin', '*')
|
|
164
|
+
res.end(markdown)
|
|
165
|
+
return
|
|
166
|
+
} catch (error) {
|
|
167
|
+
console.error('[page-markdown] Error generating llm.md:', error)
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
return next()
|
|
171
|
+
})
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
// Serve .md endpoints
|
|
175
|
+
server.middlewares.use(async (req, res, next) => {
|
|
176
|
+
const url = req.url || ''
|
|
177
|
+
|
|
178
|
+
if (!url.endsWith('.md')) {
|
|
179
|
+
return next()
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
const pagePath = mdUrlToPagePath(url)
|
|
183
|
+
|
|
184
|
+
try {
|
|
185
|
+
const host = req.headers.host || 'localhost:4321'
|
|
186
|
+
const markdown = await generateMarkdownForPath(pagePath, host, options)
|
|
187
|
+
|
|
188
|
+
if (markdown) {
|
|
189
|
+
res.setHeader('Content-Type', 'text/markdown; charset=utf-8')
|
|
190
|
+
res.setHeader('Access-Control-Allow-Origin', '*')
|
|
191
|
+
res.end(markdown)
|
|
192
|
+
return
|
|
193
|
+
}
|
|
194
|
+
} catch (error) {
|
|
195
|
+
console.error('[page-markdown] Error generating markdown:', error)
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
return next()
|
|
199
|
+
})
|
|
200
|
+
|
|
201
|
+
// Inject alternate link into HTML responses
|
|
202
|
+
server.middlewares.use((req, res, next) => {
|
|
203
|
+
const url = req.url || ''
|
|
204
|
+
|
|
205
|
+
if (url.endsWith('.md') || ASSET_PATTERN.test(url)) {
|
|
206
|
+
return next()
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
const originalWrite = res.write
|
|
210
|
+
const originalEnd = res.end
|
|
211
|
+
const chunks: Buffer[] = []
|
|
212
|
+
|
|
213
|
+
res.write = ((chunk: unknown) => {
|
|
214
|
+
if (chunk) chunks.push(Buffer.from(chunk as Buffer))
|
|
215
|
+
return true
|
|
216
|
+
}) as typeof res.write
|
|
217
|
+
|
|
218
|
+
res.end = ((chunk?: unknown, ...args: unknown[]) => {
|
|
219
|
+
if (chunk) chunks.push(Buffer.from(chunk as Buffer))
|
|
220
|
+
|
|
221
|
+
const contentType = res.getHeader('content-type')
|
|
222
|
+
const isHtml = typeof contentType === 'string' && contentType.includes('text/html')
|
|
223
|
+
|
|
224
|
+
res.write = originalWrite
|
|
225
|
+
res.end = originalEnd
|
|
226
|
+
|
|
227
|
+
if (isHtml && chunks.length > 0) {
|
|
228
|
+
const html = Buffer.concat(chunks).toString('utf8')
|
|
229
|
+
const pagePath = normalizePath(url)
|
|
230
|
+
return res.end(injectMarkdownLink(html, pagePath), ...(args as []))
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
return chunks.length > 0
|
|
234
|
+
? res.end(Buffer.concat(chunks), ...(args as []))
|
|
235
|
+
: res.end(...(args as []))
|
|
236
|
+
}) as typeof res.end
|
|
237
|
+
|
|
238
|
+
next()
|
|
239
|
+
})
|
|
240
|
+
}
|