@nuasite/llm-enhancements 0.0.57

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,34 @@
1
+ export interface PageMarkdownOptions {
2
+ /** Directory containing content collections (default: 'src/content') */
3
+ contentDir?: string;
4
+ /** Whether to include static (non-collection) pages (default: true) */
5
+ includeStaticPages?: boolean;
6
+ /** Whether to include frontmatter in output (default: true) */
7
+ includeFrontmatter?: boolean;
8
+ /** Enable /.well-known/llm.md endpoint (default: true) */
9
+ llmEndpoint?: boolean | LlmEndpointOptions;
10
+ }
11
+ export interface LlmEndpointOptions {
12
+ /** Site name override */
13
+ siteName?: string;
14
+ /** Site description override */
15
+ description?: string;
16
+ /** Additional content to append */
17
+ additionalContent?: string;
18
+ }
19
+ export interface MarkdownOutput {
20
+ /** YAML frontmatter fields */
21
+ frontmatter: Record<string, unknown>;
22
+ /** Markdown body content */
23
+ body: string;
24
+ /** Path to the original source file (if from collection) */
25
+ sourcePath?: string;
26
+ }
27
+ export interface ResolvedOptions {
28
+ contentDir: string;
29
+ includeStaticPages: boolean;
30
+ includeFrontmatter: boolean;
31
+ llmEndpoint: false | LlmEndpointOptions;
32
+ }
33
+ export declare function resolveOptions(options?: PageMarkdownOptions): ResolvedOptions;
34
+ //# sourceMappingURL=types.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/types.ts"],"names":[],"mappings":"AAAA,MAAM,WAAW,mBAAmB;IACnC,wEAAwE;IACxE,UAAU,CAAC,EAAE,MAAM,CAAA;IACnB,uEAAuE;IACvE,kBAAkB,CAAC,EAAE,OAAO,CAAA;IAC5B,+DAA+D;IAC/D,kBAAkB,CAAC,EAAE,OAAO,CAAA;IAC5B,0DAA0D;IAC1D,WAAW,CAAC,EAAE,OAAO,GAAG,kBAAkB,CAAA;CAC1C;AAED,MAAM,WAAW,kBAAkB;IAClC,yBAAyB;IACzB,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,gCAAgC;IAChC,WAAW,CAAC,EAAE,MAAM,CAAA;IACpB,mCAAmC;IACnC,iBAAiB,CAAC,EAAE,MAAM,CAAA;CAC1B;AAED,MAAM,WAAW,cAAc;IAC9B,8BAA8B;IAC9B,WAAW,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAA;IACpC,4BAA4B;IAC5B,IAAI,EAAE,MAAM,CAAA;IACZ,4DAA4D;IAC5D,UAAU,CAAC,EAAE,MAAM,CAAA;CACnB;AAED,MAAM,WAAW,eAAe;IAC/B,UAAU,EAAE,MAAM,CAAA;IAClB,kBAAkB,EAAE,OAAO,CAAA;IAC3B,kBAAkB,EAAE,OAAO,CAAA;IAC3B,WAAW,EAAE,KAAK,GAAG,kBAAkB,CAAA;CACvC;AAED,wBAAgB,cAAc,CAAC,OAAO,GAAE,mBAAwB,GAAG,eAAe,CAQjF"}
package/package.json ADDED
@@ -0,0 +1,48 @@
1
+ {
2
+ "name": "@nuasite/llm-enhancements",
3
+ "description": "Expose pages as .md endpoints for Astro",
4
+ "files": [
5
+ "dist/**",
6
+ "src/**",
7
+ "README.md",
8
+ "package.json"
9
+ ],
10
+ "homepage": "https://github.com/nuasite/nuasite/blob/main/packages/page-markdown/README.md",
11
+ "repository": {
12
+ "type": "git",
13
+ "url": "git+https://github.com/nuasite/nuasite.git",
14
+ "directory": "packages/page-markdown"
15
+ },
16
+ "license": "Apache-2.0",
17
+ "version": "0.0.57",
18
+ "module": "src/index.ts",
19
+ "types": "src/index.ts",
20
+ "type": "module",
21
+ "dependencies": {
22
+ "astro": "^5.16.6",
23
+ "node-html-parser": "^6.1.13"
24
+ },
25
+ "devDependencies": {
26
+ "@types/bun": "latest"
27
+ },
28
+ "peerDependencies": {
29
+ "typescript": "^5",
30
+ "vite": "^6",
31
+ "@nuasite/cms-marker": "0.0.57"
32
+ },
33
+ "peerDependenciesMeta": {
34
+ "@nuasite/cms-marker": {
35
+ "optional": true
36
+ }
37
+ },
38
+ "scripts": {
39
+ "prepack": "bun run ../../scripts/workspace-deps/resolve-deps.ts"
40
+ },
41
+ "keywords": [
42
+ "markdown",
43
+ "astro",
44
+ "nuasite",
45
+ "withastro",
46
+ "pages"
47
+ ]
48
+ }
@@ -0,0 +1,173 @@
1
+ import type { AstroConfig, AstroIntegrationLogger } from 'astro'
2
+ import fs from 'node:fs/promises'
3
+ import path from 'node:path'
4
+ import { getCollectionContent } from './cms-marker'
5
+ import { htmlToMarkdown } from './html-to-markdown'
6
+ import { generateLlmMarkdown, type PageEntry, type SiteMetadata } from './llm-endpoint'
7
+ import { generateLlmsTxt } from './llms-txt-endpoint'
8
+ import { createCollectionOutput, createStaticOutput, generateMarkdown } from './markdown-generator'
9
+ import { getHtmlPath, getLlmOutputPath, getLlmsTxtOutputPath, getMdOutputPath, injectMarkdownLink, normalizePath } from './paths'
10
+ import type { ResolvedOptions } from './types'
11
+
12
+ interface PageInfo {
13
+ pathname: string
14
+ }
15
+
16
+ /**
17
+ * Get base URL from Astro config, removing trailing slash
18
+ */
19
+ function getBaseUrl(config: AstroConfig): string {
20
+ const site = config.site
21
+ if (!site) return ''
22
+ return site.endsWith('/') ? site.slice(0, -1) : site
23
+ }
24
+
25
+ /**
26
+ * Process build output and generate .md files for all pages
27
+ */
28
+ export async function processBuildOutput(
29
+ dir: URL,
30
+ pages: PageInfo[],
31
+ options: ResolvedOptions,
32
+ logger: AstroIntegrationLogger,
33
+ config: AstroConfig,
34
+ ) {
35
+ const baseUrl = getBaseUrl(config)
36
+ const distDir = dir.pathname
37
+ let collectionCount = 0
38
+ let staticCount = 0
39
+ const pageEntries: PageEntry[] = []
40
+ let siteMetadata: SiteMetadata = {}
41
+
42
+ for (const page of pages) {
43
+ const pagePath = normalizePath(page.pathname === '' ? '/' : `/${page.pathname}`)
44
+
45
+ try {
46
+ const mdPath = getMdOutputPath(distDir, pagePath)
47
+ const htmlPath = getHtmlPath(distDir, pagePath)
48
+
49
+ // Try collection page first
50
+ const content = await getCollectionContent(pagePath, options.contentDir)
51
+ if (content) {
52
+ const output = createCollectionOutput(content.frontmatter, content.body, content.file)
53
+ const markdown = generateMarkdown(output, {
54
+ url: pagePath,
55
+ type: 'collection',
56
+ sourcePath: content.file,
57
+ }, options.includeFrontmatter)
58
+
59
+ await writeMarkdownFile(mdPath, markdown)
60
+ await injectLinkIntoHtml(htmlPath, pagePath)
61
+ pageEntries.push({
62
+ pathname: pagePath,
63
+ title: extractTitle(content.frontmatter.title),
64
+ type: 'collection',
65
+ })
66
+ collectionCount++
67
+ continue
68
+ }
69
+
70
+ // Fall back to static page handling
71
+ if (!options.includeStaticPages) continue
72
+
73
+ const htmlExists = await fileExists(htmlPath)
74
+ if (!htmlExists) continue
75
+
76
+ const html = await fs.readFile(htmlPath, 'utf-8')
77
+ const { metadata, body } = htmlToMarkdown(html)
78
+ const output = createStaticOutput(metadata, body)
79
+
80
+ const markdown = generateMarkdown(output, {
81
+ url: pagePath,
82
+ type: 'static',
83
+ }, options.includeFrontmatter)
84
+
85
+ await writeMarkdownFile(mdPath, markdown)
86
+ await injectLinkIntoHtml(htmlPath, pagePath)
87
+ pageEntries.push({
88
+ pathname: pagePath,
89
+ title: metadata.title,
90
+ type: 'static',
91
+ })
92
+
93
+ // Extract site metadata from homepage
94
+ if (pagePath === '/') {
95
+ siteMetadata = {
96
+ title: metadata.title,
97
+ description: metadata.description,
98
+ }
99
+ }
100
+
101
+ staticCount++
102
+ } catch (error) {
103
+ logger.warn(`Failed to process ${pagePath}: ${error}`)
104
+ }
105
+ }
106
+
107
+ const total = collectionCount + staticCount
108
+ if (total > 0) {
109
+ logger.info(`Generated ${total} .md files (${collectionCount} collection, ${staticCount} static)`)
110
+ }
111
+
112
+ // Generate llm.md if enabled
113
+ if (options.llmEndpoint !== false) {
114
+ if (!baseUrl) {
115
+ logger.warn('Skipping /.well-known/llm.md generation: no `site` configured in astro.config')
116
+ } else {
117
+ try {
118
+ const llmContent = generateLlmMarkdown(pageEntries, { ...siteMetadata, baseUrl }, options.llmEndpoint)
119
+ const llmPath = getLlmOutputPath(distDir)
120
+ await writeMarkdownFile(llmPath, llmContent)
121
+ logger.info('Generated /.well-known/llm.md')
122
+ } catch (error) {
123
+ logger.warn(`Failed to generate llm.md: ${error}`)
124
+ }
125
+ }
126
+ }
127
+
128
+ // Generate llms.txt if enabled
129
+ if (options.llmsTxt !== false) {
130
+ if (!baseUrl) {
131
+ logger.warn('Skipping /llms.txt generation: no `site` configured in astro.config')
132
+ } else {
133
+ try {
134
+ const llmsTxtContent = generateLlmsTxt(pageEntries, { ...siteMetadata, baseUrl }, options.llmsTxt)
135
+ const llmsTxtPath = getLlmsTxtOutputPath(distDir)
136
+ await writeMarkdownFile(llmsTxtPath, llmsTxtContent)
137
+ logger.info('Generated /llms.txt')
138
+ } catch (error) {
139
+ logger.warn(`Failed to generate llms.txt: ${error}`)
140
+ }
141
+ }
142
+ }
143
+ }
144
+
145
+ async function injectLinkIntoHtml(htmlPath: string, pagePath: string): Promise<void> {
146
+ try {
147
+ const html = await fs.readFile(htmlPath, 'utf-8')
148
+ await fs.writeFile(htmlPath, injectMarkdownLink(html, pagePath), 'utf-8')
149
+ } catch {
150
+ // File might not exist for some pages
151
+ }
152
+ }
153
+
154
+ async function fileExists(filePath: string): Promise<boolean> {
155
+ try {
156
+ await fs.access(filePath)
157
+ return true
158
+ } catch {
159
+ return false
160
+ }
161
+ }
162
+
163
+ async function writeMarkdownFile(filePath: string, content: string): Promise<void> {
164
+ await fs.mkdir(path.dirname(filePath), { recursive: true })
165
+ await fs.writeFile(filePath, content, 'utf-8')
166
+ }
167
+
168
+ function extractTitle(title: unknown): string | undefined {
169
+ if (typeof title === 'string') {
170
+ return title
171
+ }
172
+ return undefined
173
+ }
@@ -0,0 +1,56 @@
1
+ export interface CollectionInfo {
2
+ name: string
3
+ slug: string
4
+ file: string
5
+ }
6
+
7
+ export interface ParsedContent {
8
+ frontmatter: Record<string, { value: string; line: number }>
9
+ body: string
10
+ bodyStartLine: number
11
+ file: string
12
+ collectionName: string
13
+ collectionSlug: string
14
+ }
15
+
16
+ type FindCollectionSource = (pagePath: string, contentDir?: string) => Promise<CollectionInfo | undefined>
17
+ type ParseMarkdownContent = (collectionInfo: CollectionInfo) => Promise<ParsedContent | undefined>
18
+
19
+ let findCollectionSource: FindCollectionSource | undefined
20
+ let parseMarkdownContent: ParseMarkdownContent | undefined
21
+ let initialized = false
22
+
23
+ async function init() {
24
+ if (initialized) return
25
+ initialized = true
26
+
27
+ try {
28
+ const cmsMarker = await import('@nuasite/cms-marker')
29
+ findCollectionSource = cmsMarker.findCollectionSource
30
+ parseMarkdownContent = cmsMarker.parseMarkdownContent
31
+ } catch {
32
+ // cms-marker not available
33
+ }
34
+ }
35
+
36
+ export async function getCollectionContent(
37
+ pagePath: string,
38
+ contentDir: string,
39
+ ): Promise<ParsedContent | undefined> {
40
+ await init()
41
+
42
+ if (!findCollectionSource || !parseMarkdownContent) {
43
+ return undefined
44
+ }
45
+
46
+ const collectionInfo = await findCollectionSource(pagePath, contentDir)
47
+ if (!collectionInfo) {
48
+ return undefined
49
+ }
50
+
51
+ return parseMarkdownContent(collectionInfo)
52
+ }
53
+
54
+ export function hasCmsMarker(): boolean {
55
+ return findCollectionSource !== undefined && parseMarkdownContent !== undefined
56
+ }
@@ -0,0 +1,240 @@
1
+ import type { AstroConfig } from 'astro'
2
+ import type { ViteDevServer } from 'vite'
3
+ import { getCollectionContent } from './cms-marker'
4
+ import { htmlToMarkdown } from './html-to-markdown'
5
+ import { generateLlmMarkdown, type PageEntry, type SiteMetadata } from './llm-endpoint'
6
+ import { generateLlmsTxt } from './llms-txt-endpoint'
7
+ import { createCollectionOutput, createStaticOutput, generateMarkdown } from './markdown-generator'
8
+ import { injectMarkdownLink, LLM_ENDPOINT_PATH, LLMS_TXT_PATH, mdUrlToPagePath, normalizePath } from './paths'
9
+ import type { ResolvedOptions } from './types'
10
+
11
+ const ASSET_PATTERN = /\.(js|css|png|jpg|jpeg|gif|svg|ico|woff|woff2|ttf|eot|json)$/
12
+
13
+ /**
14
+ * Generate markdown for a given page path
15
+ */
16
+ async function generateMarkdownForPath(
17
+ pagePath: string,
18
+ host: string,
19
+ options: ResolvedOptions,
20
+ ): Promise<string | null> {
21
+ // Try collection page first
22
+ const content = await getCollectionContent(pagePath, options.contentDir)
23
+ if (content) {
24
+ const output = createCollectionOutput(content.frontmatter, content.body, content.file)
25
+ return generateMarkdown(output, {
26
+ url: pagePath,
27
+ type: 'collection',
28
+ sourcePath: content.file,
29
+ }, options.includeFrontmatter)
30
+ }
31
+
32
+ // Fall back to static page handling
33
+ if (!options.includeStaticPages) {
34
+ return null
35
+ }
36
+
37
+ const response = await fetch(`http://${host}${pagePath}`, {
38
+ headers: { Accept: 'text/html' },
39
+ })
40
+
41
+ if (!response.ok) return null
42
+
43
+ const contentType = response.headers.get('content-type')
44
+ if (!contentType?.includes('text/html')) return null
45
+
46
+ const html = await response.text()
47
+ const { metadata, body } = htmlToMarkdown(html)
48
+ const output = createStaticOutput(metadata, body)
49
+
50
+ return generateMarkdown(output, {
51
+ url: pagePath,
52
+ type: 'static',
53
+ }, options.includeFrontmatter)
54
+ }
55
+
56
+ /**
57
+ * Discover all pages and their metadata for the LLM endpoint
58
+ */
59
+ async function discoverPages(host: string, options: ResolvedOptions): Promise<{ pages: PageEntry[]; siteMetadata: SiteMetadata }> {
60
+ const pages: PageEntry[] = []
61
+ let siteMetadata: SiteMetadata = {}
62
+
63
+ // Fetch the sitemap or root to discover pages
64
+ // First try to get homepage metadata
65
+ try {
66
+ const homeResponse = await fetch(`http://${host}/`, {
67
+ headers: { Accept: 'text/html' },
68
+ })
69
+ if (homeResponse.ok) {
70
+ const html = await homeResponse.text()
71
+ const { metadata } = htmlToMarkdown(html)
72
+ siteMetadata = {
73
+ title: metadata.title,
74
+ description: metadata.description,
75
+ }
76
+ }
77
+ } catch {
78
+ // Ignore errors
79
+ }
80
+
81
+ // Try to get pages from Astro's dev server manifest via __astro_dev_toolbar__
82
+ // For now, we'll discover pages by checking common routes and the content directory
83
+ // In dev mode, we just report what we can discover
84
+
85
+ // Check if homepage exists
86
+ try {
87
+ const content = await getCollectionContent('/', options.contentDir)
88
+ if (content) {
89
+ pages.push({ pathname: '/', title: content.frontmatter.title as string | undefined, type: 'collection' })
90
+ } else if (options.includeStaticPages) {
91
+ const response = await fetch(`http://${host}/`, { headers: { Accept: 'text/html' } })
92
+ if (response.ok) {
93
+ const html = await response.text()
94
+ const { metadata } = htmlToMarkdown(html)
95
+ pages.push({ pathname: '/', title: metadata.title, type: 'static' })
96
+ }
97
+ }
98
+ } catch {
99
+ // Ignore
100
+ }
101
+
102
+ return { pages, siteMetadata }
103
+ }
104
+
105
+ /**
106
+ * Get base URL from Astro config, removing trailing slash
107
+ */
108
+ function getBaseUrl(config: AstroConfig): string {
109
+ const site = config.site
110
+ if (!site) return ''
111
+ return site.endsWith('/') ? site.slice(0, -1) : site
112
+ }
113
+
114
+ /**
115
+ * Create dev server middleware to handle markdown requests
116
+ */
117
+ export function createDevMiddleware(server: ViteDevServer, options: ResolvedOptions, config: AstroConfig) {
118
+ const baseUrl = getBaseUrl(config)
119
+
120
+ // Serve /llms.txt endpoint (only if site is configured)
121
+ const llmsTxtOptions = options.llmsTxt
122
+ if (llmsTxtOptions !== false && baseUrl) {
123
+ server.middlewares.use(async (req, res, next) => {
124
+ const url = req.url || ''
125
+
126
+ if (url !== LLMS_TXT_PATH) {
127
+ return next()
128
+ }
129
+
130
+ try {
131
+ const host = req.headers.host || 'localhost:4321'
132
+ const { pages, siteMetadata } = await discoverPages(host, options)
133
+ const content = generateLlmsTxt(pages, { ...siteMetadata, baseUrl }, llmsTxtOptions)
134
+
135
+ res.setHeader('Content-Type', 'text/plain; charset=utf-8')
136
+ res.setHeader('Access-Control-Allow-Origin', '*')
137
+ res.end(content)
138
+ return
139
+ } catch (error) {
140
+ console.error('[page-markdown] Error generating llms.txt:', error)
141
+ }
142
+
143
+ return next()
144
+ })
145
+ }
146
+
147
+ // Serve /.well-known/llm.md endpoint (only if site is configured)
148
+ const llmEndpointOptions = options.llmEndpoint
149
+ if (llmEndpointOptions !== false && baseUrl) {
150
+ server.middlewares.use(async (req, res, next) => {
151
+ const url = req.url || ''
152
+
153
+ if (url !== LLM_ENDPOINT_PATH) {
154
+ return next()
155
+ }
156
+
157
+ try {
158
+ const host = req.headers.host || 'localhost:4321'
159
+ const { pages, siteMetadata } = await discoverPages(host, options)
160
+ const markdown = generateLlmMarkdown(pages, { ...siteMetadata, baseUrl }, llmEndpointOptions)
161
+
162
+ res.setHeader('Content-Type', 'text/markdown; charset=utf-8')
163
+ res.setHeader('Access-Control-Allow-Origin', '*')
164
+ res.end(markdown)
165
+ return
166
+ } catch (error) {
167
+ console.error('[page-markdown] Error generating llm.md:', error)
168
+ }
169
+
170
+ return next()
171
+ })
172
+ }
173
+
174
+ // Serve .md endpoints
175
+ server.middlewares.use(async (req, res, next) => {
176
+ const url = req.url || ''
177
+
178
+ if (!url.endsWith('.md')) {
179
+ return next()
180
+ }
181
+
182
+ const pagePath = mdUrlToPagePath(url)
183
+
184
+ try {
185
+ const host = req.headers.host || 'localhost:4321'
186
+ const markdown = await generateMarkdownForPath(pagePath, host, options)
187
+
188
+ if (markdown) {
189
+ res.setHeader('Content-Type', 'text/markdown; charset=utf-8')
190
+ res.setHeader('Access-Control-Allow-Origin', '*')
191
+ res.end(markdown)
192
+ return
193
+ }
194
+ } catch (error) {
195
+ console.error('[page-markdown] Error generating markdown:', error)
196
+ }
197
+
198
+ return next()
199
+ })
200
+
201
+ // Inject alternate link into HTML responses
202
+ server.middlewares.use((req, res, next) => {
203
+ const url = req.url || ''
204
+
205
+ if (url.endsWith('.md') || ASSET_PATTERN.test(url)) {
206
+ return next()
207
+ }
208
+
209
+ const originalWrite = res.write
210
+ const originalEnd = res.end
211
+ const chunks: Buffer[] = []
212
+
213
+ res.write = ((chunk: unknown) => {
214
+ if (chunk) chunks.push(Buffer.from(chunk as Buffer))
215
+ return true
216
+ }) as typeof res.write
217
+
218
+ res.end = ((chunk?: unknown, ...args: unknown[]) => {
219
+ if (chunk) chunks.push(Buffer.from(chunk as Buffer))
220
+
221
+ const contentType = res.getHeader('content-type')
222
+ const isHtml = typeof contentType === 'string' && contentType.includes('text/html')
223
+
224
+ res.write = originalWrite
225
+ res.end = originalEnd
226
+
227
+ if (isHtml && chunks.length > 0) {
228
+ const html = Buffer.concat(chunks).toString('utf8')
229
+ const pagePath = normalizePath(url)
230
+ return res.end(injectMarkdownLink(html, pagePath), ...(args as []))
231
+ }
232
+
233
+ return chunks.length > 0
234
+ ? res.end(Buffer.concat(chunks), ...(args as []))
235
+ : res.end(...(args as []))
236
+ }) as typeof res.end
237
+
238
+ next()
239
+ })
240
+ }