@agentmarkup/astro 0.3.3 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +1 -1
- package/README.md +13 -3
- package/dist/index.js +96 -4
- package/package.json +4 -3
package/LICENSE
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
MIT License
|
|
2
2
|
|
|
3
|
-
Copyright (c)
|
|
3
|
+
Copyright (c) 2026 Sebastian Cochinescu and Anima Felix
|
|
4
4
|
|
|
5
5
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
6
|
of this software and associated documentation files (the "Software"), to deal
|
package/README.md
CHANGED
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
# @agentmarkup/astro
|
|
2
2
|
|
|
3
|
-
Build-time `llms.txt`, JSON-LD, markdown mirrors, AI crawler `robots.txt`, headers, and validation for Astro websites.
|
|
3
|
+
Build-time `llms.txt`, optional `llms-full.txt`, JSON-LD, markdown mirrors, AI crawler `robots.txt`, headers, and validation for Astro websites.
|
|
4
|
+
|
|
5
|
+
`@agentmarkup/astro` is the Astro adapter in the `agentmarkup` package family. Framework-agnostic helpers live in `@agentmarkup/core`, Vite sites use `@agentmarkup/vite`, and Next.js sites use `@agentmarkup/next`.
|
|
4
6
|
|
|
5
7
|
## Install
|
|
6
8
|
|
|
@@ -34,6 +36,9 @@ export default defineConfig({
|
|
|
34
36
|
},
|
|
35
37
|
],
|
|
36
38
|
},
|
|
39
|
+
llmsFullTxt: {
|
|
40
|
+
enabled: true,
|
|
41
|
+
},
|
|
37
42
|
markdownPages: {
|
|
38
43
|
enabled: true,
|
|
39
44
|
},
|
|
@@ -88,22 +93,27 @@ export default defineConfig({
|
|
|
88
93
|
|
|
89
94
|
- Injects JSON-LD into built HTML pages during the Astro build
|
|
90
95
|
- Generates `/llms.txt` from config
|
|
96
|
+
- Generates optional `/llms-full.txt` with inlined same-site markdown content
|
|
97
|
+
- Injects the homepage `llms.txt` discovery link automatically
|
|
91
98
|
- Validates JSON-LD already present in page HTML
|
|
92
|
-
- Generates `.md` mirrors from the final HTML output
|
|
99
|
+
- Generates `.md` mirrors from the final HTML output when a cleaner agent-facing fetch path is useful
|
|
93
100
|
- Patches or creates `robots.txt` with AI crawler directives
|
|
94
101
|
- Patches or creates `_headers` with `Content-Signal` and canonical `Link` headers for markdown mirrors
|
|
95
102
|
- Validates common schema and crawler mistakes at build time
|
|
96
103
|
- Warns when a page looks like a thin client-rendered HTML shell
|
|
104
|
+
- Warns when markdown alternate links or `llms.txt` mirror coverage drift out of sync
|
|
97
105
|
- Re-exports `@agentmarkup/core` helpers for custom pipelines
|
|
98
106
|
|
|
99
107
|
By default, the Astro adapter coexists with existing machine-readable assets. If a page already contains JSON-LD for a schema type, or the site already ships a curated `llms.txt` or matching crawler rules, those are preserved unless you opt into replacement.
|
|
100
108
|
|
|
101
|
-
Markdown mirrors stay directly fetchable for agents, while their `_headers` entries point search engines back at the HTML page as canonical. Existing files are still preserved unless you opt into replacement with `markdownPages.replaceExisting` or `contentSignalHeaders.replaceExisting`.
|
|
109
|
+
Markdown mirrors are optional. They are usually most useful for thin, noisy, or client-rendered HTML where the raw page is a weak fetch target for agents. The generated `.md` files stay directly fetchable for agents, while their `_headers` entries point search engines back at the HTML page as canonical. Existing files are still preserved unless you opt into replacement with `markdownPages.replaceExisting` or `contentSignalHeaders.replaceExisting`.
|
|
102
110
|
|
|
103
111
|
When markdown mirrors are enabled, the adapter also writes canonical `Link` headers for those `.md` files so search engines can keep the HTML route as the preferred indexed URL without making the markdown mirror unavailable to direct fetchers.
|
|
104
112
|
|
|
105
113
|
When markdown mirrors are enabled, same-site page entries in `llms.txt` automatically point at the generated `.md` mirrors by default. Set `llmsTxt.preferMarkdownMirrors: false` if you want `llms.txt` to keep linking to HTML routes instead.
|
|
106
114
|
|
|
115
|
+
Enable `llmsFullTxt` when you want a richer companion file for agents that can consume more than the compact `llms.txt` manifest. The generated `llms-full.txt` keeps the same section structure but inlines same-site markdown mirror content when those mirrors exist.
|
|
116
|
+
|
|
107
117
|
## Maintainer
|
|
108
118
|
|
|
109
119
|
Copyright (c) 2026 [Sebastian Cochinescu](https://www.cochinescu.com). MIT License.
|
package/dist/index.js
CHANGED
|
@@ -6,10 +6,13 @@ import { fileURLToPath } from "url";
|
|
|
6
6
|
import {
|
|
7
7
|
collectSchemasForPage,
|
|
8
8
|
filterJsonLdByExistingTypes,
|
|
9
|
+
generateLlmsFullTxt,
|
|
10
|
+
generateLlmsTxtDiscoveryLink,
|
|
9
11
|
generateMarkdownAlternateLink,
|
|
10
12
|
generatePageMarkdown,
|
|
11
13
|
generateJsonLdTags,
|
|
12
14
|
generateLlmsTxt,
|
|
15
|
+
hasLlmsTxtDiscoveryLink,
|
|
13
16
|
hasExistingJsonLdScripts,
|
|
14
17
|
injectHeadContent,
|
|
15
18
|
injectJsonLdTags,
|
|
@@ -20,9 +23,13 @@ import {
|
|
|
20
23
|
patchRobotsTxt,
|
|
21
24
|
presetToJsonLd,
|
|
22
25
|
printReport,
|
|
26
|
+
resolveLlmsTxtSections,
|
|
23
27
|
validateExistingJsonLd,
|
|
24
28
|
validateHtmlContent,
|
|
25
29
|
validateLlmsTxt,
|
|
30
|
+
validateLlmsTxtMarkdownCoverage,
|
|
31
|
+
validateMarkdownAlternateLink,
|
|
32
|
+
validateMarkdownContent,
|
|
26
33
|
validateRobotsTxt,
|
|
27
34
|
validateSchema
|
|
28
35
|
} from "@agentmarkup/core";
|
|
@@ -32,6 +39,8 @@ function agentmarkup(config) {
|
|
|
32
39
|
let llmsTxtEntries = 0;
|
|
33
40
|
let llmsTxtSections = 0;
|
|
34
41
|
let llmsTxtStatus = "none";
|
|
42
|
+
let llmsFullTxtEntries = 0;
|
|
43
|
+
let llmsFullTxtStatus = "none";
|
|
35
44
|
let jsonLdPages = 0;
|
|
36
45
|
let markdownPages = 0;
|
|
37
46
|
let markdownPagesStatus = "none";
|
|
@@ -50,6 +59,11 @@ function agentmarkup(config) {
|
|
|
50
59
|
"astro:build:done": async ({ dir }) => {
|
|
51
60
|
const outDir = fileURLToPath(dir);
|
|
52
61
|
const htmlFiles = await findHtmlFiles(outDir);
|
|
62
|
+
const resolvedLlmsSections = resolveLlmsTxtSections(config);
|
|
63
|
+
const markdownByUrl = {};
|
|
64
|
+
const availableMarkdownUrls = /* @__PURE__ */ new Set();
|
|
65
|
+
const finalHtmlByFile = /* @__PURE__ */ new Map();
|
|
66
|
+
const advertiseLlmsTxt = Boolean(config.llmsTxt) || Boolean(publicDir && existsSync(join(publicDir, "llms.txt")));
|
|
53
67
|
for (const htmlFile of htmlFiles) {
|
|
54
68
|
const pagePath = pagePathFromOutputFile(outDir, htmlFile);
|
|
55
69
|
const html = await readFile(htmlFile, "utf8");
|
|
@@ -60,12 +74,18 @@ function agentmarkup(config) {
|
|
|
60
74
|
validationResults.push(...validateHtmlContent(nextHtml, pagePath));
|
|
61
75
|
validationResults.push(...validateExistingJsonLd(nextHtml, pagePath));
|
|
62
76
|
}
|
|
77
|
+
if (advertiseLlmsTxt && !hasLlmsTxtDiscoveryLink(nextHtml)) {
|
|
78
|
+
nextHtml = injectHeadContent(nextHtml, generateLlmsTxtDiscoveryLink());
|
|
79
|
+
}
|
|
63
80
|
if (isFeatureEnabled(config.markdownPages) && pagePath && !hasMarkdownAlternateLink(nextHtml)) {
|
|
64
81
|
nextHtml = injectHeadContent(
|
|
65
82
|
nextHtml,
|
|
66
83
|
generateMarkdownAlternateLink(pagePath)
|
|
67
84
|
);
|
|
68
85
|
}
|
|
86
|
+
if (isFeatureEnabled(config.markdownPages) && !config.validation?.disabled) {
|
|
87
|
+
validationResults.push(...validateMarkdownAlternateLink(nextHtml, pagePath));
|
|
88
|
+
}
|
|
69
89
|
if (schemas.length === 0) {
|
|
70
90
|
if (pagePath && config.validation?.warnOnMissingSchema && !config.validation.disabled && !hasExistingJsonLd) {
|
|
71
91
|
validationResults.push({
|
|
@@ -77,6 +97,7 @@ function agentmarkup(config) {
|
|
|
77
97
|
if (nextHtml !== html) {
|
|
78
98
|
await writeFile(htmlFile, nextHtml, "utf8");
|
|
79
99
|
}
|
|
100
|
+
finalHtmlByFile.set(htmlFile, nextHtml);
|
|
80
101
|
continue;
|
|
81
102
|
}
|
|
82
103
|
const jsonLdObjects = schemas.map(presetToJsonLd);
|
|
@@ -90,11 +111,13 @@ function agentmarkup(config) {
|
|
|
90
111
|
if (nextHtml !== html) {
|
|
91
112
|
await writeFile(htmlFile, nextHtml, "utf8");
|
|
92
113
|
}
|
|
114
|
+
finalHtmlByFile.set(htmlFile, nextHtml);
|
|
93
115
|
continue;
|
|
94
116
|
}
|
|
95
117
|
const tags = generateJsonLdTags(injectables);
|
|
96
118
|
nextHtml = injectJsonLdTags(nextHtml, tags);
|
|
97
119
|
await writeFile(htmlFile, nextHtml, "utf8");
|
|
120
|
+
finalHtmlByFile.set(htmlFile, nextHtml);
|
|
98
121
|
jsonLdPages += 1;
|
|
99
122
|
}
|
|
100
123
|
if (isFeatureEnabled(config.markdownPages)) {
|
|
@@ -104,8 +127,9 @@ function agentmarkup(config) {
|
|
|
104
127
|
const relativeHtmlPath = relative(outDir, htmlFile).replace(/\\/g, "/");
|
|
105
128
|
const markdownFileName = markdownFileNameFromHtmlFile(relativeHtmlPath);
|
|
106
129
|
const outputMarkdownPath = join(outDir, markdownFileName);
|
|
107
|
-
const html = await readFile(htmlFile, "utf8");
|
|
130
|
+
const html = finalHtmlByFile.get(htmlFile) ?? await readFile(htmlFile, "utf8");
|
|
108
131
|
const pagePath = pagePathFromOutputFile(outDir, htmlFile);
|
|
132
|
+
const markdownAbsoluteUrl = buildAbsoluteMarkdownUrl(config.site, pagePath);
|
|
109
133
|
const markdown = generatePageMarkdown({
|
|
110
134
|
html,
|
|
111
135
|
pagePath,
|
|
@@ -118,20 +142,32 @@ function agentmarkup(config) {
|
|
|
118
142
|
const existingMarkdown = existingOutputMarkdown ?? (publicDir ? await readTextFileIfExists(join(publicDir, markdownFileName)) : null);
|
|
119
143
|
if (existingMarkdown && !config.markdownPages?.replaceExisting) {
|
|
120
144
|
preservedMarkdownPages += 1;
|
|
145
|
+
markdownByUrl[markdownAbsoluteUrl] = existingMarkdown;
|
|
146
|
+
availableMarkdownUrls.add(markdownAbsoluteUrl);
|
|
121
147
|
markdownCanonicalEntries.push({
|
|
122
148
|
markdownPath: `/${markdownFileName}`,
|
|
123
149
|
canonicalUrl: buildCanonicalUrl(config.site, pagePath)
|
|
124
150
|
});
|
|
151
|
+
if (!config.validation?.disabled) {
|
|
152
|
+
validationResults.push(
|
|
153
|
+
...validateMarkdownContent(existingMarkdown, pagePath)
|
|
154
|
+
);
|
|
155
|
+
}
|
|
125
156
|
if (!existingOutputMarkdown) {
|
|
126
157
|
await writeFile(outputMarkdownPath, existingMarkdown, "utf8");
|
|
127
158
|
}
|
|
128
159
|
continue;
|
|
129
160
|
}
|
|
130
161
|
await writeFile(outputMarkdownPath, markdown, "utf8");
|
|
162
|
+
markdownByUrl[markdownAbsoluteUrl] = markdown;
|
|
163
|
+
availableMarkdownUrls.add(markdownAbsoluteUrl);
|
|
131
164
|
markdownCanonicalEntries.push({
|
|
132
165
|
markdownPath: `/${markdownFileName}`,
|
|
133
166
|
canonicalUrl: buildCanonicalUrl(config.site, pagePath)
|
|
134
167
|
});
|
|
168
|
+
if (!config.validation?.disabled) {
|
|
169
|
+
validationResults.push(...validateMarkdownContent(markdown, pagePath));
|
|
170
|
+
}
|
|
135
171
|
markdownPages += 1;
|
|
136
172
|
}
|
|
137
173
|
markdownPagesStatus = markdownPages > 0 ? "generated" : preservedMarkdownPages > 0 ? "preserved" : "none";
|
|
@@ -150,6 +186,14 @@ function agentmarkup(config) {
|
|
|
150
186
|
await writeFile(outputHeadersPath, patchedHeaders, "utf8");
|
|
151
187
|
}
|
|
152
188
|
}
|
|
189
|
+
if (!config.validation?.disabled && resolvedLlmsSections.length > 0) {
|
|
190
|
+
validationResults.push(
|
|
191
|
+
...validateLlmsTxtMarkdownCoverage(
|
|
192
|
+
resolvedLlmsSections,
|
|
193
|
+
availableMarkdownUrls
|
|
194
|
+
)
|
|
195
|
+
);
|
|
196
|
+
}
|
|
153
197
|
}
|
|
154
198
|
const llmsTxtContent = generateLlmsTxt(config);
|
|
155
199
|
if (llmsTxtContent) {
|
|
@@ -177,6 +221,36 @@ function agentmarkup(config) {
|
|
|
177
221
|
}
|
|
178
222
|
}
|
|
179
223
|
}
|
|
224
|
+
if (config.llmsFullTxt?.enabled) {
|
|
225
|
+
const llmsFullTxtContent = generateLlmsFullTxt(config, {
|
|
226
|
+
contentByUrl: markdownByUrl
|
|
227
|
+
});
|
|
228
|
+
if (llmsFullTxtContent) {
|
|
229
|
+
const outputLlmsFullPath = join(outDir, "llms-full.txt");
|
|
230
|
+
const inlineEntries = countInlinedLlmsFullEntries(
|
|
231
|
+
resolvedLlmsSections,
|
|
232
|
+
markdownByUrl
|
|
233
|
+
);
|
|
234
|
+
const existingOutputLlmsFull = await readTextFileIfExists(outputLlmsFullPath);
|
|
235
|
+
const existingLlmsFull = existingOutputLlmsFull ?? (publicDir ? await readTextFileIfExists(join(publicDir, "llms-full.txt")) : null);
|
|
236
|
+
if (existingLlmsFull && !config.llmsFullTxt.replaceExisting) {
|
|
237
|
+
llmsFullTxtStatus = "preserved";
|
|
238
|
+
if (!existingOutputLlmsFull) {
|
|
239
|
+
await writeFile(outputLlmsFullPath, existingLlmsFull, "utf8");
|
|
240
|
+
}
|
|
241
|
+
if (!config.validation?.disabled) {
|
|
242
|
+
validationResults.push(...validateLlmsTxt(existingLlmsFull));
|
|
243
|
+
}
|
|
244
|
+
} else {
|
|
245
|
+
llmsFullTxtStatus = "generated";
|
|
246
|
+
llmsFullTxtEntries = inlineEntries;
|
|
247
|
+
await writeFile(outputLlmsFullPath, llmsFullTxtContent, "utf8");
|
|
248
|
+
if (!config.validation?.disabled) {
|
|
249
|
+
validationResults.push(...validateLlmsTxt(llmsFullTxtContent));
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
}
|
|
180
254
|
if (config.aiCrawlers) {
|
|
181
255
|
const crawlerEntries = Object.entries(config.aiCrawlers).filter(
|
|
182
256
|
([, value]) => value !== void 0
|
|
@@ -218,6 +292,8 @@ function agentmarkup(config) {
|
|
|
218
292
|
llmsTxtEntries,
|
|
219
293
|
llmsTxtSections,
|
|
220
294
|
llmsTxtStatus,
|
|
295
|
+
llmsFullTxtEntries,
|
|
296
|
+
llmsFullTxtStatus,
|
|
221
297
|
jsonLdPages,
|
|
222
298
|
markdownPages,
|
|
223
299
|
markdownPagesStatus,
|
|
@@ -233,10 +309,14 @@ function agentmarkup(config) {
|
|
|
233
309
|
};
|
|
234
310
|
}
|
|
235
311
|
async function readTextFileIfExists(filePath) {
|
|
236
|
-
|
|
237
|
-
return
|
|
312
|
+
try {
|
|
313
|
+
return await readFile(filePath, "utf8");
|
|
314
|
+
} catch (error) {
|
|
315
|
+
if (error && typeof error === "object" && "code" in error && error.code === "ENOENT") {
|
|
316
|
+
return null;
|
|
317
|
+
}
|
|
318
|
+
throw error;
|
|
238
319
|
}
|
|
239
|
-
return readFile(filePath, "utf8");
|
|
240
320
|
}
|
|
241
321
|
async function findHtmlFiles(rootDir) {
|
|
242
322
|
const entries = await readdir(rootDir, { withFileTypes: true });
|
|
@@ -262,6 +342,18 @@ function buildCanonicalUrl(siteUrl, pagePath) {
|
|
|
262
342
|
const base = siteUrl.replace(/\/$/, "");
|
|
263
343
|
return pagePath === "/" ? `${base}/` : `${base}${pagePath}`;
|
|
264
344
|
}
|
|
345
|
+
function buildAbsoluteMarkdownUrl(siteUrl, pagePath) {
|
|
346
|
+
const base = siteUrl.replace(/\/$/, "");
|
|
347
|
+
return pagePath === "/" ? `${base}/index.md` : `${base}${pagePath}.md`;
|
|
348
|
+
}
|
|
349
|
+
function countInlinedLlmsFullEntries(sections, markdownByUrl) {
|
|
350
|
+
return sections.reduce(
|
|
351
|
+
(sum, section) => sum + section.entries.filter(
|
|
352
|
+
(entry) => Boolean(entry.markdownUrl && markdownByUrl[entry.markdownUrl])
|
|
353
|
+
).length,
|
|
354
|
+
0
|
|
355
|
+
);
|
|
356
|
+
}
|
|
265
357
|
function existingOutputFileExists(filePath) {
|
|
266
358
|
return existsSync(filePath);
|
|
267
359
|
}
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@agentmarkup/astro",
|
|
3
|
-
"version": "0.
|
|
4
|
-
"description": "Build-time llms.txt, JSON-LD, markdown mirrors, headers, AI crawler controls, and validation for Astro",
|
|
3
|
+
"version": "0.4.0",
|
|
4
|
+
"description": "Build-time llms.txt, llms-full.txt, JSON-LD, markdown mirrors, headers, AI crawler controls, and validation for Astro",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"license": "MIT",
|
|
7
7
|
"author": "Sebastian Cochinescu <hello@animafelix.com> (https://animafelix.com)",
|
|
@@ -17,6 +17,7 @@
|
|
|
17
17
|
"keywords": [
|
|
18
18
|
"astro",
|
|
19
19
|
"llms-txt",
|
|
20
|
+
"llms-full",
|
|
20
21
|
"json-ld",
|
|
21
22
|
"markdown",
|
|
22
23
|
"schema-org",
|
|
@@ -41,7 +42,7 @@
|
|
|
41
42
|
"dist"
|
|
42
43
|
],
|
|
43
44
|
"dependencies": {
|
|
44
|
-
"@agentmarkup/core": "0.
|
|
45
|
+
"@agentmarkup/core": "0.4.0"
|
|
45
46
|
},
|
|
46
47
|
"peerDependencies": {
|
|
47
48
|
"astro": ">=4.0.0"
|