@agentmarkup/astro 0.3.4 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -0
- package/dist/index.js +96 -4
- package/package.json +2 -2
package/README.md
CHANGED
|
@@ -2,6 +2,8 @@
|
|
|
2
2
|
|
|
3
3
|
Build-time `llms.txt`, optional `llms-full.txt`, JSON-LD, markdown mirrors, AI crawler `robots.txt`, headers, and validation for Astro websites.
|
|
4
4
|
|
|
5
|
+
`@agentmarkup/astro` is the Astro adapter in the `agentmarkup` package family. Framework-agnostic helpers live in `@agentmarkup/core`, Vite sites use `@agentmarkup/vite`, and Next.js sites use `@agentmarkup/next`.
|
|
6
|
+
|
|
5
7
|
## Install
|
|
6
8
|
|
|
7
9
|
```bash
|
package/dist/index.js
CHANGED
|
@@ -6,10 +6,13 @@ import { fileURLToPath } from "url";
|
|
|
6
6
|
import {
|
|
7
7
|
collectSchemasForPage,
|
|
8
8
|
filterJsonLdByExistingTypes,
|
|
9
|
+
generateLlmsFullTxt,
|
|
10
|
+
generateLlmsTxtDiscoveryLink,
|
|
9
11
|
generateMarkdownAlternateLink,
|
|
10
12
|
generatePageMarkdown,
|
|
11
13
|
generateJsonLdTags,
|
|
12
14
|
generateLlmsTxt,
|
|
15
|
+
hasLlmsTxtDiscoveryLink,
|
|
13
16
|
hasExistingJsonLdScripts,
|
|
14
17
|
injectHeadContent,
|
|
15
18
|
injectJsonLdTags,
|
|
@@ -20,9 +23,13 @@ import {
|
|
|
20
23
|
patchRobotsTxt,
|
|
21
24
|
presetToJsonLd,
|
|
22
25
|
printReport,
|
|
26
|
+
resolveLlmsTxtSections,
|
|
23
27
|
validateExistingJsonLd,
|
|
24
28
|
validateHtmlContent,
|
|
25
29
|
validateLlmsTxt,
|
|
30
|
+
validateLlmsTxtMarkdownCoverage,
|
|
31
|
+
validateMarkdownAlternateLink,
|
|
32
|
+
validateMarkdownContent,
|
|
26
33
|
validateRobotsTxt,
|
|
27
34
|
validateSchema
|
|
28
35
|
} from "@agentmarkup/core";
|
|
@@ -32,6 +39,8 @@ function agentmarkup(config) {
|
|
|
32
39
|
let llmsTxtEntries = 0;
|
|
33
40
|
let llmsTxtSections = 0;
|
|
34
41
|
let llmsTxtStatus = "none";
|
|
42
|
+
let llmsFullTxtEntries = 0;
|
|
43
|
+
let llmsFullTxtStatus = "none";
|
|
35
44
|
let jsonLdPages = 0;
|
|
36
45
|
let markdownPages = 0;
|
|
37
46
|
let markdownPagesStatus = "none";
|
|
@@ -50,6 +59,11 @@ function agentmarkup(config) {
|
|
|
50
59
|
"astro:build:done": async ({ dir }) => {
|
|
51
60
|
const outDir = fileURLToPath(dir);
|
|
52
61
|
const htmlFiles = await findHtmlFiles(outDir);
|
|
62
|
+
const resolvedLlmsSections = resolveLlmsTxtSections(config);
|
|
63
|
+
const markdownByUrl = {};
|
|
64
|
+
const availableMarkdownUrls = /* @__PURE__ */ new Set();
|
|
65
|
+
const finalHtmlByFile = /* @__PURE__ */ new Map();
|
|
66
|
+
const advertiseLlmsTxt = Boolean(config.llmsTxt) || Boolean(publicDir && existsSync(join(publicDir, "llms.txt")));
|
|
53
67
|
for (const htmlFile of htmlFiles) {
|
|
54
68
|
const pagePath = pagePathFromOutputFile(outDir, htmlFile);
|
|
55
69
|
const html = await readFile(htmlFile, "utf8");
|
|
@@ -60,12 +74,18 @@ function agentmarkup(config) {
|
|
|
60
74
|
validationResults.push(...validateHtmlContent(nextHtml, pagePath));
|
|
61
75
|
validationResults.push(...validateExistingJsonLd(nextHtml, pagePath));
|
|
62
76
|
}
|
|
77
|
+
if (advertiseLlmsTxt && !hasLlmsTxtDiscoveryLink(nextHtml)) {
|
|
78
|
+
nextHtml = injectHeadContent(nextHtml, generateLlmsTxtDiscoveryLink());
|
|
79
|
+
}
|
|
63
80
|
if (isFeatureEnabled(config.markdownPages) && pagePath && !hasMarkdownAlternateLink(nextHtml)) {
|
|
64
81
|
nextHtml = injectHeadContent(
|
|
65
82
|
nextHtml,
|
|
66
83
|
generateMarkdownAlternateLink(pagePath)
|
|
67
84
|
);
|
|
68
85
|
}
|
|
86
|
+
if (isFeatureEnabled(config.markdownPages) && !config.validation?.disabled) {
|
|
87
|
+
validationResults.push(...validateMarkdownAlternateLink(nextHtml, pagePath));
|
|
88
|
+
}
|
|
69
89
|
if (schemas.length === 0) {
|
|
70
90
|
if (pagePath && config.validation?.warnOnMissingSchema && !config.validation.disabled && !hasExistingJsonLd) {
|
|
71
91
|
validationResults.push({
|
|
@@ -77,6 +97,7 @@ function agentmarkup(config) {
|
|
|
77
97
|
if (nextHtml !== html) {
|
|
78
98
|
await writeFile(htmlFile, nextHtml, "utf8");
|
|
79
99
|
}
|
|
100
|
+
finalHtmlByFile.set(htmlFile, nextHtml);
|
|
80
101
|
continue;
|
|
81
102
|
}
|
|
82
103
|
const jsonLdObjects = schemas.map(presetToJsonLd);
|
|
@@ -90,11 +111,13 @@ function agentmarkup(config) {
|
|
|
90
111
|
if (nextHtml !== html) {
|
|
91
112
|
await writeFile(htmlFile, nextHtml, "utf8");
|
|
92
113
|
}
|
|
114
|
+
finalHtmlByFile.set(htmlFile, nextHtml);
|
|
93
115
|
continue;
|
|
94
116
|
}
|
|
95
117
|
const tags = generateJsonLdTags(injectables);
|
|
96
118
|
nextHtml = injectJsonLdTags(nextHtml, tags);
|
|
97
119
|
await writeFile(htmlFile, nextHtml, "utf8");
|
|
120
|
+
finalHtmlByFile.set(htmlFile, nextHtml);
|
|
98
121
|
jsonLdPages += 1;
|
|
99
122
|
}
|
|
100
123
|
if (isFeatureEnabled(config.markdownPages)) {
|
|
@@ -104,8 +127,9 @@ function agentmarkup(config) {
|
|
|
104
127
|
const relativeHtmlPath = relative(outDir, htmlFile).replace(/\\/g, "/");
|
|
105
128
|
const markdownFileName = markdownFileNameFromHtmlFile(relativeHtmlPath);
|
|
106
129
|
const outputMarkdownPath = join(outDir, markdownFileName);
|
|
107
|
-
const html = await readFile(htmlFile, "utf8");
|
|
130
|
+
const html = finalHtmlByFile.get(htmlFile) ?? await readFile(htmlFile, "utf8");
|
|
108
131
|
const pagePath = pagePathFromOutputFile(outDir, htmlFile);
|
|
132
|
+
const markdownAbsoluteUrl = buildAbsoluteMarkdownUrl(config.site, pagePath);
|
|
109
133
|
const markdown = generatePageMarkdown({
|
|
110
134
|
html,
|
|
111
135
|
pagePath,
|
|
@@ -118,20 +142,32 @@ function agentmarkup(config) {
|
|
|
118
142
|
const existingMarkdown = existingOutputMarkdown ?? (publicDir ? await readTextFileIfExists(join(publicDir, markdownFileName)) : null);
|
|
119
143
|
if (existingMarkdown && !config.markdownPages?.replaceExisting) {
|
|
120
144
|
preservedMarkdownPages += 1;
|
|
145
|
+
markdownByUrl[markdownAbsoluteUrl] = existingMarkdown;
|
|
146
|
+
availableMarkdownUrls.add(markdownAbsoluteUrl);
|
|
121
147
|
markdownCanonicalEntries.push({
|
|
122
148
|
markdownPath: `/${markdownFileName}`,
|
|
123
149
|
canonicalUrl: buildCanonicalUrl(config.site, pagePath)
|
|
124
150
|
});
|
|
151
|
+
if (!config.validation?.disabled) {
|
|
152
|
+
validationResults.push(
|
|
153
|
+
...validateMarkdownContent(existingMarkdown, pagePath)
|
|
154
|
+
);
|
|
155
|
+
}
|
|
125
156
|
if (!existingOutputMarkdown) {
|
|
126
157
|
await writeFile(outputMarkdownPath, existingMarkdown, "utf8");
|
|
127
158
|
}
|
|
128
159
|
continue;
|
|
129
160
|
}
|
|
130
161
|
await writeFile(outputMarkdownPath, markdown, "utf8");
|
|
162
|
+
markdownByUrl[markdownAbsoluteUrl] = markdown;
|
|
163
|
+
availableMarkdownUrls.add(markdownAbsoluteUrl);
|
|
131
164
|
markdownCanonicalEntries.push({
|
|
132
165
|
markdownPath: `/${markdownFileName}`,
|
|
133
166
|
canonicalUrl: buildCanonicalUrl(config.site, pagePath)
|
|
134
167
|
});
|
|
168
|
+
if (!config.validation?.disabled) {
|
|
169
|
+
validationResults.push(...validateMarkdownContent(markdown, pagePath));
|
|
170
|
+
}
|
|
135
171
|
markdownPages += 1;
|
|
136
172
|
}
|
|
137
173
|
markdownPagesStatus = markdownPages > 0 ? "generated" : preservedMarkdownPages > 0 ? "preserved" : "none";
|
|
@@ -150,6 +186,14 @@ function agentmarkup(config) {
|
|
|
150
186
|
await writeFile(outputHeadersPath, patchedHeaders, "utf8");
|
|
151
187
|
}
|
|
152
188
|
}
|
|
189
|
+
if (!config.validation?.disabled && resolvedLlmsSections.length > 0) {
|
|
190
|
+
validationResults.push(
|
|
191
|
+
...validateLlmsTxtMarkdownCoverage(
|
|
192
|
+
resolvedLlmsSections,
|
|
193
|
+
availableMarkdownUrls
|
|
194
|
+
)
|
|
195
|
+
);
|
|
196
|
+
}
|
|
153
197
|
}
|
|
154
198
|
const llmsTxtContent = generateLlmsTxt(config);
|
|
155
199
|
if (llmsTxtContent) {
|
|
@@ -177,6 +221,36 @@ function agentmarkup(config) {
|
|
|
177
221
|
}
|
|
178
222
|
}
|
|
179
223
|
}
|
|
224
|
+
if (config.llmsFullTxt?.enabled) {
|
|
225
|
+
const llmsFullTxtContent = generateLlmsFullTxt(config, {
|
|
226
|
+
contentByUrl: markdownByUrl
|
|
227
|
+
});
|
|
228
|
+
if (llmsFullTxtContent) {
|
|
229
|
+
const outputLlmsFullPath = join(outDir, "llms-full.txt");
|
|
230
|
+
const inlineEntries = countInlinedLlmsFullEntries(
|
|
231
|
+
resolvedLlmsSections,
|
|
232
|
+
markdownByUrl
|
|
233
|
+
);
|
|
234
|
+
const existingOutputLlmsFull = await readTextFileIfExists(outputLlmsFullPath);
|
|
235
|
+
const existingLlmsFull = existingOutputLlmsFull ?? (publicDir ? await readTextFileIfExists(join(publicDir, "llms-full.txt")) : null);
|
|
236
|
+
if (existingLlmsFull && !config.llmsFullTxt.replaceExisting) {
|
|
237
|
+
llmsFullTxtStatus = "preserved";
|
|
238
|
+
if (!existingOutputLlmsFull) {
|
|
239
|
+
await writeFile(outputLlmsFullPath, existingLlmsFull, "utf8");
|
|
240
|
+
}
|
|
241
|
+
if (!config.validation?.disabled) {
|
|
242
|
+
validationResults.push(...validateLlmsTxt(existingLlmsFull));
|
|
243
|
+
}
|
|
244
|
+
} else {
|
|
245
|
+
llmsFullTxtStatus = "generated";
|
|
246
|
+
llmsFullTxtEntries = inlineEntries;
|
|
247
|
+
await writeFile(outputLlmsFullPath, llmsFullTxtContent, "utf8");
|
|
248
|
+
if (!config.validation?.disabled) {
|
|
249
|
+
validationResults.push(...validateLlmsTxt(llmsFullTxtContent));
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
}
|
|
180
254
|
if (config.aiCrawlers) {
|
|
181
255
|
const crawlerEntries = Object.entries(config.aiCrawlers).filter(
|
|
182
256
|
([, value]) => value !== void 0
|
|
@@ -218,6 +292,8 @@ function agentmarkup(config) {
|
|
|
218
292
|
llmsTxtEntries,
|
|
219
293
|
llmsTxtSections,
|
|
220
294
|
llmsTxtStatus,
|
|
295
|
+
llmsFullTxtEntries,
|
|
296
|
+
llmsFullTxtStatus,
|
|
221
297
|
jsonLdPages,
|
|
222
298
|
markdownPages,
|
|
223
299
|
markdownPagesStatus,
|
|
@@ -233,10 +309,14 @@ function agentmarkup(config) {
|
|
|
233
309
|
};
|
|
234
310
|
}
|
|
235
311
|
async function readTextFileIfExists(filePath) {
|
|
236
|
-
|
|
237
|
-
return
|
|
312
|
+
try {
|
|
313
|
+
return await readFile(filePath, "utf8");
|
|
314
|
+
} catch (error) {
|
|
315
|
+
if (error && typeof error === "object" && "code" in error && error.code === "ENOENT") {
|
|
316
|
+
return null;
|
|
317
|
+
}
|
|
318
|
+
throw error;
|
|
238
319
|
}
|
|
239
|
-
return readFile(filePath, "utf8");
|
|
240
320
|
}
|
|
241
321
|
async function findHtmlFiles(rootDir) {
|
|
242
322
|
const entries = await readdir(rootDir, { withFileTypes: true });
|
|
@@ -262,6 +342,18 @@ function buildCanonicalUrl(siteUrl, pagePath) {
|
|
|
262
342
|
const base = siteUrl.replace(/\/$/, "");
|
|
263
343
|
return pagePath === "/" ? `${base}/` : `${base}${pagePath}`;
|
|
264
344
|
}
|
|
345
|
+
function buildAbsoluteMarkdownUrl(siteUrl, pagePath) {
|
|
346
|
+
const base = siteUrl.replace(/\/$/, "");
|
|
347
|
+
return pagePath === "/" ? `${base}/index.md` : `${base}${pagePath}.md`;
|
|
348
|
+
}
|
|
349
|
+
function countInlinedLlmsFullEntries(sections, markdownByUrl) {
|
|
350
|
+
return sections.reduce(
|
|
351
|
+
(sum, section) => sum + section.entries.filter(
|
|
352
|
+
(entry) => Boolean(entry.markdownUrl && markdownByUrl[entry.markdownUrl])
|
|
353
|
+
).length,
|
|
354
|
+
0
|
|
355
|
+
);
|
|
356
|
+
}
|
|
265
357
|
function existingOutputFileExists(filePath) {
|
|
266
358
|
return existsSync(filePath);
|
|
267
359
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@agentmarkup/astro",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.4.0",
|
|
4
4
|
"description": "Build-time llms.txt, llms-full.txt, JSON-LD, markdown mirrors, headers, AI crawler controls, and validation for Astro",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"license": "MIT",
|
|
@@ -42,7 +42,7 @@
|
|
|
42
42
|
"dist"
|
|
43
43
|
],
|
|
44
44
|
"dependencies": {
|
|
45
|
-
"@agentmarkup/core": "0.
|
|
45
|
+
"@agentmarkup/core": "0.4.0"
|
|
46
46
|
},
|
|
47
47
|
"peerDependencies": {
|
|
48
48
|
"astro": ">=4.0.0"
|