@agentmarkup/astro 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +13 -1
- package/dist/index.js +86 -3
- package/package.json +2 -2
package/README.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# @agentmarkup/astro
|
|
2
2
|
|
|
3
|
-
Build-time `llms.txt`, JSON-LD, AI crawler `robots.txt`, and validation for Astro websites.
|
|
3
|
+
Build-time `llms.txt`, JSON-LD, markdown mirrors, AI crawler `robots.txt`, headers, and validation for Astro websites.
|
|
4
4
|
|
|
5
5
|
## Install
|
|
6
6
|
|
|
@@ -34,6 +34,12 @@ export default defineConfig({
|
|
|
34
34
|
},
|
|
35
35
|
],
|
|
36
36
|
},
|
|
37
|
+
markdownPages: {
|
|
38
|
+
enabled: true,
|
|
39
|
+
},
|
|
40
|
+
contentSignalHeaders: {
|
|
41
|
+
enabled: true,
|
|
42
|
+
},
|
|
37
43
|
globalSchemas: [
|
|
38
44
|
{
|
|
39
45
|
preset: 'webSite',
|
|
@@ -82,12 +88,18 @@ export default defineConfig({
|
|
|
82
88
|
|
|
83
89
|
- Injects JSON-LD into built HTML pages during the Astro build
|
|
84
90
|
- Generates `/llms.txt` from config
|
|
91
|
+
- Validates JSON-LD already present in page HTML
|
|
92
|
+
- Generates `.md` mirrors from the final HTML output
|
|
85
93
|
- Patches or creates `robots.txt` with AI crawler directives
|
|
94
|
+
- Patches or creates `_headers` with `Content-Signal`
|
|
86
95
|
- Validates common schema and crawler mistakes at build time
|
|
96
|
+
- Warns when a page looks like a thin client-rendered HTML shell
|
|
87
97
|
- Re-exports `@agentmarkup/core` helpers for custom pipelines
|
|
88
98
|
|
|
89
99
|
By default, the Astro adapter coexists with existing machine-readable assets. If a page already contains JSON-LD for a schema type, or the site already ships a curated `llms.txt` or matching crawler rules, those are preserved unless you opt into replacement.
|
|
90
100
|
|
|
101
|
+
Markdown mirrors and `_headers` follow the same rule: existing files are preserved unless you opt into replacement with `markdownPages.replaceExisting` or `contentSignalHeaders.replaceExisting`.
|
|
102
|
+
|
|
91
103
|
## Maintainer
|
|
92
104
|
|
|
93
105
|
Copyright (c) 2026 [Sebastian Cochinescu](https://www.cochinescu.com). MIT License.
|
package/dist/index.js
CHANGED
|
@@ -6,14 +6,21 @@ import { fileURLToPath } from "url";
|
|
|
6
6
|
import {
|
|
7
7
|
collectSchemasForPage,
|
|
8
8
|
filterJsonLdByExistingTypes,
|
|
9
|
+
generateMarkdownAlternateLink,
|
|
10
|
+
generatePageMarkdown,
|
|
9
11
|
generateJsonLdTags,
|
|
10
12
|
generateLlmsTxt,
|
|
11
13
|
hasExistingJsonLdScripts,
|
|
14
|
+
injectHeadContent,
|
|
12
15
|
injectJsonLdTags,
|
|
16
|
+
markdownFileNameFromHtmlFile,
|
|
17
|
+
patchHeadersFile,
|
|
13
18
|
normalizePagePath,
|
|
14
19
|
patchRobotsTxt,
|
|
15
20
|
presetToJsonLd,
|
|
16
21
|
printReport,
|
|
22
|
+
validateExistingJsonLd,
|
|
23
|
+
validateHtmlContent,
|
|
17
24
|
validateLlmsTxt,
|
|
18
25
|
validateRobotsTxt,
|
|
19
26
|
validateSchema
|
|
@@ -25,8 +32,11 @@ function agentmarkup(config) {
|
|
|
25
32
|
let llmsTxtSections = 0;
|
|
26
33
|
let llmsTxtStatus = "none";
|
|
27
34
|
let jsonLdPages = 0;
|
|
35
|
+
let markdownPages = 0;
|
|
36
|
+
let markdownPagesStatus = "none";
|
|
28
37
|
let crawlersConfigured = 0;
|
|
29
38
|
let robotsTxtStatus = "none";
|
|
39
|
+
let contentSignalHeadersStatus = "none";
|
|
30
40
|
let publicDir;
|
|
31
41
|
return {
|
|
32
42
|
name: "agentmarkup",
|
|
@@ -40,8 +50,19 @@ function agentmarkup(config) {
|
|
|
40
50
|
for (const htmlFile of htmlFiles) {
|
|
41
51
|
const pagePath = pagePathFromOutputFile(outDir, htmlFile);
|
|
42
52
|
const html = await readFile(htmlFile, "utf8");
|
|
53
|
+
let nextHtml = html;
|
|
43
54
|
const schemas = collectSchemasForPage(config, pagePath);
|
|
44
|
-
const hasExistingJsonLd = hasExistingJsonLdScripts(
|
|
55
|
+
const hasExistingJsonLd = hasExistingJsonLdScripts(nextHtml);
|
|
56
|
+
if (!config.validation?.disabled) {
|
|
57
|
+
validationResults.push(...validateHtmlContent(nextHtml, pagePath));
|
|
58
|
+
validationResults.push(...validateExistingJsonLd(nextHtml, pagePath));
|
|
59
|
+
}
|
|
60
|
+
if (isFeatureEnabled(config.markdownPages) && pagePath && !hasMarkdownAlternateLink(nextHtml)) {
|
|
61
|
+
nextHtml = injectHeadContent(
|
|
62
|
+
nextHtml,
|
|
63
|
+
generateMarkdownAlternateLink(pagePath)
|
|
64
|
+
);
|
|
65
|
+
}
|
|
45
66
|
if (schemas.length === 0) {
|
|
46
67
|
if (pagePath && config.validation?.warnOnMissingSchema && !config.validation.disabled && !hasExistingJsonLd) {
|
|
47
68
|
validationResults.push({
|
|
@@ -50,22 +71,59 @@ function agentmarkup(config) {
|
|
|
50
71
|
path: pagePath
|
|
51
72
|
});
|
|
52
73
|
}
|
|
74
|
+
if (nextHtml !== html) {
|
|
75
|
+
await writeFile(htmlFile, nextHtml, "utf8");
|
|
76
|
+
}
|
|
53
77
|
continue;
|
|
54
78
|
}
|
|
55
79
|
const jsonLdObjects = schemas.map(presetToJsonLd);
|
|
56
|
-
const injectables = config.jsonLd?.replaceExistingTypes ? jsonLdObjects : filterJsonLdByExistingTypes(jsonLdObjects,
|
|
80
|
+
const injectables = config.jsonLd?.replaceExistingTypes ? jsonLdObjects : filterJsonLdByExistingTypes(jsonLdObjects, nextHtml);
|
|
57
81
|
if (!config.validation?.disabled) {
|
|
58
82
|
for (const schema of schemas) {
|
|
59
83
|
validationResults.push(...validateSchema(schema, pagePath));
|
|
60
84
|
}
|
|
61
85
|
}
|
|
62
86
|
if (injectables.length === 0) {
|
|
87
|
+
if (nextHtml !== html) {
|
|
88
|
+
await writeFile(htmlFile, nextHtml, "utf8");
|
|
89
|
+
}
|
|
63
90
|
continue;
|
|
64
91
|
}
|
|
65
92
|
const tags = generateJsonLdTags(injectables);
|
|
66
|
-
|
|
93
|
+
nextHtml = injectJsonLdTags(nextHtml, tags);
|
|
94
|
+
await writeFile(htmlFile, nextHtml, "utf8");
|
|
67
95
|
jsonLdPages += 1;
|
|
68
96
|
}
|
|
97
|
+
if (isFeatureEnabled(config.markdownPages)) {
|
|
98
|
+
let preservedMarkdownPages = 0;
|
|
99
|
+
for (const htmlFile of htmlFiles) {
|
|
100
|
+
const relativeHtmlPath = relative(outDir, htmlFile).replace(/\\/g, "/");
|
|
101
|
+
const markdownFileName = markdownFileNameFromHtmlFile(relativeHtmlPath);
|
|
102
|
+
const outputMarkdownPath = join(outDir, markdownFileName);
|
|
103
|
+
const html = await readFile(htmlFile, "utf8");
|
|
104
|
+
const pagePath = pagePathFromOutputFile(outDir, htmlFile);
|
|
105
|
+
const markdown = generatePageMarkdown({
|
|
106
|
+
html,
|
|
107
|
+
pagePath,
|
|
108
|
+
siteUrl: config.site
|
|
109
|
+
});
|
|
110
|
+
if (!markdown) {
|
|
111
|
+
continue;
|
|
112
|
+
}
|
|
113
|
+
const existingOutputMarkdown = await readTextFileIfExists(outputMarkdownPath);
|
|
114
|
+
const existingMarkdown = existingOutputMarkdown ?? (publicDir ? await readTextFileIfExists(join(publicDir, markdownFileName)) : null);
|
|
115
|
+
if (existingMarkdown && !config.markdownPages?.replaceExisting) {
|
|
116
|
+
preservedMarkdownPages += 1;
|
|
117
|
+
if (!existingOutputMarkdown) {
|
|
118
|
+
await writeFile(outputMarkdownPath, existingMarkdown, "utf8");
|
|
119
|
+
}
|
|
120
|
+
continue;
|
|
121
|
+
}
|
|
122
|
+
await writeFile(outputMarkdownPath, markdown, "utf8");
|
|
123
|
+
markdownPages += 1;
|
|
124
|
+
}
|
|
125
|
+
markdownPagesStatus = markdownPages > 0 ? "generated" : preservedMarkdownPages > 0 ? "preserved" : "none";
|
|
126
|
+
}
|
|
69
127
|
const llmsTxtContent = generateLlmsTxt(config);
|
|
70
128
|
if (llmsTxtContent) {
|
|
71
129
|
const outputLlmsPath = join(outDir, "llms.txt");
|
|
@@ -114,14 +172,31 @@ function agentmarkup(config) {
|
|
|
114
172
|
await writeFile(outputRobotsPath, patched, "utf8");
|
|
115
173
|
}
|
|
116
174
|
}
|
|
175
|
+
if (isFeatureEnabled(config.contentSignalHeaders)) {
|
|
176
|
+
const outputHeadersPath = join(outDir, "_headers");
|
|
177
|
+
const existingOutputHeaders = await readTextFileIfExists(outputHeadersPath);
|
|
178
|
+
const existingHeaders = existingOutputHeaders ?? (publicDir ? await readTextFileIfExists(join(publicDir, "_headers")) : null);
|
|
179
|
+
const patchedHeaders = patchHeadersFile(
|
|
180
|
+
existingHeaders,
|
|
181
|
+
config.contentSignalHeaders
|
|
182
|
+
);
|
|
183
|
+
const preserved = existingHeaders !== null && patchedHeaders === existingHeaders;
|
|
184
|
+
contentSignalHeadersStatus = preserved ? "preserved" : "generated";
|
|
185
|
+
if (!preserved || !existingOutputFileExists(outputHeadersPath)) {
|
|
186
|
+
await writeFile(outputHeadersPath, patchedHeaders, "utf8");
|
|
187
|
+
}
|
|
188
|
+
}
|
|
117
189
|
printReport({
|
|
118
190
|
label: "@agentmarkup/astro",
|
|
119
191
|
llmsTxtEntries,
|
|
120
192
|
llmsTxtSections,
|
|
121
193
|
llmsTxtStatus,
|
|
122
194
|
jsonLdPages,
|
|
195
|
+
markdownPages,
|
|
196
|
+
markdownPagesStatus,
|
|
123
197
|
crawlersConfigured,
|
|
124
198
|
robotsTxtStatus,
|
|
199
|
+
contentSignalHeadersStatus,
|
|
125
200
|
validationResults
|
|
126
201
|
});
|
|
127
202
|
}
|
|
@@ -157,6 +232,14 @@ function pagePathFromOutputFile(outDir, filePath) {
|
|
|
157
232
|
function existingOutputFileExists(filePath) {
|
|
158
233
|
return existsSync(filePath);
|
|
159
234
|
}
|
|
235
|
+
function isFeatureEnabled(config) {
|
|
236
|
+
return Boolean(config && config.enabled !== false);
|
|
237
|
+
}
|
|
238
|
+
function hasMarkdownAlternateLink(html) {
|
|
239
|
+
return /<link\b[^>]*rel=(['"])alternate\1[^>]*type=(['"])text\/markdown\2/i.test(
|
|
240
|
+
html
|
|
241
|
+
);
|
|
242
|
+
}
|
|
160
243
|
export {
|
|
161
244
|
agentmarkup
|
|
162
245
|
};
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@agentmarkup/astro",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.3.0",
|
|
4
4
|
"description": "Build-time llms.txt, JSON-LD, and AI crawler controls for Astro",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"license": "MIT",
|
|
@@ -36,7 +36,7 @@
|
|
|
36
36
|
"dist"
|
|
37
37
|
],
|
|
38
38
|
"dependencies": {
|
|
39
|
-
"@agentmarkup/core": "0.
|
|
39
|
+
"@agentmarkup/core": "0.3.0"
|
|
40
40
|
},
|
|
41
41
|
"peerDependencies": {
|
|
42
42
|
"astro": ">=4.0.0"
|