@agentmarkup/astro 0.3.0 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -91,14 +91,18 @@ export default defineConfig({
91
91
  - Validates JSON-LD already present in page HTML
92
92
  - Generates `.md` mirrors from the final HTML output
93
93
  - Patches or creates `robots.txt` with AI crawler directives
94
- - Patches or creates `_headers` with `Content-Signal`
94
+ - Patches or creates `_headers` with `Content-Signal` and canonical `Link` headers for markdown mirrors
95
95
  - Validates common schema and crawler mistakes at build time
96
96
  - Warns when a page looks like a thin client-rendered HTML shell
97
97
  - Re-exports `@agentmarkup/core` helpers for custom pipelines
98
98
 
99
99
  By default, the Astro adapter coexists with existing machine-readable assets. If a page already contains JSON-LD for a schema type, or the site already ships a curated `llms.txt` or matching crawler rules, those are preserved unless you opt into replacement.
100
100
 
101
- Markdown mirrors and `_headers` follow the same rule: existing files are preserved unless you opt into replacement with `markdownPages.replaceExisting` or `contentSignalHeaders.replaceExisting`.
101
+ Markdown mirrors stay directly fetchable for agents, while their `_headers` entries point search engines back at the HTML page as canonical. Existing files are still preserved unless you opt into replacement with `markdownPages.replaceExisting` or `contentSignalHeaders.replaceExisting`.
102
+
103
+ When markdown mirrors are enabled, the adapter also writes canonical `Link` headers for those `.md` files so search engines can keep the HTML route as the preferred indexed URL without making the markdown mirror unavailable to direct fetchers.
104
+
105
+ When markdown mirrors are enabled, same-site page entries in `llms.txt` automatically point at the generated `.md` mirrors by default. Set `llmsTxt.preferMarkdownMirrors: false` if you want `llms.txt` to keep linking to HTML routes instead.
102
106
 
103
107
  ## Maintainer
104
108
 
package/dist/index.js CHANGED
@@ -14,6 +14,7 @@ import {
14
14
  injectHeadContent,
15
15
  injectJsonLdTags,
16
16
  markdownFileNameFromHtmlFile,
17
+ patchMarkdownCanonicalHeaders,
17
18
  patchHeadersFile,
18
19
  normalizePagePath,
19
20
  patchRobotsTxt,
@@ -34,6 +35,8 @@ function agentmarkup(config) {
34
35
  let jsonLdPages = 0;
35
36
  let markdownPages = 0;
36
37
  let markdownPagesStatus = "none";
38
+ let markdownCanonicalHeadersCount = 0;
39
+ let markdownCanonicalHeadersStatus = "none";
37
40
  let crawlersConfigured = 0;
38
41
  let robotsTxtStatus = "none";
39
42
  let contentSignalHeadersStatus = "none";
@@ -96,6 +99,7 @@ function agentmarkup(config) {
96
99
  }
97
100
  if (isFeatureEnabled(config.markdownPages)) {
98
101
  let preservedMarkdownPages = 0;
102
+ const markdownCanonicalEntries = [];
99
103
  for (const htmlFile of htmlFiles) {
100
104
  const relativeHtmlPath = relative(outDir, htmlFile).replace(/\\/g, "/");
101
105
  const markdownFileName = markdownFileNameFromHtmlFile(relativeHtmlPath);
@@ -114,15 +118,38 @@ function agentmarkup(config) {
114
118
  const existingMarkdown = existingOutputMarkdown ?? (publicDir ? await readTextFileIfExists(join(publicDir, markdownFileName)) : null);
115
119
  if (existingMarkdown && !config.markdownPages?.replaceExisting) {
116
120
  preservedMarkdownPages += 1;
121
+ markdownCanonicalEntries.push({
122
+ markdownPath: `/${markdownFileName}`,
123
+ canonicalUrl: buildCanonicalUrl(config.site, pagePath)
124
+ });
117
125
  if (!existingOutputMarkdown) {
118
126
  await writeFile(outputMarkdownPath, existingMarkdown, "utf8");
119
127
  }
120
128
  continue;
121
129
  }
122
130
  await writeFile(outputMarkdownPath, markdown, "utf8");
131
+ markdownCanonicalEntries.push({
132
+ markdownPath: `/${markdownFileName}`,
133
+ canonicalUrl: buildCanonicalUrl(config.site, pagePath)
134
+ });
123
135
  markdownPages += 1;
124
136
  }
125
137
  markdownPagesStatus = markdownPages > 0 ? "generated" : preservedMarkdownPages > 0 ? "preserved" : "none";
138
+ if (markdownCanonicalEntries.length > 0) {
139
+ const outputHeadersPath = join(outDir, "_headers");
140
+ const existingOutputHeaders = await readTextFileIfExists(outputHeadersPath);
141
+ const existingHeaders = existingOutputHeaders ?? (publicDir ? await readTextFileIfExists(join(publicDir, "_headers")) : null);
142
+ const patchedHeaders = patchMarkdownCanonicalHeaders(
143
+ existingHeaders,
144
+ markdownCanonicalEntries
145
+ );
146
+ const preserved = existingHeaders !== null && patchedHeaders === ensureTrailingNewline(existingHeaders);
147
+ markdownCanonicalHeadersCount = markdownCanonicalEntries.length;
148
+ markdownCanonicalHeadersStatus = preserved ? "preserved" : "generated";
149
+ if (!preserved || !existingOutputFileExists(outputHeadersPath)) {
150
+ await writeFile(outputHeadersPath, patchedHeaders, "utf8");
151
+ }
152
+ }
126
153
  }
127
154
  const llmsTxtContent = generateLlmsTxt(config);
128
155
  if (llmsTxtContent) {
@@ -180,7 +207,7 @@ function agentmarkup(config) {
180
207
  existingHeaders,
181
208
  config.contentSignalHeaders
182
209
  );
183
- const preserved = existingHeaders !== null && patchedHeaders === existingHeaders;
210
+ const preserved = existingHeaders !== null && patchedHeaders === ensureTrailingNewline(existingHeaders);
184
211
  contentSignalHeadersStatus = preserved ? "preserved" : "generated";
185
212
  if (!preserved || !existingOutputFileExists(outputHeadersPath)) {
186
213
  await writeFile(outputHeadersPath, patchedHeaders, "utf8");
@@ -194,6 +221,8 @@ function agentmarkup(config) {
194
221
  jsonLdPages,
195
222
  markdownPages,
196
223
  markdownPagesStatus,
224
+ markdownCanonicalHeadersCount,
225
+ markdownCanonicalHeadersStatus,
197
226
  crawlersConfigured,
198
227
  robotsTxtStatus,
199
228
  contentSignalHeadersStatus,
@@ -229,6 +258,10 @@ function pagePathFromOutputFile(outDir, filePath) {
229
258
  const candidatePath = relativePath === "index.html" ? "/" : `/${relativePath}`;
230
259
  return normalizePagePath(candidatePath);
231
260
  }
261
+ function buildCanonicalUrl(siteUrl, pagePath) {
262
+ const base = siteUrl.replace(/\/$/, "");
263
+ return pagePath === "/" ? `${base}/` : `${base}${pagePath}`;
264
+ }
232
265
  function existingOutputFileExists(filePath) {
233
266
  return existsSync(filePath);
234
267
  }
@@ -240,6 +273,10 @@ function hasMarkdownAlternateLink(html) {
240
273
  html
241
274
  );
242
275
  }
276
+ function ensureTrailingNewline(value) {
277
+ return value.endsWith("\n") ? value : `${value}
278
+ `;
279
+ }
243
280
  export {
244
281
  agentmarkup
245
282
  };
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@agentmarkup/astro",
3
- "version": "0.3.0",
4
- "description": "Build-time llms.txt, JSON-LD, and AI crawler controls for Astro",
3
+ "version": "0.3.2",
4
+ "description": "Build-time llms.txt, JSON-LD, markdown mirrors, headers, AI crawler controls, and validation for Astro",
5
5
  "type": "module",
6
6
  "license": "MIT",
7
7
  "author": "Sebastian Cochinescu <hello@animafelix.com> (https://animafelix.com)",
@@ -18,11 +18,16 @@
18
18
  "astro",
19
19
  "llms-txt",
20
20
  "json-ld",
21
+ "markdown",
21
22
  "schema-org",
22
23
  "structured-data",
23
24
  "robots-txt",
25
+ "content-signal",
26
+ "headers",
24
27
  "ai-crawler",
25
- "machine-readable"
28
+ "machine-readable",
29
+ "validation",
30
+ "geo"
26
31
  ],
27
32
  "exports": {
28
33
  ".": {
@@ -36,7 +41,7 @@
36
41
  "dist"
37
42
  ],
38
43
  "dependencies": {
39
- "@agentmarkup/core": "0.3.0"
44
+ "@agentmarkup/core": "0.3.2"
40
45
  },
41
46
  "peerDependencies": {
42
47
  "astro": ">=4.0.0"