@rettangoli/sites 1.0.3 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -32,11 +32,12 @@ my-site/
32
32
 
33
33
  - YAML pages rendered through `jempl` + `yahtml`
34
34
  - Markdown pages rendered through `markdown-it` + Shiki (default `rtglMarkdown`)
35
- - Frontmatter (`template`, `tags`, arbitrary page metadata)
35
+ - Frontmatter (`template`, `url`, `tags`, arbitrary page metadata)
36
36
  - Global data from `data/*.yaml` and optional inline `sites.config.yaml data`
37
37
  - Collections built from page tags
38
38
  - `$if`, `$for`, `$partial`, template functions
39
39
  - Static file copying from `static/` to `_site/`
40
+ - Default sitemap generation when `data.site.baseUrl` is configured
40
41
  - Watch mode with local dev server + websocket reload
41
42
 
42
43
  ## Site Config
@@ -76,8 +77,20 @@ imports:
76
77
  partials:
77
78
  docs/nav: https://example.com/partials/docs-nav.yaml
78
79
  data:
80
+ site:
81
+ baseUrl: https://example.com
79
82
  themeCssHref: /public/theme.css
80
83
  themeBodyClass: dark
84
+ sitemap:
85
+ outputPath: sitemap.xml
86
+ defaults:
87
+ changefreq: weekly
88
+ priority: 0.5
89
+ exclude:
90
+ - /drafts/*
91
+ pages:
92
+ /:
93
+ priority: 1
81
94
  ```
82
95
 
83
96
  In the default starter template, CDN runtime scripts are controlled via `data/site.yaml`:
@@ -96,6 +109,66 @@ Example mappings:
96
109
  - `pages/index.md` -> `_site/index.html` and `_site/index.md`
97
110
  - `pages/docs/intro.md` -> `_site/docs/intro/index.html` and `_site/docs/intro.md`
98
111
 
112
+ For Markdown pages with a custom `url`, the copied `.md` file follows the custom URL path.
113
+ For example, `url: /guides/start/` writes `_site/guides/start/index.html` and `_site/guides/start.md`.
114
+
115
+ Pages use their file path as the URL by default:
116
+ - `pages/index.*` -> `/`
117
+ - `pages/about.*` -> `/about/`
118
+ - `pages/docs/intro.*` -> `/docs/intro/`
119
+
120
+ Set `url` in page frontmatter to override that path:
121
+
122
+ ```md
123
+ ---
124
+ title: Company
125
+ url: /company/
126
+ ---
127
+ ```
128
+
129
+ `url` is normalized to a site-relative clean URL with a leading and trailing slash, so `company` becomes `/company/`.
130
+ External URLs, query strings, fragments, whitespace, and `.` / `..` path segments are rejected.
131
+ Duplicate page URLs are rejected after normalization.
132
+
133
+ ## Sitemap
134
+
135
+ Sites writes `_site/sitemap.xml` by default when `data.site.baseUrl` is configured. Use `sitemap` in `sites.config.yaml` to customize output, or set `sitemap: false` to disable it.
136
+
137
+ ```yaml
138
+ data:
139
+ site:
140
+ baseUrl: https://example.com
141
+ sitemap:
142
+ outputPath: sitemap.xml
143
+ defaults:
144
+ changefreq: weekly
145
+ priority: 0.5
146
+ exclude:
147
+ - /drafts/*
148
+ pages:
149
+ /:
150
+ priority: 1
151
+ changefreq: daily
152
+ lastmod: "2026-05-25"
153
+ /private/: false
154
+ ```
155
+
156
+ If you do not use `data.site.baseUrl`, set `sitemap.siteUrl` instead.
157
+ Generated entries use normalized page URLs, including page frontmatter `url` overrides.
158
+ Use page frontmatter for per-page control:
159
+
160
+ ```md
161
+ ---
162
+ sitemap:
163
+ changefreq: monthly
164
+ priority: 0.8
165
+ lastmod: "2026-05-25"
166
+ ---
167
+ ```
168
+
169
+ Set `sitemap: false` in page frontmatter to exclude one page.
170
+ `sitemap.exclude` accepts exact page URLs and prefix patterns ending in `*`, such as `/drafts/*`.
171
+
99
172
  `imports` lets you map aliases to remote YAML files (HTTP/HTTPS only). Use aliases in pages/templates:
100
173
  - page frontmatter: `template: base` or `template: docs`
101
174
  - template/page content: `$partial: docs/nav`
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@rettangoli/sites",
3
- "version": "1.0.3",
3
+ "version": "1.2.0",
4
4
  "description": "Generate static sites using Markdown and YAML for docs, blogs, and marketing sites.",
5
5
  "author": {
6
6
  "name": "Luciano Hanyon Wu",
package/src/cli/build.js CHANGED
@@ -8,6 +8,7 @@ import { loadSiteConfig } from '../utils/loadSiteConfig.js';
8
8
  * @param {string} options.rootDir - Root directory of the site (defaults to cwd)
9
9
  * @param {string} options.outputPath - Output directory path (relative to rootDir by default)
10
10
  * @param {Object} options.md - Optional markdown renderer
11
+ * @param {Object|boolean} options.sitemap - Optional sitemap generation config
11
12
  * @param {boolean} options.quiet - Suppress build output logs
12
13
  * @param {boolean} options.isScreenshotMode - Optional build flag exposed to templates via build.isScreenshotMode
13
14
  */
@@ -17,6 +18,7 @@ export const buildSite = async (options = {}) => {
17
18
  outputPath = '_site',
18
19
  md,
19
20
  functions,
21
+ sitemap,
20
22
  quiet = false,
21
23
  isScreenshotMode = false
22
24
  } = options;
@@ -32,6 +34,7 @@ export const buildSite = async (options = {}) => {
32
34
  keepMarkdownFiles: config.build?.keepMarkdownFiles === true,
33
35
  imports: config.imports || {},
34
36
  data: config.data || {},
37
+ sitemap: sitemap === undefined ? config.sitemap : sitemap,
35
38
  functions: functions || {},
36
39
  quiet,
37
40
  isScreenshotMode
@@ -8,6 +8,7 @@ import matter from 'gray-matter';
8
8
  import MarkdownIt from 'markdown-it';
9
9
  import rtglMarkdown from './rtglMarkdown.js';
10
10
  import builtinTemplateFunctions from './builtinTemplateFunctions.js';
11
+ import { buildSitemapXml, resolveSitemapOutputPath } from './sitemap.js';
11
12
 
12
13
  const MATTER_OPTIONS = {
13
14
  engines: {
@@ -228,6 +229,123 @@ function isSchemaSidecarFile(fileName) {
228
229
  return fileName.endsWith('.schema.yaml') || fileName.endsWith('.schema.yml');
229
230
  }
230
231
 
232
+ function hasPageExtension(fileName) {
233
+ return fileName.endsWith('.yaml') || fileName.endsWith('.yml') || fileName.endsWith('.md');
234
+ }
235
+
236
+ function hasOwn(object, key) {
237
+ return Object.prototype.hasOwnProperty.call(object, key);
238
+ }
239
+
240
+ function normalizeRelativeUrlPath(relativePath) {
241
+ return relativePath.replace(/\\/g, '/');
242
+ }
243
+
244
+ function derivePageUrlFromRelativePath(relativePath) {
245
+ const normalizedPath = normalizeRelativeUrlPath(relativePath);
246
+ const pathWithoutExtension = normalizedPath.replace(/\.(yaml|yml|md)$/, '');
247
+ const pageName = path.posix.basename(pathWithoutExtension);
248
+
249
+ if (pageName === 'index') {
250
+ const dirName = path.posix.dirname(pathWithoutExtension);
251
+ return dirName === '.' ? '/' : `/${dirName}/`;
252
+ }
253
+
254
+ return `/${pathWithoutExtension}/`;
255
+ }
256
+
257
+ function normalizePageUrlOverride(rawUrl, pagePath) {
258
+ if (typeof rawUrl !== 'string') {
259
+ throw new Error(`Invalid url in ${pagePath}: expected a string.`);
260
+ }
261
+
262
+ if (rawUrl === '') {
263
+ throw new Error(`Invalid url in ${pagePath}: expected a non-empty string.`);
264
+ }
265
+
266
+ if (/[\u0000-\u001F\u007F]/u.test(rawUrl)) {
267
+ throw new Error(`Invalid url in ${pagePath}: must not contain control characters.`);
268
+ }
269
+
270
+ if (/\s/u.test(rawUrl)) {
271
+ throw new Error(`Invalid url in ${pagePath}: must not contain whitespace.`);
272
+ }
273
+
274
+ if (/^[A-Za-z][A-Za-z0-9+.-]*:/u.test(rawUrl) || rawUrl.startsWith('//')) {
275
+ throw new Error(`Invalid url in ${pagePath}: expected a site-relative URL path.`);
276
+ }
277
+
278
+ if (rawUrl.includes('\\')) {
279
+ throw new Error(`Invalid url in ${pagePath}: must use forward slashes.`);
280
+ }
281
+
282
+ if (rawUrl.includes('?') || rawUrl.includes('#')) {
283
+ throw new Error(`Invalid url in ${pagePath}: must not include query strings or fragments.`);
284
+ }
285
+
286
+ const withLeadingSlash = rawUrl.startsWith('/') ? rawUrl : `/${rawUrl}`;
287
+ const collapsedUrl = withLeadingSlash.replace(/\/+/g, '/');
288
+ const pathWithoutSlashes = collapsedUrl.replace(/^\/+|\/+$/g, '');
289
+
290
+ if (pathWithoutSlashes === '') {
291
+ return '/';
292
+ }
293
+
294
+ const segments = pathWithoutSlashes.split('/');
295
+ for (const segment of segments) {
296
+ let decodedSegment;
297
+ try {
298
+ decodedSegment = decodeURIComponent(segment);
299
+ } catch (error) {
300
+ throw new Error(`Invalid url in ${pagePath}: contains invalid URL encoding.`);
301
+ }
302
+
303
+ if (decodedSegment === '.' || decodedSegment === '..') {
304
+ throw new Error(`Invalid url in ${pagePath}: must not contain "." or ".." segments.`);
305
+ }
306
+
307
+ if (decodedSegment.includes('/') || decodedSegment.includes('\\')) {
308
+ throw new Error(`Invalid url in ${pagePath}: must not include encoded slashes or backslashes.`);
309
+ }
310
+
311
+ if (/[\u0000-\u001F\u007F]/u.test(decodedSegment)) {
312
+ throw new Error(`Invalid url in ${pagePath}: must not contain control characters.`);
313
+ }
314
+
315
+ if (/\s/u.test(decodedSegment)) {
316
+ throw new Error(`Invalid url in ${pagePath}: must not contain whitespace.`);
317
+ }
318
+ }
319
+
320
+ return `/${segments.join('/')}/`;
321
+ }
322
+
323
+ function resolvePageUrl(publicFrontmatter, relativePath, pagePath) {
324
+ if (hasOwn(publicFrontmatter, 'url')) {
325
+ return normalizePageUrlOverride(publicFrontmatter.url, pagePath);
326
+ }
327
+
328
+ return derivePageUrlFromRelativePath(relativePath);
329
+ }
330
+
331
+ function htmlOutputRelativePathFromUrl(url) {
332
+ if (url === '/') {
333
+ return 'index.html';
334
+ }
335
+
336
+ return `${url.slice(1, -1)}/index.html`;
337
+ }
338
+
339
+ function markdownOutputRelativePathFromUrl(url) {
340
+ if (url === '/') {
341
+ return 'index.md';
342
+ }
343
+
344
+ const segments = url.slice(1, -1).split('/');
345
+ const fileName = `${segments.pop()}.md`;
346
+ return [...segments, fileName].join('/');
347
+ }
348
+
231
349
  async function fetchRemoteYaml(url, fetchImpl, aliasLabel) {
232
350
  const effectiveFetch = fetchImpl || globalThis.fetch;
233
351
  if (typeof effectiveFetch !== 'function') {
@@ -318,6 +436,7 @@ export function createSiteBuilder({
318
436
  keepMarkdownFiles = false,
319
437
  imports = {},
320
438
  data = {},
439
+ sitemap,
321
440
  fetchImpl,
322
441
  functions = {},
323
442
  quiet = false,
@@ -485,12 +604,11 @@ export function createSiteBuilder({
485
604
  };
486
605
  }
487
606
 
488
- // Function to scan all pages and build collections
489
- function buildCollections() {
490
- const collections = {};
607
+ function collectPageEntries() {
608
+ const pageEntries = [];
491
609
  const pagesDir = path.join(rootDir, 'pages');
492
610
 
493
- function scanPages(dir, basePath = '') {
611
+ function scanPages(basePath = '') {
494
612
  const fullDir = path.join(pagesDir, basePath);
495
613
  if (!fs.existsSync(fullDir)) return;
496
614
 
@@ -502,82 +620,122 @@ export function createSiteBuilder({
502
620
  const itemKind = resolveDirentKind(fs, itemPath, item);
503
621
 
504
622
  if (itemKind === 'directory') {
505
- // Recursively scan subdirectories
506
- scanPages(dir, relativePath);
507
- } else if (itemKind === 'file' && (item.name.endsWith('.yaml') || item.name.endsWith('.yml') || item.name.endsWith('.md'))) {
508
- // Extract frontmatter and content
623
+ scanPages(relativePath);
624
+ } else if (itemKind === 'file' && hasPageExtension(item.name)) {
509
625
  const { frontmatter, content } = extractFrontmatterAndContent(itemPath);
510
- const { frontmatter: publicFrontmatter } = splitSystemFrontmatter(frontmatter, globalData, itemPath);
626
+ const { frontmatter: publicFrontmatter, bindings } = splitSystemFrontmatter(frontmatter, globalData, itemPath);
627
+ const hasCustomUrl = hasOwn(publicFrontmatter, 'url');
628
+ const url = resolvePageUrl(publicFrontmatter, relativePath, itemPath);
629
+ const exposedFrontmatter = hasCustomUrl
630
+ ? { ...publicFrontmatter, url }
631
+ : publicFrontmatter;
632
+
633
+ pageEntries.push({
634
+ pagePath: itemPath,
635
+ relativePath: normalizeRelativeUrlPath(relativePath),
636
+ isMarkdown: item.name.endsWith('.md'),
637
+ content,
638
+ frontmatter: exposedFrontmatter,
639
+ bindings,
640
+ url,
641
+ hasCustomUrl
642
+ });
643
+ }
644
+ }
645
+ }
511
646
 
512
- // Calculate URL
513
- const baseFileName = item.name.replace(/\.(yaml|yml|md)$/, '');
514
- let url;
647
+ scanPages();
648
+ return pageEntries;
649
+ }
515
650
 
516
- // Special case: index files remain at root, others become directories
517
- if (baseFileName === 'index') {
518
- url = basePath ? '/' + basePath.replace(/\\/g, '/') : '/';
519
- if (url !== '/') {
520
- url = url + '/';
521
- }
522
- } else {
523
- const pagePath = basePath ? path.join(basePath, baseFileName) : baseFileName;
524
- url = '/' + pagePath.replace(/\\/g, '/') + '/';
525
- }
651
+ function assertUniquePageUrls(pageEntries) {
652
+ const seen = new Map();
526
653
 
527
- // Process tags
528
- if (publicFrontmatter.tags) {
529
- // Normalize tags to array
530
- const tags = Array.isArray(publicFrontmatter.tags) ? publicFrontmatter.tags : [publicFrontmatter.tags];
531
-
532
- // Add to collections
533
- tags.forEach(tag => {
534
- if (typeof tag === 'string' && tag.trim()) {
535
- const trimmedTag = tag.trim();
536
- if (!collections[trimmedTag]) {
537
- collections[trimmedTag] = [];
538
- }
539
- collections[trimmedTag].push({
540
- data: publicFrontmatter,
541
- url: url,
542
- content: content
543
- });
544
- }
654
+ for (const entry of pageEntries) {
655
+ const existingEntry = seen.get(entry.url);
656
+ if (existingEntry) {
657
+ throw new Error(`Duplicate page URL "${entry.url}" in ${entry.pagePath}; already used by ${existingEntry.pagePath}.`);
658
+ }
659
+ seen.set(entry.url, entry);
660
+ }
661
+ }
662
+
663
+ function assertUniqueMarkdownOutputPaths(pageEntries) {
664
+ if (!keepMarkdownFiles) {
665
+ return;
666
+ }
667
+
668
+ const seen = new Map();
669
+ for (const entry of pageEntries) {
670
+ if (!entry.isMarkdown) {
671
+ continue;
672
+ }
673
+
674
+ const markdownOutputRelativePath = entry.hasCustomUrl
675
+ ? markdownOutputRelativePathFromUrl(entry.url)
676
+ : entry.relativePath;
677
+ const existingEntry = seen.get(markdownOutputRelativePath);
678
+ if (existingEntry) {
679
+ throw new Error(`Duplicate markdown output path "${markdownOutputRelativePath}" in ${entry.pagePath}; already used by ${existingEntry.pagePath}.`);
680
+ }
681
+ seen.set(markdownOutputRelativePath, entry);
682
+ }
683
+ }
684
+
685
+ // Function to scan all pages and build collections
686
+ function buildCollections(pageEntries) {
687
+ const collections = {};
688
+
689
+ for (const entry of pageEntries) {
690
+ const publicFrontmatter = entry.frontmatter;
691
+
692
+ // Process tags
693
+ if (publicFrontmatter.tags) {
694
+ // Normalize tags to array
695
+ const tags = Array.isArray(publicFrontmatter.tags) ? publicFrontmatter.tags : [publicFrontmatter.tags];
696
+
697
+ // Add to collections
698
+ tags.forEach(tag => {
699
+ if (typeof tag === 'string' && tag.trim()) {
700
+ const trimmedTag = tag.trim();
701
+ if (!collections[trimmedTag]) {
702
+ collections[trimmedTag] = [];
703
+ }
704
+ collections[trimmedTag].push({
705
+ data: publicFrontmatter,
706
+ url: entry.url,
707
+ content: entry.content
545
708
  });
546
709
  }
547
- }
710
+ });
548
711
  }
549
712
  }
550
713
 
551
- scanPages('');
552
714
  return collections;
553
715
  }
554
716
 
717
+ const pageEntries = collectPageEntries();
718
+ assertUniquePageUrls(pageEntries);
719
+ assertUniqueMarkdownOutputPaths(pageEntries);
720
+
555
721
  // Build collections in first pass
556
722
  if (!quiet) console.log('Building collections...');
557
- const collections = buildCollections();
723
+ const collections = buildCollections(pageEntries);
558
724
 
559
725
  // Function to process a single page file
560
- async function processPage(pagePath, outputRelativePath, isMarkdown = false, markdownOutputRelativePath = null) {
561
- if (!quiet) console.log(`Processing ${pagePath}...`);
562
-
563
- const { frontmatter, content: rawContent } = extractFrontmatterAndContent(pagePath);
564
- const { frontmatter: publicFrontmatter, bindings: boundData } = splitSystemFrontmatter(frontmatter, globalData, pagePath);
565
-
566
- // Calculate URL for current page
567
- let url;
568
- const fileName = path.basename(outputRelativePath, '.html');
569
- const basePath = path.dirname(outputRelativePath);
726
+ async function processPage(pageEntry) {
727
+ const {
728
+ pagePath,
729
+ content: rawContent,
730
+ frontmatter: publicFrontmatter,
731
+ bindings: boundData,
732
+ isMarkdown,
733
+ url,
734
+ hasCustomUrl,
735
+ relativePath
736
+ } = pageEntry;
570
737
 
571
- // Special case: index files remain at root, others become directories
572
- if (fileName === 'index') {
573
- url = basePath && basePath !== '.' ? '/' + basePath.replace(/\\/g, '/') : '/';
574
- if (url !== '/') {
575
- url = url + '/';
576
- }
577
- } else {
578
- const pagePath = basePath && basePath !== '.' ? path.join(basePath, fileName) : fileName;
579
- url = '/' + pagePath.replace(/\\/g, '/') + '/';
580
- }
738
+ if (!quiet) console.log(`Processing ${pagePath}...`);
581
739
 
582
740
  // Deep merge global data with frontmatter and collections for the page context
583
741
  const pageData = deepMerge(globalData, publicFrontmatter);
@@ -648,35 +806,9 @@ export function createSiteBuilder({
648
806
  htmlString = convertToHtml(resultArray);
649
807
  }
650
808
 
651
- // Create output directory and file path for new index.html structure
652
- const pageFileName = path.basename(outputRelativePath, '.html');
653
- const dirPath = path.dirname(outputRelativePath);
654
-
655
- let outputPath, outputDir;
656
-
657
- // Special case: index files remain as index.html, others become directory/index.html
658
- if (pageFileName === 'index') {
659
- if (dirPath && dirPath !== '.') {
660
- // Nested index file: pages/blog/index.yaml -> _site/blog/index.html
661
- outputPath = path.join(outputRootDir, dirPath, 'index.html');
662
- outputDir = path.join(outputRootDir, dirPath);
663
- } else {
664
- // Root index file: pages/index.yaml -> _site/index.html
665
- outputPath = path.join(outputRootDir, 'index.html');
666
- outputDir = path.join(outputRootDir);
667
- }
668
- } else {
669
- // Regular file: pages/test.yaml -> _site/test/index.html
670
- if (dirPath && dirPath !== '.') {
671
- // Nested regular file: pages/blog/post.yaml -> _site/blog/post/index.html
672
- outputPath = path.join(outputRootDir, dirPath, pageFileName, 'index.html');
673
- outputDir = path.join(outputRootDir, dirPath, pageFileName);
674
- } else {
675
- // Root level regular file: pages/test.yaml -> _site/test/index.html
676
- outputPath = path.join(outputRootDir, pageFileName, 'index.html');
677
- outputDir = path.join(outputRootDir, pageFileName);
678
- }
679
- }
809
+ const outputRelativePath = htmlOutputRelativePathFromUrl(url);
810
+ const outputPath = path.join(outputRootDir, ...outputRelativePath.split('/'));
811
+ const outputDir = path.dirname(outputPath);
680
812
 
681
813
  if (!fs.existsSync(outputDir)) {
682
814
  fs.mkdirSync(outputDir, { recursive: true });
@@ -686,8 +818,11 @@ export function createSiteBuilder({
686
818
  fs.writeFileSync(outputPath, htmlString);
687
819
  if (!quiet) console.log(` -> Written to ${outputPath}`);
688
820
 
689
- if (isMarkdown && keepMarkdownFiles && typeof markdownOutputRelativePath === 'string') {
690
- const markdownOutputPath = path.join(outputRootDir, markdownOutputRelativePath);
821
+ if (isMarkdown && keepMarkdownFiles) {
822
+ const markdownOutputRelativePath = hasCustomUrl
823
+ ? markdownOutputRelativePathFromUrl(url)
824
+ : relativePath;
825
+ const markdownOutputPath = path.join(outputRootDir, ...markdownOutputRelativePath.split('/'));
691
826
  const markdownOutputDir = path.dirname(markdownOutputPath);
692
827
  if (!fs.existsSync(markdownOutputDir)) {
693
828
  fs.mkdirSync(markdownOutputDir, { recursive: true });
@@ -697,38 +832,28 @@ export function createSiteBuilder({
697
832
  }
698
833
  }
699
834
 
700
- // Process all YAML and Markdown files in pages directory recursively
701
- async function processAllPages(dir, basePath = '') {
702
- const pagesDir = path.join(rootDir, 'pages');
703
- const fullDir = path.join(pagesDir, basePath);
704
-
705
- if (!fs.existsSync(fullDir)) return;
835
+ async function processAllPages() {
836
+ for (const pageEntry of pageEntries) {
837
+ await processPage(pageEntry);
838
+ }
839
+ }
706
840
 
707
- const items = fs.readdirSync(fullDir, { withFileTypes: true });
841
+ function writeSitemap() {
842
+ const sitemapXml = buildSitemapXml({ pageEntries, sitemap, globalData });
843
+ if (sitemapXml === null) {
844
+ return;
845
+ }
708
846
 
709
- for (const item of items) {
710
- const itemPath = path.join(fullDir, item.name);
711
- const relativePath = basePath ? path.join(basePath, item.name) : item.name;
712
- const itemKind = resolveDirentKind(fs, itemPath, item);
847
+ const sitemapOutputRelativePath = resolveSitemapOutputPath(sitemap);
848
+ const sitemapOutputPath = path.join(outputRootDir, ...sitemapOutputRelativePath.split('/'));
849
+ const sitemapOutputDir = path.dirname(sitemapOutputPath);
713
850
 
714
- if (itemKind === 'directory') {
715
- // Recursively process subdirectories
716
- await processAllPages(dir, relativePath);
717
- } else if (itemKind === 'file') {
718
- if (item.name.endsWith('.yaml') || item.name.endsWith('.yml')) {
719
- // Process YAML file
720
- const outputFileName = item.name.replace(/\.(yaml|yml)$/, '.html');
721
- const outputRelativePath = basePath ? path.join(basePath, outputFileName) : outputFileName;
722
- await processPage(itemPath, outputRelativePath, false);
723
- } else if (item.name.endsWith('.md')) {
724
- // Process Markdown file
725
- const outputFileName = item.name.replace('.md', '.html');
726
- const outputRelativePath = basePath ? path.join(basePath, outputFileName) : outputFileName;
727
- await processPage(itemPath, outputRelativePath, true, relativePath);
728
- }
729
- // Ignore other file types
730
- }
851
+ if (!fs.existsSync(sitemapOutputDir)) {
852
+ fs.mkdirSync(sitemapOutputDir, { recursive: true });
731
853
  }
854
+
855
+ fs.writeFileSync(sitemapOutputPath, sitemapXml);
856
+ if (!quiet) console.log(` -> Written sitemap to ${sitemapOutputPath}`);
732
857
  }
733
858
 
734
859
  // Function to copy static files recursively
@@ -784,7 +909,10 @@ export function createSiteBuilder({
784
909
  copyStaticFiles();
785
910
 
786
911
  // Process all pages (can overwrite static files)
787
- await processAllPages('');
912
+ await processAllPages();
913
+
914
+ // Generate sitemap after pages so it can overwrite static files if configured.
915
+ writeSitemap();
788
916
 
789
917
  if (!quiet) console.log('Build complete!');
790
918
  };
package/src/sitemap.js ADDED
@@ -0,0 +1,466 @@
1
+ const ALLOWED_CHANGEFREQS = new Set(['always', 'hourly', 'daily', 'weekly', 'monthly', 'yearly', 'never']);
2
+ const ALLOWED_TOP_LEVEL_KEYS = new Set(['enabled', 'siteUrl', 'outputPath', 'defaults', 'exclude', 'pages']);
3
+ const ALLOWED_DEFAULT_KEYS = new Set(['changefreq', 'priority', 'lastmod']);
4
+ const ALLOWED_ENTRY_KEYS = new Set(['changefreq', 'priority', 'lastmod', 'exclude']);
5
+ const SITEMAP_DATE_RE = /^\d{4}-\d{2}-\d{2}(?:T\d{2}:\d{2}:\d{2}(?:\.\d+)?(?:Z|[+-]\d{2}:\d{2}))?$/u;
6
+
7
+ function isPlainObject(value) {
8
+ return value && typeof value === 'object' && !Array.isArray(value);
9
+ }
10
+
11
+ function hasOwn(object, key) {
12
+ return Object.prototype.hasOwnProperty.call(object, key);
13
+ }
14
+
15
+ function rejectInvalidUrlString(rawUrl, contextLabel) {
16
+ if (typeof rawUrl !== 'string') {
17
+ throw new Error(`${contextLabel}: expected a string.`);
18
+ }
19
+
20
+ if (rawUrl === '') {
21
+ throw new Error(`${contextLabel}: expected a non-empty string.`);
22
+ }
23
+
24
+ if (/[\u0000-\u001F\u007F]/u.test(rawUrl)) {
25
+ throw new Error(`${contextLabel}: must not contain control characters.`);
26
+ }
27
+
28
+ if (/\s/u.test(rawUrl)) {
29
+ throw new Error(`${contextLabel}: must not contain whitespace.`);
30
+ }
31
+
32
+ if (/^[A-Za-z][A-Za-z0-9+.-]*:/u.test(rawUrl) || rawUrl.startsWith('//')) {
33
+ throw new Error(`${contextLabel}: expected a site-relative URL path.`);
34
+ }
35
+
36
+ if (rawUrl.includes('\\')) {
37
+ throw new Error(`${contextLabel}: must use forward slashes.`);
38
+ }
39
+
40
+ if (rawUrl.includes('?') || rawUrl.includes('#')) {
41
+ throw new Error(`${contextLabel}: must not include query strings or fragments.`);
42
+ }
43
+ }
44
+
45
+ export function normalizeSitemapUrlPath(rawUrl, contextLabel) {
46
+ rejectInvalidUrlString(rawUrl, contextLabel);
47
+
48
+ const withLeadingSlash = rawUrl.startsWith('/') ? rawUrl : `/${rawUrl}`;
49
+ const collapsedUrl = withLeadingSlash.replace(/\/+/g, '/');
50
+ const pathWithoutSlashes = collapsedUrl.replace(/^\/+|\/+$/g, '');
51
+
52
+ if (pathWithoutSlashes === '') {
53
+ return '/';
54
+ }
55
+
56
+ const segments = pathWithoutSlashes.split('/');
57
+ for (const segment of segments) {
58
+ let decodedSegment;
59
+ try {
60
+ decodedSegment = decodeURIComponent(segment);
61
+ } catch {
62
+ throw new Error(`${contextLabel}: contains invalid URL encoding.`);
63
+ }
64
+
65
+ if (decodedSegment === '.' || decodedSegment === '..') {
66
+ throw new Error(`${contextLabel}: must not contain "." or ".." segments.`);
67
+ }
68
+
69
+ if (decodedSegment.includes('/') || decodedSegment.includes('\\')) {
70
+ throw new Error(`${contextLabel}: must not include encoded slashes or backslashes.`);
71
+ }
72
+
73
+ if (/[\u0000-\u001F\u007F]/u.test(decodedSegment)) {
74
+ throw new Error(`${contextLabel}: must not contain control characters.`);
75
+ }
76
+
77
+ if (/\s/u.test(decodedSegment)) {
78
+ throw new Error(`${contextLabel}: must not contain whitespace.`);
79
+ }
80
+ }
81
+
82
+ return `/${segments.join('/')}/`;
83
+ }
84
+
85
+ function normalizeSitemapUrlPattern(rawPattern, contextLabel) {
86
+ if (typeof rawPattern !== 'string') {
87
+ throw new Error(`${contextLabel}: expected a string.`);
88
+ }
89
+
90
+ if (rawPattern.endsWith('*')) {
91
+ const rawPrefix = rawPattern.slice(0, -1);
92
+ return `${normalizeSitemapUrlPath(rawPrefix, contextLabel)}*`;
93
+ }
94
+
95
+ return normalizeSitemapUrlPath(rawPattern, contextLabel);
96
+ }
97
+
98
+ function validateSiteUrl(siteUrl, contextLabel) {
99
+ if (typeof siteUrl !== 'string' || siteUrl.trim() === '') {
100
+ throw new Error(`${contextLabel}: expected a non-empty URL string.`);
101
+ }
102
+
103
+ let parsed;
104
+ try {
105
+ parsed = new URL(siteUrl);
106
+ } catch {
107
+ throw new Error(`${contextLabel}: "${siteUrl}" is not a valid URL.`);
108
+ }
109
+
110
+ if (parsed.protocol !== 'http:' && parsed.protocol !== 'https:') {
111
+ throw new Error(`${contextLabel}: protocol "${parsed.protocol}" is not supported. Allowed protocols: http:, https:.`);
112
+ }
113
+
114
+ if (parsed.search || parsed.hash) {
115
+ throw new Error(`${contextLabel}: must not include query strings or fragments.`);
116
+ }
117
+
118
+ parsed.pathname = parsed.pathname.replace(/\/+$/u, '');
119
+ return parsed.toString().replace(/\/$/u, '');
120
+ }
121
+
122
+ function validateOutputPath(outputPath, contextLabel) {
123
+ if (typeof outputPath !== 'string' || outputPath.trim() === '') {
124
+ throw new Error(`${contextLabel}: expected a non-empty string.`);
125
+ }
126
+
127
+ if (/[\u0000-\u001F\u007F]/u.test(outputPath)) {
128
+ throw new Error(`${contextLabel}: must not contain control characters.`);
129
+ }
130
+
131
+ if (/\s/u.test(outputPath)) {
132
+ throw new Error(`${contextLabel}: must not contain whitespace.`);
133
+ }
134
+
135
+ if (outputPath.startsWith('/') || /^[A-Za-z][A-Za-z0-9+.-]*:/u.test(outputPath)) {
136
+ throw new Error(`${contextLabel}: expected a relative output path.`);
137
+ }
138
+
139
+ if (outputPath.includes('\\') || outputPath.includes('?') || outputPath.includes('#')) {
140
+ throw new Error(`${contextLabel}: must be a clean relative file path.`);
141
+ }
142
+
143
+ const segments = outputPath.split('/').filter(Boolean);
144
+ if (segments.length === 0) {
145
+ throw new Error(`${contextLabel}: expected a relative file path.`);
146
+ }
147
+
148
+ for (const segment of segments) {
149
+ if (segment === '.' || segment === '..') {
150
+ throw new Error(`${contextLabel}: must not contain "." or ".." segments.`);
151
+ }
152
+ }
153
+
154
+ return segments.join('/');
155
+ }
156
+
157
+ function normalizeLastmod(lastmod, contextLabel) {
158
+ if (lastmod instanceof Date) {
159
+ if (Number.isNaN(lastmod.getTime())) {
160
+ throw new Error(`${contextLabel}: expected a valid date.`);
161
+ }
162
+ return lastmod.toISOString();
163
+ }
164
+
165
+ if (typeof lastmod !== 'string' || lastmod.trim() === '') {
166
+ throw new Error(`${contextLabel}: expected an ISO date string.`);
167
+ }
168
+
169
+ const trimmed = lastmod.trim();
170
+ if (!SITEMAP_DATE_RE.test(trimmed)) {
171
+ throw new Error(`${contextLabel}: expected an ISO date string like "2026-05-25" or "2026-05-25T12:00:00Z".`);
172
+ }
173
+
174
+ return trimmed;
175
+ }
176
+
177
+ function normalizePriority(priority, contextLabel) {
178
+ if (typeof priority !== 'number' || !Number.isFinite(priority)) {
179
+ throw new Error(`${contextLabel}: expected a number from 0 to 1.`);
180
+ }
181
+
182
+ if (priority < 0 || priority > 1) {
183
+ throw new Error(`${contextLabel}: expected a number from 0 to 1.`);
184
+ }
185
+
186
+ return priority;
187
+ }
188
+
189
+ function normalizeChangefreq(changefreq, contextLabel) {
190
+ if (typeof changefreq !== 'string' || changefreq.trim() === '') {
191
+ throw new Error(`${contextLabel}: expected a non-empty string.`);
192
+ }
193
+
194
+ const normalized = changefreq.trim();
195
+ if (!ALLOWED_CHANGEFREQS.has(normalized)) {
196
+ throw new Error(`${contextLabel}: expected one of ${Array.from(ALLOWED_CHANGEFREQS).join(', ')}.`);
197
+ }
198
+
199
+ return normalized;
200
+ }
201
+
202
+ function normalizeEntryOptions(value, contextLabel, { allowExclude }) {
203
+ if (!isPlainObject(value)) {
204
+ throw new Error(`${contextLabel}: expected an object.`);
205
+ }
206
+
207
+ const allowedKeys = allowExclude ? ALLOWED_ENTRY_KEYS : ALLOWED_DEFAULT_KEYS;
208
+ const normalized = {};
209
+
210
+ for (const key of Object.keys(value)) {
211
+ if (!allowedKeys.has(key)) {
212
+ throw new Error(`${contextLabel}: unsupported option "${key}". Supported options: ${Array.from(allowedKeys).join(', ')}.`);
213
+ }
214
+ }
215
+
216
+ if (value.changefreq !== undefined) {
217
+ normalized.changefreq = normalizeChangefreq(value.changefreq, `${contextLabel}.changefreq`);
218
+ }
219
+
220
+ if (value.priority !== undefined) {
221
+ normalized.priority = normalizePriority(value.priority, `${contextLabel}.priority`);
222
+ }
223
+
224
+ if (value.lastmod !== undefined) {
225
+ normalized.lastmod = normalizeLastmod(value.lastmod, `${contextLabel}.lastmod`);
226
+ }
227
+
228
+ if (allowExclude && value.exclude !== undefined) {
229
+ if (typeof value.exclude !== 'boolean') {
230
+ throw new Error(`${contextLabel}.exclude: expected a boolean.`);
231
+ }
232
+ normalized.exclude = value.exclude;
233
+ }
234
+
235
+ return normalized;
236
+ }
237
+
238
+ function normalizePagesConfig(value, configPath) {
239
+ if (!isPlainObject(value)) {
240
+ throw new Error(`Invalid sitemap.pages in "${configPath}": expected an object.`);
241
+ }
242
+
243
+ const pages = {};
244
+ for (const [rawUrl, rawOptions] of Object.entries(value)) {
245
+ const url = normalizeSitemapUrlPath(rawUrl, `Invalid sitemap.pages URL "${rawUrl}" in "${configPath}"`);
246
+ if (rawOptions === false) {
247
+ pages[url] = { exclude: true };
248
+ continue;
249
+ }
250
+
251
+ pages[url] = normalizeEntryOptions(rawOptions, `Invalid sitemap.pages.${rawUrl} in "${configPath}"`, { allowExclude: true });
252
+ }
253
+
254
+ return pages;
255
+ }
256
+
257
+ export function normalizeSitemapConfig(value, configPath = 'sitemap config') {
258
+ if (value === undefined || value === null) {
259
+ return undefined;
260
+ }
261
+
262
+ if (typeof value === 'boolean') {
263
+ return { enabled: value };
264
+ }
265
+
266
+ if (!isPlainObject(value)) {
267
+ throw new Error(`Invalid sitemap config in "${configPath}": expected a boolean or object.`);
268
+ }
269
+
270
+ const normalized = { enabled: true };
271
+
272
+ for (const key of Object.keys(value)) {
273
+ if (!ALLOWED_TOP_LEVEL_KEYS.has(key)) {
274
+ throw new Error(
275
+ `Unsupported sitemap option "${key}" in "${configPath}". Supported options: ${Array.from(ALLOWED_TOP_LEVEL_KEYS).join(', ')}.`
276
+ );
277
+ }
278
+ }
279
+
280
+ if (value.enabled !== undefined) {
281
+ if (typeof value.enabled !== 'boolean') {
282
+ throw new Error(`Invalid sitemap.enabled in "${configPath}": expected a boolean.`);
283
+ }
284
+ normalized.enabled = value.enabled;
285
+ }
286
+
287
+ if (value.siteUrl !== undefined) {
288
+ normalized.siteUrl = validateSiteUrl(value.siteUrl, `Invalid sitemap.siteUrl in "${configPath}"`);
289
+ }
290
+
291
+ if (value.outputPath !== undefined) {
292
+ normalized.outputPath = validateOutputPath(value.outputPath, `Invalid sitemap.outputPath in "${configPath}"`);
293
+ }
294
+
295
+ if (value.defaults !== undefined) {
296
+ normalized.defaults = normalizeEntryOptions(value.defaults, `Invalid sitemap.defaults in "${configPath}"`, { allowExclude: false });
297
+ }
298
+
299
+ if (value.exclude !== undefined) {
300
+ if (!Array.isArray(value.exclude)) {
301
+ throw new Error(`Invalid sitemap.exclude in "${configPath}": expected an array.`);
302
+ }
303
+ normalized.exclude = value.exclude.map((pattern, index) => (
304
+ normalizeSitemapUrlPattern(pattern, `Invalid sitemap.exclude[${index}] in "${configPath}"`)
305
+ ));
306
+ }
307
+
308
+ if (value.pages !== undefined) {
309
+ normalized.pages = normalizePagesConfig(value.pages, configPath);
310
+ }
311
+
312
+ return normalized;
313
+ }
314
+
315
+ function resolveSitemapSiteUrl(sitemap, globalData, { required = true } = {}) {
316
+ if (sitemap.siteUrl) {
317
+ return sitemap.siteUrl;
318
+ }
319
+
320
+ const baseUrl = globalData?.site?.baseUrl;
321
+ if (baseUrl === undefined) {
322
+ if (!required) {
323
+ return null;
324
+ }
325
+ throw new Error('Sitemap generation requires sitemap.siteUrl or data.site.baseUrl.');
326
+ }
327
+
328
+ return validateSiteUrl(baseUrl, 'Invalid data.site.baseUrl');
329
+ }
330
+
331
+ function matchesExclude(url, pattern) {
332
+ if (pattern.endsWith('*')) {
333
+ return url.startsWith(pattern.slice(0, -1));
334
+ }
335
+
336
+ return url === pattern;
337
+ }
338
+
339
+ function normalizePageSitemapOptions(rawSitemap, pagePath) {
340
+ if (rawSitemap === undefined || rawSitemap === null) {
341
+ return {};
342
+ }
343
+
344
+ if (rawSitemap === false) {
345
+ return { exclude: true };
346
+ }
347
+
348
+ if (rawSitemap === true) {
349
+ return {};
350
+ }
351
+
352
+ return normalizeEntryOptions(rawSitemap, `Invalid sitemap frontmatter in ${pagePath}`, { allowExclude: true });
353
+ }
354
+
355
+ function escapeXml(value) {
356
+ return String(value)
357
+ .replace(/&/g, '&amp;')
358
+ .replace(/</g, '&lt;')
359
+ .replace(/>/g, '&gt;')
360
+ .replace(/"/g, '&quot;')
361
+ .replace(/'/g, '&apos;');
362
+ }
363
+
364
+ function formatPriority(priority) {
365
+ return String(Number(priority.toFixed(3))).replace(/\.0+$/u, '');
366
+ }
367
+
368
+ function joinSiteUrl(siteUrl, pageUrl) {
369
+ const parsed = new URL(siteUrl);
370
+ const basePath = parsed.pathname.replace(/\/+$/u, '');
371
+ parsed.pathname = `${basePath}${pageUrl}`.replace(/\/+/g, '/');
372
+ return parsed.toString();
373
+ }
374
+
375
+ function buildUrlEntryXml(entry) {
376
+ const lines = [
377
+ ' <url>',
378
+ ` <loc>${escapeXml(entry.loc)}</loc>`
379
+ ];
380
+
381
+ if (entry.lastmod !== undefined) {
382
+ lines.push(` <lastmod>${escapeXml(entry.lastmod)}</lastmod>`);
383
+ }
384
+
385
+ if (entry.changefreq !== undefined) {
386
+ lines.push(` <changefreq>${escapeXml(entry.changefreq)}</changefreq>`);
387
+ }
388
+
389
+ if (entry.priority !== undefined) {
390
+ lines.push(` <priority>${formatPriority(entry.priority)}</priority>`);
391
+ }
392
+
393
+ lines.push(' </url>');
394
+ return lines.join('\n');
395
+ }
396
+
397
+ export function buildSitemapXml({ pageEntries, sitemap, globalData }) {
398
+ if (sitemap === false || sitemap?.enabled === false) {
399
+ return null;
400
+ }
401
+
402
+ const hasExplicitSitemapConfig = sitemap !== undefined && sitemap !== null;
403
+ const normalizedSitemap = normalizeSitemapConfig(hasExplicitSitemapConfig ? sitemap : true);
404
+ if (normalizedSitemap.enabled === false) {
405
+ return null;
406
+ }
407
+
408
+ const siteUrl = resolveSitemapSiteUrl(normalizedSitemap, globalData, {
409
+ required: hasExplicitSitemapConfig
410
+ });
411
+ if (siteUrl === null) {
412
+ return null;
413
+ }
414
+ const excludePatterns = normalizedSitemap.exclude || [];
415
+ const defaultOptions = normalizedSitemap.defaults || {};
416
+ const pageOptions = normalizedSitemap.pages || {};
417
+
418
+ const entries = [];
419
+ for (const pageEntry of pageEntries) {
420
+ const url = pageEntry.url;
421
+ const configuredOptions = pageOptions[url] || {};
422
+ const frontmatterOptions = normalizePageSitemapOptions(pageEntry.frontmatter?.sitemap, pageEntry.pagePath);
423
+ const options = {
424
+ ...defaultOptions,
425
+ ...configuredOptions,
426
+ ...frontmatterOptions
427
+ };
428
+
429
+ if (excludePatterns.some((pattern) => matchesExclude(url, pattern)) || options.exclude === true) {
430
+ continue;
431
+ }
432
+
433
+ const entry = {
434
+ loc: joinSiteUrl(siteUrl, url)
435
+ };
436
+
437
+ if (hasOwn(options, 'lastmod')) {
438
+ entry.lastmod = options.lastmod;
439
+ }
440
+
441
+ if (hasOwn(options, 'changefreq')) {
442
+ entry.changefreq = options.changefreq;
443
+ }
444
+
445
+ if (hasOwn(options, 'priority')) {
446
+ entry.priority = options.priority;
447
+ }
448
+
449
+ entries.push(entry);
450
+ }
451
+
452
+ entries.sort((left, right) => left.loc.localeCompare(right.loc));
453
+
454
+ return [
455
+ '<?xml version="1.0" encoding="UTF-8"?>',
456
+ '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">',
457
+ ...entries.map(buildUrlEntryXml),
458
+ '</urlset>',
459
+ ''
460
+ ].join('\n');
461
+ }
462
+
463
+ export function resolveSitemapOutputPath(sitemap) {
464
+ const normalizedSitemap = normalizeSitemapConfig(sitemap);
465
+ return normalizedSitemap?.outputPath || 'sitemap.xml';
466
+ }
@@ -1,8 +1,9 @@
1
1
  import fs from 'node:fs';
2
2
  import path from 'node:path';
3
3
  import yaml from 'js-yaml';
4
+ import { normalizeSitemapConfig } from '../sitemap.js';
4
5
 
5
- const ALLOWED_TOP_LEVEL_KEYS = new Set(['markdown', 'markdownit', 'build', 'imports', 'data']);
6
+ const ALLOWED_TOP_LEVEL_KEYS = new Set(['markdown', 'markdownit', 'build', 'imports', 'data', 'sitemap']);
6
7
  const MARKDOWN_BOOLEAN_KEYS = new Set(['html', 'linkify', 'typographer', 'breaks', 'xhtmlOut']);
7
8
  const MARKDOWN_STRING_KEYS = new Set(['langPrefix', 'quotes', 'preset']);
8
9
  const MARKDOWN_NUMBER_KEYS = new Set(['maxNesting']);
@@ -219,7 +220,7 @@ function validateConfig(rawConfig, configPath) {
219
220
  for (const key of Object.keys(config)) {
220
221
  if (!ALLOWED_TOP_LEVEL_KEYS.has(key)) {
221
222
  throw new Error(
222
- `Unsupported key "${key}" in "${configPath}". Supported keys: markdownit (recommended), markdown (legacy alias), build, imports, data.`
223
+ `Unsupported key "${key}" in "${configPath}". Supported keys: markdownit (recommended), markdown (legacy alias), build, imports, data, sitemap.`
223
224
  );
224
225
  }
225
226
  }
@@ -315,6 +316,10 @@ function validateConfig(rawConfig, configPath) {
315
316
  normalizedConfig.data = validateDataConfig(config.data, configPath);
316
317
  }
317
318
 
319
+ if (config.sitemap !== undefined) {
320
+ normalizedConfig.sitemap = normalizeSitemapConfig(config.sitemap, configPath);
321
+ }
322
+
318
323
  return normalizedConfig;
319
324
  }
320
325