@nuasite/cms 0.46.1 → 0.46.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,7 +4,7 @@ import { isMap, isPair, isScalar, isSeq, LineCounter, parseDocument } from 'yaml
4
4
 
5
5
  import { getProjectRoot } from '../config'
6
6
  import type { CollectionDefinition } from '../types'
7
- import { getCollectionTextIndex, getMarkdownFileCache, setCollectionTextIndex } from './cache'
7
+ import { getCollectionTextIndex, getDeclaredUrlIndexCache, getMarkdownFileCache, setCollectionTextIndex } from './cache'
8
8
  import { normalizeText } from './snippet-utils'
9
9
  import type { CollectionInfo, MarkdownContent, SourceLocation } from './types'
10
10
 
@@ -52,20 +52,10 @@ async function doBuildCollectionTextIndex(
52
52
  } else {
53
53
  // Markdown — index scalars from frontmatter only
54
54
  const { lines } = cached
55
- let fmStart = -1
56
- let fmEnd = -1
57
- for (let i = 0; i < lines.length; i++) {
58
- if (lines[i]?.trim() === '---') {
59
- if (fmStart === -1) fmStart = i
60
- else {
61
- fmEnd = i
62
- break
63
- }
64
- }
65
- }
66
- if (fmEnd > 0) {
67
- const yamlStr = lines.slice(fmStart + 1, fmEnd).join('\n')
68
- collectScalarsFromYaml(yamlStr, fmStart + 1, lines, info, index)
55
+ const bounds = findFrontmatterBounds(lines)
56
+ if (bounds) {
57
+ const yamlStr = lines.slice(bounds.start + 1, bounds.end).join('\n')
58
+ collectScalarsFromYaml(yamlStr, bounds.start + 1, lines, info, index)
69
59
  }
70
60
  }
71
61
  } catch {
@@ -187,6 +177,22 @@ export function lookupCollectionText(
187
177
  // Markdown File Cache
188
178
  // ============================================================================
189
179
 
180
+ /**
181
+ * Locate the `---`-delimited frontmatter block in a markdown file's lines.
182
+ * Returns the indexes of the opening and closing `---` lines, or undefined if
183
+ * the file has no closed frontmatter block.
184
+ */
185
+ function findFrontmatterBounds(lines: string[]): { start: number; end: number } | undefined {
186
+ let start = -1
187
+ for (let i = 0; i < lines.length; i++) {
188
+ if (lines[i]?.trim() === '---') {
189
+ if (start === -1) start = i
190
+ else return { start, end: i }
191
+ }
192
+ }
193
+ return undefined
194
+ }
195
+
190
196
  /**
191
197
  * Get cached markdown file content
192
198
  */
@@ -210,6 +216,17 @@ async function getCachedMarkdownFile(filePath: string): Promise<{ content: strin
210
216
  // Collection Source Finding
211
217
  // ============================================================================
212
218
 
219
+ /**
220
+ * Frontmatter fields, in preference order, that may declare an entry's own
221
+ * canonical page URL. Only site-absolute values (starting with `/`) are trusted
222
+ * — external `url: https://…` values and bare slugs are ignored. Deliberately
223
+ * excludes `canonical`/`canonicalUrl`: by SEO convention those declare the URL
224
+ * that should be indexed *instead of* the current page (duplicate-content
225
+ * consolidation), which can point at a different entry entirely — trusting it
226
+ * as self-identity could resolve an edit to the wrong file.
227
+ */
228
+ const DECLARED_URL_FIELDS = ['urlpath', 'permalink', 'pathname', 'route', 'url']
229
+
213
230
  /**
214
231
  * Find markdown collection file for a given page path.
215
232
  *
@@ -217,6 +234,14 @@ async function getCachedMarkdownFile(filePath: string): Promise<{ content: strin
217
234
  * matching entry regardless of the URL prefix. This supports localized or
218
235
  * renamed routes (e.g. `/aktuality/my-article` with content in `src/content/news/`).
219
236
  *
237
+ * Filename matching alone cannot tell apart two entries that share a slug but
238
+ * live under different URL prefixes (e.g. the same article slug published under
239
+ * two topic prefixes, where one file carries a disambiguating filename suffix).
240
+ * When a filename match declares a canonical URL in its frontmatter that
241
+ * contradicts the requested path, we fall back to matching entries by that
242
+ * declared URL. Projects whose entries declare no URL field keep the exact
243
+ * previous (filename-only) behavior.
244
+ *
220
245
  * @param pagePath - The URL path of the page (e.g., '/services/3d-tisk')
221
246
  * @param contentDir - The content directory (default: 'src/content')
222
247
  * @returns Collection info if found, undefined otherwise
@@ -233,6 +258,7 @@ export async function findCollectionSource(
233
258
  return undefined
234
259
  }
235
260
 
261
+ const requestedUrl = normalizeSitePath(`/${cleanPath}`)
236
262
  const contentPath = path.join(getProjectRoot(), contentDir)
237
263
 
238
264
  try {
@@ -245,9 +271,12 @@ export async function findCollectionSource(
245
271
  let collectionDirs: string[]
246
272
  try {
247
273
  const entries = await fs.readdir(contentPath, { withFileTypes: true })
274
+ // Sorted so match/resolution order is deterministic across runs and
275
+ // platforms, not dependent on readdir's unspecified enumeration order.
248
276
  collectionDirs = entries
249
277
  .filter(e => e.isDirectory() && !e.name.startsWith('_') && !e.name.startsWith('.'))
250
278
  .map(e => e.name)
279
+ .sort()
251
280
  } catch {
252
281
  return undefined
253
282
  }
@@ -266,6 +295,20 @@ export async function findCollectionSource(
266
295
  }
267
296
  }
268
297
 
298
+ if (matches.length === 0) continue
299
+
300
+ // Prefer the entry whose declared canonical URL equals the requested
301
+ // path. Only kicks in when an entry actually declares a URL, so
302
+ // URL-less projects fall through to the filename logic unchanged.
303
+ const byUrl = await resolveByDeclaredUrl(matches, requestedUrl, contentPath)
304
+ if (byUrl) {
305
+ // byUrl.file may differ from the file the filename match found
306
+ // (that's the whole point of this fallback) — its slug must be
307
+ // derived from the actual resolved file, not the URL-tail slug
308
+ // candidate, or downstream collectionSlug lookups break.
309
+ return { name: byUrl.name, slug: slugFromFilePath(byUrl.file), file: path.relative(getProjectRoot(), byUrl.file) }
310
+ }
311
+
269
312
  if (matches.length === 1 && matches[0]) {
270
313
  return {
271
314
  name: matches[0].name,
@@ -291,6 +334,182 @@ export async function findCollectionSource(
291
334
  return undefined
292
335
  }
293
336
 
337
+ /** Normalize a site-absolute path: ensure a leading slash, drop query/hash and any trailing slash. */
338
+ function normalizeSitePath(p: string): string {
339
+ let s = p.split('?')[0]?.split('#')[0] ?? p
340
+ if (!s.startsWith('/')) s = `/${s}`
341
+ if (s.length > 1 && s.endsWith('/')) s = s.slice(0, -1)
342
+ return s
343
+ }
344
+
345
+ /**
346
+ * Extract an entry's declared canonical site path from its already-parsed
347
+ * frontmatter/data object (e.g. `CollectionEntryInfo.data`). Same field set and
348
+ * rules as {@link readDeclaredPageUrl}, but operates in-memory so callers that
349
+ * already hold the data don't re-read the file. Returns the normalized
350
+ * site-absolute path, or undefined when no site-absolute URL field is declared.
351
+ */
352
+ export function declaredSitePathFromData(data: unknown): string | undefined {
353
+ if (!data || typeof data !== 'object') return undefined
354
+ const lowerKeyed = new Map<string, unknown>()
355
+ for (const [key, value] of Object.entries(data as Record<string, unknown>)) {
356
+ lowerKeyed.set(key.toLowerCase(), value)
357
+ }
358
+ for (const field of DECLARED_URL_FIELDS) {
359
+ const value = lowerKeyed.get(field)
360
+ if (typeof value === 'string' && value.startsWith('/')) return normalizeSitePath(value)
361
+ }
362
+ return undefined
363
+ }
364
+
365
+ /**
366
+ * Derive a collection entry's slug from its file path, matching the same
367
+ * convention collection-scanner.ts uses: flat `<slug>.md(x)` files use the
368
+ * basename minus extension; Hugo-style `<slug>/index.md(x)` files use the
369
+ * parent directory name.
370
+ */
371
+ function slugFromFilePath(fileAbsPath: string): string {
372
+ const base = path.basename(fileAbsPath)
373
+ if (base === 'index.md' || base === 'index.mdx') {
374
+ return path.basename(path.dirname(fileAbsPath))
375
+ }
376
+ return base.replace(/\.mdx?$/, '')
377
+ }
378
+
379
+ /**
380
+ * Read an entry's declared canonical page URL from its frontmatter, if any.
381
+ * Returns the normalized site-absolute path, or undefined when the file has no
382
+ * frontmatter or declares no site-absolute URL field.
383
+ */
384
+ async function readDeclaredPageUrl(fileAbsPath: string): Promise<string | undefined> {
385
+ const cached = await getCachedMarkdownFile(fileAbsPath)
386
+ if (!cached) return undefined
387
+
388
+ const bounds = findFrontmatterBounds(cached.lines)
389
+ if (!bounds) return undefined
390
+
391
+ let doc
392
+ try {
393
+ doc = parseDocument(cached.lines.slice(bounds.start + 1, bounds.end).join('\n'))
394
+ } catch {
395
+ return undefined
396
+ }
397
+ if (!isMap(doc.contents)) return undefined
398
+
399
+ const found: Record<string, string> = {}
400
+ for (const pair of doc.contents.items) {
401
+ if (!isPair(pair) || !isScalar(pair.key) || !isScalar(pair.value)) continue
402
+ const key = String(pair.key.value).toLowerCase()
403
+ if (!DECLARED_URL_FIELDS.includes(key)) continue
404
+ const val = pair.value.value
405
+ if (typeof val === 'string' && val.startsWith('/')) {
406
+ found[key] ??= normalizeSitePath(val)
407
+ }
408
+ }
409
+
410
+ for (const field of DECLARED_URL_FIELDS) {
411
+ if (found[field]) return found[field]
412
+ }
413
+ return undefined
414
+ }
415
+
416
+ /**
417
+ * Resolve the correct entry for `requestedUrl` using declared canonical URLs.
418
+ *
419
+ * 1. If a filename candidate declares exactly `requestedUrl`, use it.
420
+ * 2. Otherwise, if any candidate declares *some* URL (so the collection is
421
+ * URL-aware) but none matches, the filename match is for a same-slug sibling
422
+ * under a different prefix — scan the candidate collection(s) for the file
423
+ * whose declared URL is `requestedUrl`.
424
+ * 3. If no candidate declares any URL, return undefined so the caller keeps the
425
+ * legacy filename behavior.
426
+ */
427
+ async function resolveByDeclaredUrl(
428
+ matches: { name: string; file: string }[],
429
+ requestedUrl: string,
430
+ contentPath: string,
431
+ ): Promise<{ name: string; file: string } | undefined> {
432
+ let sawDeclaredUrl = false
433
+ for (const m of matches) {
434
+ const declared = await readDeclaredPageUrl(m.file)
435
+ if (declared === undefined) continue
436
+ sawDeclaredUrl = true
437
+ if (declared === requestedUrl) return m
438
+ }
439
+
440
+ if (!sawDeclaredUrl) return undefined
441
+
442
+ // Contradiction: the right entry is named differently from its slug. Scan
443
+ // the collection(s) that produced filename matches for a declared-URL hit.
444
+ // `matches` (and thus this Set) is built by iterating the sorted
445
+ // `collectionDirs`, so directory order here is deterministic.
446
+ for (const dir of new Set(matches.map(m => m.name))) {
447
+ const hit = await findFileByDeclaredUrl(path.join(contentPath, dir), requestedUrl)
448
+ if (hit) return { name: dir, file: hit }
449
+ }
450
+ return undefined
451
+ }
452
+
453
+ /**
454
+ * Find the file in a collection directory whose declared canonical URL
455
+ * matches, via a per-directory URL→file index that's built once and cached
456
+ * (see `getDeclaredUrlIndexCache`) — only the first request for an ambiguous
457
+ * slug in a given directory pays for the full scan.
458
+ */
459
+ async function findFileByDeclaredUrl(collectionPathAbs: string, requestedUrl: string): Promise<string | undefined> {
460
+ const cache = getDeclaredUrlIndexCache()
461
+ let index = cache.get(collectionPathAbs)
462
+ if (!index) {
463
+ index = await buildDeclaredUrlIndex(collectionPathAbs)
464
+ cache.set(collectionPathAbs, index)
465
+ }
466
+ return index.get(requestedUrl)
467
+ }
468
+
469
+ /**
470
+ * Scan a collection directory (flat `*.md(x)` files and Hugo-style
471
+ * `<slug>/index.md(x)`) and index every entry by its declared canonical URL.
472
+ * Entries are visited in sorted order so that if two entries declare the same
473
+ * URL (a content bug), the winner is deterministic rather than readdir-order
474
+ * dependent.
475
+ */
476
+ async function buildDeclaredUrlIndex(collectionPathAbs: string): Promise<Map<string, string>> {
477
+ const index = new Map<string, string>()
478
+ let dirEntries
479
+ try {
480
+ dirEntries = await fs.readdir(collectionPathAbs, { withFileTypes: true })
481
+ } catch {
482
+ return index
483
+ }
484
+
485
+ const files = dirEntries
486
+ .filter(e => e.isFile() && /\.mdx?$/.test(e.name))
487
+ .map(e => e.name)
488
+ .sort()
489
+ for (const name of files) {
490
+ const file = path.join(collectionPathAbs, name)
491
+ const declared = await readDeclaredPageUrl(file)
492
+ if (declared && !index.has(declared)) index.set(declared, file)
493
+ }
494
+
495
+ const subDirs = dirEntries
496
+ .filter(e => e.isDirectory() && !e.name.startsWith('_') && !e.name.startsWith('.'))
497
+ .map(e => e.name)
498
+ .sort()
499
+ for (const dir of subDirs) {
500
+ for (const idx of ['index.md', 'index.mdx']) {
501
+ const file = path.join(collectionPathAbs, dir, idx)
502
+ const declared = await readDeclaredPageUrl(file)
503
+ if (declared) {
504
+ if (!index.has(declared)) index.set(declared, file)
505
+ break
506
+ }
507
+ }
508
+ }
509
+
510
+ return index
511
+ }
512
+
294
513
  /**
295
514
  * Find a markdown file in a collection directory by slug
296
515
  */
@@ -370,23 +589,11 @@ export async function findMarkdownSourceLocation(
370
589
  const { lines } = cached
371
590
  const normalizedSearch = normalizeText(textContent)
372
591
 
373
- // Find frontmatter boundaries
374
- let frontmatterStart = -1
375
- let frontmatterEnd = -1
376
- for (let i = 0; i < lines.length; i++) {
377
- if (lines[i]?.trim() === '---') {
378
- if (frontmatterStart === -1) {
379
- frontmatterStart = i
380
- } else {
381
- frontmatterEnd = i
382
- break
383
- }
384
- }
385
- }
386
- if (frontmatterEnd <= 0) return undefined
592
+ const bounds = findFrontmatterBounds(lines)
593
+ if (!bounds) return undefined
387
594
 
388
- const yamlStr = lines.slice(frontmatterStart + 1, frontmatterEnd).join('\n')
389
- const lineOffset = frontmatterStart + 1
595
+ const yamlStr = lines.slice(bounds.start + 1, bounds.end).join('\n')
596
+ const lineOffset = bounds.start + 1
390
597
  return findScalarInYamlAst(yamlStr, lineOffset, normalizedSearch, lines, collectionInfo)
391
598
  } catch {
392
599
  // Error reading file
@@ -550,20 +757,10 @@ export async function findFieldInCollectionEntry(
550
757
 
551
758
  // For markdown, search inside frontmatter only
552
759
  const { lines } = cached
553
- let fmStart = -1
554
- let fmEnd = -1
555
- for (let i = 0; i < lines.length; i++) {
556
- if (lines[i]?.trim() === '---') {
557
- if (fmStart === -1) fmStart = i
558
- else {
559
- fmEnd = i
560
- break
561
- }
562
- }
563
- }
564
- if (fmEnd <= 0) return undefined
565
- const yamlStr = lines.slice(fmStart + 1, fmEnd).join('\n')
566
- return findFieldByNameInYaml(yamlStr, fmStart + 1, fieldName, lines, info)
760
+ const bounds = findFrontmatterBounds(lines)
761
+ if (!bounds) return undefined
762
+ const yamlStr = lines.slice(bounds.start + 1, bounds.end).join('\n')
763
+ return findFieldByNameInYaml(yamlStr, bounds.start + 1, fieldName, lines, info)
567
764
  } catch {
568
765
  return undefined
569
766
  }
@@ -598,20 +795,10 @@ export async function findFieldsInCollectionEntry(
598
795
 
599
796
  // For markdown, search inside frontmatter only
600
797
  const { lines } = cached
601
- let fmStart = -1
602
- let fmEnd = -1
603
- for (let i = 0; i < lines.length; i++) {
604
- if (lines[i]?.trim() === '---') {
605
- if (fmStart === -1) fmStart = i
606
- else {
607
- fmEnd = i
608
- break
609
- }
610
- }
611
- }
612
- if (fmEnd <= 0) return new Map()
613
- const yamlStr = lines.slice(fmStart + 1, fmEnd).join('\n')
614
- return findFieldsByNameInYaml(yamlStr, fmStart + 1, fieldNames, lines, info)
798
+ const bounds = findFrontmatterBounds(lines)
799
+ if (!bounds) return new Map()
800
+ const yamlStr = lines.slice(bounds.start + 1, bounds.end).join('\n')
801
+ return findFieldsByNameInYaml(yamlStr, bounds.start + 1, fieldNames, lines, info)
615
802
  } catch {
616
803
  return new Map()
617
804
  }
@@ -695,27 +882,12 @@ export async function parseMarkdownContent(
695
882
 
696
883
  const { lines } = cached
697
884
 
698
- // Parse frontmatter
699
- let frontmatterStart = -1
700
- let frontmatterEnd = -1
701
-
702
- for (let i = 0; i < lines.length; i++) {
703
- const line = lines[i]?.trim()
704
- if (line === '---') {
705
- if (frontmatterStart === -1) {
706
- frontmatterStart = i
707
- } else {
708
- frontmatterEnd = i
709
- break
710
- }
711
- }
712
- }
713
-
885
+ const bounds = findFrontmatterBounds(lines)
714
886
  const frontmatter: Record<string, { value: string; line: number }> = {}
715
887
 
716
888
  // Extract frontmatter fields using yaml parser
717
- if (frontmatterEnd > 0) {
718
- const yamlStr = lines.slice(frontmatterStart + 1, frontmatterEnd).join('\n')
889
+ if (bounds) {
890
+ const yamlStr = lines.slice(bounds.start + 1, bounds.end).join('\n')
719
891
  const lineCounter = new LineCounter()
720
892
  const doc = parseDocument(yamlStr, { lineCounter })
721
893
 
@@ -726,7 +898,7 @@ export async function parseMarkdownContent(
726
898
  const value = isScalar(pair.value) ? String(pair.value.value) : ''
727
899
  const keyRange = (pair.key as any).range
728
900
  const yamlLine = keyRange ? lineCounter.linePos(keyRange[0]).line : 0
729
- const fileLine = yamlLine + frontmatterStart + 1
901
+ const fileLine = yamlLine + bounds.start + 1
730
902
  if (key && value) {
731
903
  frontmatter[key] = { value, line: fileLine }
732
904
  }
@@ -736,7 +908,7 @@ export async function parseMarkdownContent(
736
908
  }
737
909
 
738
910
  // Extract body (everything after frontmatter)
739
- const bodyStartLine = frontmatterEnd > 0 ? frontmatterEnd + 1 : 0
911
+ const bodyStartLine = bounds ? bounds.end + 1 : 0
740
912
  const bodyLines = lines.slice(bodyStartLine)
741
913
  const body = bodyLines.join('\n').trim()
742
914
 
@@ -25,6 +25,7 @@ export { findImageSourceLocation } from './image-finder'
25
25
  // Collection/markdown finding
26
26
  export {
27
27
  buildCollectionTextIndex,
28
+ declaredSitePathFromData,
28
29
  findCollectionSource,
29
30
  findFieldInCollectionEntry,
30
31
  findMarkdownSourceLocation,