@nuasite/cms-core 0.43.0-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. package/dist/types/collection-scanner.d.ts +12 -0
  2. package/dist/types/collection-scanner.d.ts.map +1 -0
  3. package/dist/types/component-registry.d.ts +15 -0
  4. package/dist/types/component-registry.d.ts.map +1 -0
  5. package/dist/types/content-config-ast.d.ts +45 -0
  6. package/dist/types/content-config-ast.d.ts.map +1 -0
  7. package/dist/types/core.d.ts +44 -0
  8. package/dist/types/core.d.ts.map +1 -0
  9. package/dist/types/fs/glob.d.ts +3 -0
  10. package/dist/types/fs/glob.d.ts.map +1 -0
  11. package/dist/types/fs/node-fs.d.ts +7 -0
  12. package/dist/types/fs/node-fs.d.ts.map +1 -0
  13. package/dist/types/fs/types.d.ts +33 -0
  14. package/dist/types/fs/types.d.ts.map +1 -0
  15. package/dist/types/handlers/entry-ops.d.ts +69 -0
  16. package/dist/types/handlers/entry-ops.d.ts.map +1 -0
  17. package/dist/types/handlers/page-ops.d.ts +14 -0
  18. package/dist/types/handlers/page-ops.d.ts.map +1 -0
  19. package/dist/types/handlers/redirect-ops.d.ts +10 -0
  20. package/dist/types/handlers/redirect-ops.d.ts.map +1 -0
  21. package/dist/types/index.d.ts +12 -0
  22. package/dist/types/index.d.ts.map +1 -0
  23. package/dist/types/media/contember.d.ts +18 -0
  24. package/dist/types/media/contember.d.ts.map +1 -0
  25. package/dist/types/media/index.d.ts +5 -0
  26. package/dist/types/media/index.d.ts.map +1 -0
  27. package/dist/types/media/local.d.ts +12 -0
  28. package/dist/types/media/local.d.ts.map +1 -0
  29. package/dist/types/media/project-images.d.ts +15 -0
  30. package/dist/types/media/project-images.d.ts.map +1 -0
  31. package/dist/types/media/s3.d.ts +12 -0
  32. package/dist/types/media/s3.d.ts.map +1 -0
  33. package/dist/types/shared.d.ts +24 -0
  34. package/dist/types/shared.d.ts.map +1 -0
  35. package/dist/types/tsconfig.tsbuildinfo +1 -0
  36. package/package.json +55 -0
  37. package/src/collection-scanner.ts +935 -0
  38. package/src/component-registry.ts +308 -0
  39. package/src/content-config-ast.ts +536 -0
  40. package/src/core.ts +167 -0
  41. package/src/fs/glob.ts +32 -0
  42. package/src/fs/node-fs.ts +138 -0
  43. package/src/fs/types.ts +26 -0
  44. package/src/handlers/entry-ops.ts +528 -0
  45. package/src/handlers/page-ops.ts +203 -0
  46. package/src/handlers/redirect-ops.ts +139 -0
  47. package/src/index.ts +41 -0
  48. package/src/media/contember.ts +90 -0
  49. package/src/media/index.ts +4 -0
  50. package/src/media/local.ts +147 -0
  51. package/src/media/project-images.ts +82 -0
  52. package/src/media/s3.ts +151 -0
  53. package/src/shared.ts +65 -0
  54. package/src/tsconfig.json +9 -0
@@ -0,0 +1,935 @@
1
+ import type { CollectionDefinition, CollectionEntryInfo, FieldDefinition, FieldType } from '@nuasite/cms-types'
2
+ import path from 'node:path'
3
+ import { isMap, isPair, isScalar, parse as parseYaml, parseDocument } from 'yaml'
4
+ import { type ParseCache, parseContentConfig, type ParsedConfig, type ParsedField } from './content-config-ast'
5
+ import type { CmsFileSystem } from './fs/types'
6
+ import { slugifyHref } from './shared'
7
+
8
+ /** Regex patterns for type inference */
9
+ const DATE_PATTERN = /^\d{4}-\d{2}-\d{2}/
10
+ const URL_PATTERN = /^(https?:\/\/|\/)/
11
+ const IMAGE_EXTENSIONS = /\.(jpg|jpeg|png|gif|webp|svg|avif)$/i
12
+
13
+ /** Maximum unique values before treating as free-form text instead of select */
14
+ const MAX_SELECT_OPTIONS = 10
15
+
16
+ /** Minimum length for textarea detection */
17
+ const TEXTAREA_MIN_LENGTH = 200
18
+
19
+ /** Field names that default to sidebar position */
20
+ const SIDEBAR_FIELD_NAMES = new Set([
21
+ 'title',
22
+ 'date',
23
+ 'pubdate',
24
+ 'publishdate',
25
+ 'draft',
26
+ 'image',
27
+ 'featuredimage',
28
+ 'cover',
29
+ 'coverimage',
30
+ 'thumbnail',
31
+ 'author',
32
+ ])
33
+
34
+ /** Matches `@position <value>` or `@group <value>` in YAML comment text (# already stripped by parser) */
35
+ const DIRECTIVE_PATTERN = /^\s*@(position|group)\s+(.+)$/
36
+
37
+ /** Field names that should never be inferred as select (always free-text) */
38
+ const FREE_TEXT_FIELD_NAMES = new Set([
39
+ 'title',
40
+ 'name',
41
+ 'description',
42
+ 'summary',
43
+ 'excerpt',
44
+ 'subtitle',
45
+ 'heading',
46
+ 'headline',
47
+ 'slug',
48
+ 'alt',
49
+ 'caption',
50
+ ])
51
+
52
+ /** Normalized names (lowercased, underscores/hyphens stripped) that mark a field as the publish toggle. */
53
+ const PUBLISH_TOGGLE_NAMES = new Set(['draft', 'isdraft', 'published', 'ispublished', 'unpublished'])
54
+
55
+ /** Normalized names that mark a field as the publish/release date anchor. */
56
+ const PUBLISH_DATE_NAMES = new Set([
57
+ 'date',
58
+ 'pubdate',
59
+ 'publishdate',
60
+ 'publisheddate',
61
+ 'publishedate',
62
+ 'publishedat',
63
+ 'datepublished',
64
+ ])
65
+
66
+ /** Normalize a field name for case- and separator-insensitive matching against the *_NAMES sets above. */
67
+ function normalizeFieldName(name: string): string {
68
+ return name.toLowerCase().replace(/[_-]/g, '')
69
+ }
70
+
71
+ /**
72
+ * Observed values for a single field across multiple files
73
+ */
74
+ interface FieldObservation {
75
+ name: string
76
+ values: unknown[]
77
+ presentCount: number
78
+ totalEntries: number
79
+ }
80
+
81
+ const FRONTMATTER_PATTERN = /^---\r?\n([\s\S]*?)\r?\n---/
82
+
83
+ function extractFrontmatterBlock(content: string): string | null {
84
+ const match = content.match(FRONTMATTER_PATTERN)
85
+ return match?.[1] ?? null
86
+ }
87
+
88
+ function parseFrontmatter(content: string): Record<string, unknown> | null {
89
+ const block = extractFrontmatterBlock(content)
90
+ if (!block) return null
91
+ return parseYaml(block) as Record<string, unknown> | null
92
+ }
93
+
94
+ /**
95
+ * Parse @position and @group comment directives from raw YAML frontmatter.
96
+ * Uses the YAML AST which preserves comments via `commentBefore` on nodes.
97
+ */
98
+ function parseFieldDirectives(content: string): Record<string, { position?: 'sidebar' | 'header'; group?: string }> {
99
+ const block = extractFrontmatterBlock(content)
100
+ if (!block) return {}
101
+
102
+ const doc = parseDocument(block)
103
+ if (!isMap(doc.contents)) return {}
104
+
105
+ const result: Record<string, { position?: 'sidebar' | 'header'; group?: string }> = {}
106
+
107
+ for (const pair of doc.contents.items) {
108
+ if (!isPair(pair) || !isScalar(pair.key)) continue
109
+ const comment = pair.key.commentBefore
110
+ if (!comment) continue
111
+
112
+ const directives: { position?: 'sidebar' | 'header'; group?: string } = {}
113
+ for (const line of comment.split('\n')) {
114
+ const match = line.trim().match(DIRECTIVE_PATTERN)
115
+ if (!match) continue
116
+ const [, dirKey, dirValue] = match
117
+ if (dirKey === 'position' && (dirValue === 'sidebar' || dirValue === 'header')) {
118
+ directives.position = dirValue
119
+ } else if (dirKey === 'group' && dirValue) {
120
+ directives.group = dirValue.trim()
121
+ }
122
+ }
123
+
124
+ if (directives.position || directives.group) {
125
+ result[String(pair.key.value)] = directives
126
+ }
127
+ }
128
+
129
+ return result
130
+ }
131
+
132
+ /**
133
+ * Assign default positions to fields based on field name heuristics,
134
+ * then overlay frontmatter comment directives.
135
+ */
136
+ function assignFieldMetadata(
137
+ fields: FieldDefinition[],
138
+ directives: Record<string, { position?: 'sidebar' | 'header'; group?: string }>,
139
+ ): void {
140
+ for (const field of fields) {
141
+ // Scanner defaults: well-known fields go to sidebar
142
+ if (SIDEBAR_FIELD_NAMES.has(normalizeFieldName(field.name)) || field.type === 'image' || field.type === 'boolean') {
143
+ field.position = 'sidebar'
144
+ } else {
145
+ field.position = 'header'
146
+ }
147
+
148
+ // Overlay frontmatter comment directives
149
+ const directive = directives[field.name]
150
+ if (directive) {
151
+ if (directive.position) field.position = directive.position
152
+ if (directive.group) field.group = directive.group
153
+ }
154
+ }
155
+ }
156
+
157
+ /**
158
+ * Infer the field type from a value
159
+ */
160
+ function inferFieldType(value: unknown, key: string): FieldType {
161
+ if (value === null || value === undefined) {
162
+ return 'text'
163
+ }
164
+
165
+ if (typeof value === 'boolean') {
166
+ return 'boolean'
167
+ }
168
+
169
+ if (typeof value === 'number') {
170
+ return 'number'
171
+ }
172
+
173
+ if (Array.isArray(value)) {
174
+ return 'array'
175
+ }
176
+
177
+ if (typeof value === 'object') {
178
+ return 'object'
179
+ }
180
+
181
+ if (typeof value === 'string') {
182
+ // Check for date pattern
183
+ if (DATE_PATTERN.test(value)) {
184
+ return 'date'
185
+ }
186
+
187
+ // Check for image paths
188
+ if (IMAGE_EXTENSIONS.test(value)) {
189
+ return 'image'
190
+ }
191
+
192
+ // Check for image-specific field names (exact word boundaries, not substrings)
193
+ const lowerKey = key.toLowerCase()
194
+ if (/(?:^|[_-])(?:image|thumbnail|cover|avatar|logo|icon|banner|photo)(?:$|[_-])/.test(lowerKey)) {
195
+ return 'image'
196
+ }
197
+
198
+ // Check for URLs
199
+ if (URL_PATTERN.test(value)) {
200
+ return 'url'
201
+ }
202
+
203
+ // Check for textarea (long text or contains newlines)
204
+ if (value.includes('\n') || value.length > TEXTAREA_MIN_LENGTH) {
205
+ return 'textarea'
206
+ }
207
+
208
+ return 'text'
209
+ }
210
+
211
+ return 'text'
212
+ }
213
+
214
+ /**
215
+ * Merge field observations from multiple files to determine final field definition.
216
+ * `depth` guards against pathological deeply-nested content blowing the stack —
217
+ * real-world YAML/JSON rarely exceeds 5 levels, so the cap is well above realistic use.
218
+ */
219
+ const MAX_NESTED_FIELD_DEPTH = 16
220
+
221
+ function mergeFieldObservations(observations: FieldObservation[], depth: number = 0): FieldDefinition[] {
222
+ if (depth >= MAX_NESTED_FIELD_DEPTH) return []
223
+ const fields: FieldDefinition[] = []
224
+
225
+ for (const obs of observations) {
226
+ const nonNullValues = obs.values.filter(v => v !== null && v !== undefined)
227
+ if (nonNullValues.length === 0) continue
228
+
229
+ // Determine type by consensus (most common inferred type)
230
+ const typeCounts = new Map<FieldType, number>()
231
+ for (const value of nonNullValues) {
232
+ const type = inferFieldType(value, obs.name)
233
+ typeCounts.set(type, (typeCounts.get(type) || 0) + 1)
234
+ }
235
+
236
+ // Get most common type
237
+ let fieldType: FieldType = 'text'
238
+ let maxCount = 0
239
+ for (const [type, count] of typeCounts) {
240
+ if (count > maxCount) {
241
+ maxCount = count
242
+ fieldType = type
243
+ }
244
+ }
245
+
246
+ const field: FieldDefinition = {
247
+ name: obs.name,
248
+ type: fieldType,
249
+ required: obs.presentCount === obs.totalEntries,
250
+ examples: nonNullValues.slice(0, 3),
251
+ }
252
+
253
+ // For text fields, check if we should treat as select (limited unique values)
254
+ if (fieldType === 'text' && !FREE_TEXT_FIELD_NAMES.has(normalizeFieldName(obs.name))) {
255
+ const uniqueValues = [...new Set(nonNullValues.map(v => String(v)))]
256
+ const uniqueRatio = uniqueValues.length / nonNullValues.length
257
+ // Only treat as select if unique values are limited AND not nearly all unique
258
+ // (a high unique ratio means entries have distinct values, indicating free-text)
259
+ if (uniqueValues.length > 0 && uniqueValues.length <= MAX_SELECT_OPTIONS && nonNullValues.length >= 2 && uniqueRatio <= 0.8) {
260
+ field.type = 'select'
261
+ field.options = uniqueValues.sort()
262
+ }
263
+ }
264
+
265
+ // For arrays, try to infer item type
266
+ if (fieldType === 'array') {
267
+ const allItems = nonNullValues.flatMap(v => (Array.isArray(v) ? v : []))
268
+ if (allItems.length > 0) {
269
+ const itemType = inferFieldType(allItems[0], obs.name)
270
+ field.itemType = itemType
271
+
272
+ // Check if array items should be select
273
+ if (itemType === 'text') {
274
+ const uniqueItems = [...new Set(allItems.map(v => String(v)))]
275
+ if (uniqueItems.length <= MAX_SELECT_OPTIONS * 2) {
276
+ field.options = uniqueItems.sort()
277
+ }
278
+ }
279
+
280
+ // Infer sub-field definitions for array-of-objects
281
+ if (itemType === 'object') {
282
+ const objectItems = allItems.filter(
283
+ (v): v is Record<string, unknown> => typeof v === 'object' && v !== null && !Array.isArray(v),
284
+ )
285
+ if (objectItems.length > 0) {
286
+ const subFieldMap = new Map<string, FieldObservation>()
287
+ for (const item of objectItems) {
288
+ collectFieldObservations(subFieldMap, item, objectItems.length)
289
+ }
290
+ field.fields = mergeFieldObservations(Array.from(subFieldMap.values()), depth + 1)
291
+ }
292
+ }
293
+ }
294
+ }
295
+
296
+ // For plain object values, recurse into sub-fields so the editor can render them.
297
+ if (fieldType === 'object') {
298
+ const objectValues = nonNullValues.filter(
299
+ (v): v is Record<string, unknown> => typeof v === 'object' && v !== null && !Array.isArray(v),
300
+ )
301
+ if (objectValues.length > 0) {
302
+ const subFieldMap = new Map<string, FieldObservation>()
303
+ for (const item of objectValues) {
304
+ collectFieldObservations(subFieldMap, item, objectValues.length)
305
+ }
306
+ field.fields = mergeFieldObservations(Array.from(subFieldMap.values()), depth + 1)
307
+ }
308
+ }
309
+
310
+ fields.push(field)
311
+ }
312
+
313
+ return fields
314
+ }
315
+
316
+ function collectFieldObservations(
317
+ fieldMap: Map<string, FieldObservation>,
318
+ data: Record<string, unknown>,
319
+ totalEntries: number,
320
+ ): void {
321
+ for (const [key, value] of Object.entries(data)) {
322
+ let obs = fieldMap.get(key)
323
+ if (!obs) {
324
+ obs = { name: key, values: [], presentCount: 0, totalEntries }
325
+ fieldMap.set(key, obs)
326
+ }
327
+ obs.values.push(value)
328
+ obs.presentCount++
329
+ }
330
+ }
331
+
332
+ function assembleCollectionDefinition(
333
+ collectionName: string,
334
+ contentDir: string,
335
+ fieldMap: Map<string, FieldObservation>,
336
+ entryInfos: CollectionEntryInfo[],
337
+ entryCount: number,
338
+ extra: Partial<CollectionDefinition>,
339
+ ): CollectionDefinition {
340
+ for (const obs of fieldMap.values()) {
341
+ obs.totalEntries = entryCount
342
+ }
343
+
344
+ entryInfos.sort((a, b) => (a.title ?? a.slug).localeCompare(b.title ?? b.slug))
345
+
346
+ const fields = mergeFieldObservations(Array.from(fieldMap.values()))
347
+ const label = collectionName.replace(/[-_]/g, ' ').replace(/\b\w/g, c => c.toUpperCase())
348
+
349
+ return {
350
+ name: collectionName,
351
+ label,
352
+ path: path.join(contentDir, collectionName),
353
+ entryCount,
354
+ fields,
355
+ fileExtension: 'md',
356
+ entries: entryInfos,
357
+ ...extra,
358
+ }
359
+ }
360
+
361
+ /**
362
+ * Compute the root-relative base path of a collection's source directory.
363
+ * `basePath` is root-relative (e.g. `src/content/blog` for the default layout,
364
+ * or a glob loader base like `src/content/jsem`). When it matches the default
365
+ * `<contentDir>/<name>` location, the canonical default path is returned;
366
+ * otherwise the (already root-relative) base path is returned verbatim.
367
+ */
368
+ function getCollectionSourceBasePath(basePath: string, collectionName: string, contentDir: string): string {
369
+ const defaultCollectionPath = path.join(contentDir, collectionName)
370
+ if (path.normalize(basePath) === path.normalize(defaultCollectionPath)) {
371
+ return path.join(contentDir, collectionName)
372
+ }
373
+ return basePath
374
+ }
375
+
376
+ async function buildCollectionDefinition(
377
+ fs: CmsFileSystem,
378
+ basePath: string,
379
+ sources: Array<{ slug: string; relPath: string }>,
380
+ collectionName: string,
381
+ contentDir: string,
382
+ ): Promise<CollectionDefinition | null> {
383
+ if (sources.length === 0) return null
384
+
385
+ const sourceBasePath = getCollectionSourceBasePath(basePath, collectionName, contentDir)
386
+ const hasMd = sources.some(s => s.relPath.endsWith('.md'))
387
+ const fileExtension: 'md' | 'mdx' = hasMd ? 'md' : 'mdx'
388
+
389
+ const fieldMap = new Map<string, FieldObservation>()
390
+ const allDirectives: Record<string, { position?: 'sidebar' | 'header'; group?: string }> = {}
391
+ const entryInfos: CollectionEntryInfo[] = []
392
+ let hasDraft = false
393
+
394
+ const fileContents = await Promise.all(
395
+ sources.map(s => fs.readFile(path.join(basePath, s.relPath))),
396
+ )
397
+
398
+ for (let i = 0; i < sources.length; i++) {
399
+ const source = sources[i]!
400
+ const content = fileContents[i]!
401
+ const frontmatter = parseFrontmatter(content)
402
+
403
+ const directives = parseFieldDirectives(content)
404
+ for (const [key, value] of Object.entries(directives)) {
405
+ if (!allDirectives[key]) {
406
+ allDirectives[key] = value
407
+ }
408
+ }
409
+
410
+ const entryInfo: CollectionEntryInfo = {
411
+ slug: source.slug,
412
+ sourcePath: path.join(sourceBasePath, source.relPath),
413
+ }
414
+ if (frontmatter) {
415
+ if (typeof frontmatter.title === 'string') {
416
+ entryInfo.title = frontmatter.title
417
+ }
418
+ if (typeof frontmatter.draft === 'boolean' && frontmatter.draft) {
419
+ entryInfo.draft = true
420
+ }
421
+ entryInfo.data = frontmatter
422
+ }
423
+ entryInfos.push(entryInfo)
424
+
425
+ if (!frontmatter) continue
426
+
427
+ if (frontmatter.draft === true) hasDraft = true
428
+ collectFieldObservations(fieldMap, frontmatter, sources.length)
429
+ }
430
+
431
+ const def = assembleCollectionDefinition(collectionName, contentDir, fieldMap, entryInfos, sources.length, {
432
+ path: sourceBasePath,
433
+ supportsDraft: hasDraft,
434
+ fileExtension,
435
+ })
436
+ assignFieldMetadata(def.fields, allDirectives)
437
+ return def
438
+ }
439
+
440
+ /**
441
+ * Scan a single collection directory and infer its schema
442
+ */
443
+ async function scanCollection(
444
+ fs: CmsFileSystem,
445
+ collectionPath: string,
446
+ collectionName: string,
447
+ contentDir: string,
448
+ ): Promise<CollectionDefinition | null> {
449
+ const dirEntries = await fs.list(collectionPath)
450
+ if (dirEntries.length === 0) return null
451
+
452
+ const sources: Array<{ slug: string; relPath: string }> = []
453
+ const takenSlugs = new Set<string>()
454
+
455
+ for (const entry of dirEntries) {
456
+ if (entry.isDirectory) continue
457
+ if (!entry.name.endsWith('.md') && !entry.name.endsWith('.mdx')) continue
458
+ const slug = entry.name.replace(/\.(md|mdx)$/, '')
459
+ sources.push({ slug, relPath: entry.name })
460
+ takenSlugs.add(slug)
461
+ }
462
+
463
+ // Hugo-style layout: <slug>/index.md(x). Flat files win on slug conflict.
464
+ const subdirs = dirEntries.filter(e => e.isDirectory && !e.name.startsWith('_') && !e.name.startsWith('.'))
465
+ const indexLookups = await Promise.all(subdirs.map(async dir => {
466
+ if (takenSlugs.has(dir.name)) return null
467
+ for (const ext of ['md', 'mdx'] as const) {
468
+ const relPath = path.join(dir.name, `index.${ext}`)
469
+ if (await fs.exists(path.join(collectionPath, relPath))) {
470
+ return { slug: dir.name, relPath }
471
+ }
472
+ }
473
+ return null
474
+ }))
475
+ for (const entry of indexLookups) {
476
+ if (entry) sources.push(entry)
477
+ }
478
+
479
+ if (sources.length === 0) return null
480
+ return await buildCollectionDefinition(fs, collectionPath, sources, collectionName, contentDir)
481
+ }
482
+
483
+ /**
484
+ * Scan a collection declared in content config via a glob loader (base + pattern),
485
+ * which may share a base directory with another collection (nested layout).
486
+ */
487
+ async function scanGlobCollection(
488
+ fs: CmsFileSystem,
489
+ collectionName: string,
490
+ baseRel: string,
491
+ pattern: string,
492
+ contentDir: string,
493
+ ): Promise<CollectionDefinition | null> {
494
+ // The port's glob walks the root; scope it to the loader base and re-relativize matches.
495
+ // The scanner ignores files/dirs whose path segments start with `_` or `.` (private/hidden),
496
+ // matching the directory-walk behavior of the original scanner — the glob pattern alone
497
+ // would otherwise match them.
498
+ const matches = await fs.glob(path.join(baseRel, pattern))
499
+ const sources = matches
500
+ .filter(rel => rel.endsWith('.md') || rel.endsWith('.mdx'))
501
+ .map(rel => path.relative(baseRel, rel))
502
+ .filter(relToBase => !relToBase.split('/').some(seg => seg.startsWith('_') || seg.startsWith('.')))
503
+ .map(relToBase => ({ slug: relToBase.replace(/\.(md|mdx)$/, ''), relPath: relToBase }))
504
+
505
+ if (sources.length === 0) return null
506
+ return await buildCollectionDefinition(fs, baseRel, sources, collectionName, contentDir)
507
+ }
508
+
509
+ /**
510
+ * Filter scanned fields to schema-only and apply per-field overrides (type, hints, required)
511
+ * in a single pass. Filtering must happen first since it can shrink `def.fields`.
512
+ */
513
+ function applyParsedConfig(
514
+ collections: Record<string, CollectionDefinition>,
515
+ parsed: ParsedConfig,
516
+ ): void {
517
+ for (const [collectionName, parsedColl] of parsed) {
518
+ const def = collections[collectionName]
519
+ if (!def) continue
520
+
521
+ if (parsedColl.fields.length > 0) {
522
+ const schemaNames = new Set(parsedColl.fields.map(f => f.name))
523
+ def.fields = def.fields.filter(f => schemaNames.has(f.name))
524
+ }
525
+
526
+ const fieldsByName = new Map(def.fields.map(f => [f.name, f]))
527
+ for (const pf of parsedColl.fields) {
528
+ const field = fieldsByName.get(pf.name)
529
+ if (!field) continue
530
+ applyParsedFieldOverrides(field, pf)
531
+ }
532
+ }
533
+ }
534
+
535
+ /**
536
+ * Apply parsed schema overrides to an inferred field, recursing into nested object/array fields.
537
+ *
538
+ * Note on schema-vs-inferred merging at nested levels: schema-declared sub-fields replace
539
+ * the inferred list rather than merging. Inferred-only sub-fields are *not* lost — the
540
+ * editor's `ObjectFields` recovers them via its `extraKeys` calculation (field value keys
541
+ * minus schemaNames), routes them through `FrontmatterField` (value-based auto-detect),
542
+ * and offers a remove button. Merging here would defeat that.
543
+ */
544
+ function applyParsedFieldOverrides(field: FieldDefinition, pf: ParsedField): void {
545
+ if (pf.type) {
546
+ field.type = pf.type
547
+ if (pf.options) field.options = pf.options
548
+ }
549
+ if (pf.itemType) field.itemType = pf.itemType
550
+ if (pf.hints) field.hints = pf.hints
551
+ if (pf.astroImage) field.astroImage = true
552
+ field.required = pf.required
553
+
554
+ if (pf.fields) {
555
+ const existingByName = new Map((field.fields ?? []).map(f => [f.name, f]))
556
+ field.fields = pf.fields.map((subPf) => {
557
+ const existing = existingByName.get(subPf.name)
558
+ if (existing) {
559
+ applyParsedFieldOverrides(existing, subPf)
560
+ return existing
561
+ }
562
+ return parsedFieldToFieldDefinition(subPf)
563
+ })
564
+ }
565
+ }
566
+
567
+ /**
568
+ * Build a FieldDefinition from a parsed schema field when no inferred counterpart exists.
569
+ * Falls back to `'text'` when the parser couldn't pin a type — keeps the field visible
570
+ * and editable. Schema-declared-but-data-absent fields would otherwise vanish.
571
+ */
572
+ function parsedFieldToFieldDefinition(pf: ParsedField): FieldDefinition {
573
+ const fd: FieldDefinition = {
574
+ name: pf.name,
575
+ // A parsed field with nested children but no explicit type is necessarily an object.
576
+ // Otherwise default to 'text' so users can still fill in schema-declared fields
577
+ // whose helper the parser didn't recognize.
578
+ type: pf.type ?? (pf.fields ? 'object' : 'text'),
579
+ required: pf.required,
580
+ }
581
+ if (pf.options) fd.options = pf.options
582
+ if (pf.itemType) fd.itemType = pf.itemType
583
+ if (pf.hints) fd.hints = pf.hints
584
+ if (pf.astroImage) fd.astroImage = true
585
+ if (pf.fields) fd.fields = pf.fields.map(parsedFieldToFieldDefinition)
586
+ return fd
587
+ }
588
+
589
+ /** Apply orderBy configuration: set the field name and direction on the definition, then re-sort entries. */
590
+ function applyCollectionOrderBy(
591
+ collections: Record<string, CollectionDefinition>,
592
+ parsed: ParsedConfig,
593
+ ): void {
594
+ for (const [collectionName, parsedColl] of parsed) {
595
+ const orderField = parsedColl.fields.find(f => f.orderBy)
596
+ if (!orderField?.orderBy) continue
597
+ const def = collections[collectionName]
598
+ if (!def) continue
599
+
600
+ const fieldName = orderField.name
601
+ const direction = orderField.orderBy.direction
602
+ def.orderBy = fieldName
603
+ def.orderDirection = direction
604
+ if (def.entries && def.entries.length > 1) {
605
+ const dir = direction === 'desc' ? -1 : 1
606
+ def.entries.sort((a, b) => {
607
+ const aVal = a.data?.[fieldName]
608
+ const bVal = b.data?.[fieldName]
609
+ if (aVal == null && bVal == null) return 0
610
+ if (aVal == null) return 1
611
+ if (bVal == null) return -1
612
+ if (typeof aVal === 'number' && typeof bVal === 'number') return (aVal - bVal) * dir
613
+ if (aVal instanceof Date && bVal instanceof Date) return (aVal.getTime() - bVal.getTime()) * dir
614
+ return String(aVal).localeCompare(String(bVal)) * dir
615
+ })
616
+ }
617
+ }
618
+ }
619
+
620
+ /**
621
+ * Detect reference fields. Prefers explicit `reference()` declarations from the content
622
+ * config; if none are found anywhere, falls back to heuristic slug matching.
623
+ */
624
+ function detectReferenceFields(
625
+ collections: Record<string, CollectionDefinition>,
626
+ parsed: ParsedConfig,
627
+ ): void {
628
+ let appliedAny = false
629
+ for (const [collectionName, parsedColl] of parsed) {
630
+ const def = collections[collectionName]
631
+ if (!def) continue
632
+ for (const pf of parsedColl.fields) {
633
+ if (!pf.reference) continue
634
+ const field = def.fields.find(f => f.name === pf.name)
635
+ if (!field) continue
636
+ appliedAny = true
637
+ if (pf.reference.isArray) {
638
+ field.type = 'array'
639
+ field.itemType = 'reference'
640
+ } else {
641
+ field.type = 'reference'
642
+ }
643
+ field.collection = pf.reference.target
644
+ field.options = undefined
645
+ }
646
+ }
647
+
648
+ if (!appliedAny) detectReferenceFieldsBySlugMatch(collections)
649
+ }
650
+
651
+ function detectReferenceFieldsBySlugMatch(collections: Record<string, CollectionDefinition>): void {
652
+ const collectionSlugs = new Map<string, Set<string>>()
653
+ for (const [name, def] of Object.entries(collections)) {
654
+ if (def.entries && def.entries.length > 0) {
655
+ collectionSlugs.set(name, new Set(def.entries.map(e => e.slug)))
656
+ }
657
+ }
658
+
659
+ for (const [collectionName, def] of Object.entries(collections)) {
660
+ for (const field of def.fields) {
661
+ if ((field.type === 'text' || field.type === 'select') && field.examples) {
662
+ const stringExamples = field.examples.filter((v): v is string => typeof v === 'string')
663
+ if (stringExamples.length === 0) continue
664
+
665
+ // Find all candidate collections where all examples match slugs
666
+ const candidates: Array<{ name: string; slugs: Set<string> }> = []
667
+ for (const [targetName, slugs] of collectionSlugs) {
668
+ if (targetName === collectionName) continue
669
+ const matchCount = stringExamples.filter(v => slugs.has(v)).length
670
+ if (matchCount > 0 && matchCount === stringExamples.length) {
671
+ candidates.push({ name: targetName, slugs })
672
+ }
673
+ }
674
+
675
+ let bestTarget: string | undefined
676
+ if (candidates.length === 1) {
677
+ bestTarget = candidates[0]!.name
678
+ } else if (candidates.length > 1) {
679
+ // Multiple matches — disambiguate using all field values
680
+ const allValues = def.entries?.flatMap(e => {
681
+ const v = e.data?.[field.name]
682
+ return typeof v === 'string' ? [v] : []
683
+ }) ?? stringExamples
684
+ let bestOverlap = 0
685
+ for (const c of candidates) {
686
+ const overlap = allValues.filter(v => c.slugs.has(v)).length
687
+ if (overlap > bestOverlap) {
688
+ bestOverlap = overlap
689
+ bestTarget = c.name
690
+ }
691
+ }
692
+ }
693
+ if (bestTarget) {
694
+ field.type = 'reference'
695
+ field.collection = bestTarget
696
+ field.options = undefined
697
+ }
698
+ }
699
+
700
+ if (field.type === 'array' && field.itemType === 'text' && field.options) {
701
+ let bestTarget: string | undefined
702
+ let bestOverlap = 0
703
+ for (const [targetName, slugs] of collectionSlugs) {
704
+ if (targetName === collectionName) continue
705
+ const matchCount = field.options.filter(v => slugs.has(v)).length
706
+ if (matchCount > 0 && matchCount >= field.options.length * 0.5) {
707
+ if (matchCount > bestOverlap) {
708
+ bestOverlap = matchCount
709
+ bestTarget = targetName
710
+ }
711
+ }
712
+ }
713
+ if (bestTarget) {
714
+ field.type = 'array'
715
+ field.itemType = 'reference'
716
+ field.collection = bestTarget
717
+ field.options = undefined
718
+ }
719
+ }
720
+ }
721
+ }
722
+ }
723
+
724
+ /**
725
+ * Tag fields with semantic roles so the editor UI can position them without
726
+ * matching on Astro-specific field names. Detection lives here — the layer
727
+ * that already knows it's parsing Astro content collections.
728
+ */
729
+ function assignSemanticRoles(collections: Record<string, CollectionDefinition>): void {
730
+ for (const def of Object.values(collections)) {
731
+ let toggle: FieldDefinition | undefined
732
+ let dateByName: FieldDefinition | undefined
733
+ let dateByType: FieldDefinition | undefined
734
+ for (const field of def.fields) {
735
+ if (field.hidden || field.role) continue
736
+ const normalized = normalizeFieldName(field.name)
737
+ if (!toggle && field.type === 'boolean' && PUBLISH_TOGGLE_NAMES.has(normalized)) {
738
+ toggle = field
739
+ } else if (!dateByName && PUBLISH_DATE_NAMES.has(normalized)) {
740
+ dateByName = field
741
+ } else if (!dateByType && (field.type === 'date' || field.type === 'datetime')) {
742
+ dateByType = field
743
+ }
744
+ }
745
+ if (toggle) toggle.role = 'publish-toggle'
746
+ const date = dateByName ?? dateByType
747
+ if (date) date.role = 'publish-date'
748
+ }
749
+ }
750
+
751
+ /** Suffixes that indicate a field is a derived href/url/slug companion */
752
+ const HREF_SUFFIXES = ['href', 'url', 'link', 'slug', 'path'] as const
753
+
754
+ /**
755
+ * Detect fields like `categoryHref` that are derived from a source field (`category`).
756
+ * When every value is a slugified href of the source, mark it hidden with derivedFrom.
757
+ */
758
+ function detectDerivedHrefFields(collections: Record<string, CollectionDefinition>): void {
759
+ for (const def of Object.values(collections)) {
760
+ const fieldsByName = new Map(def.fields.map(f => [f.name, f]))
761
+
762
+ for (const field of def.fields) {
763
+ if (field.hidden || field.derivedFrom) continue
764
+
765
+ const lowerName = field.name.toLowerCase()
766
+ for (const suffix of HREF_SUFFIXES) {
767
+ if (!lowerName.endsWith(suffix)) continue
768
+ const baseName = field.name.slice(0, -suffix.length)
769
+ if (!baseName) continue
770
+
771
+ // Case-insensitive lookup: exact match first, then scan by lowercased name
772
+ let sourceField = fieldsByName.get(baseName)
773
+ if (!sourceField) {
774
+ const lowerBase = baseName.toLowerCase()
775
+ for (const f of fieldsByName.values()) {
776
+ if (f.name.toLowerCase() === lowerBase) {
777
+ sourceField = f
778
+ break
779
+ }
780
+ }
781
+ }
782
+ if (!sourceField || !sourceField.examples || !field.examples) continue
783
+
784
+ const sourceExamples = sourceField.examples.filter((v): v is string => typeof v === 'string')
785
+ const derivedExamples = field.examples.filter((v): v is string => typeof v === 'string')
786
+ if (sourceExamples.length === 0 || derivedExamples.length === 0) continue
787
+
788
+ // Order-independent: check that every derived value matches some source value's href
789
+ const expectedHrefs = new Set(sourceExamples.map(slugifyHref))
790
+ const allMatch = derivedExamples.every(v => expectedHrefs.has(v))
791
+ if (allMatch) {
792
+ field.hidden = true
793
+ field.derivedFrom = sourceField.name
794
+ break
795
+ }
796
+ }
797
+ }
798
+ }
799
+ }
800
+
801
+ /**
802
+ * Scan a data collection (JSON/YAML files) and infer its schema
803
+ */
804
+ async function scanDataCollection(
805
+ fs: CmsFileSystem,
806
+ collectionPath: string,
807
+ collectionName: string,
808
+ contentDir: string,
809
+ ): Promise<CollectionDefinition | null> {
810
+ const dirEntries = await fs.list(collectionPath)
811
+ if (dirEntries.length === 0) return null
812
+
813
+ const sources: Array<{ slug: string; relPath: string }> = []
814
+ const takenSlugs = new Set<string>()
815
+
816
+ for (const entry of dirEntries) {
817
+ if (entry.isDirectory) continue
818
+ if (!entry.name.endsWith('.json') && !entry.name.endsWith('.yaml') && !entry.name.endsWith('.yml')) continue
819
+ const slug = entry.name.replace(/\.(json|ya?ml)$/, '')
820
+ sources.push({ slug, relPath: entry.name })
821
+ takenSlugs.add(slug)
822
+ }
823
+
824
+ // Hugo-style layout: <slug>/index.{json,yaml,yml}. Flat files win on slug conflict.
825
+ const subdirs = dirEntries.filter(e => e.isDirectory && !e.name.startsWith('_') && !e.name.startsWith('.'))
826
+ const indexLookups = await Promise.all(subdirs.map(async dir => {
827
+ if (takenSlugs.has(dir.name)) return null
828
+ for (const indexExt of ['json', 'yaml', 'yml'] as const) {
829
+ const relPath = path.join(dir.name, `index.${indexExt}`)
830
+ if (await fs.exists(path.join(collectionPath, relPath))) {
831
+ return { slug: dir.name, relPath }
832
+ }
833
+ }
834
+ return null
835
+ }))
836
+ for (const entry of indexLookups) {
837
+ if (entry) sources.push(entry)
838
+ }
839
+
840
+ if (sources.length === 0) return null
841
+
842
+ const fieldMap = new Map<string, FieldObservation>()
843
+ const entryInfos: CollectionEntryInfo[] = []
844
+ const ext = sources.some(s => s.relPath.endsWith('.json'))
845
+ ? 'json' as const
846
+ : sources.some(s => s.relPath.endsWith('.yaml'))
847
+ ? 'yaml' as const
848
+ : 'yml' as const
849
+
850
+ const fileContents = await Promise.all(
851
+ sources.map(s => fs.readFile(path.join(collectionPath, s.relPath)).catch(() => null)),
852
+ )
853
+
854
+ for (let i = 0; i < sources.length; i++) {
855
+ const source = sources[i]!
856
+ const raw = fileContents[i]!
857
+ if (raw === null) continue
858
+ let data: Record<string, unknown> | null = null
859
+ try {
860
+ data = source.relPath.endsWith('.json') ? JSON.parse(raw) : parseYaml(raw) as Record<string, unknown>
861
+ } catch {
862
+ continue
863
+ }
864
+ if (!data || typeof data !== 'object') continue
865
+
866
+ const title = typeof data.name === 'string' ? data.name : typeof data.title === 'string' ? data.title : undefined
867
+ entryInfos.push({
868
+ slug: source.slug,
869
+ title,
870
+ sourcePath: path.join(contentDir, collectionName, source.relPath),
871
+ data,
872
+ })
873
+
874
+ collectFieldObservations(fieldMap, data, sources.length)
875
+ }
876
+
877
+ return assembleCollectionDefinition(collectionName, contentDir, fieldMap, entryInfos, sources.length, {
878
+ type: 'data',
879
+ fileExtension: ext,
880
+ })
881
+ }
882
+
883
+ /**
884
+ * Scan all collections in the content directory.
885
+ *
886
+ * `contentDir` is a root-relative directory (default `src/content`); all I/O is
887
+ * resolved against the injected `CmsFileSystem`'s root. `parseCache` keeps the
888
+ * mtime-keyed content-config parse result across calls within one core instance.
889
+ */
890
+ export async function scanCollections(
891
+ fs: CmsFileSystem,
892
+ contentDir: string = 'src/content',
893
+ parseCache: ParseCache = new Map(),
894
+ ): Promise<Record<string, CollectionDefinition>> {
895
+ const collections: Record<string, CollectionDefinition> = {}
896
+
897
+ const entries = await fs.list(contentDir)
898
+
899
+ const scanPromises = entries
900
+ .filter(entry => entry.isDirectory && !entry.name.startsWith('_') && !entry.name.startsWith('.'))
901
+ .map(async entry => {
902
+ const collectionPath = path.join(contentDir, entry.name)
903
+ const definition = await scanCollection(fs, collectionPath, entry.name, contentDir)
904
+ ?? await scanDataCollection(fs, collectionPath, entry.name, contentDir)
905
+ if (definition) {
906
+ collections[entry.name] = definition
907
+ }
908
+ })
909
+
910
+ await Promise.all(scanPromises)
911
+
912
+ // Post-scan: apply schema-driven field config, detect references, derived fields, and ordering
913
+ const parsed = await parseContentConfig(fs, parseCache)
914
+ for (const [collectionName, parsedCollection] of parsed) {
915
+ if (collections[collectionName]) continue
916
+ if (!parsedCollection.loaderBase || !parsedCollection.loaderPattern) continue
917
+ const definition = await scanGlobCollection(fs, collectionName, parsedCollection.loaderBase, parsedCollection.loaderPattern, contentDir)
918
+ if (!definition) continue
919
+ // Nest under the collection that owns the shared base directory (e.g. jsem-otazky -> jsem),
920
+ // so the CMS browser can group it under its parent page instead of listing it flat.
921
+ const baseName = parsedCollection.loaderBase.replace(/[/\\]+$/, '').split(/[/\\]/).pop()
922
+ if (baseName && baseName !== collectionName && collections[baseName]) {
923
+ definition.parentCollection = baseName
924
+ }
925
+ collections[collectionName] = definition
926
+ }
927
+
928
+ applyParsedConfig(collections, parsed)
929
+ detectReferenceFields(collections, parsed)
930
+ detectDerivedHrefFields(collections)
931
+ assignSemanticRoles(collections)
932
+ applyCollectionOrderBy(collections, parsed)
933
+
934
+ return collections
935
+ }