@uniweb/build 0.1.27 → 0.1.28

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,382 @@
1
+ /**
2
+ * Collection Processor
3
+ *
4
+ * Processes content collections from markdown files into JSON data.
5
+ * Collections are defined in site.yml and processed at build time.
6
+ *
7
+ * Features:
8
+ * - Discovers markdown files in content folders
9
+ * - Parses frontmatter for metadata
10
+ * - Converts markdown body to ProseMirror JSON
11
+ * - Supports filtering, sorting, and limiting
12
+ * - Auto-generates excerpts and extracts first images
13
+ *
14
+ * @module @uniweb/build/site/collection-processor
15
+ *
16
+ * @example
17
+ * // site.yml
18
+ * collections:
19
+ * articles:
20
+ * path: content/articles
21
+ * sort: date desc
22
+ *
23
+ * // Usage
24
+ * const collections = await processCollections(siteDir, config.collections)
25
+ * await writeCollectionFiles(siteDir, collections)
26
+ */
27
+
28
+ import { readFile, readdir, stat, writeFile, mkdir } from 'node:fs/promises'
29
+ import { join, basename, extname } from 'node:path'
30
+ import { existsSync } from 'node:fs'
31
+ import yaml from 'js-yaml'
32
+ import { applyFilter, applySort } from './data-fetcher.js'
33
+
34
+ // Try to import content-reader for markdown parsing
35
+ let markdownToProseMirror
36
+ try {
37
+ const contentReader = await import('@uniweb/content-reader')
38
+ markdownToProseMirror = contentReader.markdownToProseMirror
39
+ } catch {
40
+ // Simplified fallback
41
+ markdownToProseMirror = (markdown) => ({
42
+ type: 'doc',
43
+ content: [
44
+ {
45
+ type: 'paragraph',
46
+ content: [{ type: 'text', text: markdown.trim() }]
47
+ }
48
+ ]
49
+ })
50
+ }
51
+
52
+ /**
53
+ * Parse collection config from site.yml
54
+ *
55
+ * @param {string} name - Collection name
56
+ * @param {string|Object} config - Simple path string or full config object
57
+ * @returns {Object} Normalized config
58
+ *
59
+ * @example
60
+ * // Simple form
61
+ * parseCollectionConfig('articles', 'content/articles')
62
+ *
63
+ * // Extended form
64
+ * parseCollectionConfig('articles', {
65
+ * path: 'content/articles',
66
+ * sort: 'date desc',
67
+ * filter: 'published != false',
68
+ * limit: 100
69
+ * })
70
+ */
71
+ function parseCollectionConfig(name, config) {
72
+ if (typeof config === 'string') {
73
+ return {
74
+ name,
75
+ path: config,
76
+ sort: null,
77
+ filter: null,
78
+ limit: 0,
79
+ excerpt: { maxLength: 160 }
80
+ }
81
+ }
82
+
83
+ return {
84
+ name,
85
+ path: config.path,
86
+ sort: config.sort || null,
87
+ filter: config.filter || null,
88
+ limit: config.limit || 0,
89
+ excerpt: {
90
+ maxLength: config.excerpt?.maxLength || 160,
91
+ field: config.excerpt?.field || null
92
+ }
93
+ }
94
+ }
95
+
96
+ /**
97
+ * Parse YAML frontmatter from markdown content
98
+ *
99
+ * @param {string} raw - Raw file content
100
+ * @returns {{ frontmatter: Object, body: string }}
101
+ */
102
+ function parseFrontmatter(raw) {
103
+ if (!raw.trim().startsWith('---')) {
104
+ return { frontmatter: {}, body: raw }
105
+ }
106
+
107
+ const parts = raw.split('---\n')
108
+ if (parts.length < 3) {
109
+ return { frontmatter: {}, body: raw }
110
+ }
111
+
112
+ try {
113
+ const frontmatter = yaml.load(parts[1]) || {}
114
+ const body = parts.slice(2).join('---\n')
115
+ return { frontmatter, body }
116
+ } catch (err) {
117
+ console.warn('[collection-processor] YAML parse error:', err.message)
118
+ return { frontmatter: {}, body: raw }
119
+ }
120
+ }
121
+
122
+ /**
123
+ * Extract plain text from ProseMirror content
124
+ *
125
+ * @param {Object} node - ProseMirror node
126
+ * @returns {string} Plain text
127
+ */
128
+ function extractPlainText(node) {
129
+ if (!node) return ''
130
+
131
+ if (node.type === 'text') {
132
+ return node.text || ''
133
+ }
134
+
135
+ if (Array.isArray(node.content)) {
136
+ return node.content.map(extractPlainText).join('')
137
+ }
138
+
139
+ return ''
140
+ }
141
+
142
+ /**
143
+ * Extract excerpt from content
144
+ *
145
+ * @param {Object} frontmatter - Parsed frontmatter
146
+ * @param {Object} content - ProseMirror content
147
+ * @param {Object} excerptConfig - Excerpt configuration
148
+ * @returns {string} Excerpt text
149
+ */
150
+ function extractExcerpt(frontmatter, content, excerptConfig) {
151
+ const { maxLength = 160, field = null } = excerptConfig || {}
152
+
153
+ // Check for explicit excerpt in frontmatter
154
+ if (frontmatter.excerpt) {
155
+ return frontmatter.excerpt.slice(0, maxLength)
156
+ }
157
+
158
+ // Check for alternative field (e.g., 'description')
159
+ if (field && frontmatter[field]) {
160
+ return frontmatter[field].slice(0, maxLength)
161
+ }
162
+
163
+ // Auto-extract from content
164
+ const text = extractPlainText(content)
165
+ if (!text) return ''
166
+
167
+ // Clean and truncate
168
+ const cleaned = text.replace(/\s+/g, ' ').trim()
169
+ if (cleaned.length <= maxLength) return cleaned
170
+
171
+ // Truncate at word boundary
172
+ const truncated = cleaned.slice(0, maxLength)
173
+ const lastSpace = truncated.lastIndexOf(' ')
174
+ return lastSpace > maxLength * 0.7
175
+ ? truncated.slice(0, lastSpace) + '...'
176
+ : truncated + '...'
177
+ }
178
+
179
+ /**
180
+ * Extract first image from ProseMirror content
181
+ *
182
+ * @param {Object} node - ProseMirror node
183
+ * @returns {string|null} Image URL or null
184
+ */
185
+ function extractFirstImage(node) {
186
+ if (!node) return null
187
+
188
+ if (node.type === 'image' && node.attrs?.src) {
189
+ return node.attrs.src
190
+ }
191
+
192
+ if (Array.isArray(node.content)) {
193
+ for (const child of node.content) {
194
+ const img = extractFirstImage(child)
195
+ if (img) return img
196
+ }
197
+ }
198
+
199
+ return null
200
+ }
201
+
202
+ // Filter and sort utilities are imported from data-fetcher.js
203
+
204
+ /**
205
+ * Process a single content item from a markdown file
206
+ *
207
+ * @param {string} dir - Collection directory path
208
+ * @param {string} filename - Markdown filename
209
+ * @param {Object} config - Collection configuration
210
+ * @returns {Promise<Object|null>} Processed item or null if unpublished
211
+ */
212
+ async function processContentItem(dir, filename, config) {
213
+ const filepath = join(dir, filename)
214
+ const raw = await readFile(filepath, 'utf-8')
215
+ const slug = basename(filename, extname(filename))
216
+
217
+ // Parse frontmatter and body
218
+ const { frontmatter, body } = parseFrontmatter(raw)
219
+
220
+ // Skip unpublished items by default
221
+ if (frontmatter.published === false) {
222
+ return null
223
+ }
224
+
225
+ // Parse markdown body to ProseMirror
226
+ const content = markdownToProseMirror(body)
227
+
228
+ // Extract excerpt
229
+ const excerpt = extractExcerpt(frontmatter, content, config.excerpt)
230
+
231
+ // Extract first image (frontmatter takes precedence)
232
+ const image = frontmatter.image || extractFirstImage(content)
233
+
234
+ // Get file stats for lastModified
235
+ const fileStat = await stat(filepath)
236
+
237
+ return {
238
+ slug,
239
+ ...frontmatter,
240
+ excerpt,
241
+ image,
242
+ // Include both raw markdown body (for simple rendering)
243
+ // and ProseMirror content (for rich rendering)
244
+ body: body.trim(),
245
+ content,
246
+ lastModified: fileStat.mtime.toISOString()
247
+ }
248
+ }
249
+
250
+ /**
251
+ * Collect and process all items in a collection folder
252
+ *
253
+ * @param {string} siteDir - Site root directory
254
+ * @param {Object} config - Parsed collection config
255
+ * @returns {Promise<Array>} Array of processed items
256
+ */
257
+ async function collectItems(siteDir, config) {
258
+ const collectionDir = join(siteDir, config.path)
259
+
260
+ // Check if collection directory exists
261
+ if (!existsSync(collectionDir)) {
262
+ console.warn(`[collection-processor] Collection folder not found: ${config.path}`)
263
+ return []
264
+ }
265
+
266
+ const files = await readdir(collectionDir)
267
+ const mdFiles = files.filter(f => f.endsWith('.md') && !f.startsWith('_'))
268
+
269
+ // Process all markdown files
270
+ let items = await Promise.all(
271
+ mdFiles.map(file => processContentItem(collectionDir, file, config))
272
+ )
273
+
274
+ // Filter out nulls (unpublished items)
275
+ items = items.filter(Boolean)
276
+
277
+ // Apply custom filter
278
+ if (config.filter) {
279
+ items = applyFilter(items, config.filter)
280
+ }
281
+
282
+ // Apply sort
283
+ if (config.sort) {
284
+ items = applySort(items, config.sort)
285
+ }
286
+
287
+ // Apply limit
288
+ if (config.limit > 0) {
289
+ items = items.slice(0, config.limit)
290
+ }
291
+
292
+ return items
293
+ }
294
+
295
+ /**
296
+ * Process all content collections defined in site.yml
297
+ *
298
+ * @param {string} siteDir - Site root directory
299
+ * @param {Object} collectionsConfig - Collections config from site.yml
300
+ * @returns {Promise<Object>} Map of collection name to items array
301
+ *
302
+ * @example
303
+ * const collections = await processCollections('/path/to/site', {
304
+ * articles: { path: 'content/articles', sort: 'date desc' },
305
+ * products: 'content/products'
306
+ * })
307
+ * // { articles: [...], products: [...] }
308
+ */
309
+ export async function processCollections(siteDir, collectionsConfig) {
310
+ if (!collectionsConfig || typeof collectionsConfig !== 'object') {
311
+ return {}
312
+ }
313
+
314
+ const results = {}
315
+
316
+ for (const [name, config] of Object.entries(collectionsConfig)) {
317
+ const parsed = parseCollectionConfig(name, config)
318
+ const items = await collectItems(siteDir, parsed)
319
+ results[name] = items
320
+ console.log(`[collection-processor] Processed ${name}: ${items.length} items`)
321
+ }
322
+
323
+ return results
324
+ }
325
+
326
+ /**
327
+ * Write collection data to JSON files in public/data/
328
+ *
329
+ * @param {string} siteDir - Site root directory
330
+ * @param {Object} collections - Map of collection name to items array
331
+ * @returns {Promise<void>}
332
+ *
333
+ * @example
334
+ * await writeCollectionFiles('/path/to/site', {
335
+ * articles: [{ slug: 'hello', title: 'Hello World', ... }]
336
+ * })
337
+ * // Creates public/data/articles.json
338
+ */
339
+ export async function writeCollectionFiles(siteDir, collections) {
340
+ if (!collections || Object.keys(collections).length === 0) {
341
+ return
342
+ }
343
+
344
+ const dataDir = join(siteDir, 'public', 'data')
345
+ await mkdir(dataDir, { recursive: true })
346
+
347
+ for (const [name, items] of Object.entries(collections)) {
348
+ const filepath = join(dataDir, `${name}.json`)
349
+ await writeFile(filepath, JSON.stringify(items, null, 2))
350
+ console.log(`[collection-processor] Generated ${filepath} (${items.length} items)`)
351
+ }
352
+ }
353
+
354
+ /**
355
+ * Get last modified time for a collection
356
+ *
357
+ * @param {string} siteDir - Site root directory
358
+ * @param {Object} config - Collection config
359
+ * @returns {Promise<Date|null>} Most recent modification time
360
+ */
361
+ export async function getCollectionLastModified(siteDir, config) {
362
+ const parsed = parseCollectionConfig('temp', config)
363
+ const collectionDir = join(siteDir, parsed.path)
364
+
365
+ if (!existsSync(collectionDir)) {
366
+ return null
367
+ }
368
+
369
+ const files = await readdir(collectionDir)
370
+ const mdFiles = files.filter(f => f.endsWith('.md') && !f.startsWith('_'))
371
+
372
+ let lastModified = null
373
+
374
+ for (const file of mdFiles) {
375
+ const fileStat = await stat(join(collectionDir, file))
376
+ if (!lastModified || fileStat.mtime > lastModified) {
377
+ lastModified = fileStat.mtime
378
+ }
379
+ }
380
+
381
+ return lastModified
382
+ }
@@ -12,6 +12,7 @@
12
12
  * - preset: Preset configuration name
13
13
  * - input: Input field mapping
14
14
  * - props: Additional component props (merged with other params)
15
+ * - fetch: Data fetching configuration (path, url, schema, prerender, merge, transform)
15
16
  *
16
17
  * Note: `component` is supported as an alias for `type` (deprecated)
17
18
  *
@@ -26,6 +27,7 @@ import { join, parse } from 'node:path'
26
27
  import { existsSync } from 'node:fs'
27
28
  import yaml from 'js-yaml'
28
29
  import { collectSectionAssets, mergeAssetCollections } from './assets.js'
30
+ import { parseFetchConfig, singularize } from './data-fetcher.js'
29
31
 
30
32
  // Try to import content-reader, fall back to simplified parser
31
33
  let markdownToProseMirror
@@ -45,6 +47,25 @@ try {
45
47
  })
46
48
  }
47
49
 
50
+ /**
51
+ * Check if a folder name represents a dynamic route (e.g., [slug], [id])
52
+ * @param {string} folderName - The folder name to check
53
+ * @returns {boolean}
54
+ */
55
+ function isDynamicRoute(folderName) {
56
+ return /^\[(\w+)\]$/.test(folderName)
57
+ }
58
+
59
+ /**
60
+ * Extract the parameter name from a dynamic route folder (e.g., [slug] → slug)
61
+ * @param {string} folderName - The folder name (e.g., "[slug]")
62
+ * @returns {string|null} The parameter name or null if not a dynamic route
63
+ */
64
+ function extractRouteParam(folderName) {
65
+ const match = folderName.match(/^\[(\w+)\]$/)
66
+ return match ? match[1] : null
67
+ }
68
+
48
69
  /**
49
70
  * Parse YAML string using js-yaml
50
71
  */
@@ -140,7 +161,7 @@ async function processMarkdownFile(filePath, id, siteRoot) {
140
161
  }
141
162
  }
142
163
 
143
- const { type, component, preset, input, props, ...params } = frontMatter
164
+ const { type, component, preset, input, props, fetch, ...params } = frontMatter
144
165
 
145
166
  // Convert markdown to ProseMirror
146
167
  const proseMirrorContent = markdownToProseMirror(markdown)
@@ -152,6 +173,7 @@ async function processMarkdownFile(filePath, id, siteRoot) {
152
173
  input,
153
174
  params: { ...params, ...props },
154
175
  content: proseMirrorContent,
176
+ fetch: parseFetchConfig(fetch),
155
177
  subsections: []
156
178
  }
157
179
 
@@ -293,9 +315,10 @@ async function processExplicitSections(sectionsConfig, pagePath, siteRoot, paren
293
315
  * @param {Object} options - Route options
294
316
  * @param {boolean} options.isIndex - Whether this page is the index for its parent route
295
317
  * @param {string} options.parentRoute - The parent route (e.g., '/' or '/docs')
318
+ * @param {Object} options.parentFetch - Parent page's fetch config (for dynamic routes)
296
319
  * @returns {Object} Page data with assets manifest
297
320
  */
298
- async function processPage(pagePath, pageName, siteRoot, { isIndex = false, parentRoute = '/' } = {}) {
321
+ async function processPage(pagePath, pageName, siteRoot, { isIndex = false, parentRoute = '/', parentFetch = null } = {}) {
299
322
  const pageConfig = await readYamlFile(join(pagePath, 'page.yml'))
300
323
 
301
324
  // Note: We no longer skip hidden pages here - they still exist as valid pages,
@@ -354,9 +377,16 @@ async function processPage(pagePath, pageName, siteRoot, { isIndex = false, pare
354
377
  // All pages get their actual folder-based route (no special treatment for index)
355
378
  // The isIndex flag marks which page should also be accessible at the parent route
356
379
  let route
380
+ const isDynamic = isDynamicRoute(pageName)
381
+ const paramName = isDynamic ? extractRouteParam(pageName) : null
382
+
357
383
  if (pageName.startsWith('@')) {
358
384
  // Special pages (layout areas) keep their @ prefix
359
385
  route = parentRoute === '/' ? `/@${pageName.slice(1)}` : `${parentRoute}/@${pageName.slice(1)}`
386
+ } else if (isDynamic) {
387
+ // Dynamic routes: /blog/[slug] → /blog/:slug (for route matching)
388
+ // The actual routes like /blog/my-post are generated at prerender time
389
+ route = parentRoute === '/' ? `/:${paramName}` : `${parentRoute}/:${paramName}`
360
390
  } else {
361
391
  // Normal pages get parent + their name
362
392
  route = parentRoute === '/' ? `/${pageName}` : `${parentRoute}/${pageName}`
@@ -365,6 +395,13 @@ async function processPage(pagePath, pageName, siteRoot, { isIndex = false, pare
365
395
  // Extract configuration
366
396
  const { seo = {}, layout = {}, ...restConfig } = pageConfig
367
397
 
398
+ // For dynamic routes, determine the parent's data schema
399
+ // This tells prerender which data array to iterate over
400
+ let parentSchema = null
401
+ if (isDynamic && parentFetch) {
402
+ parentSchema = parentFetch.schema
403
+ }
404
+
368
405
  return {
369
406
  page: {
370
407
  route,
@@ -375,6 +412,11 @@ async function processPage(pagePath, pageName, siteRoot, { isIndex = false, pare
375
412
  order: pageConfig.order,
376
413
  lastModified: lastModified?.toISOString(),
377
414
 
415
+ // Dynamic route metadata
416
+ isDynamic,
417
+ paramName, // e.g., "slug" from [slug]
418
+ parentSchema, // e.g., "articles" - the data array to iterate over
419
+
378
420
  // Navigation options
379
421
  hidden: pageConfig.hidden || false, // Hide from all navigation
380
422
  hideInHeader: pageConfig.hideInHeader || false, // Hide from header nav
@@ -394,6 +436,10 @@ async function processPage(pagePath, pageName, siteRoot, { isIndex = false, pare
394
436
  changefreq: seo.changefreq || null,
395
437
  priority: seo.priority || null
396
438
  },
439
+
440
+ // Data fetching
441
+ fetch: parseFetchConfig(pageConfig.fetch),
442
+
397
443
  sections: hierarchicalSections
398
444
  },
399
445
  assetCollection: pageAssetCollection
@@ -441,9 +487,10 @@ function determineIndexPage(orderConfig, availableFolders) {
441
487
  * @param {string} parentRoute - Parent route (e.g., '/' or '/docs')
442
488
  * @param {string} siteRoot - Site root directory for asset resolution
443
489
  * @param {Object} orderConfig - { pages: [...], index: 'name' } from parent's config
490
+ * @param {Object} parentFetch - Parent page's fetch config (for dynamic child routes)
444
491
  * @returns {Promise<Object>} { pages, assetCollection, header, footer, left, right }
445
492
  */
446
- async function collectPagesRecursive(dirPath, parentRoute, siteRoot, orderConfig = {}) {
493
+ async function collectPagesRecursive(dirPath, parentRoute, siteRoot, orderConfig = {}, parentFetch = null) {
447
494
  const entries = await readdir(dirPath)
448
495
  const pages = []
449
496
  let assetCollection = {
@@ -487,9 +534,11 @@ async function collectPagesRecursive(dirPath, parentRoute, siteRoot, orderConfig
487
534
  const isSpecial = entry.startsWith('@')
488
535
 
489
536
  // Process this directory as a page
537
+ // Pass parentFetch so dynamic routes can inherit parent's data schema
490
538
  const result = await processPage(entryPath, entry, siteRoot, {
491
539
  isIndex: isIndex && !isSpecial,
492
- parentRoute
540
+ parentRoute,
541
+ parentFetch
493
542
  })
494
543
 
495
544
  if (result) {
@@ -517,7 +566,9 @@ async function collectPagesRecursive(dirPath, parentRoute, siteRoot, orderConfig
517
566
  if (!isSpecial) {
518
567
  // The child route depends on whether this page is the index
519
568
  const childParentRoute = isIndex ? parentRoute : page.route
520
- const subResult = await collectPagesRecursive(entryPath, childParentRoute, siteRoot, childOrderConfig)
569
+ // Pass this page's fetch config to children (for dynamic routes that inherit parent data)
570
+ const childFetch = page.fetch || parentFetch
571
+ const subResult = await collectPagesRecursive(entryPath, childParentRoute, siteRoot, childOrderConfig, childFetch)
521
572
  pages.push(...subResult.pages)
522
573
  assetCollection = mergeAssetCollections(assetCollection, subResult.assetCollection)
523
574
  }
@@ -571,7 +622,10 @@ export async function collectSiteContent(sitePath) {
571
622
  }
572
623
 
573
624
  return {
574
- config: siteConfig,
625
+ config: {
626
+ ...siteConfig,
627
+ fetch: parseFetchConfig(siteConfig.fetch),
628
+ },
575
629
  theme: themeConfig,
576
630
  pages,
577
631
  header,