@uniweb/build 0.1.26 → 0.1.28

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -10,6 +10,7 @@
10
10
  * - defaults: param default values
11
11
  * - context: static capabilities for cross-block coordination
12
12
  * - initialState: initial values for mutable block state
13
+ * - inheritData: boolean or array for cascaded data from page/site fetches
13
14
  *
14
15
  * Full metadata (titles, descriptions, hints, etc.) stays in schema.json
15
16
  * for the visual editor.
@@ -35,6 +36,125 @@ function parseDataString(dataString) {
35
36
  }
36
37
  }
37
38
 
39
+ /**
40
+ * Extract lean schema field for runtime
41
+ * Strips editor-only fields (label, hint, description)
42
+ * Keeps runtime fields (type, default, options, of, schema)
43
+ *
44
+ * @param {string|Object} field - Schema field definition
45
+ * @returns {string|Object} - Lean field definition
46
+ */
47
+ function extractSchemaField(field) {
48
+ // Shorthand: 'string', 'number', 'boolean'
49
+ if (typeof field === 'string') {
50
+ return field
51
+ }
52
+
53
+ if (!field || typeof field !== 'object') {
54
+ return field
55
+ }
56
+
57
+ const lean = {}
58
+
59
+ // Keep runtime-relevant fields
60
+ if (field.type) lean.type = field.type
61
+ if (field.default !== undefined) lean.default = field.default
62
+ if (field.options) lean.options = field.options
63
+
64
+ // Handle array 'of' - can be string, schema name, or inline object
65
+ if (field.of !== undefined) {
66
+ if (typeof field.of === 'string') {
67
+ lean.of = field.of
68
+ } else if (typeof field.of === 'object') {
69
+ // Inline schema definition
70
+ lean.of = extractSchemaFields(field.of)
71
+ }
72
+ }
73
+
74
+ // Handle nested object 'schema'
75
+ if (field.schema && typeof field.schema === 'object') {
76
+ lean.schema = extractSchemaFields(field.schema)
77
+ }
78
+
79
+ // If we only have 'type' and it's a simple type, use shorthand
80
+ const keys = Object.keys(lean)
81
+ if (keys.length === 1 && keys[0] === 'type' && ['string', 'number', 'boolean'].includes(lean.type)) {
82
+ return lean.type
83
+ }
84
+
85
+ return keys.length > 0 ? lean : null
86
+ }
87
+
88
+ /**
89
+ * Extract lean schema fields for an entire schema object
90
+ *
91
+ * @param {Object} schemaFields - Map of fieldName -> field definition
92
+ * @returns {Object} - Map of fieldName -> lean field definition
93
+ */
94
+ function extractSchemaFields(schemaFields) {
95
+ if (!schemaFields || typeof schemaFields !== 'object') {
96
+ return {}
97
+ }
98
+
99
+ const lean = {}
100
+ for (const [name, field] of Object.entries(schemaFields)) {
101
+ const leanField = extractSchemaField(field)
102
+ if (leanField !== null) {
103
+ lean[name] = leanField
104
+ }
105
+ }
106
+ return lean
107
+ }
108
+
109
+ /**
110
+ * Check if a schema value is in the full @uniweb/schemas format
111
+ * Full format has: { name, version?, description?, fields: {...} }
112
+ *
113
+ * @param {Object} schema - Schema value to check
114
+ * @returns {boolean}
115
+ */
116
+ function isFullSchemaFormat(schema) {
117
+ return (
118
+ schema &&
119
+ typeof schema === 'object' &&
120
+ typeof schema.fields === 'object' &&
121
+ schema.fields !== null
122
+ )
123
+ }
124
+
125
+ /**
126
+ * Extract lean schemas from meta.js schemas object
127
+ * Strips editor-only fields while preserving structure
128
+ *
129
+ * Supports two formats:
130
+ * 1. Full @uniweb/schemas format: { name, version, fields: {...} }
131
+ * 2. Inline fields format: { fieldName: fieldDef, ... }
132
+ *
133
+ * @param {Object} schemas - The schemas object from meta.js
134
+ * @returns {Object|null} - Lean schemas or null if empty
135
+ */
136
+ function extractSchemas(schemas) {
137
+ if (!schemas || typeof schemas !== 'object') {
138
+ return null
139
+ }
140
+
141
+ const lean = {}
142
+ for (const [schemaName, schemaValue] of Object.entries(schemas)) {
143
+ // Handle full schema format (from @uniweb/schemas or npm packages)
144
+ // Extract just the fields, discard name/version/description metadata
145
+ const schemaFields = isFullSchemaFormat(schemaValue)
146
+ ? schemaValue.fields
147
+ : schemaValue
148
+
149
+ const leanSchema = extractSchemaFields(schemaFields)
150
+ if (Object.keys(leanSchema).length > 0) {
151
+ lean[schemaName] = leanSchema
152
+ }
153
+ }
154
+
155
+ return Object.keys(lean).length > 0 ? lean : null
156
+ }
157
+
38
158
  /**
39
159
  * Extract param defaults from params object
40
160
  *
@@ -102,6 +222,21 @@ export function extractRuntimeSchema(fullMeta) {
102
222
  runtime.initialState = fullMeta.initialState
103
223
  }
104
224
 
225
+ // Schemas - lean version for runtime validation/defaults
226
+ // Strips editor-only fields (label, hint, description)
227
+ if (fullMeta.schemas) {
228
+ const schemas = extractSchemas(fullMeta.schemas)
229
+ if (schemas) {
230
+ runtime.schemas = schemas
231
+ }
232
+ }
233
+
234
+ // Data inheritance - component receives cascaded data from page/site level fetches
235
+ // Can be: true (inherit all), false (inherit none), or ['schema1', 'schema2'] (selective)
236
+ if (fullMeta.inheritData !== undefined) {
237
+ runtime.inheritData = fullMeta.inheritData
238
+ }
239
+
105
240
  return Object.keys(runtime).length > 0 ? runtime : null
106
241
  }
107
242
 
@@ -166,6 +166,33 @@ export async function processAssets(assetManifest, options = {}) {
166
166
  return { pathMapping, results }
167
167
  }
168
168
 
169
+ /**
170
+ * Recursively rewrite asset paths in a data object
171
+ *
172
+ * @param {any} data - Parsed JSON/YAML data
173
+ * @param {Object} pathMapping - Map of original paths to new paths
174
+ * @returns {any} Data with rewritten paths
175
+ */
176
+ function rewriteDataPaths(data, pathMapping) {
177
+ if (typeof data === 'string') {
178
+ return pathMapping[data] || data
179
+ }
180
+
181
+ if (Array.isArray(data)) {
182
+ return data.map(item => rewriteDataPaths(item, pathMapping))
183
+ }
184
+
185
+ if (data && typeof data === 'object') {
186
+ const result = {}
187
+ for (const [key, value] of Object.entries(data)) {
188
+ result[key] = rewriteDataPaths(value, pathMapping)
189
+ }
190
+ return result
191
+ }
192
+
193
+ return data
194
+ }
195
+
169
196
  /**
170
197
  * Rewrite asset paths in ProseMirror content
171
198
  *
@@ -188,6 +215,11 @@ export function rewriteContentPaths(content, pathMapping) {
188
215
  }
189
216
  }
190
217
 
218
+ // Rewrite paths in data blocks (structured data parsed at build time)
219
+ if (node.type === 'dataBlock' && node.attrs?.data) {
220
+ node.attrs.data = rewriteDataPaths(node.attrs.data, pathMapping)
221
+ }
222
+
191
223
  // Recurse into content
192
224
  if (node.content && Array.isArray(node.content)) {
193
225
  node.content.forEach(walk)
@@ -50,6 +50,72 @@ function isPdfPath(src) {
50
50
  return src.toLowerCase().endsWith(PDF_EXTENSION)
51
51
  }
52
52
 
53
+ /**
54
+ * Check if a string looks like a local asset path
55
+ *
56
+ * @param {string} value - String to check
57
+ * @returns {boolean} True if it looks like a local asset path
58
+ */
59
+ function isLocalAssetPath(value) {
60
+ if (typeof value !== 'string' || !value) return false
61
+
62
+ // Skip external URLs
63
+ if (isExternalUrl(value)) return false
64
+
65
+ // Must start with ./, ../, or / (absolute site path)
66
+ if (!value.startsWith('./') && !value.startsWith('../') && !value.startsWith('/')) {
67
+ return false
68
+ }
69
+
70
+ // Must have a media extension
71
+ return isImagePath(value) || isVideoPath(value) || isPdfPath(value)
72
+ }
73
+
74
+ /**
75
+ * Recursively walk a parsed data object and collect asset paths
76
+ *
77
+ * @param {any} data - Parsed JSON/YAML data
78
+ * @param {Function} visitor - Callback for each asset path: (path) => void
79
+ */
80
+ function walkDataAssets(data, visitor) {
81
+ if (typeof data === 'string') {
82
+ if (isLocalAssetPath(data)) {
83
+ visitor(data)
84
+ }
85
+ return
86
+ }
87
+
88
+ if (Array.isArray(data)) {
89
+ data.forEach(item => walkDataAssets(item, visitor))
90
+ return
91
+ }
92
+
93
+ if (data && typeof data === 'object') {
94
+ Object.values(data).forEach(value => walkDataAssets(value, visitor))
95
+ }
96
+ }
97
+
98
+ /**
99
+ * Walk ProseMirror content and collect assets from data blocks
100
+ * Data blocks have pre-parsed structured data (parsed at content-reader build time)
101
+ *
102
+ * @param {Object} doc - ProseMirror document
103
+ * @param {Function} visitor - Callback for each asset: (path) => void
104
+ */
105
+ function walkDataBlockAssets(doc, visitor) {
106
+ if (!doc) return
107
+
108
+ // dataBlock nodes have pre-parsed data in attrs.data
109
+ if (doc.type === 'dataBlock' && doc.attrs?.data) {
110
+ walkDataAssets(doc.attrs.data, visitor)
111
+ }
112
+
113
+ // Recurse into content
114
+ if (doc.content && Array.isArray(doc.content)) {
115
+ doc.content.forEach(child => walkDataBlockAssets(child, visitor))
116
+ }
117
+ }
118
+
53
119
  /**
54
120
  * Resolve an asset path to absolute file system path
55
121
  *
@@ -215,6 +281,22 @@ export function collectSectionAssets(section, markdownPath, siteRoot) {
215
281
  }
216
282
  }
217
283
 
284
+ // Collect from tagged code blocks (JSON/YAML data)
285
+ if (section.content) {
286
+ walkDataBlockAssets(section.content, (assetPath) => {
287
+ const result = resolveAssetPath(assetPath, markdownPath, siteRoot)
288
+ if (!result.external && result.resolved) {
289
+ assets[assetPath] = {
290
+ original: assetPath,
291
+ resolved: result.resolved,
292
+ isImage: result.isImage,
293
+ isVideo: result.isVideo,
294
+ isPdf: result.isPdf
295
+ }
296
+ }
297
+ })
298
+ }
299
+
218
300
  return { assets, hasExplicitPoster, hasExplicitPreview }
219
301
  }
220
302
 
@@ -0,0 +1,382 @@
1
+ /**
2
+ * Collection Processor
3
+ *
4
+ * Processes content collections from markdown files into JSON data.
5
+ * Collections are defined in site.yml and processed at build time.
6
+ *
7
+ * Features:
8
+ * - Discovers markdown files in content folders
9
+ * - Parses frontmatter for metadata
10
+ * - Converts markdown body to ProseMirror JSON
11
+ * - Supports filtering, sorting, and limiting
12
+ * - Auto-generates excerpts and extracts first images
13
+ *
14
+ * @module @uniweb/build/site/collection-processor
15
+ *
16
+ * @example
17
+ * // site.yml
18
+ * collections:
19
+ * articles:
20
+ * path: content/articles
21
+ * sort: date desc
22
+ *
23
+ * // Usage
24
+ * const collections = await processCollections(siteDir, config.collections)
25
+ * await writeCollectionFiles(siteDir, collections)
26
+ */
27
+
28
+ import { readFile, readdir, stat, writeFile, mkdir } from 'node:fs/promises'
29
+ import { join, basename, extname } from 'node:path'
30
+ import { existsSync } from 'node:fs'
31
+ import yaml from 'js-yaml'
32
+ import { applyFilter, applySort } from './data-fetcher.js'
33
+
34
+ // Try to import content-reader for markdown parsing
35
+ let markdownToProseMirror
36
+ try {
37
+ const contentReader = await import('@uniweb/content-reader')
38
+ markdownToProseMirror = contentReader.markdownToProseMirror
39
+ } catch {
40
+ // Simplified fallback
41
+ markdownToProseMirror = (markdown) => ({
42
+ type: 'doc',
43
+ content: [
44
+ {
45
+ type: 'paragraph',
46
+ content: [{ type: 'text', text: markdown.trim() }]
47
+ }
48
+ ]
49
+ })
50
+ }
51
+
52
+ /**
53
+ * Parse collection config from site.yml
54
+ *
55
+ * @param {string} name - Collection name
56
+ * @param {string|Object} config - Simple path string or full config object
57
+ * @returns {Object} Normalized config
58
+ *
59
+ * @example
60
+ * // Simple form
61
+ * parseCollectionConfig('articles', 'content/articles')
62
+ *
63
+ * // Extended form
64
+ * parseCollectionConfig('articles', {
65
+ * path: 'content/articles',
66
+ * sort: 'date desc',
67
+ * filter: 'published != false',
68
+ * limit: 100
69
+ * })
70
+ */
71
+ function parseCollectionConfig(name, config) {
72
+ if (typeof config === 'string') {
73
+ return {
74
+ name,
75
+ path: config,
76
+ sort: null,
77
+ filter: null,
78
+ limit: 0,
79
+ excerpt: { maxLength: 160 }
80
+ }
81
+ }
82
+
83
+ return {
84
+ name,
85
+ path: config.path,
86
+ sort: config.sort || null,
87
+ filter: config.filter || null,
88
+ limit: config.limit || 0,
89
+ excerpt: {
90
+ maxLength: config.excerpt?.maxLength || 160,
91
+ field: config.excerpt?.field || null
92
+ }
93
+ }
94
+ }
95
+
96
+ /**
97
+ * Parse YAML frontmatter from markdown content
98
+ *
99
+ * @param {string} raw - Raw file content
100
+ * @returns {{ frontmatter: Object, body: string }}
101
+ */
102
+ function parseFrontmatter(raw) {
103
+ if (!raw.trim().startsWith('---')) {
104
+ return { frontmatter: {}, body: raw }
105
+ }
106
+
107
+ const parts = raw.split('---\n')
108
+ if (parts.length < 3) {
109
+ return { frontmatter: {}, body: raw }
110
+ }
111
+
112
+ try {
113
+ const frontmatter = yaml.load(parts[1]) || {}
114
+ const body = parts.slice(2).join('---\n')
115
+ return { frontmatter, body }
116
+ } catch (err) {
117
+ console.warn('[collection-processor] YAML parse error:', err.message)
118
+ return { frontmatter: {}, body: raw }
119
+ }
120
+ }
121
+
122
+ /**
123
+ * Extract plain text from ProseMirror content
124
+ *
125
+ * @param {Object} node - ProseMirror node
126
+ * @returns {string} Plain text
127
+ */
128
+ function extractPlainText(node) {
129
+ if (!node) return ''
130
+
131
+ if (node.type === 'text') {
132
+ return node.text || ''
133
+ }
134
+
135
+ if (Array.isArray(node.content)) {
136
+ return node.content.map(extractPlainText).join('')
137
+ }
138
+
139
+ return ''
140
+ }
141
+
142
+ /**
143
+ * Extract excerpt from content
144
+ *
145
+ * @param {Object} frontmatter - Parsed frontmatter
146
+ * @param {Object} content - ProseMirror content
147
+ * @param {Object} excerptConfig - Excerpt configuration
148
+ * @returns {string} Excerpt text
149
+ */
150
+ function extractExcerpt(frontmatter, content, excerptConfig) {
151
+ const { maxLength = 160, field = null } = excerptConfig || {}
152
+
153
+ // Check for explicit excerpt in frontmatter
154
+ if (frontmatter.excerpt) {
155
+ return frontmatter.excerpt.slice(0, maxLength)
156
+ }
157
+
158
+ // Check for alternative field (e.g., 'description')
159
+ if (field && frontmatter[field]) {
160
+ return frontmatter[field].slice(0, maxLength)
161
+ }
162
+
163
+ // Auto-extract from content
164
+ const text = extractPlainText(content)
165
+ if (!text) return ''
166
+
167
+ // Clean and truncate
168
+ const cleaned = text.replace(/\s+/g, ' ').trim()
169
+ if (cleaned.length <= maxLength) return cleaned
170
+
171
+ // Truncate at word boundary
172
+ const truncated = cleaned.slice(0, maxLength)
173
+ const lastSpace = truncated.lastIndexOf(' ')
174
+ return lastSpace > maxLength * 0.7
175
+ ? truncated.slice(0, lastSpace) + '...'
176
+ : truncated + '...'
177
+ }
178
+
179
+ /**
180
+ * Extract first image from ProseMirror content
181
+ *
182
+ * @param {Object} node - ProseMirror node
183
+ * @returns {string|null} Image URL or null
184
+ */
185
+ function extractFirstImage(node) {
186
+ if (!node) return null
187
+
188
+ if (node.type === 'image' && node.attrs?.src) {
189
+ return node.attrs.src
190
+ }
191
+
192
+ if (Array.isArray(node.content)) {
193
+ for (const child of node.content) {
194
+ const img = extractFirstImage(child)
195
+ if (img) return img
196
+ }
197
+ }
198
+
199
+ return null
200
+ }
201
+
202
+ // Filter and sort utilities are imported from data-fetcher.js
203
+
204
+ /**
205
+ * Process a single content item from a markdown file
206
+ *
207
+ * @param {string} dir - Collection directory path
208
+ * @param {string} filename - Markdown filename
209
+ * @param {Object} config - Collection configuration
210
+ * @returns {Promise<Object|null>} Processed item or null if unpublished
211
+ */
212
+ async function processContentItem(dir, filename, config) {
213
+ const filepath = join(dir, filename)
214
+ const raw = await readFile(filepath, 'utf-8')
215
+ const slug = basename(filename, extname(filename))
216
+
217
+ // Parse frontmatter and body
218
+ const { frontmatter, body } = parseFrontmatter(raw)
219
+
220
+ // Skip unpublished items by default
221
+ if (frontmatter.published === false) {
222
+ return null
223
+ }
224
+
225
+ // Parse markdown body to ProseMirror
226
+ const content = markdownToProseMirror(body)
227
+
228
+ // Extract excerpt
229
+ const excerpt = extractExcerpt(frontmatter, content, config.excerpt)
230
+
231
+ // Extract first image (frontmatter takes precedence)
232
+ const image = frontmatter.image || extractFirstImage(content)
233
+
234
+ // Get file stats for lastModified
235
+ const fileStat = await stat(filepath)
236
+
237
+ return {
238
+ slug,
239
+ ...frontmatter,
240
+ excerpt,
241
+ image,
242
+ // Include both raw markdown body (for simple rendering)
243
+ // and ProseMirror content (for rich rendering)
244
+ body: body.trim(),
245
+ content,
246
+ lastModified: fileStat.mtime.toISOString()
247
+ }
248
+ }
249
+
250
+ /**
251
+ * Collect and process all items in a collection folder
252
+ *
253
+ * @param {string} siteDir - Site root directory
254
+ * @param {Object} config - Parsed collection config
255
+ * @returns {Promise<Array>} Array of processed items
256
+ */
257
+ async function collectItems(siteDir, config) {
258
+ const collectionDir = join(siteDir, config.path)
259
+
260
+ // Check if collection directory exists
261
+ if (!existsSync(collectionDir)) {
262
+ console.warn(`[collection-processor] Collection folder not found: ${config.path}`)
263
+ return []
264
+ }
265
+
266
+ const files = await readdir(collectionDir)
267
+ const mdFiles = files.filter(f => f.endsWith('.md') && !f.startsWith('_'))
268
+
269
+ // Process all markdown files
270
+ let items = await Promise.all(
271
+ mdFiles.map(file => processContentItem(collectionDir, file, config))
272
+ )
273
+
274
+ // Filter out nulls (unpublished items)
275
+ items = items.filter(Boolean)
276
+
277
+ // Apply custom filter
278
+ if (config.filter) {
279
+ items = applyFilter(items, config.filter)
280
+ }
281
+
282
+ // Apply sort
283
+ if (config.sort) {
284
+ items = applySort(items, config.sort)
285
+ }
286
+
287
+ // Apply limit
288
+ if (config.limit > 0) {
289
+ items = items.slice(0, config.limit)
290
+ }
291
+
292
+ return items
293
+ }
294
+
295
+ /**
296
+ * Process all content collections defined in site.yml
297
+ *
298
+ * @param {string} siteDir - Site root directory
299
+ * @param {Object} collectionsConfig - Collections config from site.yml
300
+ * @returns {Promise<Object>} Map of collection name to items array
301
+ *
302
+ * @example
303
+ * const collections = await processCollections('/path/to/site', {
304
+ * articles: { path: 'content/articles', sort: 'date desc' },
305
+ * products: 'content/products'
306
+ * })
307
+ * // { articles: [...], products: [...] }
308
+ */
309
+ export async function processCollections(siteDir, collectionsConfig) {
310
+ if (!collectionsConfig || typeof collectionsConfig !== 'object') {
311
+ return {}
312
+ }
313
+
314
+ const results = {}
315
+
316
+ for (const [name, config] of Object.entries(collectionsConfig)) {
317
+ const parsed = parseCollectionConfig(name, config)
318
+ const items = await collectItems(siteDir, parsed)
319
+ results[name] = items
320
+ console.log(`[collection-processor] Processed ${name}: ${items.length} items`)
321
+ }
322
+
323
+ return results
324
+ }
325
+
326
+ /**
327
+ * Write collection data to JSON files in public/data/
328
+ *
329
+ * @param {string} siteDir - Site root directory
330
+ * @param {Object} collections - Map of collection name to items array
331
+ * @returns {Promise<void>}
332
+ *
333
+ * @example
334
+ * await writeCollectionFiles('/path/to/site', {
335
+ * articles: [{ slug: 'hello', title: 'Hello World', ... }]
336
+ * })
337
+ * // Creates public/data/articles.json
338
+ */
339
+ export async function writeCollectionFiles(siteDir, collections) {
340
+ if (!collections || Object.keys(collections).length === 0) {
341
+ return
342
+ }
343
+
344
+ const dataDir = join(siteDir, 'public', 'data')
345
+ await mkdir(dataDir, { recursive: true })
346
+
347
+ for (const [name, items] of Object.entries(collections)) {
348
+ const filepath = join(dataDir, `${name}.json`)
349
+ await writeFile(filepath, JSON.stringify(items, null, 2))
350
+ console.log(`[collection-processor] Generated ${filepath} (${items.length} items)`)
351
+ }
352
+ }
353
+
354
+ /**
355
+ * Get last modified time for a collection
356
+ *
357
+ * @param {string} siteDir - Site root directory
358
+ * @param {Object} config - Collection config
359
+ * @returns {Promise<Date|null>} Most recent modification time
360
+ */
361
+ export async function getCollectionLastModified(siteDir, config) {
362
+ const parsed = parseCollectionConfig('temp', config)
363
+ const collectionDir = join(siteDir, parsed.path)
364
+
365
+ if (!existsSync(collectionDir)) {
366
+ return null
367
+ }
368
+
369
+ const files = await readdir(collectionDir)
370
+ const mdFiles = files.filter(f => f.endsWith('.md') && !f.startsWith('_'))
371
+
372
+ let lastModified = null
373
+
374
+ for (const file of mdFiles) {
375
+ const fileStat = await stat(join(collectionDir, file))
376
+ if (!lastModified || fileStat.mtime > lastModified) {
377
+ lastModified = fileStat.mtime
378
+ }
379
+ }
380
+
381
+ return lastModified
382
+ }