@nuasite/cms-marker 0.0.99 → 0.0.101

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -14,7 +14,7 @@
14
14
  "directory": "packages/cms-marker"
15
15
  },
16
16
  "license": "Apache-2.0",
17
- "version": "0.0.99",
17
+ "version": "0.0.101",
18
18
  "module": "src/index.ts",
19
19
  "types": "src/index.ts",
20
20
  "type": "module",
@@ -48,6 +48,253 @@ function getPagePath(htmlPath: string, outDir: string): string {
48
48
  return '/' + parts.join('/')
49
49
  }
50
50
 
51
+ /**
52
+ * Cluster entries from the same source file into separate component instances.
53
+ * When a component is used multiple times on a page, its entries are in different
54
+ * subtrees. We partition by finding which direct child of the LCA each entry belongs to.
55
+ */
56
+ export function clusterComponentEntries<T>(
57
+ elements: T[],
58
+ entryIds: string[],
59
+ findLCA: (els: T[]) => T | null,
60
+ ): Array<{ clusterEntryIds: string[]; clusterElements: T[] }> {
61
+ if (elements.length <= 1) {
62
+ return [{ clusterEntryIds: [...entryIds], clusterElements: [...elements] }]
63
+ }
64
+
65
+ const lca = findLCA(elements)
66
+ if (!lca) {
67
+ return [{ clusterEntryIds: [...entryIds], clusterElements: [...elements] }]
68
+ }
69
+
70
+ // If any entry is a direct child of the LCA, the LCA is the component
71
+ // root itself — don't split its content into separate instances.
72
+ // Only split when ALL entries are behind intermediate wrapper elements.
73
+ const anyDirectChild = elements.some(
74
+ (el: any) => el.parentNode === lca,
75
+ )
76
+ if (anyDirectChild) {
77
+ return [{ clusterEntryIds: [...entryIds], clusterElements: [...elements] }]
78
+ }
79
+
80
+ // Group entries by which direct child of the LCA they fall under.
81
+ // Entries under different intermediate subtrees belong to different instances.
82
+ const childGroups = new Map<unknown, { clusterEntryIds: string[]; clusterElements: T[] }>()
83
+
84
+ for (let i = 0; i < elements.length; i++) {
85
+ let current: any = elements[i]
86
+ while (current && current.parentNode !== lca) {
87
+ current = current.parentNode
88
+ }
89
+ if (!current) continue
90
+
91
+ const existing = childGroups.get(current)
92
+ if (existing) {
93
+ existing.clusterEntryIds.push(entryIds[i]!)
94
+ existing.clusterElements.push(elements[i]!)
95
+ } else {
96
+ childGroups.set(current, {
97
+ clusterEntryIds: [entryIds[i]!],
98
+ clusterElements: [elements[i]!],
99
+ })
100
+ }
101
+ }
102
+
103
+ if (childGroups.size > 1) {
104
+ // Multiple subtrees → each is a separate component instance
105
+ return Array.from(childGroups.values())
106
+ }
107
+
108
+ // All entries are in the same subtree → single instance
109
+ return [{ clusterEntryIds: [...entryIds], clusterElements: [...elements] }]
110
+ }
111
+
112
+ interface PageComponentInvocation {
113
+ componentName: string
114
+ sourceFile: string
115
+ /** Template offset for ordering invocations */
116
+ offset: number
117
+ }
118
+
119
+ /**
120
+ * Find the .astro source file for a page given its URL path.
121
+ */
122
+ async function findPageSource(pagePath: string): Promise<string | null> {
123
+ const projectRoot = getProjectRoot()
124
+ const candidates: string[] = []
125
+
126
+ if (pagePath === '/' || pagePath === '') {
127
+ candidates.push(path.join(projectRoot, 'src/pages/index.astro'))
128
+ } else {
129
+ const cleanPath = pagePath.replace(/^\//, '')
130
+ candidates.push(
131
+ path.join(projectRoot, `src/pages/${cleanPath}.astro`),
132
+ path.join(projectRoot, `src/pages/${cleanPath}/index.astro`),
133
+ )
134
+ }
135
+
136
+ for (const candidate of candidates) {
137
+ try {
138
+ await fs.access(candidate)
139
+ return candidate
140
+ } catch {
141
+ continue
142
+ }
143
+ }
144
+ return null
145
+ }
146
+
147
+ /**
148
+ * Parse an .astro page source file to find component invocations.
149
+ * Returns an ordered list of component usages (including duplicates).
150
+ */
151
+ async function parseComponentInvocations(
152
+ pageSourcePath: string,
153
+ componentDirs: string[],
154
+ ): Promise<PageComponentInvocation[]> {
155
+ const content = await fs.readFile(pageSourcePath, 'utf-8')
156
+ const projectRoot = getProjectRoot()
157
+ const pageDir = path.dirname(pageSourcePath)
158
+
159
+ // Split frontmatter from template
160
+ const fmMatch = content.match(/^---\r?\n([\s\S]*?)\r?\n---/)
161
+ if (!fmMatch) return []
162
+ const frontmatter = fmMatch[1]!
163
+ const templateStart = fmMatch[0].length
164
+ const template = content.slice(templateStart)
165
+
166
+ // Parse import statements to map component names to source files
167
+ const imports = new Map<string, string>() // componentName -> relative source path
168
+ const importRegex = /import\s+(\w+)\s+from\s+['"]([^'"]+)['"]/g
169
+ let match
170
+ while ((match = importRegex.exec(frontmatter)) !== null) {
171
+ const name = match[1]!
172
+ const importPath = match[2]!
173
+
174
+ // Resolve the import path relative to the page file
175
+ const resolved = path.resolve(pageDir, importPath)
176
+ const relToProject = path.relative(projectRoot, resolved)
177
+
178
+ // Check if it's in a component directory
179
+ const isComponent = componentDirs.some(dir => {
180
+ const d = dir.replace(/^\/+|\/+$/g, '')
181
+ return relToProject.startsWith(d + '/') || relToProject.startsWith(d + path.sep)
182
+ })
183
+ if (isComponent) {
184
+ imports.set(name, relToProject)
185
+ }
186
+ }
187
+
188
+ if (imports.size === 0) return []
189
+
190
+ // Find component invocations in the template (both self-closing and paired tags)
191
+ const invocations: PageComponentInvocation[] = []
192
+ for (const [componentName, sourceFile] of imports) {
193
+ const tagRegex = new RegExp(`<${componentName}[\\s/>]`, 'g')
194
+ let tagMatch
195
+ while ((tagMatch = tagRegex.exec(template)) !== null) {
196
+ invocations.push({
197
+ componentName,
198
+ sourceFile,
199
+ offset: tagMatch.index,
200
+ })
201
+ }
202
+ }
203
+
204
+ // Sort by position in template (invocation order)
205
+ invocations.sort((a, b) => a.offset - b.offset)
206
+
207
+ return invocations
208
+ }
209
+
210
+ /**
211
+ * Detect components that have no text entries by parsing the page source file.
212
+ * After entry-based components are detected, this finds any remaining component
213
+ * invocations and assigns them to unclaimed DOM elements using invocation order.
214
+ */
215
+ async function detectEntrylessComponents(
216
+ pagePath: string,
217
+ root: ReturnType<typeof parse>,
218
+ components: Record<string, import('./types').ComponentInstance>,
219
+ componentDirs: string[],
220
+ relPath: string,
221
+ idGenerator: () => string,
222
+ markComponentRoot: (el: any, sourceFile: string, entryIds: string[]) => void,
223
+ ): Promise<void> {
224
+ const pageSourcePath = await findPageSource(pagePath)
225
+ if (!pageSourcePath) return
226
+
227
+ const invocations = await parseComponentInvocations(pageSourcePath, componentDirs)
228
+ if (invocations.length === 0) return
229
+
230
+ // Collect all detected component root elements in DOM order
231
+ const detectedRoots: Array<{ el: any; componentName: string }> = []
232
+ const compEls = root.querySelectorAll('[data-cms-component-id]')
233
+ for (const el of compEls) {
234
+ const compId = el.getAttribute('data-cms-component-id')
235
+ if (compId && components[compId]) {
236
+ detectedRoots.push({ el, componentName: components[compId].componentName })
237
+ }
238
+ }
239
+
240
+ if (detectedRoots.length === 0 && invocations.length === 0) return
241
+
242
+ // Find the container: parent of all detected component roots
243
+ // If no components detected yet, we can't determine the container
244
+ if (detectedRoots.length === 0) return
245
+
246
+ const container = detectedRoots[0]?.el.parentNode
247
+ if (!container || !container.childNodes) return
248
+
249
+ // Verify all detected roots share the same parent
250
+ const allSameParent = detectedRoots.every(r => r.el.parentNode === container)
251
+ if (!allSameParent) return
252
+
253
+ // Get the container's element children in DOM order
254
+ const containerChildren: any[] = []
255
+ for (const child of container.childNodes) {
256
+ // Only consider element nodes (nodeType 1)
257
+ if (child.nodeType === 1) {
258
+ containerChildren.push(child)
259
+ }
260
+ }
261
+
262
+ // Build a paired mapping between invocations and container children.
263
+ // Detected components serve as anchor points; undetected children between
264
+ // anchors are assigned to the corresponding unmatched invocations in order.
265
+
266
+ // First, find anchor points: container children that are already detected
267
+ const anchorMap = new Map<number, string>() // childIdx → componentName
268
+ for (let ci = 0; ci < containerChildren.length; ci++) {
269
+ const compId = containerChildren[ci].getAttribute?.('data-cms-component-id')
270
+ if (compId && components[compId]) {
271
+ anchorMap.set(ci, components[compId].componentName)
272
+ }
273
+ }
274
+
275
+ // Walk both lists, using anchors to stay in sync
276
+ let invIdx = 0
277
+ for (let ci = 0; ci < containerChildren.length && invIdx < invocations.length; ci++) {
278
+ const anchorName = anchorMap.get(ci)
279
+
280
+ if (anchorName) {
281
+ // This child is a detected component. Find the matching invocation.
282
+ while (invIdx < invocations.length && invocations[invIdx]!.componentName !== anchorName) {
283
+ invIdx++
284
+ }
285
+ if (invIdx < invocations.length) {
286
+ invIdx++ // consume the matched invocation
287
+ }
288
+ } else {
289
+ // Undetected child - assign it to the current invocation
290
+ const inv = invocations[invIdx]!
291
+ // Only assign if the invocation's component isn't already detected at a later anchor
292
+ markComponentRoot(containerChildren[ci], inv.sourceFile, [])
293
+ invIdx++
294
+ }
295
+ }
296
+ }
297
+
51
298
  /**
52
299
  * Process a single HTML file
53
300
  */
@@ -263,84 +510,113 @@ async function processFile(
263
510
  }
264
511
  }
265
512
 
266
- // For each component source file, find the outermost common ancestor in the DOM
267
- if (entriesBySourceFile.size > 0) {
268
- const root = parse(result.html, {
269
- lowerCaseTagName: false,
270
- comment: true,
271
- })
272
-
273
- for (const [sourceFile, entryIds] of entriesBySourceFile) {
274
- // Find DOM elements for these entries
275
- const elements = entryIds
276
- .map(id => root.querySelector(`[${config.attributeName}="${id}"]`))
277
- .filter((el): el is NonNullable<typeof el> => el !== null)
278
-
279
- if (elements.length === 0) continue
513
+ const root = parse(result.html, {
514
+ lowerCaseTagName: false,
515
+ comment: true,
516
+ })
280
517
 
281
- // Find the lowest common ancestor of all elements
282
- const getAncestors = (el: ReturnType<typeof root.querySelector>): typeof el[] => {
283
- const ancestors: typeof el[] = []
284
- let current = el?.parentNode as typeof el
285
- while (current) {
286
- ancestors.unshift(current)
287
- current = current.parentNode as typeof el
288
- }
289
- return ancestors
518
+ // Helper: find lowest common ancestor of DOM elements
519
+ type HTMLNode = ReturnType<typeof root.querySelector>
520
+ const findLCA = (elements: NonNullable<HTMLNode>[]): HTMLNode => {
521
+ if (elements.length === 0) return null
522
+ if (elements.length === 1) return elements[0]!
523
+
524
+ const getAncestors = (el: HTMLNode): HTMLNode[] => {
525
+ const ancestors: HTMLNode[] = []
526
+ let current = el?.parentNode as HTMLNode
527
+ while (current) {
528
+ ancestors.unshift(current)
529
+ current = current.parentNode as HTMLNode
290
530
  }
531
+ return ancestors
532
+ }
291
533
 
292
- const ancestorChains = elements.map(el => getAncestors(el))
293
- let lca = ancestorChains[0]?.[0]
294
- if (ancestorChains.length > 1) {
295
- const minLen = Math.min(...ancestorChains.map(c => c.length))
296
- let lcaIdx = 0
297
- for (let i = 0; i < minLen; i++) {
298
- if (ancestorChains.every(chain => chain[i] === ancestorChains[0]![i])) {
299
- lcaIdx = i
300
- } else {
301
- break
302
- }
303
- }
304
- lca = ancestorChains[0]![lcaIdx]
534
+ const chains = elements.map(el => getAncestors(el))
535
+ const minLen = Math.min(...chains.map(c => c.length))
536
+ let lcaIdx = 0
537
+ for (let i = 0; i < minLen; i++) {
538
+ if (chains.every(chain => chain[i] === chains[0]![i])) {
539
+ lcaIdx = i
540
+ } else {
541
+ break
305
542
  }
543
+ }
544
+ return chains[0]![lcaIdx] ?? null
545
+ }
546
+
547
+ // Helper: mark an element as a component root and register the instance
548
+ const markComponentRoot = (
549
+ lca: NonNullable<HTMLNode>,
550
+ sourceFile: string,
551
+ instanceEntryIds: string[],
552
+ ) => {
553
+ if (!('setAttribute' in lca) || !('getAttribute' in lca)) return
554
+ if (lca.getAttribute?.('data-cms-component-id')) return
555
+
556
+ const compId = idGenerator()
557
+ lca.setAttribute('data-cms-component-id', compId)
558
+
559
+ const componentName = extractComponentName(sourceFile)
560
+ const firstEntry = instanceEntryIds.length > 0 ? result.entries[instanceEntryIds[0]!] : undefined
561
+
562
+ result.components[compId] = {
563
+ id: compId,
564
+ componentName,
565
+ file: relPath,
566
+ sourcePath: sourceFile,
567
+ sourceLine: firstEntry?.sourceLine ?? 1,
568
+ props: {},
569
+ }
306
570
 
307
- // If the LCA is a text element itself (only one entry from this component),
308
- // use its parent instead so the component wraps the element
309
- if (lca && elements.length === 1 && lca === elements[0]) {
310
- lca = lca.parentNode as typeof lca
571
+ for (const eid of instanceEntryIds) {
572
+ const entry = result.entries[eid]
573
+ if (entry) {
574
+ entry.parentComponentId = compId
311
575
  }
576
+ }
577
+ }
312
578
 
313
- if (!lca || !('setAttribute' in lca) || !('getAttribute' in lca)) continue
314
- // Skip if already marked as a component
315
- if (lca.getAttribute?.('data-cms-component-id')) continue
579
+ // For each component source file, cluster entries into separate instances
580
+ // by partitioning them based on which subtree of their common ancestor they belong to
581
+ if (entriesBySourceFile.size > 0) {
582
+ for (const [sourceFile, entryIds] of entriesBySourceFile) {
583
+ const elements = entryIds
584
+ .map(id => root.querySelector(`[${config.attributeName}="${id}"]`))
585
+ .filter((el): el is NonNullable<HTMLNode> => el !== null)
316
586
 
317
- const compId = idGenerator()
318
- lca.setAttribute('data-cms-component-id', compId)
587
+ if (elements.length === 0) continue
319
588
 
320
- const componentName = extractComponentName(sourceFile)
321
- const firstEntry = result.entries[entryIds[0]!]!
589
+ // Cluster entries into separate component instances
590
+ const clusters = clusterComponentEntries(elements, entryIds, findLCA)
322
591
 
323
- result.components[compId] = {
324
- id: compId,
325
- componentName,
326
- file: relPath,
327
- sourcePath: sourceFile,
328
- sourceLine: firstEntry.sourceLine ?? 1,
329
- props: {},
330
- }
592
+ for (const { clusterEntryIds, clusterElements } of clusters) {
593
+ let lca = findLCA(clusterElements)
331
594
 
332
- // Set parentComponentId on entries
333
- for (const eid of entryIds) {
334
- const entry = result.entries[eid]
335
- if (entry) {
336
- entry.parentComponentId = compId
595
+ // If the LCA is a text element itself (only one entry),
596
+ // use its parent so the component wraps the element
597
+ if (lca && clusterElements.length === 1 && lca === clusterElements[0]) {
598
+ lca = lca.parentNode as HTMLNode
337
599
  }
600
+
601
+ if (!lca) continue
602
+ markComponentRoot(lca, sourceFile, clusterEntryIds)
338
603
  }
339
604
  }
340
-
341
- // Re-serialize HTML with component markers
342
- result.html = root.toString()
343
605
  }
606
+
607
+ // Detect components without text entries by parsing the page source file
608
+ await detectEntrylessComponents(
609
+ pagePath,
610
+ root,
611
+ result.components,
612
+ componentDirs,
613
+ relPath,
614
+ idGenerator,
615
+ markComponentRoot,
616
+ )
617
+
618
+ // Re-serialize HTML with component markers
619
+ result.html = root.toString()
344
620
  }
345
621
 
346
622
  // Remove CMS ID attributes from HTML for entries that were filtered out