@setzkasten-cms/astro-admin 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/LICENSE +37 -0
  2. package/package.json +70 -0
  3. package/src/admin-page.astro +148 -0
  4. package/src/api-routes/__tests__/add-section-helpers.test.ts +383 -0
  5. package/src/api-routes/__tests__/catalog-api.test.ts +115 -0
  6. package/src/api-routes/__tests__/deferred-operations.test.ts +232 -0
  7. package/src/api-routes/__tests__/deploy-hook.test.ts +134 -0
  8. package/src/api-routes/__tests__/patch-page-file.test.ts +193 -0
  9. package/src/api-routes/__tests__/scan-page-helpers.test.ts +162 -0
  10. package/src/api-routes/__tests__/section-management.test.ts +284 -0
  11. package/src/api-routes/_storage-config.ts +54 -0
  12. package/src/api-routes/asset-proxy.ts +76 -0
  13. package/src/api-routes/auth-callback.ts +105 -0
  14. package/src/api-routes/auth-login.ts +87 -0
  15. package/src/api-routes/auth-logout.ts +9 -0
  16. package/src/api-routes/auth-session.ts +36 -0
  17. package/src/api-routes/catalog-add.ts +151 -0
  18. package/src/api-routes/catalog-export.ts +86 -0
  19. package/src/api-routes/catalog-helpers.ts +83 -0
  20. package/src/api-routes/catalog-list.ts +12 -0
  21. package/src/api-routes/config.ts +30 -0
  22. package/src/api-routes/deploy-hook.ts +69 -0
  23. package/src/api-routes/github-proxy.ts +111 -0
  24. package/src/api-routes/init-add-section.ts +511 -0
  25. package/src/api-routes/init-apply.ts +270 -0
  26. package/src/api-routes/init-migrate.ts +262 -0
  27. package/src/api-routes/init-scan-page.ts +336 -0
  28. package/src/api-routes/init-scan.ts +162 -0
  29. package/src/api-routes/pages.ts +17 -0
  30. package/src/api-routes/section-add.ts +189 -0
  31. package/src/api-routes/section-commit-pending.ts +147 -0
  32. package/src/api-routes/section-delete.ts +141 -0
  33. package/src/api-routes/section-duplicate.ts +144 -0
  34. package/src/api-routes/section-management.ts +95 -0
  35. package/src/api-routes/section-prepare-copy.ts +93 -0
  36. package/src/api-routes/section-prepare.ts +121 -0
  37. package/src/env.d.ts +7 -0
  38. package/src/init/__tests__/page-level.test.ts +1033 -0
  39. package/src/init/__tests__/page-list-coverage.test.ts +474 -0
  40. package/src/init/__tests__/patcher-edge-cases.test.ts +434 -0
  41. package/src/init/__tests__/patcher-page-mode.test.ts +272 -0
  42. package/src/init/__tests__/section-pipeline.test.ts +393 -0
  43. package/src/init/analyzer-types.ts +92 -0
  44. package/src/init/astro-config-patcher.ts +98 -0
  45. package/src/init/astro-detector.ts +207 -0
  46. package/src/init/astro-section-analyzer-v2.ts +1663 -0
  47. package/src/init/field-label-enricher.ts +72 -0
  48. package/src/init/template-patcher-v2.ts +1957 -0
  49. package/tsconfig.json +9 -0
@@ -0,0 +1,1663 @@
1
+ /**
2
+ * Astro-specific section component analysis (v2).
3
+ *
4
+ * Two-phase architecture:
5
+ * Phase 1 — Structural detection (AST-based, position-tracked, no CSS heuristics)
6
+ * Phase 2 — Label enrichment (delegated to field-label-enricher.ts)
7
+ *
8
+ * The analyzer produces an AnalyzerResult that is consumed by:
9
+ * - field-label-enricher.ts (cosmetic labels)
10
+ * - template-patcher-v2.ts (template transformation)
11
+ */
12
+
13
+ import { parse } from '@astrojs/compiler'
14
+ import { inferFields, type InferredSection, type InferredField } from '@setzkasten-cms/core/init'
15
+ import type {
16
+ AnalyzerResult,
17
+ RepeatedGroup,
18
+ RepeatedGroupInstance,
19
+ InnerFieldInfo,
20
+ FieldPosition,
21
+ } from './analyzer-types.js'
22
+ import { enrichFieldLabels } from './field-label-enricher.js'
23
+
24
+ // ---------------------------------------------------------------------------
25
+ // AST types
26
+ // ---------------------------------------------------------------------------
27
+
28
+ interface AstNode {
29
+ type: string
30
+ position?: { start: { offset: number; line: number; column: number }; end?: { offset: number } }
31
+ children?: AstNode[]
32
+ attributes?: AstAttr[]
33
+ name?: string
34
+ value?: string
35
+ }
36
+
37
+ interface AstAttr {
38
+ type: 'attribute'
39
+ kind: 'quoted' | 'empty' | 'expression' | 'spread' | 'shorthand' | 'template-literal'
40
+ name: string
41
+ value: string
42
+ raw?: string
43
+ position?: { start: { offset: number } }
44
+ }
45
+
46
+ function walkAst(node: AstNode, callback: (node: AstNode, parent: AstNode | null) => void, parent: AstNode | null = null): void {
47
+ callback(node, parent)
48
+ if (node.children) {
49
+ for (const child of node.children) {
50
+ walkAst(child, callback, node)
51
+ }
52
+ }
53
+ }
54
+
55
+ // ---------------------------------------------------------------------------
56
+ // Public API
57
+ // ---------------------------------------------------------------------------
58
+
59
+ /**
60
+ * Analyze an Astro section component and infer its fields.
61
+ * Returns an AnalyzerResult with structural field info + repeated groups for the patcher.
62
+ */
63
+ export async function analyzeAstroSection(
64
+ source: string,
65
+ sectionKey: string,
66
+ componentName: string,
67
+ componentPath: string,
68
+ options?: { mode?: 'component' | 'page' },
69
+ ): Promise<InferredSection & { _analyzerResult: AnalyzerResult }> {
70
+ const isPageMode = options?.mode === 'page'
71
+ const alreadyIntegrated =
72
+ source.includes('data-sk-field') ||
73
+ source.includes('getSection(') ||
74
+ source.includes('setzkasten:content')
75
+
76
+ const { frontmatter, template, templateOffset } = splitAstroFile(source)
77
+
78
+ // Filter variables: skip prop values, fallbacks, and .map()-only sources
79
+ const variables = extractFrontmatterVariables(frontmatter).filter(name => {
80
+ const propUsageRegex = new RegExp(`\\w+={${name}}`)
81
+ if (propUsageRegex.test(template)) return false
82
+ const fallbackRegex = new RegExp(`(?:\\?\\?|\\|\\|)\\s*${name}\\b`)
83
+ if (fallbackRegex.test(frontmatter) || fallbackRegex.test(template)) return false
84
+ // In component mode, .map()-only arrays become repeatedGroup inner fields
85
+ // and should not also appear as top-level fields.
86
+ // In page mode, standalone .map() arrays ARE the content → keep them.
87
+ if (!isPageMode) {
88
+ const hasMapUsage = new RegExp(`\\b${name}\\.map\\s*\\(`).test(template)
89
+ const hasNonMapUsage = new RegExp(`\\b${name}\\b(?!\\.map)`).test(template)
90
+ if (hasMapUsage && !hasNonMapUsage) return false
91
+ }
92
+ return true
93
+ })
94
+
95
+ const variableFields = inferFields(variables, template)
96
+
97
+ for (const field of variableFields) {
98
+ if (!field.defaultValue) {
99
+ field.defaultValue = extractFrontmatterValue(frontmatter, field.key)
100
+ }
101
+ }
102
+
103
+ // Extract template fields + repeated groups (Phase 1)
104
+ const { fields: templateFields, repeatedGroups } = await extractTemplateFields(template, frontmatter)
105
+
106
+ // Fix positions: extractTemplateFields already subtracts WRAPPER_OFFSET internally,
107
+ // so positions are relative to the template string. Add templateOffset for full source.
108
+ const posAdjust = templateOffset
109
+
110
+ for (const group of repeatedGroups) {
111
+ for (const inst of group.instances) {
112
+ inst.start += posAdjust
113
+ inst.end += posAdjust
114
+ }
115
+ for (const field of group.fields) {
116
+ for (let i = 0; i < field.positions.length; i++) {
117
+ const pos = field.positions[i]
118
+ if (pos) {
119
+ pos.offset += posAdjust
120
+ }
121
+ }
122
+ }
123
+ }
124
+
125
+ // Adjust classAttrs positions
126
+ for (const group of repeatedGroups) {
127
+ if (group.classAttrs) {
128
+ for (const instAttrs of group.classAttrs) {
129
+ for (const a of instAttrs) {
130
+ a.sourceOffset += posAdjust
131
+ }
132
+ }
133
+ }
134
+ }
135
+
136
+ // Phase 2: Enrich labels on repeated group inner fields
137
+ for (const group of repeatedGroups) {
138
+ enrichFieldLabels(group.fields)
139
+ }
140
+
141
+ // Assign template positions to variable fields
142
+ for (const field of variableFields) {
143
+ const usageRegex = new RegExp(`\\{\\s*(?:\\w+\\.)?${field.key}(?:[.\\s}(])`, 's')
144
+ const usageMatch = usageRegex.exec(template)
145
+ if (usageMatch) {
146
+ ;(field as any)._pos = usageMatch.index
147
+ } else {
148
+ const mapUsage = template.indexOf(`${field.key}.map(`)
149
+ ;(field as any)._pos = mapUsage !== -1 ? mapUsage : Infinity
150
+ }
151
+ }
152
+
153
+ // Merge all fields and sort by template position
154
+ const existingKeys = new Set(variableFields.map((f) => f.key))
155
+ const allFields = [
156
+ ...variableFields,
157
+ ...templateFields.filter((f) => !existingKeys.has(f.key)),
158
+ ]
159
+ allFields.sort((a, b) => ((a as any)._pos ?? Infinity) - ((b as any)._pos ?? Infinity))
160
+
161
+ const fields = allFields.map(({ ...field }) => {
162
+ delete (field as any)._pos
163
+ return field
164
+ })
165
+
166
+ const analyzerResult: AnalyzerResult = {
167
+ sectionKey,
168
+ componentName,
169
+ componentPath,
170
+ alreadyIntegrated,
171
+ fields,
172
+ repeatedGroups,
173
+ frontmatter,
174
+ template,
175
+ }
176
+
177
+ return {
178
+ key: sectionKey,
179
+ componentName,
180
+ componentPath,
181
+ fields,
182
+ alreadyIntegrated,
183
+ _analyzerResult: analyzerResult,
184
+ }
185
+ }
186
+
187
+ // ---------------------------------------------------------------------------
188
+ // Frontmatter utilities (unchanged from v1)
189
+ // ---------------------------------------------------------------------------
190
+
191
+ function splitAstroFile(source: string): { frontmatter: string; template: string; templateOffset: number } {
192
+ if (!source.startsWith('---')) return { frontmatter: '', template: source, templateOffset: 0 }
193
+ // Find closing --- marker (starts at beginning of a line after the opening ---)
194
+ const endIdx = source.indexOf('\n---', 3)
195
+ if (endIdx === -1) return { frontmatter: '', template: source, templateOffset: 0 }
196
+ const frontmatter = source.slice(4, endIdx) // after "---\n", before "\n---"
197
+ let templateStart = endIdx + 4 // skip "\n---"
198
+ // Skip trailing whitespace/newline on the closing --- line
199
+ while (templateStart < source.length && source[templateStart] === ' ') templateStart++
200
+ if (templateStart < source.length && source[templateStart] === '\n') templateStart++
201
+ return { frontmatter, template: source.slice(templateStart), templateOffset: templateStart }
202
+ }
203
+
204
+ function extractFrontmatterVariables(frontmatter: string): string[] {
205
+ const variables: string[] = []
206
+ const constRegex = /(?:const|let)\s+(\w+)\s*=\s*(.*)/g
207
+ let match: RegExpExecArray | null
208
+ while ((match = constRegex.exec(frontmatter)) !== null) {
209
+ const name = match[1]!
210
+ const rhs = match[2]?.trim() ?? ''
211
+ if (isInternalVariable(name)) continue
212
+ // Skip exported declarations (e.g. "export const prerender = true")
213
+ const charBefore = match.index > 0 ? frontmatter.slice(Math.max(0, match.index - 10), match.index) : ''
214
+ if (/export\s*$/.test(charBefore)) continue
215
+ if (/\.\s*map\s*\(/.test(rhs) || /\w+\?\.\w+/.test(rhs)) continue
216
+ if (/^\[/.test(rhs) && /^default/i.test(name)) continue
217
+ // Skip arrow function declarations (internal helpers, not content fields)
218
+ if (/^\(|^\w+\s*=>/.test(rhs) && /=>/.test(rhs)) continue
219
+ // Skip setzkasten API calls and other framework calls that return data objects
220
+ if (/\bget(?:Page|Pages|Section|CollectionEntry)\s*\(/.test(rhs)) continue
221
+ variables.push(name)
222
+ }
223
+ const propsRegex = /const\s+\{\s*([^}]+)\}\s*=\s*Astro\.props/
224
+ const propsMatch = frontmatter.match(propsRegex)
225
+ if (propsMatch) {
226
+ const props = propsMatch[1]!
227
+ .split(',')
228
+ .map((p) => p.trim().split(':')[0]!.split('=')[0]!.trim())
229
+ .filter((p) => p && !isInternalVariable(p))
230
+ variables.push(...props)
231
+ }
232
+ const interfaceRegex = /interface\s+Props\s*\{([^}]+)\}/s
233
+ const interfaceMatch = frontmatter.match(interfaceRegex)
234
+ if (interfaceMatch) {
235
+ const fields = interfaceMatch[1]!
236
+ .split('\n')
237
+ .map((line) => line.trim())
238
+ .filter((line) => line && !line.startsWith('//'))
239
+ .map((line) => line.match(/^(\w+)\??:/)?.[1])
240
+ .filter((name): name is string => !!name && !isInternalVariable(name))
241
+ variables.push(...fields)
242
+ }
243
+ return [...new Set(variables)]
244
+ }
245
+
246
+ function extractFrontmatterValue(frontmatter: string, varName: string): unknown | undefined {
247
+ const arrayStartRegex = new RegExp(`(?:const|let)\\s+${varName}\\s*=\\s*\\[`)
248
+ const arrayStartMatch = arrayStartRegex.exec(frontmatter)
249
+ if (arrayStartMatch) {
250
+ const startIdx = arrayStartMatch.index + arrayStartMatch[0].length
251
+ let depth = 1
252
+ let endIdx = startIdx
253
+ for (let i = startIdx; i < frontmatter.length && depth > 0; i++) {
254
+ if (frontmatter[i] === '[') depth++
255
+ else if (frontmatter[i] === ']') depth--
256
+ endIdx = i
257
+ }
258
+ const content = frontmatter.slice(startIdx, endIdx)
259
+ const isObjectArray = /\{/.test(content)
260
+ const isStringArray = /^\s*['"]/.test(content.trim())
261
+ return extractInlineArrayValues(content, isObjectArray, isStringArray)
262
+ }
263
+ const strRegex = new RegExp(`(?:const|let)\\s+${varName}\\s*=\\s*['"]([^'"]+)['"]`)
264
+ const strMatch = frontmatter.match(strRegex)
265
+ if (strMatch) return strMatch[1]!
266
+ const numRegex = new RegExp(`(?:const|let)\\s+${varName}\\s*=\\s*(\\d+(?:\\.\\d+)?)`)
267
+ const numMatch = frontmatter.match(numRegex)
268
+ if (numMatch) return Number(numMatch[1]!)
269
+ return undefined
270
+ }
271
+
272
+ function isInternalVariable(name: string): boolean {
273
+ const skip = new Set([
274
+ 'Astro', 'props', 'data', 'class', 'className', 'style', 'id',
275
+ 'slot', 'Fragment', 'Component', 'frontmatter', 'url', 'site',
276
+ 'generator', 'redirect', 'response', 'request', 'cookies',
277
+ 'params', 'slots',
278
+ ])
279
+ return skip.has(name) || name.startsWith('_')
280
+ }
281
+
282
+ // ---------------------------------------------------------------------------
283
+ // Byte → Char offset conversion
284
+ // ---------------------------------------------------------------------------
285
+
286
+ function buildByteToCharMap(source: string): (byteOffset: number) => number {
287
+ const buf = Buffer.from(source, 'utf-8')
288
+ if (buf.length === source.length) return (offset) => offset
289
+ const map = new Array<number>(buf.length + 1)
290
+ let byteIdx = 0
291
+ for (let charIdx = 0; charIdx < source.length; charIdx++) {
292
+ const codePoint = source.codePointAt(charIdx)!
293
+ const charByteLen = codePoint <= 0x7f ? 1 : codePoint <= 0x7ff ? 2 : codePoint <= 0xffff ? 3 : 4
294
+ for (let b = 0; b < charByteLen; b++) map[byteIdx + b] = charIdx
295
+ byteIdx += charByteLen
296
+ if (codePoint > 0xffff) charIdx++
297
+ }
298
+ map[byteIdx] = source.length
299
+ return (offset) => offset <= 0 ? 0 : offset >= map.length ? source.length : map[offset]!
300
+ }
301
+
302
+ function convertAstPositions(node: AstNode, b2c: (offset: number) => number): void {
303
+ if (node.position?.start) node.position.start.offset = b2c(node.position.start.offset)
304
+ if (node.position?.end) node.position.end.offset = b2c(node.position.end.offset)
305
+ if (node.attributes) {
306
+ for (const attr of node.attributes) {
307
+ if (attr.position?.start) attr.position.start.offset = b2c(attr.position.start.offset)
308
+ }
309
+ }
310
+ if (node.children) {
311
+ for (const child of node.children) convertAstPositions(child, b2c)
312
+ }
313
+ }
314
+
315
+ // ---------------------------------------------------------------------------
316
+ // AST helper utilities
317
+ // ---------------------------------------------------------------------------
318
+
319
+ const WRAPPER_OFFSET = 8
320
+
321
+ function nodeOffset(node: AstNode): number {
322
+ return (node.position?.start?.offset ?? Infinity) - WRAPPER_OFFSET
323
+ }
324
+
325
+ function nodeEnd(node: AstNode): number {
326
+ return (node.position?.end?.offset ?? Infinity) - WRAPPER_OFFSET
327
+ }
328
+
329
+ function getClassValue(node: AstNode): string {
330
+ if (!node.attributes) return ''
331
+ const classAttr = node.attributes.find((a) => a.name === 'class' || a.name === 'className')
332
+ return classAttr?.value ?? ''
333
+ }
334
+
335
+ function getAttr(node: AstNode, name: string): AstAttr | undefined {
336
+ return node.attributes?.find((a) => a.name === name)
337
+ }
338
+
339
+ function isAriaHidden(node: AstNode): boolean {
340
+ const attr = getAttr(node, 'aria-hidden')
341
+ return attr?.value === 'true'
342
+ }
343
+
344
+ function extractTextContent(node: AstNode, stripCmsBound: boolean = false): string {
345
+ let text = ''
346
+ if (node.type === 'text') {
347
+ text += node.value ?? ''
348
+ } else if (node.type === 'expression') {
349
+ const exprCode = (node.children ?? []).map((c) => c.value ?? '').join('')
350
+ if (stripCmsBound && /^\s*\(?\s*\w+\?\.\s*\w+/.test(exprCode)) return ''
351
+ const fallbackMatch = exprCode.match(/\?\?\s*['"]([^'"]+)['"]/)
352
+ if (fallbackMatch) text += fallbackMatch[1]!
353
+ } else if (node.children) {
354
+ for (const child of node.children) {
355
+ text += extractTextContent(child, stripCmsBound)
356
+ }
357
+ }
358
+ return text
359
+ }
360
+
361
+ /** Tags that indicate inline formatting (bold, italic, links, code, color spans, etc.) */
362
+ const INLINE_FORMATTING_TAGS = new Set([
363
+ 'strong', 'b', 'em', 'i', 'mark', 'code', 'del', 'ins',
364
+ 'sub', 'sup', 'a', 'abbr', 'cite', 'u', 's', 'small',
365
+ 'span', // color/style spans inside text content
366
+ ])
367
+
368
+ /** Check if an element contains inline formatting child elements. */
369
+ function hasInlineFormatting(node: AstNode): boolean {
370
+ if (!node.children) return false
371
+ for (const child of node.children) {
372
+ if (child.type === 'element' && INLINE_FORMATTING_TAGS.has(child.name ?? '')) return true
373
+ if (hasInlineFormatting(child)) return true
374
+ }
375
+ return false
376
+ }
377
+
378
+ /**
379
+ * Collect all class="..." attributes from an element subtree.
380
+ * Returns them with a structural path (child index path) for cross-instance matching.
381
+ */
382
+ function collectClassAttrs(node: AstNode, path: string = '', source?: string): import('./analyzer-types.js').ClassAttrInfo[] {
383
+ const result: import('./analyzer-types.js').ClassAttrInfo[] = []
384
+ if (node.type === 'element' && node.attributes && source) {
385
+ const classAttr = node.attributes.find(a => a.name === 'class' && a.kind === 'quoted')
386
+ if (classAttr && classAttr.value) {
387
+ // The AST position points to the element start, not the attribute.
388
+ // Find the actual `class="` within the opening tag.
389
+ const elemStart = nodeOffset(node)
390
+ const tagEndGuess = source.indexOf('>', elemStart)
391
+ if (tagEndGuess !== -1) {
392
+ const classIdx = source.indexOf('class="', elemStart)
393
+ if (classIdx !== -1 && classIdx < tagEndGuess) {
394
+ // class="value" → length = 'class="'.length + value.length + '"'.length
395
+ const sourceLength = 7 + classAttr.value.length + 1
396
+ result.push({
397
+ path,
398
+ value: classAttr.value,
399
+ sourceOffset: classIdx,
400
+ sourceLength,
401
+ })
402
+ }
403
+ }
404
+ }
405
+ }
406
+ if (node.children) {
407
+ // Use tag:nth as path key (more robust than child index for optional elements)
408
+ const tagCounts: Record<string, number> = {}
409
+ for (const child of node.children) {
410
+ if (child.type === 'element' && child.name) {
411
+ const tag = child.name
412
+ const nth = tagCounts[tag] ?? 0
413
+ tagCounts[tag] = nth + 1
414
+ const childPath = path ? `${path}/${tag}:${nth}` : `${tag}:${nth}`
415
+ result.push(...collectClassAttrs(child, childPath, source))
416
+ } else if (child.type === 'expression' && child.children) {
417
+ // Recurse into expression nodes (e.g. {items.map(() => (<li class="...">))})
418
+ for (const exprChild of child.children) {
419
+ if (exprChild.type === 'element' && exprChild.name) {
420
+ const tag = exprChild.name
421
+ const nth = tagCounts[tag] ?? 0
422
+ tagCounts[tag] = nth + 1
423
+ const childPath = path ? `${path}/${tag}:${nth}` : `${tag}:${nth}`
424
+ result.push(...collectClassAttrs(exprChild, childPath, source))
425
+ }
426
+ }
427
+ }
428
+ }
429
+ }
430
+ return result
431
+ }
432
+
433
+ function containsElement(node: AstNode, tagName: string): boolean {
434
+ if (node.type === 'element' && node.name === tagName) return true
435
+ if (node.children) {
436
+ for (const child of node.children) {
437
+ if (containsElement(child, tagName)) return true
438
+ }
439
+ }
440
+ return false
441
+ }
442
+
443
+ function serializeNode(node: AstNode): string {
444
+ if (node.type === 'text') return node.value ?? ''
445
+ if (node.type === 'expression') {
446
+ const inner = (node.children ?? []).map((c) => c.value ?? '').join('')
447
+ return `{${inner}}`
448
+ }
449
+ let result = ''
450
+ if (node.type === 'element' || node.type === 'component') {
451
+ result += `<${node.name ?? ''}`
452
+ for (const attr of node.attributes ?? []) {
453
+ if (attr.kind === 'quoted') result += ` ${attr.name}="${attr.value}"`
454
+ else if (attr.kind === 'expression') result += ` ${attr.name}={${attr.value}}`
455
+ else if (attr.kind === 'empty') result += ` ${attr.name}`
456
+ else result += ` ${attr.name}="${attr.value}"`
457
+ }
458
+ result += '>'
459
+ }
460
+ for (const child of node.children ?? []) {
461
+ result += serializeNode(child)
462
+ }
463
+ if ((node.type === 'element' || node.type === 'component') && node.name) {
464
+ result += `</${node.name}>`
465
+ }
466
+ return result
467
+ }
468
+
469
+ function camelToLabel(str: string): string {
470
+ return str.replace(/([A-Z])/g, ' $1').replace(/^./, (s) => s.toUpperCase()).trim()
471
+ }
472
+
473
+ function inferInnerFieldType(name: string): InferredField['type'] {
474
+ const n = name.toLowerCase()
475
+ if (/icon/.test(n)) return 'icon'
476
+ if (/image|img|photo|avatar|logo|thumbnail|src/.test(n)) return 'image'
477
+ if (/color|colour/.test(n)) return 'color'
478
+ if (/count|amount|number|quantity|total|rating|score|percent|order|index|size|width|height/.test(n)) return 'number'
479
+ if (/^is[A-Z]/.test(name) || /^has[A-Z]/.test(name) || /enabled|disabled|visible|hidden|active|checked|selected|highlight|accent|featured/.test(n)) return 'boolean'
480
+ return 'text'
481
+ }
482
+
483
+ // ---------------------------------------------------------------------------
484
+ // Inline array extraction (unchanged from v1)
485
+ // ---------------------------------------------------------------------------
486
+
487
+ function extractInlineArrayValues(arrayContent: string, isObjectArray: boolean, isStringArray: boolean): unknown[] {
488
+ if (isStringArray || !isObjectArray) {
489
+ const strings: string[] = []
490
+ const strRegex = /['"]([^'"]+)['"]/g
491
+ let sm: RegExpExecArray | null
492
+ while ((sm = strRegex.exec(arrayContent)) !== null) strings.push(sm[1]!)
493
+ return strings
494
+ }
495
+ const objects: Array<Record<string, unknown>> = []
496
+ let i = 0
497
+ while (i < arrayContent.length) {
498
+ if (arrayContent[i] === '{') {
499
+ const end = findMatchingBracket(arrayContent, i, '{', '}')
500
+ const objStr = arrayContent.slice(i + 1, end)
501
+ const obj = parseObjectLiteral(objStr)
502
+ if (Object.keys(obj).length > 0) objects.push(obj)
503
+ i = end + 1
504
+ } else { i++ }
505
+ }
506
+ return objects
507
+ }
508
+
509
+ function findMatchingBracket(source: string, start: number, open: string, close: string): number {
510
+ let depth = 1
511
+ let i = start + 1
512
+ while (i < source.length && depth > 0) {
513
+ const ch = source[i]!
514
+ if (ch === "'" || ch === '"') {
515
+ i++
516
+ while (i < source.length && source[i] !== ch) i++
517
+ } else if (ch === open) {
518
+ depth++
519
+ } else if (ch === close) {
520
+ depth--
521
+ }
522
+ i++
523
+ }
524
+ return i - 1
525
+ }
526
+
527
+ function parseObjectLiteral(objStr: string): Record<string, unknown> {
528
+ const obj: Record<string, unknown> = {}
529
+ let i = 0
530
+ while (i < objStr.length) {
531
+ while (i < objStr.length && /[\s,]/.test(objStr[i]!)) i++
532
+ if (i >= objStr.length) break
533
+ const keyMatch = objStr.slice(i).match(/^(\w+)\s*:\s*/)
534
+ if (!keyMatch) { i++; continue }
535
+ const key = keyMatch[1]!
536
+ i += keyMatch[0].length
537
+ const ch = objStr[i]
538
+ if (ch === "'" || ch === '"') {
539
+ let j = i + 1
540
+ while (j < objStr.length && objStr[j] !== ch) j++
541
+ obj[key] = objStr.slice(i + 1, j)
542
+ i = j + 1
543
+ } else if (ch === '[') {
544
+ const end = findMatchingBracket(objStr, i, '[', ']')
545
+ const innerContent = objStr.slice(i + 1, end)
546
+ const hasObjects = /\{/.test(innerContent)
547
+ if (hasObjects) {
548
+ obj[key] = extractInlineArrayValues(innerContent, true, false)
549
+ } else {
550
+ const items: string[] = []
551
+ const strRegex = /['"]([^'"]+)['"]/g
552
+ let sm: RegExpExecArray | null
553
+ while ((sm = strRegex.exec(innerContent)) !== null) items.push(sm[1]!)
554
+ obj[key] = items
555
+ }
556
+ i = end + 1
557
+ } else if (ch === '{') {
558
+ const end = findMatchingBracket(objStr, i, '{', '}')
559
+ obj[key] = parseObjectLiteral(objStr.slice(i + 1, end))
560
+ i = end + 1
561
+ } else if (ch && /\d/.test(ch)) {
562
+ const numMatch = objStr.slice(i).match(/^(\d+(?:\.\d+)?)/)
563
+ if (numMatch) { obj[key] = Number(numMatch[1]); i += numMatch[0].length }
564
+ } else if (objStr.slice(i, i + 4) === 'true') {
565
+ obj[key] = true; i += 4
566
+ } else if (objStr.slice(i, i + 5) === 'false') {
567
+ obj[key] = false; i += 5
568
+ } else {
569
+ while (i < objStr.length && objStr[i] !== ',') i++
570
+ }
571
+ }
572
+ return obj
573
+ }
574
+
575
+ // ---------------------------------------------------------------------------
576
+ // Template content extraction — Steps 0-11 unchanged, Step 12 rewritten
577
+ // ---------------------------------------------------------------------------
578
+
579
+ interface ExtractResult {
580
+ fields: InferredField[]
581
+ repeatedGroups: RepeatedGroup[]
582
+ }
583
+
584
+ async function extractTemplateFields(template: string, frontmatter: string = ''): Promise<ExtractResult> {
585
+ const fields: Array<InferredField & { _pos: number }> = []
586
+ const usedKeys = new Set<string>()
587
+ const repeatedGroups: RepeatedGroup[] = []
588
+
589
+ let ast: AstNode
590
+ try {
591
+ const wrappedSource = `---\n---\n${template}`
592
+ const result = await parse(wrappedSource)
593
+ ast = result.ast as unknown as AstNode
594
+ convertAstPositions(ast, buildByteToCharMap(wrappedSource))
595
+ } catch (err) {
596
+ console.error('[setzkasten] section-analyzer: parse() failed:', err)
597
+ return { fields, repeatedGroups }
598
+ }
599
+
600
+ function addField(field: InferredField, pos?: number) {
601
+ if (!usedKeys.has(field.key)) {
602
+ usedKeys.add(field.key)
603
+ let finalPos = pos ?? -1
604
+ if (finalPos === -1) {
605
+ if (typeof field.defaultValue === 'string' && field.defaultValue.length >= 3) {
606
+ finalPos = template.indexOf(field.defaultValue)
607
+ } else if (Array.isArray(field.defaultValue) && field.defaultValue.length > 0) {
608
+ const firstItem = typeof field.defaultValue[0] === 'string'
609
+ ? field.defaultValue[0]
610
+ : typeof field.defaultValue[0] === 'object' && field.defaultValue[0]
611
+ ? Object.values(field.defaultValue[0])[0]
612
+ : null
613
+ if (typeof firstItem === 'string' && firstItem.length >= 2) {
614
+ finalPos = template.indexOf(firstItem)
615
+ }
616
+ }
617
+ }
618
+ if (finalPos === -1) finalPos = template.indexOf(`?.${field.key}`)
619
+ if (finalPos === -1) {
620
+ const keyRegex = new RegExp(`(?<![\\w-])${field.key}(?![\\w-])`)
621
+ const keyMatch = keyRegex.exec(template)
622
+ if (keyMatch) finalPos = keyMatch.index
623
+ }
624
+ fields.push({ ...field, _pos: finalPos === -1 ? Infinity : finalPos })
625
+ }
626
+ }
627
+
628
+ function numberedKey(base: string, count: number): string {
629
+ return count === 1 ? base : `${base}${count}`
630
+ }
631
+
632
+ function numberedLabel(base: string, count: number): string {
633
+ return count === 1 ? base : `${base} ${count}`
634
+ }
635
+
636
+ // Collect aria-hidden ranges
637
+ const ariaHiddenRanges: Array<{ start: number; end: number }> = []
638
+ walkAst(ast, (node) => {
639
+ if (node.type !== 'element' && node.type !== 'component') return
640
+ const isHidden = isAriaHidden(node) || getAttr(node, 'role')?.value === 'img'
641
+ if (isHidden) {
642
+ const start = nodeOffset(node)
643
+ const end = node.position?.end?.offset ? node.position.end.offset - WRAPPER_OFFSET : start + 1
644
+ ariaHiddenRanges.push({ start, end })
645
+ }
646
+ })
647
+
648
+ // Collect .map() expression ranges
649
+ const mapExpressionRanges: Array<{ start: number; end: number }> = []
650
+ walkAst(ast, (node) => {
651
+ if (node.type !== 'expression') return
652
+ const exprCode = (node.children ?? []).map((c) => c.value ?? '').join('')
653
+ if (/\.map\s*\(/.test(exprCode)) {
654
+ const start = nodeOffset(node)
655
+ const end = node.position?.end?.offset ? node.position.end.offset - WRAPPER_OFFSET : start + 1
656
+ mapExpressionRanges.push({ start, end })
657
+ }
658
+ })
659
+
660
+ // Collect repeated sibling element groups
661
+ const SEMANTIC_REPEATED_TAGS = new Set(['article', 'aside', 'figure', 'details', 'blockquote'])
662
+ const repeatedElementGroups: Array<{ tag: string; instances: AstNode[] }> = []
663
+ const repeatedElementRanges: Array<{ start: number; end: number }> = []
664
+
665
+ walkAst(ast, (node) => {
666
+ if (node.type !== 'element' && node.type !== 'fragment') return
667
+ if (!node.children || node.children.length < 2) return
668
+ const childrenByTag = new Map<string, AstNode[]>()
669
+ for (const child of node.children) {
670
+ if (child.type !== 'element') continue
671
+ const tag = child.name ?? ''
672
+ if (!tag) continue
673
+ if (!childrenByTag.has(tag)) childrenByTag.set(tag, [])
674
+ childrenByTag.get(tag)!.push(child)
675
+ }
676
+ for (const [tag, siblings] of childrenByTag) {
677
+ if (siblings.length < 2) continue
678
+ if (!SEMANTIC_REPEATED_TAGS.has(tag)) continue
679
+ repeatedElementGroups.push({ tag, instances: siblings })
680
+ for (const inst of siblings) {
681
+ const start = nodeOffset(inst)
682
+ const end = inst.position?.end?.offset ? inst.position.end.offset - WRAPPER_OFFSET : start + 1
683
+ repeatedElementRanges.push({ start, end })
684
+ }
685
+ }
686
+ })
687
+
688
+ // Table rows: <tbody><tr> rows → repeatedElementGroup
689
+ // Detects only tbody rows (not thead) so column headers are excluded.
690
+ walkAst(ast, (node) => {
691
+ if (node.type !== 'element' || node.name !== 'tbody') return
692
+ const trRows = (node.children ?? []).filter(c => c.type === 'element' && c.name === 'tr')
693
+ if (trRows.length < 2) return
694
+ repeatedElementGroups.push({ tag: 'tr', instances: trRows })
695
+ for (const row of trRows) {
696
+ const start = nodeOffset(row)
697
+ const end = row.position?.end?.offset ? row.position.end.offset - WRAPPER_OFFSET : start + 1
698
+ repeatedElementRanges.push({ start, end })
699
+ }
700
+ })
701
+
702
+ function isInAriaHidden(offset: number): boolean {
703
+ return ariaHiddenRanges.some((r) => offset >= r.start && offset < r.end)
704
+ }
705
+ function isInMapExpression(offset: number): boolean {
706
+ return mapExpressionRanges.some((r) => offset >= r.start && offset < r.end)
707
+ }
708
+ function isInRepeatedElement(offset: number): boolean {
709
+ return repeatedElementRanges.some((r) => offset >= r.start && offset < r.end)
710
+ }
711
+
712
+ const cmsBoundOffsets = new Set<number>()
713
+
714
+ // ── 0. ALREADY-BOUND CMS FIELDS ────────────────────────────────────────
715
+ walkAst(ast, (node) => {
716
+ if (node.type !== 'expression') return
717
+ const exprCode = (node.children ?? []).map((c) => c.value ?? '').join('')
718
+ const cmsVarMatch = exprCode.match(/^\s*\(?\s*\w+\?\.\s*(\w+)/)
719
+ if (cmsVarMatch) {
720
+ const fieldKey = cmsVarMatch[1]!
721
+ // Array fallback: (skData?.items ?? ['a', 'b']).map(
722
+ const arrayFallbackMatch = exprCode.match(/\?\?\s*\[([^\]]*)\]\s*\)\.map\(/)
723
+ if (arrayFallbackMatch) {
724
+ const items: string[] = []
725
+ const itemRe = /`([^`]*)`|'([^']*)'|"([^"]*)"/g
726
+ let m: RegExpExecArray | null
727
+ while ((m = itemRe.exec(arrayFallbackMatch[1]!)) !== null) {
728
+ items.push(m[1] ?? m[2] ?? m[3] ?? '')
729
+ }
730
+ const hasFormatting = items.some(s => /<[a-z]/.test(s))
731
+ addField({
732
+ key: fieldKey, type: 'array', label: camelToLabel(fieldKey), confidence: 'high',
733
+ defaultValue: items.length > 0 ? items : undefined,
734
+ options: { arrayItem: { type: 'text', ...(hasFormatting ? { formatting: true } : {}) } },
735
+ }, nodeOffset(node))
736
+ } else {
737
+ // String fallback or no fallback
738
+ const strFallback = exprCode.match(/\?\?\s*(?:`([\s\S]*?)`|'([^']*)'|"([^"]*)")/)
739
+ const defaultValue = strFallback ? (strFallback[1] ?? strFallback[2] ?? strFallback[3] ?? '').trim() : undefined
740
+ const hasHtml = defaultValue ? /<[a-z]/.test(defaultValue) : false
741
+ addField({
742
+ key: fieldKey, type: 'text', label: camelToLabel(fieldKey), confidence: 'high',
743
+ ...(defaultValue ? { defaultValue } : {}),
744
+ ...(hasHtml ? { options: { formatting: true } } : {}),
745
+ }, nodeOffset(node))
746
+ }
747
+ cmsBoundOffsets.add(nodeOffset(node))
748
+ }
749
+ })
750
+
751
+ walkAst(ast, (node) => {
752
+ if (node.type !== 'element' && node.type !== 'component') return
753
+ for (const attr of node.attributes ?? []) {
754
+ if (attr.kind !== 'expression') continue
755
+ const cmsAttrMatch = attr.value.match(/^\s*\w+\?\.\s*(\w+)\s*\?\?\s*(?:`([\s\S]*?)`|'([^']*)'|"([^"]*)")/)
756
+ if (cmsAttrMatch) {
757
+ const fieldKey = cmsAttrMatch[1]!
758
+ const fallback = (cmsAttrMatch[2] ?? cmsAttrMatch[3] ?? cmsAttrMatch[4] ?? '').trim()
759
+ const pos = attr.position?.start?.offset ? attr.position.start.offset - WRAPPER_OFFSET : nodeOffset(node)
760
+ // formatting: true when fallback contains HTML tags,
761
+ // OR when the attribute is set:html without a fallback (content is always HTML then)
762
+ const isSetHtml = attr.name === 'set:html'
763
+ const fallbackHasHtml = fallback ? /<[a-z]/.test(fallback) : false
764
+ const hasHtml = fallbackHasHtml || (isSetHtml && !fallback)
765
+ addField({
766
+ key: fieldKey, type: 'text', label: camelToLabel(fieldKey), confidence: 'high',
767
+ ...(fallback ? { defaultValue: fallback } : {}),
768
+ ...(hasHtml ? { options: { formatting: true } } : {}),
769
+ }, pos)
770
+ cmsBoundOffsets.add(nodeOffset(node))
771
+ }
772
+ }
773
+ })
774
+
775
+ function shouldSkipForContent(node: AstNode): boolean {
776
+ const offset = nodeOffset(node)
777
+ return isInAriaHidden(offset) || cmsBoundOffsets.has(offset) || isInRepeatedElement(offset)
778
+ }
779
+
780
+ function shouldSkipMapChild(node: AstNode): boolean {
781
+ return isInMapExpression(nodeOffset(node))
782
+ }
783
+
784
+ // ── 1. OVERLINE / EYEBROW ──────────────────────────────────────────────
785
+ walkAst(ast, (node) => {
786
+ if (node.type !== 'element') return
787
+ if (node.name !== 'p' && node.name !== 'span') return
788
+ if (shouldSkipForContent(node)) return
789
+ const classVal = getClassValue(node)
790
+ if (!/uppercase|tracking-widest/.test(classVal)) return
791
+ const text = extractTextContent(node, true).trim()
792
+ if (text.length >= 2 && text.length <= 80) {
793
+ addField({ key: 'overline', type: 'text', label: 'Overline', confidence: 'medium', defaultValue: text }, nodeOffset(node))
794
+ }
795
+ })
796
+
797
+ // ── 2. HEADINGS (h1-h6) ───────────────────────────────────────────────
798
+ // Start from the count of already-registered heading* keys (from Section 0)
799
+ // so new headings get non-colliding keys (heading3, heading4, ...).
800
+ let headingCount = Array.from(usedKeys).filter(k => k === 'heading' || /^heading\d+$/.test(k)).length
801
+ walkAst(ast, (node) => {
802
+ if (node.type !== 'element') return
803
+ if (!/^h[1-6]$/.test(node.name ?? '')) return
804
+ if (shouldSkipForContent(node)) return
805
+ const text = extractTextContent(node, true).replace(/\s+/g, ' ').trim()
806
+ if (text.length >= 2) {
807
+ headingCount++
808
+ const headingOpts: Record<string, unknown> = { required: true }
809
+ if (hasInlineFormatting(node)) headingOpts.formatting = true
810
+ addField({
811
+ key: numberedKey('heading', headingCount), type: 'text',
812
+ label: numberedLabel('Heading', headingCount), confidence: 'high',
813
+ defaultValue: text, options: headingOpts,
814
+ }, nodeOffset(node))
815
+ }
816
+ })
817
+
818
+ // ── 3. PARAGRAPHS / DESCRIPTION TEXT ───────────────────────────────────
819
+ // Start from the count of already-registered description* keys (from Section 0)
820
+ // so new description fields get non-colliding keys (description2, description3, ...).
821
+ let descCount = Array.from(usedKeys).filter(k => k === 'description' || /^description\d+$/.test(k)).length
822
+ walkAst(ast, (node) => {
823
+ if (node.type !== 'element') return
824
+ if (node.name !== 'p' && node.name !== 'div') return
825
+ if (shouldSkipForContent(node)) return
826
+ const classVal = getClassValue(node)
827
+ const serialized = serializeNode(node)
828
+ if (/uppercase|tracking-widest/.test(classVal) && serialized.length < 250) return
829
+ if (/text-2xl|text-3xl|text-\[11px\]|text-\[10px\]|text-\[9px\]|text-\[8px\]/.test(classVal)) return
830
+ if (node.name === 'div' && (containsElement(node, 'a') || containsElement(node, 'button'))) {
831
+ // Allow mixed-text divs (callouts with inline links) — skip only pure-element
832
+ // containers like nav/card wrappers that have no direct text nodes.
833
+ const hasMixedText = (node.children ?? []).some(
834
+ c => c.type === 'text' && (c.value ?? '').trim().length > 0,
835
+ )
836
+ if (!hasMixedText) return
837
+ }
838
+ if (node.name === 'div' && ['h1','h2','h3','h4','h5','h6'].some(h => containsElement(node, h))) return
839
+ if (node.name === 'div') {
840
+ const contentChildren = (node.children ?? []).filter(c => c.type !== 'text' || (c.value ?? '').trim().length > 0)
841
+ const hasOnlyElementChildren = contentChildren.length > 0 && contentChildren.every(c => c.type === 'element' || c.type === 'component')
842
+ if (hasOnlyElementChildren) return
843
+ }
844
+ const text = extractTextContent(node, true).replace(/\s+/g, ' ').trim()
845
+ if (text.length < 15) return
846
+ descCount++
847
+ const descOpts: Record<string, unknown> = { multiline: true }
848
+ if (hasInlineFormatting(node)) descOpts.formatting = true
849
+ addField({
850
+ key: numberedKey('description', descCount), type: 'text',
851
+ label: numberedLabel('Beschreibung', descCount), confidence: 'medium',
852
+ defaultValue: text, options: descOpts,
853
+ }, nodeOffset(node))
854
+ })
855
+
856
+ // ── 4. RICH TEXT (set:html) ────────────────────────────────────────────
857
+ let richCount = 0
858
+ walkAst(ast, (node) => {
859
+ if (node.type !== 'element' && node.type !== 'component') return
860
+ const setHtmlAttr = node.attributes?.find((a) => a.name === 'set:html')
861
+ if (!setHtmlAttr || setHtmlAttr.kind !== 'expression') return
862
+ const expr = setHtmlAttr.value.trim()
863
+ if (/^\w+\?\.\w+/.test(expr)) return
864
+ if (expr.startsWith("'") || expr.startsWith('"') || expr.includes('??')) {
865
+ richCount++
866
+ const strMatch = expr.match(/['"]([^'"]+)['"]/)
867
+ const fallbackMatch = expr.match(/\?\?\s*['"]([^'"]+)['"]/)
868
+ const value = fallbackMatch?.[1] ?? strMatch?.[1] ?? ''
869
+ addField({
870
+ key: numberedKey('richText', richCount), type: 'text',
871
+ label: numberedLabel('Rich Text', richCount), confidence: 'high',
872
+ defaultValue: value, options: { multiline: true, formatting: true },
873
+ }, nodeOffset(node))
874
+ }
875
+ })
876
+
877
+ // ── 5. BUTTONS & CTA TEXT ──────────────────────────────────────────────
878
+ let ctaCount = 0
879
+ walkAst(ast, (node) => {
880
+ if (node.type !== 'element') return
881
+ if (node.name !== 'a' && node.name !== 'button') return
882
+ if (shouldSkipForContent(node)) return
883
+ const classVal = getClassValue(node)
884
+ if (!/rounded|px-|py-|font-semibold|bg-/.test(classVal)) return
885
+ const text = extractTextContent(node, true).replace(/\s+/g, ' ').trim()
886
+ if (text.length >= 2 && text.length <= 60) {
887
+ ctaCount++
888
+ addField({
889
+ key: numberedKey('ctaText', ctaCount), type: 'text',
890
+ label: numberedLabel('Button Text', ctaCount), confidence: 'medium',
891
+ defaultValue: text,
892
+ }, nodeOffset(node))
893
+ }
894
+ })
895
+
896
+ // ── 6. CTA LINKS (href values) ─────────────────────────────────────────
897
+ let linkCount = 0
898
+ walkAst(ast, (node) => {
899
+ if (node.type !== 'element' || node.name !== 'a') return
900
+ if (shouldSkipForContent(node)) return
901
+ const classVal = getClassValue(node)
902
+ if (!/rounded|px-|py-|font-semibold|bg-/.test(classVal)) return
903
+ const hrefAttr = getAttr(node, 'href')
904
+ if (!hrefAttr) return
905
+ let href: string | undefined
906
+ if (hrefAttr.kind === 'quoted') {
907
+ href = hrefAttr.value
908
+ } else if (hrefAttr.kind === 'expression') {
909
+ if (/^\s*\w+\?\.\w+/.test(hrefAttr.value)) return
910
+ const fallback = hrefAttr.value.match(/\?\?\s*['"]([^'"]+)['"]/)
911
+ if (fallback) href = fallback[1]
912
+ }
913
+ if (!href) return
914
+ if (href.startsWith('#') || href.startsWith('javascript:')) return
915
+ linkCount++
916
+ addField({
917
+ key: numberedKey('ctaLink', linkCount), type: 'text',
918
+ label: numberedLabel('Button Link', linkCount), confidence: 'medium',
919
+ defaultValue: href,
920
+ }, nodeOffset(node))
921
+ })
922
+
923
+ // ── 7. IMAGES ──────────────────────────────────────────────────────────
924
+ let imgCount = 0
925
+ let altCount = 0
926
+ walkAst(ast, (node) => {
927
+ if (node.type !== 'element' && node.type !== 'component') return
928
+ const tagName = node.name ?? ''
929
+ if (tagName !== 'img' && tagName !== 'Image' && tagName !== 'picture') return
930
+ if (shouldSkipForContent(node)) return
931
+ const srcAttr = getAttr(node, 'src')
932
+ if (srcAttr) {
933
+ let src: string | undefined
934
+ if (srcAttr.kind === 'quoted') src = srcAttr.value
935
+ else if (srcAttr.kind === 'expression') {
936
+ const strMatch = srcAttr.value.match(/['"]([^'"]+)['"]/)
937
+ if (strMatch) src = strMatch[1]
938
+ }
939
+ if (src) {
940
+ imgCount++
941
+ addField({
942
+ key: numberedKey('image', imgCount), type: 'image',
943
+ label: numberedLabel('Bild', imgCount), confidence: 'high',
944
+ defaultValue: { path: src.trim(), alt: '' },
945
+ }, nodeOffset(node))
946
+ }
947
+ }
948
+ if (tagName === 'img' || tagName === 'Image') {
949
+ const altAttr = getAttr(node, 'alt')
950
+ if (altAttr && altAttr.kind === 'quoted' && altAttr.value.trim().length >= 2) {
951
+ altCount++
952
+ addField({
953
+ key: numberedKey('imageAlt', altCount), type: 'text',
954
+ label: numberedLabel('Bild Alt-Text', altCount), confidence: 'medium',
955
+ defaultValue: altAttr.value.trim(),
956
+ }, nodeOffset(node))
957
+ }
958
+ }
959
+ })
960
+
961
+ // ── 8. ICONS ───────────────────────────────────────────────────────────
962
+ let iconCount = 0
963
+ walkAst(ast, (node, parentNode) => {
964
+ if (node.type !== 'element' || node.name !== 'svg') return
965
+ if (shouldSkipForContent(node) || shouldSkipMapChild(node)) return
966
+ const svgSource = serializeNode(node)
967
+ if (svgSource.length < 100) return
968
+ if (parentNode && (parentNode.name === 'a' || parentNode.name === 'button')) return
969
+ iconCount++
970
+ addField({
971
+ key: numberedKey('icon', iconCount), type: 'icon',
972
+ label: numberedLabel('Icon', iconCount), confidence: 'low',
973
+ }, nodeOffset(node))
974
+ })
975
+ let foundIconProp = false
976
+ walkAst(ast, (node) => {
977
+ if (foundIconProp) return
978
+ if (node.type !== 'component') return
979
+ if (shouldSkipMapChild(node)) return
980
+ const iconAttr = getAttr(node, 'icon')
981
+ if (iconAttr) {
982
+ addField({ key: 'icon', type: 'icon', label: 'Icon', confidence: 'high' }, nodeOffset(node))
983
+ foundIconProp = true
984
+ }
985
+ })
986
+
987
+ // ── 9. INLINE ARRAYS: {[...].map()} ───────────────────────────────────
988
+ let arrayCount = 0
989
+ walkAst(ast, (node) => {
990
+ if (node.type !== 'expression') return
991
+ const exprCode = (node.children ?? []).map((c) => c.value ?? '').join('')
992
+ const inlineArrayMatch = exprCode.match(/^\s*\[([\s\S]*?)\]\s*\.map\s*\(\s*\(?\s*(\{[^}]*\}|\w+)/)
993
+ if (!inlineArrayMatch) return
994
+ arrayCount++
995
+ const arrayContent = inlineArrayMatch[1]!
996
+ const callbackParam = inlineArrayMatch[2]!
997
+ let objectKeys: string[] = []
998
+ if (callbackParam.startsWith('{')) {
999
+ objectKeys = callbackParam.replace(/[{}]/g, '').split(',')
1000
+ .map((p: string) => p.trim().split(':')[0]!.trim())
1001
+ .filter((p: string) => p && !p.startsWith('...'))
1002
+ } else {
1003
+ const firstObjMatch = arrayContent.match(/\{\s*([\s\S]*?)\}/)
1004
+ if (firstObjMatch) {
1005
+ const keyRegex = /(\w+)\s*:/g
1006
+ let km: RegExpExecArray | null
1007
+ while ((km = keyRegex.exec(firstObjMatch[1]!)) !== null) objectKeys.push(km[1]!)
1008
+ }
1009
+ if (objectKeys.length === 0) {
1010
+ const accessRegex = new RegExp(`${callbackParam}\\.(\\w+)`, 'g')
1011
+ let am: RegExpExecArray | null
1012
+ const accessedProps = new Set<string>()
1013
+ while ((am = accessRegex.exec(exprCode)) !== null) accessedProps.add(am[1]!)
1014
+ objectKeys = [...accessedProps]
1015
+ }
1016
+ }
1017
+ const isObjectArray = objectKeys.length > 0
1018
+ const isStringArray = /^\s*'[^']*'\s*,/.test(arrayContent) || /^\s*"[^"]*"\s*,/.test(arrayContent)
1019
+ const arrayDefaultValue = extractInlineArrayValues(arrayContent, isObjectArray, isStringArray)
1020
+ if (isObjectArray && !isStringArray) {
1021
+ const innerFields: InferredField[] = objectKeys.map((prop: string) => ({
1022
+ key: prop, type: inferInnerFieldType(prop), label: camelToLabel(prop), confidence: 'medium' as const,
1023
+ }))
1024
+ addField({
1025
+ key: numberedKey('items', arrayCount), type: 'array',
1026
+ label: numberedLabel('Liste', arrayCount), confidence: 'high',
1027
+ defaultValue: arrayDefaultValue,
1028
+ options: { arrayItem: { type: 'object', fields: innerFields } },
1029
+ }, nodeOffset(node))
1030
+ } else {
1031
+ addField({
1032
+ key: numberedKey('items', arrayCount), type: 'array',
1033
+ label: numberedLabel('Liste', arrayCount), confidence: 'high',
1034
+ defaultValue: arrayDefaultValue,
1035
+ options: { arrayItem: { type: 'text' } },
1036
+ }, nodeOffset(node))
1037
+ }
1038
+ })
1039
+
1040
+ // ── 9b. STATIC LISTS (<ul>/<ol> with no .map()) → ARRAY ─────────────────
1041
+ // Only processes simple flat lists (no nested <ul>/<ol> inside <li> items).
1042
+ // Complex nested structures (like phase lists with sub-lists) are handled
1043
+ // by section 12 (repeated groups) via walkContentNodes.
1044
+ let staticListCount = 0
1045
+ walkAst(ast, (node) => {
1046
+ if (node.type !== 'element') return
1047
+ if (node.name !== 'ul' && node.name !== 'ol') return
1048
+ if (shouldSkipForContent(node)) return
1049
+ if (shouldSkipMapChild(node)) return
1050
+ // Skip if any .map() expression exists inside (already dynamic)
1051
+ let hasMap = false
1052
+ walkAst(node, (n) => {
1053
+ if (hasMap) return
1054
+ if (n.type === 'expression') {
1055
+ const code = (n.children ?? []).map(c => c.value ?? '').join('')
1056
+ if (/\.map\s*\(/.test(code)) hasMap = true
1057
+ }
1058
+ })
1059
+ if (hasMap) return
1060
+ // Skip complex lists: <li> items that themselves contain nested <ul>/<ol>
1061
+ let hasNestedList = false
1062
+ for (const li of node.children ?? []) {
1063
+ if (li.type !== 'element' || li.name !== 'li') continue
1064
+ for (const child of li.children ?? []) {
1065
+ if (child.type === 'element' && (child.name === 'ul' || child.name === 'ol')) {
1066
+ hasNestedList = true
1067
+ break
1068
+ }
1069
+ }
1070
+ if (hasNestedList) break
1071
+ }
1072
+ if (hasNestedList) return
1073
+ // Collect <li> items — detect formatting elements (<strong>, <em>, etc.)
1074
+ // If present, store innerHTML of the content span so the MiniRTE can handle it.
1075
+ const FORMATTING_TAGS = new Set(['strong', 'em', 'b', 'i', 'code', 'a', 's', 'u'])
1076
+ let listHasFormatting = false
1077
+ const listItems: string[] = []
1078
+ for (const li of node.children ?? []) {
1079
+ if (li.type !== 'element' || li.name !== 'li') continue
1080
+ // Check for formatting elements inside this <li>
1081
+ let liHasFormatting = false
1082
+ walkAst(li, (n) => {
1083
+ if (liHasFormatting) return
1084
+ if (n.type === 'element' && FORMATTING_TAGS.has(n.name ?? '')) liHasFormatting = true
1085
+ })
1086
+ if (liHasFormatting) {
1087
+ listHasFormatting = true
1088
+ // Extract innerHTML of the content span via <li> source + regex.
1089
+ // Child span positions from @astrojs/compiler point after the opening tag,
1090
+ // so we slice the full <li> source and use regex to find the content span.
1091
+ let contentHTML: string | null = null
1092
+ const liSrcStart = li.position?.start?.offset
1093
+ const liSrcEnd = li.position?.end?.offset
1094
+ if (liSrcStart != null && liSrcEnd != null) {
1095
+ const liSrc = template.slice(liSrcStart, liSrcEnd)
1096
+ const spanRegex = /<span[^>]*>([\s\S]*?)<\/span>/g
1097
+ let spanMatch: RegExpExecArray | null
1098
+ while ((spanMatch = spanRegex.exec(liSrc)) !== null) {
1099
+ const inner = spanMatch[1]!
1100
+ if (!inner.trim()) continue // empty bullet-dot span — skip
1101
+ contentHTML = inner.replace(/\s+/g, ' ').trim()
1102
+ break
1103
+ }
1104
+ }
1105
+ listItems.push(contentHTML ?? extractTextContent(li, true).replace(/\s+/g, ' ').trim())
1106
+ } else {
1107
+ const t = extractTextContent(li, true).replace(/\s+/g, ' ').trim()
1108
+ if (t.length >= 1) listItems.push(t)
1109
+ }
1110
+ }
1111
+ if (listItems.length < 1) return
1112
+ staticListCount++
1113
+ addField({
1114
+ key: numberedKey('items', arrayCount + staticListCount), type: 'array',
1115
+ label: numberedLabel('Liste', arrayCount + staticListCount), confidence: 'high',
1116
+ defaultValue: listItems,
1117
+ options: { arrayItem: { type: 'text', ...(listHasFormatting ? { formatting: true } : {}) } },
1118
+ }, nodeOffset(node))
1119
+ })
1120
+
1121
+ // ── 10. COMPONENT PROPS (content-bearing) ─────────────────────────────
1122
+ const componentCounts = new Map<string, number>()
1123
+ walkAst(ast, (node) => {
1124
+ if (node.type !== 'component' || shouldSkipMapChild(node)) return
1125
+ componentCounts.set(node.name ?? '', (componentCounts.get(node.name ?? '') ?? 0) + 1)
1126
+ })
1127
+
1128
+ walkAst(ast, (node) => {
1129
+ if (node.type !== 'component') return
1130
+ if (shouldSkipMapChild(node)) return
1131
+ if ((componentCounts.get(node.name ?? '') ?? 0) >= 2) return
1132
+ for (const attr of node.attributes ?? []) {
1133
+ if (attr.kind === 'quoted') {
1134
+ const propName = attr.name
1135
+ const propValue = attr.value
1136
+ if (/^(class|className|id|style|type|role|width|height|viewBox|fill|stroke|xmlns|d|cx|cy|r|rx|ry|x|y|x1|y1|x2|y2)$/.test(propName)) continue
1137
+ if (/^(lang|language|filename|file|format|variant|size|loading|decoding|transition|client:.*)$/.test(propName)) continue
1138
+ if (/^(aria-|data-)/.test(propName)) continue
1139
+ if (propValue.length < 2) continue
1140
+ if (propValue === 'true' || propValue === 'false' || /^\d+$/.test(propValue)) continue
1141
+ if (propValue.includes('/') && !propValue.includes(' ')) continue
1142
+ addField({
1143
+ key: propName,
1144
+ type: propName === 'icon' ? 'icon' : propName === 'src' ? 'image' : 'text',
1145
+ label: camelToLabel(propName), confidence: 'medium', defaultValue: propValue,
1146
+ }, nodeOffset(node))
1147
+ } else if (attr.kind === 'expression') {
1148
+ const propName = attr.name
1149
+ if (/^(class|className|id|style|type|role|lang|language|filename|file|format|variant|size|loading|decoding)$/.test(propName)) continue
1150
+ if (/^\s*\w+\s*$/.test(attr.value)) continue
1151
+ const fallbackMatch = attr.value.match(/\?\?\s*['"]([^'"]+)['"]/)
1152
+ if (fallbackMatch) {
1153
+ addField({
1154
+ key: propName,
1155
+ type: propName === 'icon' ? 'icon' : propName === 'src' ? 'image' : 'text',
1156
+ label: camelToLabel(propName), confidence: 'medium', defaultValue: fallbackMatch[1]!,
1157
+ }, nodeOffset(node))
1158
+ }
1159
+ }
1160
+ }
1161
+ })
1162
+
1163
+ // ── 11. REPEATED COMPONENTS → ARRAY ───────────────────────────────────
1164
+ const componentInstances = new Map<string, AstNode[]>()
1165
+ walkAst(ast, (node) => {
1166
+ if (node.type !== 'component') return
1167
+ if (shouldSkipMapChild(node)) return
1168
+ const name = node.name ?? ''
1169
+ if (!name) return
1170
+ if (!componentInstances.has(name)) componentInstances.set(name, [])
1171
+ componentInstances.get(name)!.push(node)
1172
+ })
1173
+
1174
+ for (const [compName, instances] of componentInstances) {
1175
+ if (instances.length < 2) continue
1176
+ const allProps = new Set<string>()
1177
+ const instanceValues: Array<Record<string, unknown>> = []
1178
+ for (const inst of instances) {
1179
+ const item: Record<string, unknown> = {}
1180
+ for (const attr of inst.attributes ?? []) {
1181
+ const name = attr.name
1182
+ if (/^(class|className|id|style|type|role)$/.test(name)) continue
1183
+ if (/^(aria-|data-)/.test(name)) continue
1184
+ if (attr.kind === 'quoted') {
1185
+ allProps.add(name); item[name] = attr.value
1186
+ } else if (attr.kind === 'expression') {
1187
+ allProps.add(name)
1188
+ const expr = attr.value.trim()
1189
+ if (expr === 'true') item[name] = true
1190
+ else if (expr === 'false') item[name] = false
1191
+ else if (/^\w+$/.test(expr) && frontmatter) {
1192
+ const extractedValue = extractFrontmatterValue(frontmatter, expr)
1193
+ if (extractedValue !== undefined) item[name] = extractedValue
1194
+ }
1195
+ }
1196
+ }
1197
+ if (Object.keys(item).length > 0) instanceValues.push(item)
1198
+ }
1199
+ if (allProps.size > 0) {
1200
+ const cardKey = compName.replace(/([A-Z])/g, (_m: string, c: string, i: number) => i === 0 ? c.toLowerCase() : '_' + c.toLowerCase()).replace(/_/g, '') + 's'
1201
+ if (!usedKeys.has(cardKey)) {
1202
+ const innerFields: InferredField[] = [...allProps].map((p) => {
1203
+ const isArray = instanceValues.some(iv => Array.isArray(iv[p]))
1204
+ if (isArray) {
1205
+ return { key: p, type: 'array' as const, label: camelToLabel(p), confidence: 'medium' as const, options: { arrayItem: { type: 'text' as const } } }
1206
+ }
1207
+ return { key: p, type: inferInnerFieldType(p), label: camelToLabel(p), confidence: 'medium' as const }
1208
+ })
1209
+ addField({
1210
+ key: cardKey, type: 'array', label: `${compName} Liste`, confidence: 'medium',
1211
+ defaultValue: instanceValues.length > 0 ? instanceValues : undefined,
1212
+ options: { arrayItem: { type: 'object', fields: innerFields } },
1213
+ }, nodeOffset(instances[0]!))
1214
+ }
1215
+ }
1216
+ }
1217
+
1218
+ // ── 12. REPEATED HTML ELEMENTS → ARRAY (v2: position-based) ───────────
1219
+ // Phase 1: Pure structural detection — no CSS heuristics.
1220
+ // Produces RepeatedGroup objects with position info for the patcher.
1221
+ for (const group of repeatedElementGroups) {
1222
+ const instanceCount = group.instances.length
1223
+
1224
+ // Build instance bounds
1225
+ const instanceBounds: RepeatedGroupInstance[] = group.instances.map(inst => ({
1226
+ start: nodeOffset(inst),
1227
+ end: nodeEnd(inst),
1228
+ }))
1229
+
1230
+ // Phase 1a: Extract structural fingerprint from each instance
1231
+ const instanceFingerprints: Array<ContentItem[]> = []
1232
+
1233
+ for (const inst of group.instances) {
1234
+ const items: ContentItem[] = []
1235
+ walkContentNodes(inst, 0, items, frontmatter)
1236
+ instanceFingerprints.push(items)
1237
+ }
1238
+
1239
+ if (instanceFingerprints.length === 0) continue
1240
+
1241
+ // Phase 1b: Use MAJORITY structure as canonical (most common tag sequence)
1242
+ // This avoids using an instance with optional elements as the template.
1243
+ const signatures = instanceFingerprints.map(fp => fp.map(i => i.tag).join(','))
1244
+ const sigCounts = new Map<string, number>()
1245
+ for (const sig of signatures) sigCounts.set(sig, (sigCounts.get(sig) ?? 0) + 1)
1246
+ const mostCommonSig = [...sigCounts.entries()].sort((a, b) => b[1] - a[1])[0]![0]
1247
+ const canonicalIdx = signatures.indexOf(mostCommonSig)
1248
+ const canonical = instanceFingerprints[canonicalIdx]!
1249
+
1250
+ // Track which items in each instance have been consumed (prevents double-matching)
1251
+ const consumedPerInstance: Array<Set<number>> = instanceFingerprints.map(() => new Set())
1252
+
1253
+ // For each canonical item, check if it exists in all other instances
1254
+ const innerFields: InnerFieldInfo[] = []
1255
+ const typeCounts: Record<string, number> = {}
1256
+
1257
+ for (let ci = 0; ci < canonical.length; ci++) {
1258
+ const cItem = canonical[ci]!
1259
+ const tag = cItem.tag
1260
+ const isHeading = /^h[1-6]$/.test(tag)
1261
+ const isLink = tag === 'a'
1262
+ const isArray = tag === '__array__'
1263
+
1264
+ // Determine field type and key
1265
+ let fieldType: 'text' | 'array' | 'link'
1266
+ let keyBase: string
1267
+
1268
+ if (isHeading) {
1269
+ fieldType = 'text'
1270
+ keyBase = 'heading'
1271
+ } else if (isLink) {
1272
+ fieldType = 'link'
1273
+ keyBase = 'link'
1274
+ } else if (isArray) {
1275
+ fieldType = 'array'
1276
+ keyBase = 'list'
1277
+ } else {
1278
+ fieldType = 'text'
1279
+ keyBase = 'text'
1280
+ }
1281
+
1282
+ typeCounts[keyBase] = (typeCounts[keyBase] ?? 0) + 1
1283
+ const key = typeCounts[keyBase] === 1 ? keyBase : `${keyBase}${typeCounts[keyBase]}`
1284
+
1285
+ // Find matching item in each instance (track consumed items per instance)
1286
+ const positions: Array<FieldPosition | null> = []
1287
+ const defaultValues: unknown[] = []
1288
+ let presentCount = 0
1289
+
1290
+ for (let ii = 0; ii < instanceCount; ii++) {
1291
+ const fp = instanceFingerprints[ii]!
1292
+ const consumed = consumedPerInstance[ii]!
1293
+ const match = findMatchingItem(fp, cItem, ci, canonical, consumed)
1294
+ if (match) {
1295
+ presentCount++
1296
+ positions.push({
1297
+ offset: nodeOffset(match.node),
1298
+ length: nodeEnd(match.node) - nodeOffset(match.node),
1299
+ source: match.exprSource,
1300
+ })
1301
+ if (isArray && match.exprSource) {
1302
+ defaultValues.push(extractFrontmatterValue(frontmatter, match.exprSource) ?? [])
1303
+ } else if (isLink) {
1304
+ // For link fields, use the href value (text is extracted separately as linkText)
1305
+ defaultValues.push(match.hrefValue || null)
1306
+ } else {
1307
+ defaultValues.push(match.text || match.hrefValue || null)
1308
+ }
1309
+ } else {
1310
+ positions.push(null)
1311
+ defaultValues.push(null)
1312
+ }
1313
+ }
1314
+
1315
+ innerFields.push({
1316
+ key,
1317
+ type: fieldType,
1318
+ tag,
1319
+ required: presentCount === instanceCount,
1320
+ positions,
1321
+ defaultValues,
1322
+ })
1323
+
1324
+ // For links, also extract link text as a separate field
1325
+ if (isLink) {
1326
+ typeCounts['linkText'] = (typeCounts['linkText'] ?? 0) + 1
1327
+ const ltKey = typeCounts['linkText'] === 1 ? 'linkText' : `linkText${typeCounts['linkText']}`
1328
+ const ltPositions: Array<FieldPosition | null> = []
1329
+ const ltDefaults: unknown[] = []
1330
+
1331
+ for (let ii = 0; ii < instanceCount; ii++) {
1332
+ // Reuse the same match (link node) — text is a sub-field, not a separate consumed item
1333
+ const fp = instanceFingerprints[ii]!
1334
+ const consumed = consumedPerInstance[ii]!
1335
+ // Don't consume again — find same link node via tag+position
1336
+ const match = findMatchingItemPeek(fp, cItem, ci, canonical, consumed)
1337
+ if (match) {
1338
+ ltPositions.push({
1339
+ offset: nodeOffset(match.node),
1340
+ length: nodeEnd(match.node) - nodeOffset(match.node),
1341
+ })
1342
+ ltDefaults.push(match.text || null)
1343
+ } else {
1344
+ ltPositions.push(null)
1345
+ ltDefaults.push(null)
1346
+ }
1347
+ }
1348
+
1349
+ innerFields.push({
1350
+ key: ltKey,
1351
+ type: 'text',
1352
+ tag: 'a',
1353
+ required: presentCount === instanceCount,
1354
+ positions: ltPositions,
1355
+ defaultValues: ltDefaults,
1356
+ })
1357
+ }
1358
+ }
1359
+
1360
+ // Phase 1c: Detect optional elements from longer instances
1361
+ // Items in non-canonical instances that weren't consumed are optional fields.
1362
+ for (let ii = 0; ii < instanceCount; ii++) {
1363
+ if (ii === canonicalIdx) continue
1364
+ const fp = instanceFingerprints[ii]!
1365
+ const consumed = consumedPerInstance[ii]!
1366
+ for (let fi = 0; fi < fp.length; fi++) {
1367
+ if (consumed.has(fi)) continue
1368
+ const item = fp[fi]!
1369
+ // This is an optional element only present in this instance
1370
+ const tag = item.tag
1371
+ const isArray = tag === '__array__'
1372
+ const isLink = tag === 'a'
1373
+ let fieldType: 'text' | 'array' | 'link' = isArray ? 'array' : isLink ? 'link' : 'text'
1374
+ let keyBase = /^h[1-6]$/.test(tag) ? 'heading' : isLink ? 'link' : isArray ? 'list' : 'text'
1375
+ typeCounts[keyBase] = (typeCounts[keyBase] ?? 0) + 1
1376
+ const key = typeCounts[keyBase] === 1 ? keyBase : `${keyBase}${typeCounts[keyBase]}`
1377
+
1378
+ const positions: Array<FieldPosition | null> = new Array(instanceCount).fill(null)
1379
+ const defaultValues: unknown[] = new Array(instanceCount).fill(null)
1380
+ positions[ii] = {
1381
+ offset: nodeOffset(item.node),
1382
+ length: nodeEnd(item.node) - nodeOffset(item.node),
1383
+ source: item.exprSource,
1384
+ }
1385
+ if (isArray && item.exprSource) {
1386
+ defaultValues[ii] = extractFrontmatterValue(frontmatter, item.exprSource) ?? []
1387
+ } else {
1388
+ defaultValues[ii] = item.text || item.hrefValue || null
1389
+ }
1390
+
1391
+ innerFields.push({ key, type: fieldType, tag, required: false, positions, defaultValues })
1392
+
1393
+ if (isLink) {
1394
+ typeCounts['linkText'] = (typeCounts['linkText'] ?? 0) + 1
1395
+ const ltKey = typeCounts['linkText'] === 1 ? 'linkText' : `linkText${typeCounts['linkText']}`
1396
+ const ltPositions: Array<FieldPosition | null> = new Array(instanceCount).fill(null)
1397
+ const ltDefaults: unknown[] = new Array(instanceCount).fill(null)
1398
+ ltPositions[ii] = positions[ii] ?? null
1399
+ ltDefaults[ii] = item.text || null
1400
+ innerFields.push({ key: ltKey, type: 'text', tag: 'a', required: false, positions: ltPositions, defaultValues: ltDefaults })
1401
+ }
1402
+ }
1403
+ }
1404
+
1405
+ if (innerFields.length === 0) continue
1406
+
1407
+ arrayCount++
1408
+ let fieldKey = numberedKey('items', arrayCount)
1409
+ while (usedKeys.has(fieldKey)) {
1410
+ arrayCount++
1411
+ fieldKey = numberedKey('items', arrayCount)
1412
+ }
1413
+
1414
+ // Build defaultValue array for backwards compatibility
1415
+ const defaultValue: Array<Record<string, unknown>> = []
1416
+ for (let ii = 0; ii < instanceCount; ii++) {
1417
+ const item: Record<string, unknown> = {}
1418
+ for (const f of innerFields) {
1419
+ if (f.defaultValues[ii] != null) {
1420
+ item[f.key] = f.defaultValues[ii]
1421
+ }
1422
+ }
1423
+ defaultValue.push(item)
1424
+ }
1425
+
1426
+ // Build inner field definitions for the field schema
1427
+ const innerFieldDefs: InferredField[] = innerFields.map(f => {
1428
+ if (f.type === 'array') {
1429
+ return {
1430
+ key: f.key,
1431
+ type: 'array' as const,
1432
+ label: f.label ?? camelToLabel(f.key),
1433
+ confidence: 'medium' as const,
1434
+ options: { arrayItem: { type: 'text' as const } },
1435
+ }
1436
+ }
1437
+ return {
1438
+ key: f.key,
1439
+ type: f.type === 'link' ? 'text' as const : 'text' as const,
1440
+ label: f.label ?? camelToLabel(f.key),
1441
+ confidence: 'medium' as const,
1442
+ }
1443
+ })
1444
+
1445
+ // Add as top-level field
1446
+ const groupLabel = group.tag === 'tr'
1447
+ ? 'Tabellen-Zeilen'
1448
+ : `${group.tag.charAt(0).toUpperCase() + group.tag.slice(1)} Liste`
1449
+ addField({
1450
+ key: fieldKey,
1451
+ type: 'array',
1452
+ label: groupLabel,
1453
+ confidence: 'high',
1454
+ defaultValue,
1455
+ options: {
1456
+ arrayItem: { type: 'object', fields: innerFieldDefs },
1457
+ _repeatedTag: group.tag,
1458
+ _instanceCount: instanceCount,
1459
+ } as any,
1460
+ }, instanceBounds[0]!.start)
1461
+
1462
+ // Collect class attributes per instance (AST-based, for dynamic class detection in patcher)
1463
+ // Positions are relative to `template` here; posAdjust is applied after extractTemplateFields returns
1464
+ const classAttrs = group.instances.map(inst => collectClassAttrs(inst, '', template))
1465
+
1466
+ // Store RepeatedGroup for the patcher
1467
+ repeatedGroups.push({
1468
+ tag: group.tag,
1469
+ fieldKey,
1470
+ instances: instanceBounds,
1471
+ templateIndex: 0,
1472
+ fields: innerFields,
1473
+ classAttrs,
1474
+ })
1475
+ }
1476
+
1477
+ fields.sort((a, b) => a._pos - b._pos)
1478
+ return { fields: fields.map(({ _pos, ...field }) => field), repeatedGroups }
1479
+ }
1480
+
1481
+ // ---------------------------------------------------------------------------
1482
+ // Phase 1: Structural content node walker (no CSS heuristics)
1483
+ // ---------------------------------------------------------------------------
1484
+
1485
+ interface ContentItem {
1486
+ tag: string // h2, p, span, a, __array__
1487
+ depth: number
1488
+ node: AstNode
1489
+ text: string
1490
+ exprSource?: string
1491
+ hrefValue?: string
1492
+ }
1493
+
1494
+ /**
1495
+ * Walk an element's subtree and collect content nodes structurally.
1496
+ * No CSS class checking — purely based on HTML tags and position.
1497
+ */
1498
+ function walkContentNodes(
1499
+ root: AstNode,
1500
+ depth: number,
1501
+ items: ContentItem[],
1502
+ frontmatter: string,
1503
+ ): void {
1504
+ for (const child of root.children ?? []) {
1505
+ if (child.type === 'element' && isAriaHidden(child)) continue
1506
+
1507
+ // Heading
1508
+ if (child.type === 'element' && /^h[1-6]$/.test(child.name ?? '')) {
1509
+ const text = extractTextContent(child, true).replace(/\s+/g, ' ').trim()
1510
+ if (text.length >= 1) {
1511
+ items.push({ tag: child.name!, depth, node: child, text })
1512
+ }
1513
+ continue // don't recurse into headings
1514
+ }
1515
+
1516
+ // Paragraph with meaningful text
1517
+ if (child.type === 'element' && child.name === 'p') {
1518
+ const text = extractTextContent(child, true).replace(/\s+/g, ' ').trim()
1519
+ if (text.length >= 1) {
1520
+ items.push({ tag: 'p', depth, node: child, text })
1521
+ }
1522
+ continue
1523
+ }
1524
+
1525
+ // Standalone span with text (not inside a deeper context we already handle)
1526
+ if (child.type === 'element' && child.name === 'span') {
1527
+ const text = extractTextContent(child, true).replace(/\s+/g, ' ').trim()
1528
+ if (text.length >= 1) {
1529
+ items.push({ tag: 'span', depth, node: child, text })
1530
+ }
1531
+ continue
1532
+ }
1533
+
1534
+ // Link
1535
+ if (child.type === 'element' && child.name === 'a') {
1536
+ const text = extractTextContent(child, true).replace(/\s+/g, ' ').trim()
1537
+ const href = getAttr(child, 'href')
1538
+ let hrefValue: string | undefined
1539
+ if (href?.kind === 'quoted') hrefValue = href.value
1540
+ else if (href?.kind === 'expression') {
1541
+ const fb = href.value.match(/['"]([^'"]+)['"]/)
1542
+ if (fb) hrefValue = fb[1]
1543
+ }
1544
+ if (text.length >= 1 || hrefValue) {
1545
+ items.push({ tag: 'a', depth, node: child, text, hrefValue })
1546
+ }
1547
+ continue
1548
+ }
1549
+
1550
+ // Table cell (inside a <tr> repeatedGroup instance)
1551
+ if (child.type === 'element' && child.name === 'td') {
1552
+ const text = extractTextContent(child, true).replace(/\s+/g, ' ').trim()
1553
+ if (text.length >= 1) {
1554
+ items.push({ tag: 'td', depth, node: child, text })
1555
+ }
1556
+ continue
1557
+ }
1558
+
1559
+ // List with .map() expression or static items
1560
+ if (child.type === 'element' && (child.name === 'ul' || child.name === 'ol')) {
1561
+ // Check for .map() expression inside
1562
+ let mapSource: string | undefined
1563
+ walkAst(child, (expr) => {
1564
+ if (mapSource) return
1565
+ if (expr.type !== 'expression') return
1566
+ const code = (expr.children ?? []).map(c => c.value ?? '').join('')
1567
+ const mapMatch = code.match(/(\w+)\.map\s*\(/)
1568
+ if (mapMatch) mapSource = mapMatch[1]
1569
+ })
1570
+ if (mapSource) {
1571
+ items.push({ tag: '__array__', depth, node: child, text: '', exprSource: mapSource })
1572
+ } else {
1573
+ // Static list items
1574
+ const listTexts: string[] = []
1575
+ for (const li of child.children ?? []) {
1576
+ if (li.type === 'element' && li.name === 'li') {
1577
+ const t = extractTextContent(li, true).trim()
1578
+ if (t.length >= 1) listTexts.push(t)
1579
+ }
1580
+ }
1581
+ if (listTexts.length > 0) {
1582
+ items.push({ tag: '__array__', depth, node: child, text: listTexts.join('\n') })
1583
+ }
1584
+ }
1585
+ continue
1586
+ }
1587
+
1588
+ // Recurse into div/section/other container elements
1589
+ if (child.type === 'element') {
1590
+ walkContentNodes(child, depth + 1, items, frontmatter)
1591
+ }
1592
+ }
1593
+ }
1594
+
1595
+ /**
1596
+ * Find a matching content item in another instance's fingerprint.
1597
+ * Uses tag + relative position among same-tag items.
1598
+ */
1599
+ /**
1600
+ * Find a matching content item, marking it as consumed so it won't be reused.
1601
+ */
1602
+ function findMatchingItem(
1603
+ targetFp: ContentItem[],
1604
+ canonicalItem: ContentItem,
1605
+ canonicalIndex: number,
1606
+ allCanonical: ContentItem[],
1607
+ consumed: Set<number>,
1608
+ ): ContentItem | null {
1609
+ const result = findMatchingItemCore(targetFp, canonicalItem, canonicalIndex, allCanonical, consumed)
1610
+ if (result) consumed.add(result.idx)
1611
+ return result?.item ?? null
1612
+ }
1613
+
1614
+ /**
1615
+ * Find a matching content item WITHOUT consuming it (for sub-fields like link text).
1616
+ */
1617
+ function findMatchingItemPeek(
1618
+ targetFp: ContentItem[],
1619
+ canonicalItem: ContentItem,
1620
+ canonicalIndex: number,
1621
+ allCanonical: ContentItem[],
1622
+ consumed: Set<number>,
1623
+ ): ContentItem | null {
1624
+ // For peek, we look for the already-consumed link node (it was consumed by the link field)
1625
+ // So we search WITHOUT the consumed filter
1626
+ const tag = canonicalItem.tag
1627
+ const candidates = targetFp
1628
+ .map((item, idx) => ({ item, idx }))
1629
+ .filter(c => c.item.tag === tag)
1630
+ if (candidates.length === 0) return null
1631
+ if (candidates.length === 1) return candidates[0]!.item
1632
+ const canonicalRelPos = canonicalIndex / Math.max(allCanonical.length - 1, 1)
1633
+ candidates.sort((a, b) => {
1634
+ const relA = a.idx / Math.max(targetFp.length - 1, 1)
1635
+ const relB = b.idx / Math.max(targetFp.length - 1, 1)
1636
+ return Math.abs(relA - canonicalRelPos) - Math.abs(relB - canonicalRelPos)
1637
+ })
1638
+ return candidates[0]!.item
1639
+ }
1640
+
1641
+ function findMatchingItemCore(
1642
+ targetFp: ContentItem[],
1643
+ canonicalItem: ContentItem,
1644
+ canonicalIndex: number,
1645
+ allCanonical: ContentItem[],
1646
+ consumed: Set<number>,
1647
+ ): { item: ContentItem; idx: number } | null {
1648
+ const tag = canonicalItem.tag
1649
+ const candidates = targetFp
1650
+ .map((item, idx) => ({ item, idx }))
1651
+ .filter(c => c.item.tag === tag && !consumed.has(c.idx))
1652
+
1653
+ if (candidates.length === 0) return null
1654
+ if (candidates.length === 1) return candidates[0]!
1655
+
1656
+ const canonicalRelPos = canonicalIndex / Math.max(allCanonical.length - 1, 1)
1657
+ candidates.sort((a, b) => {
1658
+ const relA = a.idx / Math.max(targetFp.length - 1, 1)
1659
+ const relB = b.idx / Math.max(targetFp.length - 1, 1)
1660
+ return Math.abs(relA - canonicalRelPos) - Math.abs(relB - canonicalRelPos)
1661
+ })
1662
+ return candidates[0]!
1663
+ }