@setzkasten-cms/astro-admin 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +37 -0
- package/package.json +70 -0
- package/src/admin-page.astro +148 -0
- package/src/api-routes/__tests__/add-section-helpers.test.ts +383 -0
- package/src/api-routes/__tests__/catalog-api.test.ts +115 -0
- package/src/api-routes/__tests__/deferred-operations.test.ts +232 -0
- package/src/api-routes/__tests__/deploy-hook.test.ts +134 -0
- package/src/api-routes/__tests__/patch-page-file.test.ts +193 -0
- package/src/api-routes/__tests__/scan-page-helpers.test.ts +162 -0
- package/src/api-routes/__tests__/section-management.test.ts +284 -0
- package/src/api-routes/_storage-config.ts +54 -0
- package/src/api-routes/asset-proxy.ts +76 -0
- package/src/api-routes/auth-callback.ts +105 -0
- package/src/api-routes/auth-login.ts +87 -0
- package/src/api-routes/auth-logout.ts +9 -0
- package/src/api-routes/auth-session.ts +36 -0
- package/src/api-routes/catalog-add.ts +151 -0
- package/src/api-routes/catalog-export.ts +86 -0
- package/src/api-routes/catalog-helpers.ts +83 -0
- package/src/api-routes/catalog-list.ts +12 -0
- package/src/api-routes/config.ts +30 -0
- package/src/api-routes/deploy-hook.ts +69 -0
- package/src/api-routes/github-proxy.ts +111 -0
- package/src/api-routes/init-add-section.ts +511 -0
- package/src/api-routes/init-apply.ts +270 -0
- package/src/api-routes/init-migrate.ts +262 -0
- package/src/api-routes/init-scan-page.ts +336 -0
- package/src/api-routes/init-scan.ts +162 -0
- package/src/api-routes/pages.ts +17 -0
- package/src/api-routes/section-add.ts +189 -0
- package/src/api-routes/section-commit-pending.ts +147 -0
- package/src/api-routes/section-delete.ts +141 -0
- package/src/api-routes/section-duplicate.ts +144 -0
- package/src/api-routes/section-management.ts +95 -0
- package/src/api-routes/section-prepare-copy.ts +93 -0
- package/src/api-routes/section-prepare.ts +121 -0
- package/src/env.d.ts +7 -0
- package/src/init/__tests__/page-level.test.ts +1033 -0
- package/src/init/__tests__/page-list-coverage.test.ts +474 -0
- package/src/init/__tests__/patcher-edge-cases.test.ts +434 -0
- package/src/init/__tests__/patcher-page-mode.test.ts +272 -0
- package/src/init/__tests__/section-pipeline.test.ts +393 -0
- package/src/init/analyzer-types.ts +92 -0
- package/src/init/astro-config-patcher.ts +98 -0
- package/src/init/astro-detector.ts +207 -0
- package/src/init/astro-section-analyzer-v2.ts +1663 -0
- package/src/init/field-label-enricher.ts +72 -0
- package/src/init/template-patcher-v2.ts +1957 -0
- package/tsconfig.json +9 -0
|
@@ -0,0 +1,1663 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Astro-specific section component analysis (v2).
|
|
3
|
+
*
|
|
4
|
+
* Two-phase architecture:
|
|
5
|
+
* Phase 1 — Structural detection (AST-based, position-tracked, no CSS heuristics)
|
|
6
|
+
* Phase 2 — Label enrichment (delegated to field-label-enricher.ts)
|
|
7
|
+
*
|
|
8
|
+
* The analyzer produces an AnalyzerResult that is consumed by:
|
|
9
|
+
* - field-label-enricher.ts (cosmetic labels)
|
|
10
|
+
* - template-patcher-v2.ts (template transformation)
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
import { parse } from '@astrojs/compiler'
|
|
14
|
+
import { inferFields, type InferredSection, type InferredField } from '@setzkasten-cms/core/init'
|
|
15
|
+
import type {
|
|
16
|
+
AnalyzerResult,
|
|
17
|
+
RepeatedGroup,
|
|
18
|
+
RepeatedGroupInstance,
|
|
19
|
+
InnerFieldInfo,
|
|
20
|
+
FieldPosition,
|
|
21
|
+
} from './analyzer-types.js'
|
|
22
|
+
import { enrichFieldLabels } from './field-label-enricher.js'
|
|
23
|
+
|
|
24
|
+
// ---------------------------------------------------------------------------
|
|
25
|
+
// AST types
|
|
26
|
+
// ---------------------------------------------------------------------------
|
|
27
|
+
|
|
28
|
+
interface AstNode {
|
|
29
|
+
type: string
|
|
30
|
+
position?: { start: { offset: number; line: number; column: number }; end?: { offset: number } }
|
|
31
|
+
children?: AstNode[]
|
|
32
|
+
attributes?: AstAttr[]
|
|
33
|
+
name?: string
|
|
34
|
+
value?: string
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
interface AstAttr {
|
|
38
|
+
type: 'attribute'
|
|
39
|
+
kind: 'quoted' | 'empty' | 'expression' | 'spread' | 'shorthand' | 'template-literal'
|
|
40
|
+
name: string
|
|
41
|
+
value: string
|
|
42
|
+
raw?: string
|
|
43
|
+
position?: { start: { offset: number } }
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
function walkAst(node: AstNode, callback: (node: AstNode, parent: AstNode | null) => void, parent: AstNode | null = null): void {
|
|
47
|
+
callback(node, parent)
|
|
48
|
+
if (node.children) {
|
|
49
|
+
for (const child of node.children) {
|
|
50
|
+
walkAst(child, callback, node)
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
// ---------------------------------------------------------------------------
|
|
56
|
+
// Public API
|
|
57
|
+
// ---------------------------------------------------------------------------
|
|
58
|
+
|
|
59
|
+
/**
|
|
60
|
+
* Analyze an Astro section component and infer its fields.
|
|
61
|
+
* Returns an AnalyzerResult with structural field info + repeated groups for the patcher.
|
|
62
|
+
*/
|
|
63
|
+
export async function analyzeAstroSection(
|
|
64
|
+
source: string,
|
|
65
|
+
sectionKey: string,
|
|
66
|
+
componentName: string,
|
|
67
|
+
componentPath: string,
|
|
68
|
+
options?: { mode?: 'component' | 'page' },
|
|
69
|
+
): Promise<InferredSection & { _analyzerResult: AnalyzerResult }> {
|
|
70
|
+
const isPageMode = options?.mode === 'page'
|
|
71
|
+
const alreadyIntegrated =
|
|
72
|
+
source.includes('data-sk-field') ||
|
|
73
|
+
source.includes('getSection(') ||
|
|
74
|
+
source.includes('setzkasten:content')
|
|
75
|
+
|
|
76
|
+
const { frontmatter, template, templateOffset } = splitAstroFile(source)
|
|
77
|
+
|
|
78
|
+
// Filter variables: skip prop values, fallbacks, and .map()-only sources
|
|
79
|
+
const variables = extractFrontmatterVariables(frontmatter).filter(name => {
|
|
80
|
+
const propUsageRegex = new RegExp(`\\w+={${name}}`)
|
|
81
|
+
if (propUsageRegex.test(template)) return false
|
|
82
|
+
const fallbackRegex = new RegExp(`(?:\\?\\?|\\|\\|)\\s*${name}\\b`)
|
|
83
|
+
if (fallbackRegex.test(frontmatter) || fallbackRegex.test(template)) return false
|
|
84
|
+
// In component mode, .map()-only arrays become repeatedGroup inner fields
|
|
85
|
+
// and should not also appear as top-level fields.
|
|
86
|
+
// In page mode, standalone .map() arrays ARE the content → keep them.
|
|
87
|
+
if (!isPageMode) {
|
|
88
|
+
const hasMapUsage = new RegExp(`\\b${name}\\.map\\s*\\(`).test(template)
|
|
89
|
+
const hasNonMapUsage = new RegExp(`\\b${name}\\b(?!\\.map)`).test(template)
|
|
90
|
+
if (hasMapUsage && !hasNonMapUsage) return false
|
|
91
|
+
}
|
|
92
|
+
return true
|
|
93
|
+
})
|
|
94
|
+
|
|
95
|
+
const variableFields = inferFields(variables, template)
|
|
96
|
+
|
|
97
|
+
for (const field of variableFields) {
|
|
98
|
+
if (!field.defaultValue) {
|
|
99
|
+
field.defaultValue = extractFrontmatterValue(frontmatter, field.key)
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
// Extract template fields + repeated groups (Phase 1)
|
|
104
|
+
const { fields: templateFields, repeatedGroups } = await extractTemplateFields(template, frontmatter)
|
|
105
|
+
|
|
106
|
+
// Fix positions: extractTemplateFields already subtracts WRAPPER_OFFSET internally,
|
|
107
|
+
// so positions are relative to the template string. Add templateOffset for full source.
|
|
108
|
+
const posAdjust = templateOffset
|
|
109
|
+
|
|
110
|
+
for (const group of repeatedGroups) {
|
|
111
|
+
for (const inst of group.instances) {
|
|
112
|
+
inst.start += posAdjust
|
|
113
|
+
inst.end += posAdjust
|
|
114
|
+
}
|
|
115
|
+
for (const field of group.fields) {
|
|
116
|
+
for (let i = 0; i < field.positions.length; i++) {
|
|
117
|
+
const pos = field.positions[i]
|
|
118
|
+
if (pos) {
|
|
119
|
+
pos.offset += posAdjust
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
// Adjust classAttrs positions
|
|
126
|
+
for (const group of repeatedGroups) {
|
|
127
|
+
if (group.classAttrs) {
|
|
128
|
+
for (const instAttrs of group.classAttrs) {
|
|
129
|
+
for (const a of instAttrs) {
|
|
130
|
+
a.sourceOffset += posAdjust
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
// Phase 2: Enrich labels on repeated group inner fields
|
|
137
|
+
for (const group of repeatedGroups) {
|
|
138
|
+
enrichFieldLabels(group.fields)
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
// Assign template positions to variable fields
|
|
142
|
+
for (const field of variableFields) {
|
|
143
|
+
const usageRegex = new RegExp(`\\{\\s*(?:\\w+\\.)?${field.key}(?:[.\\s}(])`, 's')
|
|
144
|
+
const usageMatch = usageRegex.exec(template)
|
|
145
|
+
if (usageMatch) {
|
|
146
|
+
;(field as any)._pos = usageMatch.index
|
|
147
|
+
} else {
|
|
148
|
+
const mapUsage = template.indexOf(`${field.key}.map(`)
|
|
149
|
+
;(field as any)._pos = mapUsage !== -1 ? mapUsage : Infinity
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
// Merge all fields and sort by template position
|
|
154
|
+
const existingKeys = new Set(variableFields.map((f) => f.key))
|
|
155
|
+
const allFields = [
|
|
156
|
+
...variableFields,
|
|
157
|
+
...templateFields.filter((f) => !existingKeys.has(f.key)),
|
|
158
|
+
]
|
|
159
|
+
allFields.sort((a, b) => ((a as any)._pos ?? Infinity) - ((b as any)._pos ?? Infinity))
|
|
160
|
+
|
|
161
|
+
const fields = allFields.map(({ ...field }) => {
|
|
162
|
+
delete (field as any)._pos
|
|
163
|
+
return field
|
|
164
|
+
})
|
|
165
|
+
|
|
166
|
+
const analyzerResult: AnalyzerResult = {
|
|
167
|
+
sectionKey,
|
|
168
|
+
componentName,
|
|
169
|
+
componentPath,
|
|
170
|
+
alreadyIntegrated,
|
|
171
|
+
fields,
|
|
172
|
+
repeatedGroups,
|
|
173
|
+
frontmatter,
|
|
174
|
+
template,
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
return {
|
|
178
|
+
key: sectionKey,
|
|
179
|
+
componentName,
|
|
180
|
+
componentPath,
|
|
181
|
+
fields,
|
|
182
|
+
alreadyIntegrated,
|
|
183
|
+
_analyzerResult: analyzerResult,
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
// ---------------------------------------------------------------------------
|
|
188
|
+
// Frontmatter utilities (unchanged from v1)
|
|
189
|
+
// ---------------------------------------------------------------------------
|
|
190
|
+
|
|
191
|
+
function splitAstroFile(source: string): { frontmatter: string; template: string; templateOffset: number } {
|
|
192
|
+
if (!source.startsWith('---')) return { frontmatter: '', template: source, templateOffset: 0 }
|
|
193
|
+
// Find closing --- marker (starts at beginning of a line after the opening ---)
|
|
194
|
+
const endIdx = source.indexOf('\n---', 3)
|
|
195
|
+
if (endIdx === -1) return { frontmatter: '', template: source, templateOffset: 0 }
|
|
196
|
+
const frontmatter = source.slice(4, endIdx) // after "---\n", before "\n---"
|
|
197
|
+
let templateStart = endIdx + 4 // skip "\n---"
|
|
198
|
+
// Skip trailing whitespace/newline on the closing --- line
|
|
199
|
+
while (templateStart < source.length && source[templateStart] === ' ') templateStart++
|
|
200
|
+
if (templateStart < source.length && source[templateStart] === '\n') templateStart++
|
|
201
|
+
return { frontmatter, template: source.slice(templateStart), templateOffset: templateStart }
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
function extractFrontmatterVariables(frontmatter: string): string[] {
|
|
205
|
+
const variables: string[] = []
|
|
206
|
+
const constRegex = /(?:const|let)\s+(\w+)\s*=\s*(.*)/g
|
|
207
|
+
let match: RegExpExecArray | null
|
|
208
|
+
while ((match = constRegex.exec(frontmatter)) !== null) {
|
|
209
|
+
const name = match[1]!
|
|
210
|
+
const rhs = match[2]?.trim() ?? ''
|
|
211
|
+
if (isInternalVariable(name)) continue
|
|
212
|
+
// Skip exported declarations (e.g. "export const prerender = true")
|
|
213
|
+
const charBefore = match.index > 0 ? frontmatter.slice(Math.max(0, match.index - 10), match.index) : ''
|
|
214
|
+
if (/export\s*$/.test(charBefore)) continue
|
|
215
|
+
if (/\.\s*map\s*\(/.test(rhs) || /\w+\?\.\w+/.test(rhs)) continue
|
|
216
|
+
if (/^\[/.test(rhs) && /^default/i.test(name)) continue
|
|
217
|
+
// Skip arrow function declarations (internal helpers, not content fields)
|
|
218
|
+
if (/^\(|^\w+\s*=>/.test(rhs) && /=>/.test(rhs)) continue
|
|
219
|
+
// Skip setzkasten API calls and other framework calls that return data objects
|
|
220
|
+
if (/\bget(?:Page|Pages|Section|CollectionEntry)\s*\(/.test(rhs)) continue
|
|
221
|
+
variables.push(name)
|
|
222
|
+
}
|
|
223
|
+
const propsRegex = /const\s+\{\s*([^}]+)\}\s*=\s*Astro\.props/
|
|
224
|
+
const propsMatch = frontmatter.match(propsRegex)
|
|
225
|
+
if (propsMatch) {
|
|
226
|
+
const props = propsMatch[1]!
|
|
227
|
+
.split(',')
|
|
228
|
+
.map((p) => p.trim().split(':')[0]!.split('=')[0]!.trim())
|
|
229
|
+
.filter((p) => p && !isInternalVariable(p))
|
|
230
|
+
variables.push(...props)
|
|
231
|
+
}
|
|
232
|
+
const interfaceRegex = /interface\s+Props\s*\{([^}]+)\}/s
|
|
233
|
+
const interfaceMatch = frontmatter.match(interfaceRegex)
|
|
234
|
+
if (interfaceMatch) {
|
|
235
|
+
const fields = interfaceMatch[1]!
|
|
236
|
+
.split('\n')
|
|
237
|
+
.map((line) => line.trim())
|
|
238
|
+
.filter((line) => line && !line.startsWith('//'))
|
|
239
|
+
.map((line) => line.match(/^(\w+)\??:/)?.[1])
|
|
240
|
+
.filter((name): name is string => !!name && !isInternalVariable(name))
|
|
241
|
+
variables.push(...fields)
|
|
242
|
+
}
|
|
243
|
+
return [...new Set(variables)]
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
function extractFrontmatterValue(frontmatter: string, varName: string): unknown | undefined {
|
|
247
|
+
const arrayStartRegex = new RegExp(`(?:const|let)\\s+${varName}\\s*=\\s*\\[`)
|
|
248
|
+
const arrayStartMatch = arrayStartRegex.exec(frontmatter)
|
|
249
|
+
if (arrayStartMatch) {
|
|
250
|
+
const startIdx = arrayStartMatch.index + arrayStartMatch[0].length
|
|
251
|
+
let depth = 1
|
|
252
|
+
let endIdx = startIdx
|
|
253
|
+
for (let i = startIdx; i < frontmatter.length && depth > 0; i++) {
|
|
254
|
+
if (frontmatter[i] === '[') depth++
|
|
255
|
+
else if (frontmatter[i] === ']') depth--
|
|
256
|
+
endIdx = i
|
|
257
|
+
}
|
|
258
|
+
const content = frontmatter.slice(startIdx, endIdx)
|
|
259
|
+
const isObjectArray = /\{/.test(content)
|
|
260
|
+
const isStringArray = /^\s*['"]/.test(content.trim())
|
|
261
|
+
return extractInlineArrayValues(content, isObjectArray, isStringArray)
|
|
262
|
+
}
|
|
263
|
+
const strRegex = new RegExp(`(?:const|let)\\s+${varName}\\s*=\\s*['"]([^'"]+)['"]`)
|
|
264
|
+
const strMatch = frontmatter.match(strRegex)
|
|
265
|
+
if (strMatch) return strMatch[1]!
|
|
266
|
+
const numRegex = new RegExp(`(?:const|let)\\s+${varName}\\s*=\\s*(\\d+(?:\\.\\d+)?)`)
|
|
267
|
+
const numMatch = frontmatter.match(numRegex)
|
|
268
|
+
if (numMatch) return Number(numMatch[1]!)
|
|
269
|
+
return undefined
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
function isInternalVariable(name: string): boolean {
|
|
273
|
+
const skip = new Set([
|
|
274
|
+
'Astro', 'props', 'data', 'class', 'className', 'style', 'id',
|
|
275
|
+
'slot', 'Fragment', 'Component', 'frontmatter', 'url', 'site',
|
|
276
|
+
'generator', 'redirect', 'response', 'request', 'cookies',
|
|
277
|
+
'params', 'slots',
|
|
278
|
+
])
|
|
279
|
+
return skip.has(name) || name.startsWith('_')
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
// ---------------------------------------------------------------------------
|
|
283
|
+
// Byte → Char offset conversion
|
|
284
|
+
// ---------------------------------------------------------------------------
|
|
285
|
+
|
|
286
|
+
function buildByteToCharMap(source: string): (byteOffset: number) => number {
|
|
287
|
+
const buf = Buffer.from(source, 'utf-8')
|
|
288
|
+
if (buf.length === source.length) return (offset) => offset
|
|
289
|
+
const map = new Array<number>(buf.length + 1)
|
|
290
|
+
let byteIdx = 0
|
|
291
|
+
for (let charIdx = 0; charIdx < source.length; charIdx++) {
|
|
292
|
+
const codePoint = source.codePointAt(charIdx)!
|
|
293
|
+
const charByteLen = codePoint <= 0x7f ? 1 : codePoint <= 0x7ff ? 2 : codePoint <= 0xffff ? 3 : 4
|
|
294
|
+
for (let b = 0; b < charByteLen; b++) map[byteIdx + b] = charIdx
|
|
295
|
+
byteIdx += charByteLen
|
|
296
|
+
if (codePoint > 0xffff) charIdx++
|
|
297
|
+
}
|
|
298
|
+
map[byteIdx] = source.length
|
|
299
|
+
return (offset) => offset <= 0 ? 0 : offset >= map.length ? source.length : map[offset]!
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
function convertAstPositions(node: AstNode, b2c: (offset: number) => number): void {
|
|
303
|
+
if (node.position?.start) node.position.start.offset = b2c(node.position.start.offset)
|
|
304
|
+
if (node.position?.end) node.position.end.offset = b2c(node.position.end.offset)
|
|
305
|
+
if (node.attributes) {
|
|
306
|
+
for (const attr of node.attributes) {
|
|
307
|
+
if (attr.position?.start) attr.position.start.offset = b2c(attr.position.start.offset)
|
|
308
|
+
}
|
|
309
|
+
}
|
|
310
|
+
if (node.children) {
|
|
311
|
+
for (const child of node.children) convertAstPositions(child, b2c)
|
|
312
|
+
}
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
// ---------------------------------------------------------------------------
|
|
316
|
+
// AST helper utilities
|
|
317
|
+
// ---------------------------------------------------------------------------
|
|
318
|
+
|
|
319
|
+
const WRAPPER_OFFSET = 8
|
|
320
|
+
|
|
321
|
+
function nodeOffset(node: AstNode): number {
|
|
322
|
+
return (node.position?.start?.offset ?? Infinity) - WRAPPER_OFFSET
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
function nodeEnd(node: AstNode): number {
|
|
326
|
+
return (node.position?.end?.offset ?? Infinity) - WRAPPER_OFFSET
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
function getClassValue(node: AstNode): string {
|
|
330
|
+
if (!node.attributes) return ''
|
|
331
|
+
const classAttr = node.attributes.find((a) => a.name === 'class' || a.name === 'className')
|
|
332
|
+
return classAttr?.value ?? ''
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
function getAttr(node: AstNode, name: string): AstAttr | undefined {
|
|
336
|
+
return node.attributes?.find((a) => a.name === name)
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
function isAriaHidden(node: AstNode): boolean {
|
|
340
|
+
const attr = getAttr(node, 'aria-hidden')
|
|
341
|
+
return attr?.value === 'true'
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
function extractTextContent(node: AstNode, stripCmsBound: boolean = false): string {
|
|
345
|
+
let text = ''
|
|
346
|
+
if (node.type === 'text') {
|
|
347
|
+
text += node.value ?? ''
|
|
348
|
+
} else if (node.type === 'expression') {
|
|
349
|
+
const exprCode = (node.children ?? []).map((c) => c.value ?? '').join('')
|
|
350
|
+
if (stripCmsBound && /^\s*\(?\s*\w+\?\.\s*\w+/.test(exprCode)) return ''
|
|
351
|
+
const fallbackMatch = exprCode.match(/\?\?\s*['"]([^'"]+)['"]/)
|
|
352
|
+
if (fallbackMatch) text += fallbackMatch[1]!
|
|
353
|
+
} else if (node.children) {
|
|
354
|
+
for (const child of node.children) {
|
|
355
|
+
text += extractTextContent(child, stripCmsBound)
|
|
356
|
+
}
|
|
357
|
+
}
|
|
358
|
+
return text
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
/** Tags that indicate inline formatting (bold, italic, links, code, color spans, etc.) */
|
|
362
|
+
const INLINE_FORMATTING_TAGS = new Set([
|
|
363
|
+
'strong', 'b', 'em', 'i', 'mark', 'code', 'del', 'ins',
|
|
364
|
+
'sub', 'sup', 'a', 'abbr', 'cite', 'u', 's', 'small',
|
|
365
|
+
'span', // color/style spans inside text content
|
|
366
|
+
])
|
|
367
|
+
|
|
368
|
+
/** Check if an element contains inline formatting child elements. */
|
|
369
|
+
function hasInlineFormatting(node: AstNode): boolean {
|
|
370
|
+
if (!node.children) return false
|
|
371
|
+
for (const child of node.children) {
|
|
372
|
+
if (child.type === 'element' && INLINE_FORMATTING_TAGS.has(child.name ?? '')) return true
|
|
373
|
+
if (hasInlineFormatting(child)) return true
|
|
374
|
+
}
|
|
375
|
+
return false
|
|
376
|
+
}
|
|
377
|
+
|
|
378
|
+
/**
|
|
379
|
+
* Collect all class="..." attributes from an element subtree.
|
|
380
|
+
* Returns them with a structural path (child index path) for cross-instance matching.
|
|
381
|
+
*/
|
|
382
|
+
function collectClassAttrs(node: AstNode, path: string = '', source?: string): import('./analyzer-types.js').ClassAttrInfo[] {
|
|
383
|
+
const result: import('./analyzer-types.js').ClassAttrInfo[] = []
|
|
384
|
+
if (node.type === 'element' && node.attributes && source) {
|
|
385
|
+
const classAttr = node.attributes.find(a => a.name === 'class' && a.kind === 'quoted')
|
|
386
|
+
if (classAttr && classAttr.value) {
|
|
387
|
+
// The AST position points to the element start, not the attribute.
|
|
388
|
+
// Find the actual `class="` within the opening tag.
|
|
389
|
+
const elemStart = nodeOffset(node)
|
|
390
|
+
const tagEndGuess = source.indexOf('>', elemStart)
|
|
391
|
+
if (tagEndGuess !== -1) {
|
|
392
|
+
const classIdx = source.indexOf('class="', elemStart)
|
|
393
|
+
if (classIdx !== -1 && classIdx < tagEndGuess) {
|
|
394
|
+
// class="value" → length = 'class="'.length + value.length + '"'.length
|
|
395
|
+
const sourceLength = 7 + classAttr.value.length + 1
|
|
396
|
+
result.push({
|
|
397
|
+
path,
|
|
398
|
+
value: classAttr.value,
|
|
399
|
+
sourceOffset: classIdx,
|
|
400
|
+
sourceLength,
|
|
401
|
+
})
|
|
402
|
+
}
|
|
403
|
+
}
|
|
404
|
+
}
|
|
405
|
+
}
|
|
406
|
+
if (node.children) {
|
|
407
|
+
// Use tag:nth as path key (more robust than child index for optional elements)
|
|
408
|
+
const tagCounts: Record<string, number> = {}
|
|
409
|
+
for (const child of node.children) {
|
|
410
|
+
if (child.type === 'element' && child.name) {
|
|
411
|
+
const tag = child.name
|
|
412
|
+
const nth = tagCounts[tag] ?? 0
|
|
413
|
+
tagCounts[tag] = nth + 1
|
|
414
|
+
const childPath = path ? `${path}/${tag}:${nth}` : `${tag}:${nth}`
|
|
415
|
+
result.push(...collectClassAttrs(child, childPath, source))
|
|
416
|
+
} else if (child.type === 'expression' && child.children) {
|
|
417
|
+
// Recurse into expression nodes (e.g. {items.map(() => (<li class="...">))})
|
|
418
|
+
for (const exprChild of child.children) {
|
|
419
|
+
if (exprChild.type === 'element' && exprChild.name) {
|
|
420
|
+
const tag = exprChild.name
|
|
421
|
+
const nth = tagCounts[tag] ?? 0
|
|
422
|
+
tagCounts[tag] = nth + 1
|
|
423
|
+
const childPath = path ? `${path}/${tag}:${nth}` : `${tag}:${nth}`
|
|
424
|
+
result.push(...collectClassAttrs(exprChild, childPath, source))
|
|
425
|
+
}
|
|
426
|
+
}
|
|
427
|
+
}
|
|
428
|
+
}
|
|
429
|
+
}
|
|
430
|
+
return result
|
|
431
|
+
}
|
|
432
|
+
|
|
433
|
+
function containsElement(node: AstNode, tagName: string): boolean {
|
|
434
|
+
if (node.type === 'element' && node.name === tagName) return true
|
|
435
|
+
if (node.children) {
|
|
436
|
+
for (const child of node.children) {
|
|
437
|
+
if (containsElement(child, tagName)) return true
|
|
438
|
+
}
|
|
439
|
+
}
|
|
440
|
+
return false
|
|
441
|
+
}
|
|
442
|
+
|
|
443
|
+
function serializeNode(node: AstNode): string {
|
|
444
|
+
if (node.type === 'text') return node.value ?? ''
|
|
445
|
+
if (node.type === 'expression') {
|
|
446
|
+
const inner = (node.children ?? []).map((c) => c.value ?? '').join('')
|
|
447
|
+
return `{${inner}}`
|
|
448
|
+
}
|
|
449
|
+
let result = ''
|
|
450
|
+
if (node.type === 'element' || node.type === 'component') {
|
|
451
|
+
result += `<${node.name ?? ''}`
|
|
452
|
+
for (const attr of node.attributes ?? []) {
|
|
453
|
+
if (attr.kind === 'quoted') result += ` ${attr.name}="${attr.value}"`
|
|
454
|
+
else if (attr.kind === 'expression') result += ` ${attr.name}={${attr.value}}`
|
|
455
|
+
else if (attr.kind === 'empty') result += ` ${attr.name}`
|
|
456
|
+
else result += ` ${attr.name}="${attr.value}"`
|
|
457
|
+
}
|
|
458
|
+
result += '>'
|
|
459
|
+
}
|
|
460
|
+
for (const child of node.children ?? []) {
|
|
461
|
+
result += serializeNode(child)
|
|
462
|
+
}
|
|
463
|
+
if ((node.type === 'element' || node.type === 'component') && node.name) {
|
|
464
|
+
result += `</${node.name}>`
|
|
465
|
+
}
|
|
466
|
+
return result
|
|
467
|
+
}
|
|
468
|
+
|
|
469
|
+
function camelToLabel(str: string): string {
|
|
470
|
+
return str.replace(/([A-Z])/g, ' $1').replace(/^./, (s) => s.toUpperCase()).trim()
|
|
471
|
+
}
|
|
472
|
+
|
|
473
|
+
function inferInnerFieldType(name: string): InferredField['type'] {
|
|
474
|
+
const n = name.toLowerCase()
|
|
475
|
+
if (/icon/.test(n)) return 'icon'
|
|
476
|
+
if (/image|img|photo|avatar|logo|thumbnail|src/.test(n)) return 'image'
|
|
477
|
+
if (/color|colour/.test(n)) return 'color'
|
|
478
|
+
if (/count|amount|number|quantity|total|rating|score|percent|order|index|size|width|height/.test(n)) return 'number'
|
|
479
|
+
if (/^is[A-Z]/.test(name) || /^has[A-Z]/.test(name) || /enabled|disabled|visible|hidden|active|checked|selected|highlight|accent|featured/.test(n)) return 'boolean'
|
|
480
|
+
return 'text'
|
|
481
|
+
}
|
|
482
|
+
|
|
483
|
+
// ---------------------------------------------------------------------------
|
|
484
|
+
// Inline array extraction (unchanged from v1)
|
|
485
|
+
// ---------------------------------------------------------------------------
|
|
486
|
+
|
|
487
|
+
function extractInlineArrayValues(arrayContent: string, isObjectArray: boolean, isStringArray: boolean): unknown[] {
|
|
488
|
+
if (isStringArray || !isObjectArray) {
|
|
489
|
+
const strings: string[] = []
|
|
490
|
+
const strRegex = /['"]([^'"]+)['"]/g
|
|
491
|
+
let sm: RegExpExecArray | null
|
|
492
|
+
while ((sm = strRegex.exec(arrayContent)) !== null) strings.push(sm[1]!)
|
|
493
|
+
return strings
|
|
494
|
+
}
|
|
495
|
+
const objects: Array<Record<string, unknown>> = []
|
|
496
|
+
let i = 0
|
|
497
|
+
while (i < arrayContent.length) {
|
|
498
|
+
if (arrayContent[i] === '{') {
|
|
499
|
+
const end = findMatchingBracket(arrayContent, i, '{', '}')
|
|
500
|
+
const objStr = arrayContent.slice(i + 1, end)
|
|
501
|
+
const obj = parseObjectLiteral(objStr)
|
|
502
|
+
if (Object.keys(obj).length > 0) objects.push(obj)
|
|
503
|
+
i = end + 1
|
|
504
|
+
} else { i++ }
|
|
505
|
+
}
|
|
506
|
+
return objects
|
|
507
|
+
}
|
|
508
|
+
|
|
509
|
+
function findMatchingBracket(source: string, start: number, open: string, close: string): number {
|
|
510
|
+
let depth = 1
|
|
511
|
+
let i = start + 1
|
|
512
|
+
while (i < source.length && depth > 0) {
|
|
513
|
+
const ch = source[i]!
|
|
514
|
+
if (ch === "'" || ch === '"') {
|
|
515
|
+
i++
|
|
516
|
+
while (i < source.length && source[i] !== ch) i++
|
|
517
|
+
} else if (ch === open) {
|
|
518
|
+
depth++
|
|
519
|
+
} else if (ch === close) {
|
|
520
|
+
depth--
|
|
521
|
+
}
|
|
522
|
+
i++
|
|
523
|
+
}
|
|
524
|
+
return i - 1
|
|
525
|
+
}
|
|
526
|
+
|
|
527
|
+
function parseObjectLiteral(objStr: string): Record<string, unknown> {
|
|
528
|
+
const obj: Record<string, unknown> = {}
|
|
529
|
+
let i = 0
|
|
530
|
+
while (i < objStr.length) {
|
|
531
|
+
while (i < objStr.length && /[\s,]/.test(objStr[i]!)) i++
|
|
532
|
+
if (i >= objStr.length) break
|
|
533
|
+
const keyMatch = objStr.slice(i).match(/^(\w+)\s*:\s*/)
|
|
534
|
+
if (!keyMatch) { i++; continue }
|
|
535
|
+
const key = keyMatch[1]!
|
|
536
|
+
i += keyMatch[0].length
|
|
537
|
+
const ch = objStr[i]
|
|
538
|
+
if (ch === "'" || ch === '"') {
|
|
539
|
+
let j = i + 1
|
|
540
|
+
while (j < objStr.length && objStr[j] !== ch) j++
|
|
541
|
+
obj[key] = objStr.slice(i + 1, j)
|
|
542
|
+
i = j + 1
|
|
543
|
+
} else if (ch === '[') {
|
|
544
|
+
const end = findMatchingBracket(objStr, i, '[', ']')
|
|
545
|
+
const innerContent = objStr.slice(i + 1, end)
|
|
546
|
+
const hasObjects = /\{/.test(innerContent)
|
|
547
|
+
if (hasObjects) {
|
|
548
|
+
obj[key] = extractInlineArrayValues(innerContent, true, false)
|
|
549
|
+
} else {
|
|
550
|
+
const items: string[] = []
|
|
551
|
+
const strRegex = /['"]([^'"]+)['"]/g
|
|
552
|
+
let sm: RegExpExecArray | null
|
|
553
|
+
while ((sm = strRegex.exec(innerContent)) !== null) items.push(sm[1]!)
|
|
554
|
+
obj[key] = items
|
|
555
|
+
}
|
|
556
|
+
i = end + 1
|
|
557
|
+
} else if (ch === '{') {
|
|
558
|
+
const end = findMatchingBracket(objStr, i, '{', '}')
|
|
559
|
+
obj[key] = parseObjectLiteral(objStr.slice(i + 1, end))
|
|
560
|
+
i = end + 1
|
|
561
|
+
} else if (ch && /\d/.test(ch)) {
|
|
562
|
+
const numMatch = objStr.slice(i).match(/^(\d+(?:\.\d+)?)/)
|
|
563
|
+
if (numMatch) { obj[key] = Number(numMatch[1]); i += numMatch[0].length }
|
|
564
|
+
} else if (objStr.slice(i, i + 4) === 'true') {
|
|
565
|
+
obj[key] = true; i += 4
|
|
566
|
+
} else if (objStr.slice(i, i + 5) === 'false') {
|
|
567
|
+
obj[key] = false; i += 5
|
|
568
|
+
} else {
|
|
569
|
+
while (i < objStr.length && objStr[i] !== ',') i++
|
|
570
|
+
}
|
|
571
|
+
}
|
|
572
|
+
return obj
|
|
573
|
+
}
|
|
574
|
+
|
|
575
|
+
// ---------------------------------------------------------------------------
|
|
576
|
+
// Template content extraction — Steps 0-11 unchanged, Step 12 rewritten
|
|
577
|
+
// ---------------------------------------------------------------------------
|
|
578
|
+
|
|
579
|
+
interface ExtractResult {
|
|
580
|
+
fields: InferredField[]
|
|
581
|
+
repeatedGroups: RepeatedGroup[]
|
|
582
|
+
}
|
|
583
|
+
|
|
584
|
+
async function extractTemplateFields(template: string, frontmatter: string = ''): Promise<ExtractResult> {
|
|
585
|
+
const fields: Array<InferredField & { _pos: number }> = []
|
|
586
|
+
const usedKeys = new Set<string>()
|
|
587
|
+
const repeatedGroups: RepeatedGroup[] = []
|
|
588
|
+
|
|
589
|
+
let ast: AstNode
|
|
590
|
+
try {
|
|
591
|
+
const wrappedSource = `---\n---\n${template}`
|
|
592
|
+
const result = await parse(wrappedSource)
|
|
593
|
+
ast = result.ast as unknown as AstNode
|
|
594
|
+
convertAstPositions(ast, buildByteToCharMap(wrappedSource))
|
|
595
|
+
} catch (err) {
|
|
596
|
+
console.error('[setzkasten] section-analyzer: parse() failed:', err)
|
|
597
|
+
return { fields, repeatedGroups }
|
|
598
|
+
}
|
|
599
|
+
|
|
600
|
+
function addField(field: InferredField, pos?: number) {
|
|
601
|
+
if (!usedKeys.has(field.key)) {
|
|
602
|
+
usedKeys.add(field.key)
|
|
603
|
+
let finalPos = pos ?? -1
|
|
604
|
+
if (finalPos === -1) {
|
|
605
|
+
if (typeof field.defaultValue === 'string' && field.defaultValue.length >= 3) {
|
|
606
|
+
finalPos = template.indexOf(field.defaultValue)
|
|
607
|
+
} else if (Array.isArray(field.defaultValue) && field.defaultValue.length > 0) {
|
|
608
|
+
const firstItem = typeof field.defaultValue[0] === 'string'
|
|
609
|
+
? field.defaultValue[0]
|
|
610
|
+
: typeof field.defaultValue[0] === 'object' && field.defaultValue[0]
|
|
611
|
+
? Object.values(field.defaultValue[0])[0]
|
|
612
|
+
: null
|
|
613
|
+
if (typeof firstItem === 'string' && firstItem.length >= 2) {
|
|
614
|
+
finalPos = template.indexOf(firstItem)
|
|
615
|
+
}
|
|
616
|
+
}
|
|
617
|
+
}
|
|
618
|
+
if (finalPos === -1) finalPos = template.indexOf(`?.${field.key}`)
|
|
619
|
+
if (finalPos === -1) {
|
|
620
|
+
const keyRegex = new RegExp(`(?<![\\w-])${field.key}(?![\\w-])`)
|
|
621
|
+
const keyMatch = keyRegex.exec(template)
|
|
622
|
+
if (keyMatch) finalPos = keyMatch.index
|
|
623
|
+
}
|
|
624
|
+
fields.push({ ...field, _pos: finalPos === -1 ? Infinity : finalPos })
|
|
625
|
+
}
|
|
626
|
+
}
|
|
627
|
+
|
|
628
|
+
function numberedKey(base: string, count: number): string {
|
|
629
|
+
return count === 1 ? base : `${base}${count}`
|
|
630
|
+
}
|
|
631
|
+
|
|
632
|
+
function numberedLabel(base: string, count: number): string {
|
|
633
|
+
return count === 1 ? base : `${base} ${count}`
|
|
634
|
+
}
|
|
635
|
+
|
|
636
|
+
// Collect aria-hidden ranges
|
|
637
|
+
const ariaHiddenRanges: Array<{ start: number; end: number }> = []
|
|
638
|
+
walkAst(ast, (node) => {
|
|
639
|
+
if (node.type !== 'element' && node.type !== 'component') return
|
|
640
|
+
const isHidden = isAriaHidden(node) || getAttr(node, 'role')?.value === 'img'
|
|
641
|
+
if (isHidden) {
|
|
642
|
+
const start = nodeOffset(node)
|
|
643
|
+
const end = node.position?.end?.offset ? node.position.end.offset - WRAPPER_OFFSET : start + 1
|
|
644
|
+
ariaHiddenRanges.push({ start, end })
|
|
645
|
+
}
|
|
646
|
+
})
|
|
647
|
+
|
|
648
|
+
// Collect .map() expression ranges
|
|
649
|
+
const mapExpressionRanges: Array<{ start: number; end: number }> = []
|
|
650
|
+
walkAst(ast, (node) => {
|
|
651
|
+
if (node.type !== 'expression') return
|
|
652
|
+
const exprCode = (node.children ?? []).map((c) => c.value ?? '').join('')
|
|
653
|
+
if (/\.map\s*\(/.test(exprCode)) {
|
|
654
|
+
const start = nodeOffset(node)
|
|
655
|
+
const end = node.position?.end?.offset ? node.position.end.offset - WRAPPER_OFFSET : start + 1
|
|
656
|
+
mapExpressionRanges.push({ start, end })
|
|
657
|
+
}
|
|
658
|
+
})
|
|
659
|
+
|
|
660
|
+
// Collect repeated sibling element groups
|
|
661
|
+
const SEMANTIC_REPEATED_TAGS = new Set(['article', 'aside', 'figure', 'details', 'blockquote'])
|
|
662
|
+
const repeatedElementGroups: Array<{ tag: string; instances: AstNode[] }> = []
|
|
663
|
+
const repeatedElementRanges: Array<{ start: number; end: number }> = []
|
|
664
|
+
|
|
665
|
+
walkAst(ast, (node) => {
|
|
666
|
+
if (node.type !== 'element' && node.type !== 'fragment') return
|
|
667
|
+
if (!node.children || node.children.length < 2) return
|
|
668
|
+
const childrenByTag = new Map<string, AstNode[]>()
|
|
669
|
+
for (const child of node.children) {
|
|
670
|
+
if (child.type !== 'element') continue
|
|
671
|
+
const tag = child.name ?? ''
|
|
672
|
+
if (!tag) continue
|
|
673
|
+
if (!childrenByTag.has(tag)) childrenByTag.set(tag, [])
|
|
674
|
+
childrenByTag.get(tag)!.push(child)
|
|
675
|
+
}
|
|
676
|
+
for (const [tag, siblings] of childrenByTag) {
|
|
677
|
+
if (siblings.length < 2) continue
|
|
678
|
+
if (!SEMANTIC_REPEATED_TAGS.has(tag)) continue
|
|
679
|
+
repeatedElementGroups.push({ tag, instances: siblings })
|
|
680
|
+
for (const inst of siblings) {
|
|
681
|
+
const start = nodeOffset(inst)
|
|
682
|
+
const end = inst.position?.end?.offset ? inst.position.end.offset - WRAPPER_OFFSET : start + 1
|
|
683
|
+
repeatedElementRanges.push({ start, end })
|
|
684
|
+
}
|
|
685
|
+
}
|
|
686
|
+
})
|
|
687
|
+
|
|
688
|
+
// Table rows: <tbody><tr> rows → repeatedElementGroup
|
|
689
|
+
// Detects only tbody rows (not thead) so column headers are excluded.
|
|
690
|
+
walkAst(ast, (node) => {
|
|
691
|
+
if (node.type !== 'element' || node.name !== 'tbody') return
|
|
692
|
+
const trRows = (node.children ?? []).filter(c => c.type === 'element' && c.name === 'tr')
|
|
693
|
+
if (trRows.length < 2) return
|
|
694
|
+
repeatedElementGroups.push({ tag: 'tr', instances: trRows })
|
|
695
|
+
for (const row of trRows) {
|
|
696
|
+
const start = nodeOffset(row)
|
|
697
|
+
const end = row.position?.end?.offset ? row.position.end.offset - WRAPPER_OFFSET : start + 1
|
|
698
|
+
repeatedElementRanges.push({ start, end })
|
|
699
|
+
}
|
|
700
|
+
})
|
|
701
|
+
|
|
702
|
+
function isInAriaHidden(offset: number): boolean {
|
|
703
|
+
return ariaHiddenRanges.some((r) => offset >= r.start && offset < r.end)
|
|
704
|
+
}
|
|
705
|
+
function isInMapExpression(offset: number): boolean {
|
|
706
|
+
return mapExpressionRanges.some((r) => offset >= r.start && offset < r.end)
|
|
707
|
+
}
|
|
708
|
+
function isInRepeatedElement(offset: number): boolean {
|
|
709
|
+
return repeatedElementRanges.some((r) => offset >= r.start && offset < r.end)
|
|
710
|
+
}
|
|
711
|
+
|
|
712
|
+
const cmsBoundOffsets = new Set<number>()
|
|
713
|
+
|
|
714
|
+
// ── 0. ALREADY-BOUND CMS FIELDS ────────────────────────────────────────
|
|
715
|
+
walkAst(ast, (node) => {
|
|
716
|
+
if (node.type !== 'expression') return
|
|
717
|
+
const exprCode = (node.children ?? []).map((c) => c.value ?? '').join('')
|
|
718
|
+
const cmsVarMatch = exprCode.match(/^\s*\(?\s*\w+\?\.\s*(\w+)/)
|
|
719
|
+
if (cmsVarMatch) {
|
|
720
|
+
const fieldKey = cmsVarMatch[1]!
|
|
721
|
+
// Array fallback: (skData?.items ?? ['a', 'b']).map(
|
|
722
|
+
const arrayFallbackMatch = exprCode.match(/\?\?\s*\[([^\]]*)\]\s*\)\.map\(/)
|
|
723
|
+
if (arrayFallbackMatch) {
|
|
724
|
+
const items: string[] = []
|
|
725
|
+
const itemRe = /`([^`]*)`|'([^']*)'|"([^"]*)"/g
|
|
726
|
+
let m: RegExpExecArray | null
|
|
727
|
+
while ((m = itemRe.exec(arrayFallbackMatch[1]!)) !== null) {
|
|
728
|
+
items.push(m[1] ?? m[2] ?? m[3] ?? '')
|
|
729
|
+
}
|
|
730
|
+
const hasFormatting = items.some(s => /<[a-z]/.test(s))
|
|
731
|
+
addField({
|
|
732
|
+
key: fieldKey, type: 'array', label: camelToLabel(fieldKey), confidence: 'high',
|
|
733
|
+
defaultValue: items.length > 0 ? items : undefined,
|
|
734
|
+
options: { arrayItem: { type: 'text', ...(hasFormatting ? { formatting: true } : {}) } },
|
|
735
|
+
}, nodeOffset(node))
|
|
736
|
+
} else {
|
|
737
|
+
// String fallback or no fallback
|
|
738
|
+
const strFallback = exprCode.match(/\?\?\s*(?:`([\s\S]*?)`|'([^']*)'|"([^"]*)")/)
|
|
739
|
+
const defaultValue = strFallback ? (strFallback[1] ?? strFallback[2] ?? strFallback[3] ?? '').trim() : undefined
|
|
740
|
+
const hasHtml = defaultValue ? /<[a-z]/.test(defaultValue) : false
|
|
741
|
+
addField({
|
|
742
|
+
key: fieldKey, type: 'text', label: camelToLabel(fieldKey), confidence: 'high',
|
|
743
|
+
...(defaultValue ? { defaultValue } : {}),
|
|
744
|
+
...(hasHtml ? { options: { formatting: true } } : {}),
|
|
745
|
+
}, nodeOffset(node))
|
|
746
|
+
}
|
|
747
|
+
cmsBoundOffsets.add(nodeOffset(node))
|
|
748
|
+
}
|
|
749
|
+
})
|
|
750
|
+
|
|
751
|
+
walkAst(ast, (node) => {
|
|
752
|
+
if (node.type !== 'element' && node.type !== 'component') return
|
|
753
|
+
for (const attr of node.attributes ?? []) {
|
|
754
|
+
if (attr.kind !== 'expression') continue
|
|
755
|
+
const cmsAttrMatch = attr.value.match(/^\s*\w+\?\.\s*(\w+)\s*\?\?\s*(?:`([\s\S]*?)`|'([^']*)'|"([^"]*)")/)
|
|
756
|
+
if (cmsAttrMatch) {
|
|
757
|
+
const fieldKey = cmsAttrMatch[1]!
|
|
758
|
+
const fallback = (cmsAttrMatch[2] ?? cmsAttrMatch[3] ?? cmsAttrMatch[4] ?? '').trim()
|
|
759
|
+
const pos = attr.position?.start?.offset ? attr.position.start.offset - WRAPPER_OFFSET : nodeOffset(node)
|
|
760
|
+
// formatting: true when fallback contains HTML tags,
|
|
761
|
+
// OR when the attribute is set:html without a fallback (content is always HTML then)
|
|
762
|
+
const isSetHtml = attr.name === 'set:html'
|
|
763
|
+
const fallbackHasHtml = fallback ? /<[a-z]/.test(fallback) : false
|
|
764
|
+
const hasHtml = fallbackHasHtml || (isSetHtml && !fallback)
|
|
765
|
+
addField({
|
|
766
|
+
key: fieldKey, type: 'text', label: camelToLabel(fieldKey), confidence: 'high',
|
|
767
|
+
...(fallback ? { defaultValue: fallback } : {}),
|
|
768
|
+
...(hasHtml ? { options: { formatting: true } } : {}),
|
|
769
|
+
}, pos)
|
|
770
|
+
cmsBoundOffsets.add(nodeOffset(node))
|
|
771
|
+
}
|
|
772
|
+
}
|
|
773
|
+
})
|
|
774
|
+
|
|
775
|
+
function shouldSkipForContent(node: AstNode): boolean {
|
|
776
|
+
const offset = nodeOffset(node)
|
|
777
|
+
return isInAriaHidden(offset) || cmsBoundOffsets.has(offset) || isInRepeatedElement(offset)
|
|
778
|
+
}
|
|
779
|
+
|
|
780
|
+
function shouldSkipMapChild(node: AstNode): boolean {
|
|
781
|
+
return isInMapExpression(nodeOffset(node))
|
|
782
|
+
}
|
|
783
|
+
|
|
784
|
+
// ── 1. OVERLINE / EYEBROW ──────────────────────────────────────────────
|
|
785
|
+
walkAst(ast, (node) => {
|
|
786
|
+
if (node.type !== 'element') return
|
|
787
|
+
if (node.name !== 'p' && node.name !== 'span') return
|
|
788
|
+
if (shouldSkipForContent(node)) return
|
|
789
|
+
const classVal = getClassValue(node)
|
|
790
|
+
if (!/uppercase|tracking-widest/.test(classVal)) return
|
|
791
|
+
const text = extractTextContent(node, true).trim()
|
|
792
|
+
if (text.length >= 2 && text.length <= 80) {
|
|
793
|
+
addField({ key: 'overline', type: 'text', label: 'Overline', confidence: 'medium', defaultValue: text }, nodeOffset(node))
|
|
794
|
+
}
|
|
795
|
+
})
|
|
796
|
+
|
|
797
|
+
// ── 2. HEADINGS (h1-h6) ───────────────────────────────────────────────
|
|
798
|
+
// Start from the count of already-registered heading* keys (from Section 0)
|
|
799
|
+
// so new headings get non-colliding keys (heading3, heading4, ...).
|
|
800
|
+
let headingCount = Array.from(usedKeys).filter(k => k === 'heading' || /^heading\d+$/.test(k)).length
|
|
801
|
+
walkAst(ast, (node) => {
|
|
802
|
+
if (node.type !== 'element') return
|
|
803
|
+
if (!/^h[1-6]$/.test(node.name ?? '')) return
|
|
804
|
+
if (shouldSkipForContent(node)) return
|
|
805
|
+
const text = extractTextContent(node, true).replace(/\s+/g, ' ').trim()
|
|
806
|
+
if (text.length >= 2) {
|
|
807
|
+
headingCount++
|
|
808
|
+
const headingOpts: Record<string, unknown> = { required: true }
|
|
809
|
+
if (hasInlineFormatting(node)) headingOpts.formatting = true
|
|
810
|
+
addField({
|
|
811
|
+
key: numberedKey('heading', headingCount), type: 'text',
|
|
812
|
+
label: numberedLabel('Heading', headingCount), confidence: 'high',
|
|
813
|
+
defaultValue: text, options: headingOpts,
|
|
814
|
+
}, nodeOffset(node))
|
|
815
|
+
}
|
|
816
|
+
})
|
|
817
|
+
|
|
818
|
+
// ── 3. PARAGRAPHS / DESCRIPTION TEXT ───────────────────────────────────
|
|
819
|
+
// Start from the count of already-registered description* keys (from Section 0)
|
|
820
|
+
// so new description fields get non-colliding keys (description2, description3, ...).
|
|
821
|
+
let descCount = Array.from(usedKeys).filter(k => k === 'description' || /^description\d+$/.test(k)).length
|
|
822
|
+
walkAst(ast, (node) => {
|
|
823
|
+
if (node.type !== 'element') return
|
|
824
|
+
if (node.name !== 'p' && node.name !== 'div') return
|
|
825
|
+
if (shouldSkipForContent(node)) return
|
|
826
|
+
const classVal = getClassValue(node)
|
|
827
|
+
const serialized = serializeNode(node)
|
|
828
|
+
if (/uppercase|tracking-widest/.test(classVal) && serialized.length < 250) return
|
|
829
|
+
if (/text-2xl|text-3xl|text-\[11px\]|text-\[10px\]|text-\[9px\]|text-\[8px\]/.test(classVal)) return
|
|
830
|
+
if (node.name === 'div' && (containsElement(node, 'a') || containsElement(node, 'button'))) {
|
|
831
|
+
// Allow mixed-text divs (callouts with inline links) — skip only pure-element
|
|
832
|
+
// containers like nav/card wrappers that have no direct text nodes.
|
|
833
|
+
const hasMixedText = (node.children ?? []).some(
|
|
834
|
+
c => c.type === 'text' && (c.value ?? '').trim().length > 0,
|
|
835
|
+
)
|
|
836
|
+
if (!hasMixedText) return
|
|
837
|
+
}
|
|
838
|
+
if (node.name === 'div' && ['h1','h2','h3','h4','h5','h6'].some(h => containsElement(node, h))) return
|
|
839
|
+
if (node.name === 'div') {
|
|
840
|
+
const contentChildren = (node.children ?? []).filter(c => c.type !== 'text' || (c.value ?? '').trim().length > 0)
|
|
841
|
+
const hasOnlyElementChildren = contentChildren.length > 0 && contentChildren.every(c => c.type === 'element' || c.type === 'component')
|
|
842
|
+
if (hasOnlyElementChildren) return
|
|
843
|
+
}
|
|
844
|
+
const text = extractTextContent(node, true).replace(/\s+/g, ' ').trim()
|
|
845
|
+
if (text.length < 15) return
|
|
846
|
+
descCount++
|
|
847
|
+
const descOpts: Record<string, unknown> = { multiline: true }
|
|
848
|
+
if (hasInlineFormatting(node)) descOpts.formatting = true
|
|
849
|
+
addField({
|
|
850
|
+
key: numberedKey('description', descCount), type: 'text',
|
|
851
|
+
label: numberedLabel('Beschreibung', descCount), confidence: 'medium',
|
|
852
|
+
defaultValue: text, options: descOpts,
|
|
853
|
+
}, nodeOffset(node))
|
|
854
|
+
})
|
|
855
|
+
|
|
856
|
+
// ── 4. RICH TEXT (set:html) ────────────────────────────────────────────
|
|
857
|
+
let richCount = 0
|
|
858
|
+
walkAst(ast, (node) => {
|
|
859
|
+
if (node.type !== 'element' && node.type !== 'component') return
|
|
860
|
+
const setHtmlAttr = node.attributes?.find((a) => a.name === 'set:html')
|
|
861
|
+
if (!setHtmlAttr || setHtmlAttr.kind !== 'expression') return
|
|
862
|
+
const expr = setHtmlAttr.value.trim()
|
|
863
|
+
if (/^\w+\?\.\w+/.test(expr)) return
|
|
864
|
+
if (expr.startsWith("'") || expr.startsWith('"') || expr.includes('??')) {
|
|
865
|
+
richCount++
|
|
866
|
+
const strMatch = expr.match(/['"]([^'"]+)['"]/)
|
|
867
|
+
const fallbackMatch = expr.match(/\?\?\s*['"]([^'"]+)['"]/)
|
|
868
|
+
const value = fallbackMatch?.[1] ?? strMatch?.[1] ?? ''
|
|
869
|
+
addField({
|
|
870
|
+
key: numberedKey('richText', richCount), type: 'text',
|
|
871
|
+
label: numberedLabel('Rich Text', richCount), confidence: 'high',
|
|
872
|
+
defaultValue: value, options: { multiline: true, formatting: true },
|
|
873
|
+
}, nodeOffset(node))
|
|
874
|
+
}
|
|
875
|
+
})
|
|
876
|
+
|
|
877
|
+
// ── 5. BUTTONS & CTA TEXT ──────────────────────────────────────────────
|
|
878
|
+
let ctaCount = 0
|
|
879
|
+
walkAst(ast, (node) => {
|
|
880
|
+
if (node.type !== 'element') return
|
|
881
|
+
if (node.name !== 'a' && node.name !== 'button') return
|
|
882
|
+
if (shouldSkipForContent(node)) return
|
|
883
|
+
const classVal = getClassValue(node)
|
|
884
|
+
if (!/rounded|px-|py-|font-semibold|bg-/.test(classVal)) return
|
|
885
|
+
const text = extractTextContent(node, true).replace(/\s+/g, ' ').trim()
|
|
886
|
+
if (text.length >= 2 && text.length <= 60) {
|
|
887
|
+
ctaCount++
|
|
888
|
+
addField({
|
|
889
|
+
key: numberedKey('ctaText', ctaCount), type: 'text',
|
|
890
|
+
label: numberedLabel('Button Text', ctaCount), confidence: 'medium',
|
|
891
|
+
defaultValue: text,
|
|
892
|
+
}, nodeOffset(node))
|
|
893
|
+
}
|
|
894
|
+
})
|
|
895
|
+
|
|
896
|
+
// ── 6. CTA LINKS (href values) ─────────────────────────────────────────
|
|
897
|
+
let linkCount = 0
|
|
898
|
+
walkAst(ast, (node) => {
|
|
899
|
+
if (node.type !== 'element' || node.name !== 'a') return
|
|
900
|
+
if (shouldSkipForContent(node)) return
|
|
901
|
+
const classVal = getClassValue(node)
|
|
902
|
+
if (!/rounded|px-|py-|font-semibold|bg-/.test(classVal)) return
|
|
903
|
+
const hrefAttr = getAttr(node, 'href')
|
|
904
|
+
if (!hrefAttr) return
|
|
905
|
+
let href: string | undefined
|
|
906
|
+
if (hrefAttr.kind === 'quoted') {
|
|
907
|
+
href = hrefAttr.value
|
|
908
|
+
} else if (hrefAttr.kind === 'expression') {
|
|
909
|
+
if (/^\s*\w+\?\.\w+/.test(hrefAttr.value)) return
|
|
910
|
+
const fallback = hrefAttr.value.match(/\?\?\s*['"]([^'"]+)['"]/)
|
|
911
|
+
if (fallback) href = fallback[1]
|
|
912
|
+
}
|
|
913
|
+
if (!href) return
|
|
914
|
+
if (href.startsWith('#') || href.startsWith('javascript:')) return
|
|
915
|
+
linkCount++
|
|
916
|
+
addField({
|
|
917
|
+
key: numberedKey('ctaLink', linkCount), type: 'text',
|
|
918
|
+
label: numberedLabel('Button Link', linkCount), confidence: 'medium',
|
|
919
|
+
defaultValue: href,
|
|
920
|
+
}, nodeOffset(node))
|
|
921
|
+
})
|
|
922
|
+
|
|
923
|
+
// ── 7. IMAGES ──────────────────────────────────────────────────────────
|
|
924
|
+
let imgCount = 0
|
|
925
|
+
let altCount = 0
|
|
926
|
+
walkAst(ast, (node) => {
|
|
927
|
+
if (node.type !== 'element' && node.type !== 'component') return
|
|
928
|
+
const tagName = node.name ?? ''
|
|
929
|
+
if (tagName !== 'img' && tagName !== 'Image' && tagName !== 'picture') return
|
|
930
|
+
if (shouldSkipForContent(node)) return
|
|
931
|
+
const srcAttr = getAttr(node, 'src')
|
|
932
|
+
if (srcAttr) {
|
|
933
|
+
let src: string | undefined
|
|
934
|
+
if (srcAttr.kind === 'quoted') src = srcAttr.value
|
|
935
|
+
else if (srcAttr.kind === 'expression') {
|
|
936
|
+
const strMatch = srcAttr.value.match(/['"]([^'"]+)['"]/)
|
|
937
|
+
if (strMatch) src = strMatch[1]
|
|
938
|
+
}
|
|
939
|
+
if (src) {
|
|
940
|
+
imgCount++
|
|
941
|
+
addField({
|
|
942
|
+
key: numberedKey('image', imgCount), type: 'image',
|
|
943
|
+
label: numberedLabel('Bild', imgCount), confidence: 'high',
|
|
944
|
+
defaultValue: { path: src.trim(), alt: '' },
|
|
945
|
+
}, nodeOffset(node))
|
|
946
|
+
}
|
|
947
|
+
}
|
|
948
|
+
if (tagName === 'img' || tagName === 'Image') {
|
|
949
|
+
const altAttr = getAttr(node, 'alt')
|
|
950
|
+
if (altAttr && altAttr.kind === 'quoted' && altAttr.value.trim().length >= 2) {
|
|
951
|
+
altCount++
|
|
952
|
+
addField({
|
|
953
|
+
key: numberedKey('imageAlt', altCount), type: 'text',
|
|
954
|
+
label: numberedLabel('Bild Alt-Text', altCount), confidence: 'medium',
|
|
955
|
+
defaultValue: altAttr.value.trim(),
|
|
956
|
+
}, nodeOffset(node))
|
|
957
|
+
}
|
|
958
|
+
}
|
|
959
|
+
})
|
|
960
|
+
|
|
961
|
+
// ── 8. ICONS ───────────────────────────────────────────────────────────
|
|
962
|
+
let iconCount = 0
|
|
963
|
+
walkAst(ast, (node, parentNode) => {
|
|
964
|
+
if (node.type !== 'element' || node.name !== 'svg') return
|
|
965
|
+
if (shouldSkipForContent(node) || shouldSkipMapChild(node)) return
|
|
966
|
+
const svgSource = serializeNode(node)
|
|
967
|
+
if (svgSource.length < 100) return
|
|
968
|
+
if (parentNode && (parentNode.name === 'a' || parentNode.name === 'button')) return
|
|
969
|
+
iconCount++
|
|
970
|
+
addField({
|
|
971
|
+
key: numberedKey('icon', iconCount), type: 'icon',
|
|
972
|
+
label: numberedLabel('Icon', iconCount), confidence: 'low',
|
|
973
|
+
}, nodeOffset(node))
|
|
974
|
+
})
|
|
975
|
+
let foundIconProp = false
|
|
976
|
+
walkAst(ast, (node) => {
|
|
977
|
+
if (foundIconProp) return
|
|
978
|
+
if (node.type !== 'component') return
|
|
979
|
+
if (shouldSkipMapChild(node)) return
|
|
980
|
+
const iconAttr = getAttr(node, 'icon')
|
|
981
|
+
if (iconAttr) {
|
|
982
|
+
addField({ key: 'icon', type: 'icon', label: 'Icon', confidence: 'high' }, nodeOffset(node))
|
|
983
|
+
foundIconProp = true
|
|
984
|
+
}
|
|
985
|
+
})
|
|
986
|
+
|
|
987
|
+
// ── 9. INLINE ARRAYS: {[...].map()} ───────────────────────────────────
|
|
988
|
+
let arrayCount = 0
|
|
989
|
+
walkAst(ast, (node) => {
|
|
990
|
+
if (node.type !== 'expression') return
|
|
991
|
+
const exprCode = (node.children ?? []).map((c) => c.value ?? '').join('')
|
|
992
|
+
const inlineArrayMatch = exprCode.match(/^\s*\[([\s\S]*?)\]\s*\.map\s*\(\s*\(?\s*(\{[^}]*\}|\w+)/)
|
|
993
|
+
if (!inlineArrayMatch) return
|
|
994
|
+
arrayCount++
|
|
995
|
+
const arrayContent = inlineArrayMatch[1]!
|
|
996
|
+
const callbackParam = inlineArrayMatch[2]!
|
|
997
|
+
let objectKeys: string[] = []
|
|
998
|
+
if (callbackParam.startsWith('{')) {
|
|
999
|
+
objectKeys = callbackParam.replace(/[{}]/g, '').split(',')
|
|
1000
|
+
.map((p: string) => p.trim().split(':')[0]!.trim())
|
|
1001
|
+
.filter((p: string) => p && !p.startsWith('...'))
|
|
1002
|
+
} else {
|
|
1003
|
+
const firstObjMatch = arrayContent.match(/\{\s*([\s\S]*?)\}/)
|
|
1004
|
+
if (firstObjMatch) {
|
|
1005
|
+
const keyRegex = /(\w+)\s*:/g
|
|
1006
|
+
let km: RegExpExecArray | null
|
|
1007
|
+
while ((km = keyRegex.exec(firstObjMatch[1]!)) !== null) objectKeys.push(km[1]!)
|
|
1008
|
+
}
|
|
1009
|
+
if (objectKeys.length === 0) {
|
|
1010
|
+
const accessRegex = new RegExp(`${callbackParam}\\.(\\w+)`, 'g')
|
|
1011
|
+
let am: RegExpExecArray | null
|
|
1012
|
+
const accessedProps = new Set<string>()
|
|
1013
|
+
while ((am = accessRegex.exec(exprCode)) !== null) accessedProps.add(am[1]!)
|
|
1014
|
+
objectKeys = [...accessedProps]
|
|
1015
|
+
}
|
|
1016
|
+
}
|
|
1017
|
+
const isObjectArray = objectKeys.length > 0
|
|
1018
|
+
const isStringArray = /^\s*'[^']*'\s*,/.test(arrayContent) || /^\s*"[^"]*"\s*,/.test(arrayContent)
|
|
1019
|
+
const arrayDefaultValue = extractInlineArrayValues(arrayContent, isObjectArray, isStringArray)
|
|
1020
|
+
if (isObjectArray && !isStringArray) {
|
|
1021
|
+
const innerFields: InferredField[] = objectKeys.map((prop: string) => ({
|
|
1022
|
+
key: prop, type: inferInnerFieldType(prop), label: camelToLabel(prop), confidence: 'medium' as const,
|
|
1023
|
+
}))
|
|
1024
|
+
addField({
|
|
1025
|
+
key: numberedKey('items', arrayCount), type: 'array',
|
|
1026
|
+
label: numberedLabel('Liste', arrayCount), confidence: 'high',
|
|
1027
|
+
defaultValue: arrayDefaultValue,
|
|
1028
|
+
options: { arrayItem: { type: 'object', fields: innerFields } },
|
|
1029
|
+
}, nodeOffset(node))
|
|
1030
|
+
} else {
|
|
1031
|
+
addField({
|
|
1032
|
+
key: numberedKey('items', arrayCount), type: 'array',
|
|
1033
|
+
label: numberedLabel('Liste', arrayCount), confidence: 'high',
|
|
1034
|
+
defaultValue: arrayDefaultValue,
|
|
1035
|
+
options: { arrayItem: { type: 'text' } },
|
|
1036
|
+
}, nodeOffset(node))
|
|
1037
|
+
}
|
|
1038
|
+
})
|
|
1039
|
+
|
|
1040
|
+
// ── 9b. STATIC LISTS (<ul>/<ol> with no .map()) → ARRAY ─────────────────
|
|
1041
|
+
// Only processes simple flat lists (no nested <ul>/<ol> inside <li> items).
|
|
1042
|
+
// Complex nested structures (like phase lists with sub-lists) are handled
|
|
1043
|
+
// by section 12 (repeated groups) via walkContentNodes.
|
|
1044
|
+
let staticListCount = 0
|
|
1045
|
+
walkAst(ast, (node) => {
|
|
1046
|
+
if (node.type !== 'element') return
|
|
1047
|
+
if (node.name !== 'ul' && node.name !== 'ol') return
|
|
1048
|
+
if (shouldSkipForContent(node)) return
|
|
1049
|
+
if (shouldSkipMapChild(node)) return
|
|
1050
|
+
// Skip if any .map() expression exists inside (already dynamic)
|
|
1051
|
+
let hasMap = false
|
|
1052
|
+
walkAst(node, (n) => {
|
|
1053
|
+
if (hasMap) return
|
|
1054
|
+
if (n.type === 'expression') {
|
|
1055
|
+
const code = (n.children ?? []).map(c => c.value ?? '').join('')
|
|
1056
|
+
if (/\.map\s*\(/.test(code)) hasMap = true
|
|
1057
|
+
}
|
|
1058
|
+
})
|
|
1059
|
+
if (hasMap) return
|
|
1060
|
+
// Skip complex lists: <li> items that themselves contain nested <ul>/<ol>
|
|
1061
|
+
let hasNestedList = false
|
|
1062
|
+
for (const li of node.children ?? []) {
|
|
1063
|
+
if (li.type !== 'element' || li.name !== 'li') continue
|
|
1064
|
+
for (const child of li.children ?? []) {
|
|
1065
|
+
if (child.type === 'element' && (child.name === 'ul' || child.name === 'ol')) {
|
|
1066
|
+
hasNestedList = true
|
|
1067
|
+
break
|
|
1068
|
+
}
|
|
1069
|
+
}
|
|
1070
|
+
if (hasNestedList) break
|
|
1071
|
+
}
|
|
1072
|
+
if (hasNestedList) return
|
|
1073
|
+
// Collect <li> items — detect formatting elements (<strong>, <em>, etc.)
|
|
1074
|
+
// If present, store innerHTML of the content span so the MiniRTE can handle it.
|
|
1075
|
+
const FORMATTING_TAGS = new Set(['strong', 'em', 'b', 'i', 'code', 'a', 's', 'u'])
|
|
1076
|
+
let listHasFormatting = false
|
|
1077
|
+
const listItems: string[] = []
|
|
1078
|
+
for (const li of node.children ?? []) {
|
|
1079
|
+
if (li.type !== 'element' || li.name !== 'li') continue
|
|
1080
|
+
// Check for formatting elements inside this <li>
|
|
1081
|
+
let liHasFormatting = false
|
|
1082
|
+
walkAst(li, (n) => {
|
|
1083
|
+
if (liHasFormatting) return
|
|
1084
|
+
if (n.type === 'element' && FORMATTING_TAGS.has(n.name ?? '')) liHasFormatting = true
|
|
1085
|
+
})
|
|
1086
|
+
if (liHasFormatting) {
|
|
1087
|
+
listHasFormatting = true
|
|
1088
|
+
// Extract innerHTML of the content span via <li> source + regex.
|
|
1089
|
+
// Child span positions from @astrojs/compiler point after the opening tag,
|
|
1090
|
+
// so we slice the full <li> source and use regex to find the content span.
|
|
1091
|
+
let contentHTML: string | null = null
|
|
1092
|
+
const liSrcStart = li.position?.start?.offset
|
|
1093
|
+
const liSrcEnd = li.position?.end?.offset
|
|
1094
|
+
if (liSrcStart != null && liSrcEnd != null) {
|
|
1095
|
+
const liSrc = template.slice(liSrcStart, liSrcEnd)
|
|
1096
|
+
const spanRegex = /<span[^>]*>([\s\S]*?)<\/span>/g
|
|
1097
|
+
let spanMatch: RegExpExecArray | null
|
|
1098
|
+
while ((spanMatch = spanRegex.exec(liSrc)) !== null) {
|
|
1099
|
+
const inner = spanMatch[1]!
|
|
1100
|
+
if (!inner.trim()) continue // empty bullet-dot span — skip
|
|
1101
|
+
contentHTML = inner.replace(/\s+/g, ' ').trim()
|
|
1102
|
+
break
|
|
1103
|
+
}
|
|
1104
|
+
}
|
|
1105
|
+
listItems.push(contentHTML ?? extractTextContent(li, true).replace(/\s+/g, ' ').trim())
|
|
1106
|
+
} else {
|
|
1107
|
+
const t = extractTextContent(li, true).replace(/\s+/g, ' ').trim()
|
|
1108
|
+
if (t.length >= 1) listItems.push(t)
|
|
1109
|
+
}
|
|
1110
|
+
}
|
|
1111
|
+
if (listItems.length < 1) return
|
|
1112
|
+
staticListCount++
|
|
1113
|
+
addField({
|
|
1114
|
+
key: numberedKey('items', arrayCount + staticListCount), type: 'array',
|
|
1115
|
+
label: numberedLabel('Liste', arrayCount + staticListCount), confidence: 'high',
|
|
1116
|
+
defaultValue: listItems,
|
|
1117
|
+
options: { arrayItem: { type: 'text', ...(listHasFormatting ? { formatting: true } : {}) } },
|
|
1118
|
+
}, nodeOffset(node))
|
|
1119
|
+
})
|
|
1120
|
+
|
|
1121
|
+
// ── 10. COMPONENT PROPS (content-bearing) ─────────────────────────────
|
|
1122
|
+
const componentCounts = new Map<string, number>()
|
|
1123
|
+
walkAst(ast, (node) => {
|
|
1124
|
+
if (node.type !== 'component' || shouldSkipMapChild(node)) return
|
|
1125
|
+
componentCounts.set(node.name ?? '', (componentCounts.get(node.name ?? '') ?? 0) + 1)
|
|
1126
|
+
})
|
|
1127
|
+
|
|
1128
|
+
walkAst(ast, (node) => {
|
|
1129
|
+
if (node.type !== 'component') return
|
|
1130
|
+
if (shouldSkipMapChild(node)) return
|
|
1131
|
+
if ((componentCounts.get(node.name ?? '') ?? 0) >= 2) return
|
|
1132
|
+
for (const attr of node.attributes ?? []) {
|
|
1133
|
+
if (attr.kind === 'quoted') {
|
|
1134
|
+
const propName = attr.name
|
|
1135
|
+
const propValue = attr.value
|
|
1136
|
+
if (/^(class|className|id|style|type|role|width|height|viewBox|fill|stroke|xmlns|d|cx|cy|r|rx|ry|x|y|x1|y1|x2|y2)$/.test(propName)) continue
|
|
1137
|
+
if (/^(lang|language|filename|file|format|variant|size|loading|decoding|transition|client:.*)$/.test(propName)) continue
|
|
1138
|
+
if (/^(aria-|data-)/.test(propName)) continue
|
|
1139
|
+
if (propValue.length < 2) continue
|
|
1140
|
+
if (propValue === 'true' || propValue === 'false' || /^\d+$/.test(propValue)) continue
|
|
1141
|
+
if (propValue.includes('/') && !propValue.includes(' ')) continue
|
|
1142
|
+
addField({
|
|
1143
|
+
key: propName,
|
|
1144
|
+
type: propName === 'icon' ? 'icon' : propName === 'src' ? 'image' : 'text',
|
|
1145
|
+
label: camelToLabel(propName), confidence: 'medium', defaultValue: propValue,
|
|
1146
|
+
}, nodeOffset(node))
|
|
1147
|
+
} else if (attr.kind === 'expression') {
|
|
1148
|
+
const propName = attr.name
|
|
1149
|
+
if (/^(class|className|id|style|type|role|lang|language|filename|file|format|variant|size|loading|decoding)$/.test(propName)) continue
|
|
1150
|
+
if (/^\s*\w+\s*$/.test(attr.value)) continue
|
|
1151
|
+
const fallbackMatch = attr.value.match(/\?\?\s*['"]([^'"]+)['"]/)
|
|
1152
|
+
if (fallbackMatch) {
|
|
1153
|
+
addField({
|
|
1154
|
+
key: propName,
|
|
1155
|
+
type: propName === 'icon' ? 'icon' : propName === 'src' ? 'image' : 'text',
|
|
1156
|
+
label: camelToLabel(propName), confidence: 'medium', defaultValue: fallbackMatch[1]!,
|
|
1157
|
+
}, nodeOffset(node))
|
|
1158
|
+
}
|
|
1159
|
+
}
|
|
1160
|
+
}
|
|
1161
|
+
})
|
|
1162
|
+
|
|
1163
|
+
// ── 11. REPEATED COMPONENTS → ARRAY ───────────────────────────────────
|
|
1164
|
+
const componentInstances = new Map<string, AstNode[]>()
|
|
1165
|
+
walkAst(ast, (node) => {
|
|
1166
|
+
if (node.type !== 'component') return
|
|
1167
|
+
if (shouldSkipMapChild(node)) return
|
|
1168
|
+
const name = node.name ?? ''
|
|
1169
|
+
if (!name) return
|
|
1170
|
+
if (!componentInstances.has(name)) componentInstances.set(name, [])
|
|
1171
|
+
componentInstances.get(name)!.push(node)
|
|
1172
|
+
})
|
|
1173
|
+
|
|
1174
|
+
for (const [compName, instances] of componentInstances) {
|
|
1175
|
+
if (instances.length < 2) continue
|
|
1176
|
+
const allProps = new Set<string>()
|
|
1177
|
+
const instanceValues: Array<Record<string, unknown>> = []
|
|
1178
|
+
for (const inst of instances) {
|
|
1179
|
+
const item: Record<string, unknown> = {}
|
|
1180
|
+
for (const attr of inst.attributes ?? []) {
|
|
1181
|
+
const name = attr.name
|
|
1182
|
+
if (/^(class|className|id|style|type|role)$/.test(name)) continue
|
|
1183
|
+
if (/^(aria-|data-)/.test(name)) continue
|
|
1184
|
+
if (attr.kind === 'quoted') {
|
|
1185
|
+
allProps.add(name); item[name] = attr.value
|
|
1186
|
+
} else if (attr.kind === 'expression') {
|
|
1187
|
+
allProps.add(name)
|
|
1188
|
+
const expr = attr.value.trim()
|
|
1189
|
+
if (expr === 'true') item[name] = true
|
|
1190
|
+
else if (expr === 'false') item[name] = false
|
|
1191
|
+
else if (/^\w+$/.test(expr) && frontmatter) {
|
|
1192
|
+
const extractedValue = extractFrontmatterValue(frontmatter, expr)
|
|
1193
|
+
if (extractedValue !== undefined) item[name] = extractedValue
|
|
1194
|
+
}
|
|
1195
|
+
}
|
|
1196
|
+
}
|
|
1197
|
+
if (Object.keys(item).length > 0) instanceValues.push(item)
|
|
1198
|
+
}
|
|
1199
|
+
if (allProps.size > 0) {
|
|
1200
|
+
const cardKey = compName.replace(/([A-Z])/g, (_m: string, c: string, i: number) => i === 0 ? c.toLowerCase() : '_' + c.toLowerCase()).replace(/_/g, '') + 's'
|
|
1201
|
+
if (!usedKeys.has(cardKey)) {
|
|
1202
|
+
const innerFields: InferredField[] = [...allProps].map((p) => {
|
|
1203
|
+
const isArray = instanceValues.some(iv => Array.isArray(iv[p]))
|
|
1204
|
+
if (isArray) {
|
|
1205
|
+
return { key: p, type: 'array' as const, label: camelToLabel(p), confidence: 'medium' as const, options: { arrayItem: { type: 'text' as const } } }
|
|
1206
|
+
}
|
|
1207
|
+
return { key: p, type: inferInnerFieldType(p), label: camelToLabel(p), confidence: 'medium' as const }
|
|
1208
|
+
})
|
|
1209
|
+
addField({
|
|
1210
|
+
key: cardKey, type: 'array', label: `${compName} Liste`, confidence: 'medium',
|
|
1211
|
+
defaultValue: instanceValues.length > 0 ? instanceValues : undefined,
|
|
1212
|
+
options: { arrayItem: { type: 'object', fields: innerFields } },
|
|
1213
|
+
}, nodeOffset(instances[0]!))
|
|
1214
|
+
}
|
|
1215
|
+
}
|
|
1216
|
+
}
|
|
1217
|
+
|
|
1218
|
+
// ── 12. REPEATED HTML ELEMENTS → ARRAY (v2: position-based) ───────────
|
|
1219
|
+
// Phase 1: Pure structural detection — no CSS heuristics.
|
|
1220
|
+
// Produces RepeatedGroup objects with position info for the patcher.
|
|
1221
|
+
for (const group of repeatedElementGroups) {
|
|
1222
|
+
const instanceCount = group.instances.length
|
|
1223
|
+
|
|
1224
|
+
// Build instance bounds
|
|
1225
|
+
const instanceBounds: RepeatedGroupInstance[] = group.instances.map(inst => ({
|
|
1226
|
+
start: nodeOffset(inst),
|
|
1227
|
+
end: nodeEnd(inst),
|
|
1228
|
+
}))
|
|
1229
|
+
|
|
1230
|
+
// Phase 1a: Extract structural fingerprint from each instance
|
|
1231
|
+
const instanceFingerprints: Array<ContentItem[]> = []
|
|
1232
|
+
|
|
1233
|
+
for (const inst of group.instances) {
|
|
1234
|
+
const items: ContentItem[] = []
|
|
1235
|
+
walkContentNodes(inst, 0, items, frontmatter)
|
|
1236
|
+
instanceFingerprints.push(items)
|
|
1237
|
+
}
|
|
1238
|
+
|
|
1239
|
+
if (instanceFingerprints.length === 0) continue
|
|
1240
|
+
|
|
1241
|
+
// Phase 1b: Use MAJORITY structure as canonical (most common tag sequence)
|
|
1242
|
+
// This avoids using an instance with optional elements as the template.
|
|
1243
|
+
const signatures = instanceFingerprints.map(fp => fp.map(i => i.tag).join(','))
|
|
1244
|
+
const sigCounts = new Map<string, number>()
|
|
1245
|
+
for (const sig of signatures) sigCounts.set(sig, (sigCounts.get(sig) ?? 0) + 1)
|
|
1246
|
+
const mostCommonSig = [...sigCounts.entries()].sort((a, b) => b[1] - a[1])[0]![0]
|
|
1247
|
+
const canonicalIdx = signatures.indexOf(mostCommonSig)
|
|
1248
|
+
const canonical = instanceFingerprints[canonicalIdx]!
|
|
1249
|
+
|
|
1250
|
+
// Track which items in each instance have been consumed (prevents double-matching)
|
|
1251
|
+
const consumedPerInstance: Array<Set<number>> = instanceFingerprints.map(() => new Set())
|
|
1252
|
+
|
|
1253
|
+
// For each canonical item, check if it exists in all other instances
|
|
1254
|
+
const innerFields: InnerFieldInfo[] = []
|
|
1255
|
+
const typeCounts: Record<string, number> = {}
|
|
1256
|
+
|
|
1257
|
+
for (let ci = 0; ci < canonical.length; ci++) {
|
|
1258
|
+
const cItem = canonical[ci]!
|
|
1259
|
+
const tag = cItem.tag
|
|
1260
|
+
const isHeading = /^h[1-6]$/.test(tag)
|
|
1261
|
+
const isLink = tag === 'a'
|
|
1262
|
+
const isArray = tag === '__array__'
|
|
1263
|
+
|
|
1264
|
+
// Determine field type and key
|
|
1265
|
+
let fieldType: 'text' | 'array' | 'link'
|
|
1266
|
+
let keyBase: string
|
|
1267
|
+
|
|
1268
|
+
if (isHeading) {
|
|
1269
|
+
fieldType = 'text'
|
|
1270
|
+
keyBase = 'heading'
|
|
1271
|
+
} else if (isLink) {
|
|
1272
|
+
fieldType = 'link'
|
|
1273
|
+
keyBase = 'link'
|
|
1274
|
+
} else if (isArray) {
|
|
1275
|
+
fieldType = 'array'
|
|
1276
|
+
keyBase = 'list'
|
|
1277
|
+
} else {
|
|
1278
|
+
fieldType = 'text'
|
|
1279
|
+
keyBase = 'text'
|
|
1280
|
+
}
|
|
1281
|
+
|
|
1282
|
+
typeCounts[keyBase] = (typeCounts[keyBase] ?? 0) + 1
|
|
1283
|
+
const key = typeCounts[keyBase] === 1 ? keyBase : `${keyBase}${typeCounts[keyBase]}`
|
|
1284
|
+
|
|
1285
|
+
// Find matching item in each instance (track consumed items per instance)
|
|
1286
|
+
const positions: Array<FieldPosition | null> = []
|
|
1287
|
+
const defaultValues: unknown[] = []
|
|
1288
|
+
let presentCount = 0
|
|
1289
|
+
|
|
1290
|
+
for (let ii = 0; ii < instanceCount; ii++) {
|
|
1291
|
+
const fp = instanceFingerprints[ii]!
|
|
1292
|
+
const consumed = consumedPerInstance[ii]!
|
|
1293
|
+
const match = findMatchingItem(fp, cItem, ci, canonical, consumed)
|
|
1294
|
+
if (match) {
|
|
1295
|
+
presentCount++
|
|
1296
|
+
positions.push({
|
|
1297
|
+
offset: nodeOffset(match.node),
|
|
1298
|
+
length: nodeEnd(match.node) - nodeOffset(match.node),
|
|
1299
|
+
source: match.exprSource,
|
|
1300
|
+
})
|
|
1301
|
+
if (isArray && match.exprSource) {
|
|
1302
|
+
defaultValues.push(extractFrontmatterValue(frontmatter, match.exprSource) ?? [])
|
|
1303
|
+
} else if (isLink) {
|
|
1304
|
+
// For link fields, use the href value (text is extracted separately as linkText)
|
|
1305
|
+
defaultValues.push(match.hrefValue || null)
|
|
1306
|
+
} else {
|
|
1307
|
+
defaultValues.push(match.text || match.hrefValue || null)
|
|
1308
|
+
}
|
|
1309
|
+
} else {
|
|
1310
|
+
positions.push(null)
|
|
1311
|
+
defaultValues.push(null)
|
|
1312
|
+
}
|
|
1313
|
+
}
|
|
1314
|
+
|
|
1315
|
+
innerFields.push({
|
|
1316
|
+
key,
|
|
1317
|
+
type: fieldType,
|
|
1318
|
+
tag,
|
|
1319
|
+
required: presentCount === instanceCount,
|
|
1320
|
+
positions,
|
|
1321
|
+
defaultValues,
|
|
1322
|
+
})
|
|
1323
|
+
|
|
1324
|
+
// For links, also extract link text as a separate field
|
|
1325
|
+
if (isLink) {
|
|
1326
|
+
typeCounts['linkText'] = (typeCounts['linkText'] ?? 0) + 1
|
|
1327
|
+
const ltKey = typeCounts['linkText'] === 1 ? 'linkText' : `linkText${typeCounts['linkText']}`
|
|
1328
|
+
const ltPositions: Array<FieldPosition | null> = []
|
|
1329
|
+
const ltDefaults: unknown[] = []
|
|
1330
|
+
|
|
1331
|
+
for (let ii = 0; ii < instanceCount; ii++) {
|
|
1332
|
+
// Reuse the same match (link node) — text is a sub-field, not a separate consumed item
|
|
1333
|
+
const fp = instanceFingerprints[ii]!
|
|
1334
|
+
const consumed = consumedPerInstance[ii]!
|
|
1335
|
+
// Don't consume again — find same link node via tag+position
|
|
1336
|
+
const match = findMatchingItemPeek(fp, cItem, ci, canonical, consumed)
|
|
1337
|
+
if (match) {
|
|
1338
|
+
ltPositions.push({
|
|
1339
|
+
offset: nodeOffset(match.node),
|
|
1340
|
+
length: nodeEnd(match.node) - nodeOffset(match.node),
|
|
1341
|
+
})
|
|
1342
|
+
ltDefaults.push(match.text || null)
|
|
1343
|
+
} else {
|
|
1344
|
+
ltPositions.push(null)
|
|
1345
|
+
ltDefaults.push(null)
|
|
1346
|
+
}
|
|
1347
|
+
}
|
|
1348
|
+
|
|
1349
|
+
innerFields.push({
|
|
1350
|
+
key: ltKey,
|
|
1351
|
+
type: 'text',
|
|
1352
|
+
tag: 'a',
|
|
1353
|
+
required: presentCount === instanceCount,
|
|
1354
|
+
positions: ltPositions,
|
|
1355
|
+
defaultValues: ltDefaults,
|
|
1356
|
+
})
|
|
1357
|
+
}
|
|
1358
|
+
}
|
|
1359
|
+
|
|
1360
|
+
// Phase 1c: Detect optional elements from longer instances
|
|
1361
|
+
// Items in non-canonical instances that weren't consumed are optional fields.
|
|
1362
|
+
for (let ii = 0; ii < instanceCount; ii++) {
|
|
1363
|
+
if (ii === canonicalIdx) continue
|
|
1364
|
+
const fp = instanceFingerprints[ii]!
|
|
1365
|
+
const consumed = consumedPerInstance[ii]!
|
|
1366
|
+
for (let fi = 0; fi < fp.length; fi++) {
|
|
1367
|
+
if (consumed.has(fi)) continue
|
|
1368
|
+
const item = fp[fi]!
|
|
1369
|
+
// This is an optional element only present in this instance
|
|
1370
|
+
const tag = item.tag
|
|
1371
|
+
const isArray = tag === '__array__'
|
|
1372
|
+
const isLink = tag === 'a'
|
|
1373
|
+
let fieldType: 'text' | 'array' | 'link' = isArray ? 'array' : isLink ? 'link' : 'text'
|
|
1374
|
+
let keyBase = /^h[1-6]$/.test(tag) ? 'heading' : isLink ? 'link' : isArray ? 'list' : 'text'
|
|
1375
|
+
typeCounts[keyBase] = (typeCounts[keyBase] ?? 0) + 1
|
|
1376
|
+
const key = typeCounts[keyBase] === 1 ? keyBase : `${keyBase}${typeCounts[keyBase]}`
|
|
1377
|
+
|
|
1378
|
+
const positions: Array<FieldPosition | null> = new Array(instanceCount).fill(null)
|
|
1379
|
+
const defaultValues: unknown[] = new Array(instanceCount).fill(null)
|
|
1380
|
+
positions[ii] = {
|
|
1381
|
+
offset: nodeOffset(item.node),
|
|
1382
|
+
length: nodeEnd(item.node) - nodeOffset(item.node),
|
|
1383
|
+
source: item.exprSource,
|
|
1384
|
+
}
|
|
1385
|
+
if (isArray && item.exprSource) {
|
|
1386
|
+
defaultValues[ii] = extractFrontmatterValue(frontmatter, item.exprSource) ?? []
|
|
1387
|
+
} else {
|
|
1388
|
+
defaultValues[ii] = item.text || item.hrefValue || null
|
|
1389
|
+
}
|
|
1390
|
+
|
|
1391
|
+
innerFields.push({ key, type: fieldType, tag, required: false, positions, defaultValues })
|
|
1392
|
+
|
|
1393
|
+
if (isLink) {
|
|
1394
|
+
typeCounts['linkText'] = (typeCounts['linkText'] ?? 0) + 1
|
|
1395
|
+
const ltKey = typeCounts['linkText'] === 1 ? 'linkText' : `linkText${typeCounts['linkText']}`
|
|
1396
|
+
const ltPositions: Array<FieldPosition | null> = new Array(instanceCount).fill(null)
|
|
1397
|
+
const ltDefaults: unknown[] = new Array(instanceCount).fill(null)
|
|
1398
|
+
ltPositions[ii] = positions[ii] ?? null
|
|
1399
|
+
ltDefaults[ii] = item.text || null
|
|
1400
|
+
innerFields.push({ key: ltKey, type: 'text', tag: 'a', required: false, positions: ltPositions, defaultValues: ltDefaults })
|
|
1401
|
+
}
|
|
1402
|
+
}
|
|
1403
|
+
}
|
|
1404
|
+
|
|
1405
|
+
if (innerFields.length === 0) continue
|
|
1406
|
+
|
|
1407
|
+
arrayCount++
|
|
1408
|
+
let fieldKey = numberedKey('items', arrayCount)
|
|
1409
|
+
while (usedKeys.has(fieldKey)) {
|
|
1410
|
+
arrayCount++
|
|
1411
|
+
fieldKey = numberedKey('items', arrayCount)
|
|
1412
|
+
}
|
|
1413
|
+
|
|
1414
|
+
// Build defaultValue array for backwards compatibility
|
|
1415
|
+
const defaultValue: Array<Record<string, unknown>> = []
|
|
1416
|
+
for (let ii = 0; ii < instanceCount; ii++) {
|
|
1417
|
+
const item: Record<string, unknown> = {}
|
|
1418
|
+
for (const f of innerFields) {
|
|
1419
|
+
if (f.defaultValues[ii] != null) {
|
|
1420
|
+
item[f.key] = f.defaultValues[ii]
|
|
1421
|
+
}
|
|
1422
|
+
}
|
|
1423
|
+
defaultValue.push(item)
|
|
1424
|
+
}
|
|
1425
|
+
|
|
1426
|
+
// Build inner field definitions for the field schema
|
|
1427
|
+
const innerFieldDefs: InferredField[] = innerFields.map(f => {
|
|
1428
|
+
if (f.type === 'array') {
|
|
1429
|
+
return {
|
|
1430
|
+
key: f.key,
|
|
1431
|
+
type: 'array' as const,
|
|
1432
|
+
label: f.label ?? camelToLabel(f.key),
|
|
1433
|
+
confidence: 'medium' as const,
|
|
1434
|
+
options: { arrayItem: { type: 'text' as const } },
|
|
1435
|
+
}
|
|
1436
|
+
}
|
|
1437
|
+
return {
|
|
1438
|
+
key: f.key,
|
|
1439
|
+
type: f.type === 'link' ? 'text' as const : 'text' as const,
|
|
1440
|
+
label: f.label ?? camelToLabel(f.key),
|
|
1441
|
+
confidence: 'medium' as const,
|
|
1442
|
+
}
|
|
1443
|
+
})
|
|
1444
|
+
|
|
1445
|
+
// Add as top-level field
|
|
1446
|
+
const groupLabel = group.tag === 'tr'
|
|
1447
|
+
? 'Tabellen-Zeilen'
|
|
1448
|
+
: `${group.tag.charAt(0).toUpperCase() + group.tag.slice(1)} Liste`
|
|
1449
|
+
addField({
|
|
1450
|
+
key: fieldKey,
|
|
1451
|
+
type: 'array',
|
|
1452
|
+
label: groupLabel,
|
|
1453
|
+
confidence: 'high',
|
|
1454
|
+
defaultValue,
|
|
1455
|
+
options: {
|
|
1456
|
+
arrayItem: { type: 'object', fields: innerFieldDefs },
|
|
1457
|
+
_repeatedTag: group.tag,
|
|
1458
|
+
_instanceCount: instanceCount,
|
|
1459
|
+
} as any,
|
|
1460
|
+
}, instanceBounds[0]!.start)
|
|
1461
|
+
|
|
1462
|
+
// Collect class attributes per instance (AST-based, for dynamic class detection in patcher)
|
|
1463
|
+
// Positions are relative to `template` here; posAdjust is applied after extractTemplateFields returns
|
|
1464
|
+
const classAttrs = group.instances.map(inst => collectClassAttrs(inst, '', template))
|
|
1465
|
+
|
|
1466
|
+
// Store RepeatedGroup for the patcher
|
|
1467
|
+
repeatedGroups.push({
|
|
1468
|
+
tag: group.tag,
|
|
1469
|
+
fieldKey,
|
|
1470
|
+
instances: instanceBounds,
|
|
1471
|
+
templateIndex: 0,
|
|
1472
|
+
fields: innerFields,
|
|
1473
|
+
classAttrs,
|
|
1474
|
+
})
|
|
1475
|
+
}
|
|
1476
|
+
|
|
1477
|
+
fields.sort((a, b) => a._pos - b._pos)
|
|
1478
|
+
return { fields: fields.map(({ _pos, ...field }) => field), repeatedGroups }
|
|
1479
|
+
}
|
|
1480
|
+
|
|
1481
|
+
// ---------------------------------------------------------------------------
|
|
1482
|
+
// Phase 1: Structural content node walker (no CSS heuristics)
|
|
1483
|
+
// ---------------------------------------------------------------------------
|
|
1484
|
+
|
|
1485
|
+
interface ContentItem {
|
|
1486
|
+
tag: string // h2, p, span, a, __array__
|
|
1487
|
+
depth: number
|
|
1488
|
+
node: AstNode
|
|
1489
|
+
text: string
|
|
1490
|
+
exprSource?: string
|
|
1491
|
+
hrefValue?: string
|
|
1492
|
+
}
|
|
1493
|
+
|
|
1494
|
+
/**
|
|
1495
|
+
* Walk an element's subtree and collect content nodes structurally.
|
|
1496
|
+
* No CSS class checking — purely based on HTML tags and position.
|
|
1497
|
+
*/
|
|
1498
|
+
function walkContentNodes(
|
|
1499
|
+
root: AstNode,
|
|
1500
|
+
depth: number,
|
|
1501
|
+
items: ContentItem[],
|
|
1502
|
+
frontmatter: string,
|
|
1503
|
+
): void {
|
|
1504
|
+
for (const child of root.children ?? []) {
|
|
1505
|
+
if (child.type === 'element' && isAriaHidden(child)) continue
|
|
1506
|
+
|
|
1507
|
+
// Heading
|
|
1508
|
+
if (child.type === 'element' && /^h[1-6]$/.test(child.name ?? '')) {
|
|
1509
|
+
const text = extractTextContent(child, true).replace(/\s+/g, ' ').trim()
|
|
1510
|
+
if (text.length >= 1) {
|
|
1511
|
+
items.push({ tag: child.name!, depth, node: child, text })
|
|
1512
|
+
}
|
|
1513
|
+
continue // don't recurse into headings
|
|
1514
|
+
}
|
|
1515
|
+
|
|
1516
|
+
// Paragraph with meaningful text
|
|
1517
|
+
if (child.type === 'element' && child.name === 'p') {
|
|
1518
|
+
const text = extractTextContent(child, true).replace(/\s+/g, ' ').trim()
|
|
1519
|
+
if (text.length >= 1) {
|
|
1520
|
+
items.push({ tag: 'p', depth, node: child, text })
|
|
1521
|
+
}
|
|
1522
|
+
continue
|
|
1523
|
+
}
|
|
1524
|
+
|
|
1525
|
+
// Standalone span with text (not inside a deeper context we already handle)
|
|
1526
|
+
if (child.type === 'element' && child.name === 'span') {
|
|
1527
|
+
const text = extractTextContent(child, true).replace(/\s+/g, ' ').trim()
|
|
1528
|
+
if (text.length >= 1) {
|
|
1529
|
+
items.push({ tag: 'span', depth, node: child, text })
|
|
1530
|
+
}
|
|
1531
|
+
continue
|
|
1532
|
+
}
|
|
1533
|
+
|
|
1534
|
+
// Link
|
|
1535
|
+
if (child.type === 'element' && child.name === 'a') {
|
|
1536
|
+
const text = extractTextContent(child, true).replace(/\s+/g, ' ').trim()
|
|
1537
|
+
const href = getAttr(child, 'href')
|
|
1538
|
+
let hrefValue: string | undefined
|
|
1539
|
+
if (href?.kind === 'quoted') hrefValue = href.value
|
|
1540
|
+
else if (href?.kind === 'expression') {
|
|
1541
|
+
const fb = href.value.match(/['"]([^'"]+)['"]/)
|
|
1542
|
+
if (fb) hrefValue = fb[1]
|
|
1543
|
+
}
|
|
1544
|
+
if (text.length >= 1 || hrefValue) {
|
|
1545
|
+
items.push({ tag: 'a', depth, node: child, text, hrefValue })
|
|
1546
|
+
}
|
|
1547
|
+
continue
|
|
1548
|
+
}
|
|
1549
|
+
|
|
1550
|
+
// Table cell (inside a <tr> repeatedGroup instance)
|
|
1551
|
+
if (child.type === 'element' && child.name === 'td') {
|
|
1552
|
+
const text = extractTextContent(child, true).replace(/\s+/g, ' ').trim()
|
|
1553
|
+
if (text.length >= 1) {
|
|
1554
|
+
items.push({ tag: 'td', depth, node: child, text })
|
|
1555
|
+
}
|
|
1556
|
+
continue
|
|
1557
|
+
}
|
|
1558
|
+
|
|
1559
|
+
// List with .map() expression or static items
|
|
1560
|
+
if (child.type === 'element' && (child.name === 'ul' || child.name === 'ol')) {
|
|
1561
|
+
// Check for .map() expression inside
|
|
1562
|
+
let mapSource: string | undefined
|
|
1563
|
+
walkAst(child, (expr) => {
|
|
1564
|
+
if (mapSource) return
|
|
1565
|
+
if (expr.type !== 'expression') return
|
|
1566
|
+
const code = (expr.children ?? []).map(c => c.value ?? '').join('')
|
|
1567
|
+
const mapMatch = code.match(/(\w+)\.map\s*\(/)
|
|
1568
|
+
if (mapMatch) mapSource = mapMatch[1]
|
|
1569
|
+
})
|
|
1570
|
+
if (mapSource) {
|
|
1571
|
+
items.push({ tag: '__array__', depth, node: child, text: '', exprSource: mapSource })
|
|
1572
|
+
} else {
|
|
1573
|
+
// Static list items
|
|
1574
|
+
const listTexts: string[] = []
|
|
1575
|
+
for (const li of child.children ?? []) {
|
|
1576
|
+
if (li.type === 'element' && li.name === 'li') {
|
|
1577
|
+
const t = extractTextContent(li, true).trim()
|
|
1578
|
+
if (t.length >= 1) listTexts.push(t)
|
|
1579
|
+
}
|
|
1580
|
+
}
|
|
1581
|
+
if (listTexts.length > 0) {
|
|
1582
|
+
items.push({ tag: '__array__', depth, node: child, text: listTexts.join('\n') })
|
|
1583
|
+
}
|
|
1584
|
+
}
|
|
1585
|
+
continue
|
|
1586
|
+
}
|
|
1587
|
+
|
|
1588
|
+
// Recurse into div/section/other container elements
|
|
1589
|
+
if (child.type === 'element') {
|
|
1590
|
+
walkContentNodes(child, depth + 1, items, frontmatter)
|
|
1591
|
+
}
|
|
1592
|
+
}
|
|
1593
|
+
}
|
|
1594
|
+
|
|
1595
|
+
/**
|
|
1596
|
+
* Find a matching content item in another instance's fingerprint.
|
|
1597
|
+
* Uses tag + relative position among same-tag items.
|
|
1598
|
+
*/
|
|
1599
|
+
/**
|
|
1600
|
+
* Find a matching content item, marking it as consumed so it won't be reused.
|
|
1601
|
+
*/
|
|
1602
|
+
function findMatchingItem(
|
|
1603
|
+
targetFp: ContentItem[],
|
|
1604
|
+
canonicalItem: ContentItem,
|
|
1605
|
+
canonicalIndex: number,
|
|
1606
|
+
allCanonical: ContentItem[],
|
|
1607
|
+
consumed: Set<number>,
|
|
1608
|
+
): ContentItem | null {
|
|
1609
|
+
const result = findMatchingItemCore(targetFp, canonicalItem, canonicalIndex, allCanonical, consumed)
|
|
1610
|
+
if (result) consumed.add(result.idx)
|
|
1611
|
+
return result?.item ?? null
|
|
1612
|
+
}
|
|
1613
|
+
|
|
1614
|
+
/**
|
|
1615
|
+
* Find a matching content item WITHOUT consuming it (for sub-fields like link text).
|
|
1616
|
+
*/
|
|
1617
|
+
function findMatchingItemPeek(
|
|
1618
|
+
targetFp: ContentItem[],
|
|
1619
|
+
canonicalItem: ContentItem,
|
|
1620
|
+
canonicalIndex: number,
|
|
1621
|
+
allCanonical: ContentItem[],
|
|
1622
|
+
consumed: Set<number>,
|
|
1623
|
+
): ContentItem | null {
|
|
1624
|
+
// For peek, we look for the already-consumed link node (it was consumed by the link field)
|
|
1625
|
+
// So we search WITHOUT the consumed filter
|
|
1626
|
+
const tag = canonicalItem.tag
|
|
1627
|
+
const candidates = targetFp
|
|
1628
|
+
.map((item, idx) => ({ item, idx }))
|
|
1629
|
+
.filter(c => c.item.tag === tag)
|
|
1630
|
+
if (candidates.length === 0) return null
|
|
1631
|
+
if (candidates.length === 1) return candidates[0]!.item
|
|
1632
|
+
const canonicalRelPos = canonicalIndex / Math.max(allCanonical.length - 1, 1)
|
|
1633
|
+
candidates.sort((a, b) => {
|
|
1634
|
+
const relA = a.idx / Math.max(targetFp.length - 1, 1)
|
|
1635
|
+
const relB = b.idx / Math.max(targetFp.length - 1, 1)
|
|
1636
|
+
return Math.abs(relA - canonicalRelPos) - Math.abs(relB - canonicalRelPos)
|
|
1637
|
+
})
|
|
1638
|
+
return candidates[0]!.item
|
|
1639
|
+
}
|
|
1640
|
+
|
|
1641
|
+
function findMatchingItemCore(
|
|
1642
|
+
targetFp: ContentItem[],
|
|
1643
|
+
canonicalItem: ContentItem,
|
|
1644
|
+
canonicalIndex: number,
|
|
1645
|
+
allCanonical: ContentItem[],
|
|
1646
|
+
consumed: Set<number>,
|
|
1647
|
+
): { item: ContentItem; idx: number } | null {
|
|
1648
|
+
const tag = canonicalItem.tag
|
|
1649
|
+
const candidates = targetFp
|
|
1650
|
+
.map((item, idx) => ({ item, idx }))
|
|
1651
|
+
.filter(c => c.item.tag === tag && !consumed.has(c.idx))
|
|
1652
|
+
|
|
1653
|
+
if (candidates.length === 0) return null
|
|
1654
|
+
if (candidates.length === 1) return candidates[0]!
|
|
1655
|
+
|
|
1656
|
+
const canonicalRelPos = canonicalIndex / Math.max(allCanonical.length - 1, 1)
|
|
1657
|
+
candidates.sort((a, b) => {
|
|
1658
|
+
const relA = a.idx / Math.max(targetFp.length - 1, 1)
|
|
1659
|
+
const relB = b.idx / Math.max(targetFp.length - 1, 1)
|
|
1660
|
+
return Math.abs(relA - canonicalRelPos) - Math.abs(relB - canonicalRelPos)
|
|
1661
|
+
})
|
|
1662
|
+
return candidates[0]!
|
|
1663
|
+
}
|