@nuasite/cms-marker 0.0.71 → 0.0.73

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. package/dist/types/build-processor.d.ts.map +1 -1
  2. package/dist/types/dev-middleware.d.ts.map +1 -1
  3. package/dist/types/source-finder/ast-extractors.d.ts +35 -0
  4. package/dist/types/source-finder/ast-extractors.d.ts.map +1 -0
  5. package/dist/types/source-finder/ast-parser.d.ts +16 -0
  6. package/dist/types/source-finder/ast-parser.d.ts.map +1 -0
  7. package/dist/types/source-finder/cache.d.ts +18 -0
  8. package/dist/types/source-finder/cache.d.ts.map +1 -0
  9. package/dist/types/source-finder/collection-finder.d.ts +24 -0
  10. package/dist/types/source-finder/collection-finder.d.ts.map +1 -0
  11. package/dist/types/source-finder/cross-file-tracker.d.ts +29 -0
  12. package/dist/types/source-finder/cross-file-tracker.d.ts.map +1 -0
  13. package/dist/types/source-finder/element-finder.d.ts +42 -0
  14. package/dist/types/source-finder/element-finder.d.ts.map +1 -0
  15. package/dist/types/source-finder/image-finder.d.ts +16 -0
  16. package/dist/types/source-finder/image-finder.d.ts.map +1 -0
  17. package/dist/types/source-finder/index.d.ts +8 -0
  18. package/dist/types/source-finder/index.d.ts.map +1 -0
  19. package/dist/types/source-finder/search-index.d.ts +27 -0
  20. package/dist/types/source-finder/search-index.d.ts.map +1 -0
  21. package/dist/types/source-finder/snippet-utils.d.ts +49 -0
  22. package/dist/types/source-finder/snippet-utils.d.ts.map +1 -0
  23. package/dist/types/source-finder/source-lookup.d.ts +16 -0
  24. package/dist/types/source-finder/source-lookup.d.ts.map +1 -0
  25. package/dist/types/source-finder/types.d.ts +163 -0
  26. package/dist/types/source-finder/types.d.ts.map +1 -0
  27. package/dist/types/source-finder/variable-extraction.d.ts +37 -0
  28. package/dist/types/source-finder/variable-extraction.d.ts.map +1 -0
  29. package/dist/types/tsconfig.tsbuildinfo +1 -1
  30. package/package.json +1 -1
  31. package/src/build-processor.ts +33 -1
  32. package/src/dev-middleware.ts +33 -1
  33. package/src/source-finder/ast-extractors.ts +175 -0
  34. package/src/source-finder/ast-parser.ts +127 -0
  35. package/src/source-finder/cache.ts +75 -0
  36. package/src/source-finder/collection-finder.ts +321 -0
  37. package/src/source-finder/cross-file-tracker.ts +337 -0
  38. package/src/source-finder/element-finder.ts +383 -0
  39. package/src/source-finder/image-finder.ts +189 -0
  40. package/src/source-finder/index.ts +26 -0
  41. package/src/source-finder/search-index.ts +418 -0
  42. package/src/source-finder/snippet-utils.ts +268 -0
  43. package/src/source-finder/source-lookup.ts +197 -0
  44. package/src/source-finder/types.ts +206 -0
  45. package/src/source-finder/variable-extraction.ts +355 -0
  46. package/dist/types/source-finder.d.ts +0 -117
  47. package/dist/types/source-finder.d.ts.map +0 -1
  48. package/src/source-finder.ts +0 -1784
@@ -1,1784 +0,0 @@
1
- import { parse as parseAstro } from '@astrojs/compiler'
2
- import type { ComponentNode, ElementNode, Node as AstroNode, TextNode } from '@astrojs/compiler/types'
3
- import { parse as parseBabel } from '@babel/parser'
4
- import fs from 'node:fs/promises'
5
- import path from 'node:path'
6
- import { getProjectRoot } from './config'
7
- import { getErrorCollector } from './error-collector'
8
- import type { ManifestEntry } from './types'
9
- import { generateSourceHash } from './utils'
10
-
11
- // ============================================================================
12
- // File Parsing Cache - Avoid re-parsing the same files
13
- // ============================================================================
14
-
15
- interface CachedParsedFile {
16
- content: string
17
- lines: string[]
18
- ast: AstroNode
19
- frontmatterContent: string | null
20
- frontmatterStartLine: number
21
- variableDefinitions: VariableDefinition[]
22
- }
23
-
24
- /** Cache for parsed Astro files - cleared between builds */
25
- const parsedFileCache = new Map<string, CachedParsedFile>()
26
-
27
- /** Cache for directory listings - cleared between builds */
28
- const directoryCache = new Map<string, string[]>()
29
-
30
- /** Cache for markdown file contents - cleared between builds */
31
- const markdownFileCache = new Map<string, { content: string; lines: string[] }>()
32
-
33
- /** Pre-built search index for fast lookups */
34
- interface SearchIndexEntry {
35
- file: string
36
- line: number
37
- snippet: string
38
- type: 'static' | 'variable' | 'prop' | 'computed'
39
- variableName?: string
40
- definitionLine?: number
41
- normalizedText: string
42
- tag: string
43
- }
44
-
45
- interface ImageIndexEntry {
46
- file: string
47
- line: number
48
- snippet: string
49
- src: string
50
- }
51
-
52
- /** Search indexes built once per build */
53
- let textSearchIndex: SearchIndexEntry[] = []
54
- let imageSearchIndex: ImageIndexEntry[] = []
55
- let searchIndexInitialized = false
56
-
57
- /**
58
- * Clear all caches - call at start of each build
59
- */
60
- export function clearSourceFinderCache(): void {
61
- parsedFileCache.clear()
62
- directoryCache.clear()
63
- markdownFileCache.clear()
64
- textSearchIndex = []
65
- imageSearchIndex = []
66
- searchIndexInitialized = false
67
- }
68
-
69
- /**
70
- * Initialize search index by pre-scanning all source files.
71
- * This is much faster than searching per-entry.
72
- */
73
- export async function initializeSearchIndex(): Promise<void> {
74
- if (searchIndexInitialized) return
75
-
76
- const srcDir = path.join(getProjectRoot(), 'src')
77
- const searchDirs = [
78
- path.join(srcDir, 'components'),
79
- path.join(srcDir, 'pages'),
80
- path.join(srcDir, 'layouts'),
81
- ]
82
-
83
- // Collect all Astro files first
84
- const allFiles: string[] = []
85
- for (const dir of searchDirs) {
86
- try {
87
- const files = await collectAstroFiles(dir)
88
- allFiles.push(...files)
89
- } catch {
90
- // Directory doesn't exist
91
- }
92
- }
93
-
94
- // Parse all files in parallel and build indexes
95
- await Promise.all(allFiles.map(async (filePath) => {
96
- try {
97
- const cached = await getCachedParsedFile(filePath)
98
- if (!cached) return
99
-
100
- const relFile = path.relative(getProjectRoot(), filePath)
101
-
102
- // Index all text content from this file
103
- indexFileContent(cached, relFile)
104
-
105
- // Index all images from this file
106
- indexFileImages(cached, relFile)
107
- } catch {
108
- // Skip files that fail to parse
109
- }
110
- }))
111
-
112
- searchIndexInitialized = true
113
- }
114
-
115
- /**
116
- * Collect all .astro files in a directory recursively
117
- */
118
- async function collectAstroFiles(dir: string): Promise<string[]> {
119
- const cached = directoryCache.get(dir)
120
- if (cached) return cached
121
-
122
- const results: string[] = []
123
-
124
- try {
125
- const entries = await fs.readdir(dir, { withFileTypes: true })
126
-
127
- await Promise.all(entries.map(async (entry) => {
128
- const fullPath = path.join(dir, entry.name)
129
- if (entry.isDirectory()) {
130
- const subFiles = await collectAstroFiles(fullPath)
131
- results.push(...subFiles)
132
- } else if (entry.isFile() && (entry.name.endsWith('.astro') || entry.name.endsWith('.tsx') || entry.name.endsWith('.jsx'))) {
133
- results.push(fullPath)
134
- }
135
- }))
136
- } catch {
137
- // Directory doesn't exist
138
- }
139
-
140
- directoryCache.set(dir, results)
141
- return results
142
- }
143
-
144
- /**
145
- * Get a cached parsed file, parsing it if not cached
146
- */
147
- async function getCachedParsedFile(filePath: string): Promise<CachedParsedFile | null> {
148
- const cached = parsedFileCache.get(filePath)
149
- if (cached) return cached
150
-
151
- try {
152
- const content = await fs.readFile(filePath, 'utf-8')
153
- const lines = content.split('\n')
154
-
155
- // Only parse .astro files with AST
156
- if (!filePath.endsWith('.astro')) {
157
- // For tsx/jsx, just cache content/lines for regex search
158
- const entry: CachedParsedFile = {
159
- content,
160
- lines,
161
- ast: { type: 'root', children: [] } as unknown as AstroNode,
162
- frontmatterContent: null,
163
- frontmatterStartLine: 0,
164
- variableDefinitions: [],
165
- }
166
- parsedFileCache.set(filePath, entry)
167
- return entry
168
- }
169
-
170
- const { ast, frontmatterContent, frontmatterStartLine } = await parseAstroFile(content)
171
-
172
- let variableDefinitions: VariableDefinition[] = []
173
- if (frontmatterContent) {
174
- const frontmatterAst = parseFrontmatter(frontmatterContent, filePath)
175
- if (frontmatterAst) {
176
- variableDefinitions = extractVariableDefinitions(frontmatterAst, frontmatterStartLine)
177
- }
178
- }
179
-
180
- const entry: CachedParsedFile = {
181
- content,
182
- lines,
183
- ast,
184
- frontmatterContent,
185
- frontmatterStartLine,
186
- variableDefinitions,
187
- }
188
-
189
- parsedFileCache.set(filePath, entry)
190
- return entry
191
- } catch {
192
- return null
193
- }
194
- }
195
-
196
- /**
197
- * Index all searchable text content from a parsed file
198
- */
199
- function indexFileContent(cached: CachedParsedFile, relFile: string): void {
200
- // Walk AST and collect all text elements
201
- function visit(node: AstroNode) {
202
- if ((node.type === 'element' || node.type === 'component')) {
203
- const elemNode = node as ElementNode | ComponentNode
204
- const tag = elemNode.name.toLowerCase()
205
- const textContent = getTextContent(elemNode)
206
- const normalizedText = normalizeText(textContent)
207
- const line = elemNode.position?.start.line ?? 0
208
-
209
- if (normalizedText && normalizedText.length >= 2) {
210
- // Check for variable references
211
- const exprInfo = hasExpressionChild(elemNode)
212
- if (exprInfo.found && exprInfo.varNames.length > 0) {
213
- for (const varName of exprInfo.varNames) {
214
- for (const def of cached.variableDefinitions) {
215
- if (def.name === varName || (def.parentName && def.name === varName)) {
216
- const normalizedDef = normalizeText(def.value)
217
- const completeSnippet = extractCompleteTagSnippet(cached.lines, line - 1, tag)
218
- const snippet = extractInnerHtmlFromSnippet(completeSnippet, tag) ?? completeSnippet
219
-
220
- textSearchIndex.push({
221
- file: relFile,
222
- line: def.line,
223
- snippet: cached.lines[def.line - 1] || '',
224
- type: 'variable',
225
- variableName: def.parentName ? `${def.parentName}.${def.name}` : def.name,
226
- definitionLine: def.line,
227
- normalizedText: normalizedDef,
228
- tag,
229
- })
230
- }
231
- }
232
- }
233
- }
234
-
235
- // Index static text content
236
- const completeSnippet = extractCompleteTagSnippet(cached.lines, line - 1, tag)
237
- const snippet = extractInnerHtmlFromSnippet(completeSnippet, tag) ?? completeSnippet
238
-
239
- textSearchIndex.push({
240
- file: relFile,
241
- line,
242
- snippet,
243
- type: 'static',
244
- normalizedText,
245
- tag,
246
- })
247
- }
248
-
249
- // Also index component props
250
- if (node.type === 'component') {
251
- for (const attr of elemNode.attributes) {
252
- if (attr.type === 'attribute' && attr.kind === 'quoted' && attr.value) {
253
- const normalizedValue = normalizeText(attr.value)
254
- if (normalizedValue && normalizedValue.length >= 2) {
255
- textSearchIndex.push({
256
- file: relFile,
257
- line: attr.position?.start.line ?? line,
258
- snippet: cached.lines[(attr.position?.start.line ?? line) - 1] || '',
259
- type: 'prop',
260
- variableName: attr.name,
261
- normalizedText: normalizedValue,
262
- tag,
263
- })
264
- }
265
- }
266
- }
267
- }
268
- }
269
-
270
- if ('children' in node && Array.isArray(node.children)) {
271
- for (const child of node.children) {
272
- visit(child)
273
- }
274
- }
275
- }
276
-
277
- visit(cached.ast)
278
- }
279
-
280
- /**
281
- * Index all images from a parsed file
282
- */
283
- function indexFileImages(cached: CachedParsedFile, relFile: string): void {
284
- // For Astro files, use AST
285
- if (relFile.endsWith('.astro')) {
286
- function visit(node: AstroNode) {
287
- if (node.type === 'element') {
288
- const elemNode = node as ElementNode
289
- if (elemNode.name.toLowerCase() === 'img') {
290
- for (const attr of elemNode.attributes) {
291
- if (attr.type === 'attribute' && attr.name === 'src' && attr.value) {
292
- const srcLine = attr.position?.start.line ?? elemNode.position?.start.line ?? 0
293
- const snippet = extractImageSnippet(cached.lines, srcLine - 1)
294
- imageSearchIndex.push({
295
- file: relFile,
296
- line: srcLine,
297
- snippet,
298
- src: attr.value,
299
- })
300
- }
301
- }
302
- }
303
- }
304
-
305
- if ('children' in node && Array.isArray(node.children)) {
306
- for (const child of node.children) {
307
- visit(child)
308
- }
309
- }
310
- }
311
- visit(cached.ast)
312
- } else {
313
- // For tsx/jsx, use regex
314
- const srcPatterns = [/src="([^"]+)"/g, /src='([^']+)'/g]
315
- for (let i = 0; i < cached.lines.length; i++) {
316
- const line = cached.lines[i]
317
- if (!line) continue
318
-
319
- for (const pattern of srcPatterns) {
320
- pattern.lastIndex = 0
321
- let match: RegExpExecArray | null
322
- while ((match = pattern.exec(line)) !== null) {
323
- const snippet = extractImageSnippet(cached.lines, i)
324
- imageSearchIndex.push({
325
- file: relFile,
326
- line: i + 1,
327
- snippet,
328
- src: match[1]!,
329
- })
330
- }
331
- }
332
- }
333
- }
334
- }
335
-
336
- /**
337
- * Fast text lookup using pre-built index
338
- */
339
- function findInTextIndex(textContent: string, tag: string): SourceLocation | undefined {
340
- const normalizedSearch = normalizeText(textContent)
341
- const tagLower = tag.toLowerCase()
342
-
343
- // First try exact match with same tag
344
- for (const entry of textSearchIndex) {
345
- if (entry.tag === tagLower && entry.normalizedText === normalizedSearch) {
346
- return {
347
- file: entry.file,
348
- line: entry.line,
349
- snippet: entry.snippet,
350
- type: entry.type,
351
- variableName: entry.variableName,
352
- definitionLine: entry.definitionLine,
353
- }
354
- }
355
- }
356
-
357
- // Then try partial match for longer text
358
- if (normalizedSearch.length > 10) {
359
- const textPreview = normalizedSearch.slice(0, Math.min(30, normalizedSearch.length))
360
- for (const entry of textSearchIndex) {
361
- if (entry.tag === tagLower && entry.normalizedText.includes(textPreview)) {
362
- return {
363
- file: entry.file,
364
- line: entry.line,
365
- snippet: entry.snippet,
366
- type: entry.type,
367
- variableName: entry.variableName,
368
- definitionLine: entry.definitionLine,
369
- }
370
- }
371
- }
372
- }
373
-
374
- // Try any tag match
375
- for (const entry of textSearchIndex) {
376
- if (entry.normalizedText === normalizedSearch) {
377
- return {
378
- file: entry.file,
379
- line: entry.line,
380
- snippet: entry.snippet,
381
- type: entry.type,
382
- variableName: entry.variableName,
383
- definitionLine: entry.definitionLine,
384
- }
385
- }
386
- }
387
-
388
- return undefined
389
- }
390
-
391
- /**
392
- * Fast image lookup using pre-built index
393
- */
394
- function findInImageIndex(imageSrc: string): SourceLocation | undefined {
395
- for (const entry of imageSearchIndex) {
396
- if (entry.src === imageSrc) {
397
- return {
398
- file: entry.file,
399
- line: entry.line,
400
- snippet: entry.snippet,
401
- type: 'static',
402
- }
403
- }
404
- }
405
- return undefined
406
- }
407
-
408
- // Helper for indexing - get text content from node
409
- function getTextContent(node: AstroNode): string {
410
- if (node.type === 'text') {
411
- return (node as TextNode).value
412
- }
413
- if ('children' in node && Array.isArray(node.children)) {
414
- return node.children.map(getTextContent).join('')
415
- }
416
- return ''
417
- }
418
-
419
- // Helper for indexing - check for expression children
420
- function hasExpressionChild(node: AstroNode): { found: boolean; varNames: string[] } {
421
- const varNames: string[] = []
422
- if (node.type === 'expression') {
423
- const exprText = getTextContent(node)
424
- const match = exprText.match(/^\s*(\w+)(?:\.(\w+))?\s*$/)
425
- if (match) {
426
- varNames.push(match[2] ?? match[1]!)
427
- }
428
- return { found: true, varNames }
429
- }
430
- if ('children' in node && Array.isArray(node.children)) {
431
- for (const child of node.children) {
432
- const result = hasExpressionChild(child)
433
- if (result.found) {
434
- varNames.push(...result.varNames)
435
- }
436
- }
437
- }
438
- return { found: varNames.length > 0, varNames }
439
- }
440
-
441
- export interface SourceLocation {
442
- file: string
443
- line: number
444
- snippet?: string
445
- type?: 'static' | 'variable' | 'prop' | 'computed' | 'collection'
446
- variableName?: string
447
- definitionLine?: number
448
- /** Collection name for collection entries */
449
- collectionName?: string
450
- /** Entry slug for collection entries */
451
- collectionSlug?: string
452
- }
453
-
454
- export interface VariableReference {
455
- name: string
456
- pattern: string
457
- definitionLine: number
458
- }
459
-
460
- export interface CollectionInfo {
461
- name: string
462
- slug: string
463
- file: string
464
- }
465
-
466
- export interface MarkdownContent {
467
- /** Frontmatter fields as key-value pairs with line numbers */
468
- frontmatter: Record<string, { value: string; line: number }>
469
- /** The full markdown body content */
470
- body: string
471
- /** Line number where body starts */
472
- bodyStartLine: number
473
- /** File path relative to cwd */
474
- file: string
475
- /** Collection name */
476
- collectionName: string
477
- /** Collection slug */
478
- collectionSlug: string
479
- }
480
-
481
- // ============================================================================
482
- // AST Parsing Utilities
483
- // ============================================================================
484
-
485
- interface ParsedAstroFile {
486
- ast: AstroNode
487
- frontmatterContent: string | null
488
- frontmatterStartLine: number
489
- }
490
-
491
- /**
492
- * Parse an Astro file and return both template AST and frontmatter content
493
- */
494
- async function parseAstroFile(content: string): Promise<ParsedAstroFile> {
495
- const result = await parseAstro(content, { position: true })
496
-
497
- // Find frontmatter node
498
- let frontmatterContent: string | null = null
499
- let frontmatterStartLine = 0
500
-
501
- for (const child of result.ast.children) {
502
- if (child.type === 'frontmatter') {
503
- frontmatterContent = child.value
504
- frontmatterStartLine = child.position?.start.line ?? 1
505
- break
506
- }
507
- }
508
-
509
- return {
510
- ast: result.ast,
511
- frontmatterContent,
512
- frontmatterStartLine,
513
- }
514
- }
515
-
516
- /** Minimal Babel AST node type for our usage */
517
- interface BabelNode {
518
- type: string
519
- [key: string]: unknown
520
- }
521
-
522
- /** Minimal Babel File type */
523
- interface BabelFile {
524
- type: 'File'
525
- program: BabelNode & { body: BabelNode[] }
526
- }
527
-
528
- /**
529
- * Parse frontmatter JavaScript/TypeScript with Babel
530
- * @param content - The frontmatter content to parse
531
- * @param filePath - Optional file path for error reporting
532
- */
533
- function parseFrontmatter(content: string, filePath?: string): BabelFile | null {
534
- try {
535
- return parseBabel(content, {
536
- sourceType: 'module',
537
- plugins: ['typescript'],
538
- errorRecovery: true,
539
- }) as unknown as BabelFile
540
- } catch (error) {
541
- // Record parse errors for aggregated reporting
542
- if (filePath) {
543
- getErrorCollector().addWarning(
544
- `Frontmatter parse: ${filePath}`,
545
- error instanceof Error ? error.message : String(error),
546
- )
547
- }
548
- return null
549
- }
550
- }
551
-
552
- interface VariableDefinition {
553
- name: string
554
- value: string
555
- line: number
556
- /** For object properties, the parent variable name */
557
- parentName?: string
558
- }
559
-
560
- /**
561
- * Extract variable definitions from Babel AST
562
- * Finds const/let/var declarations with string literal values
563
- *
564
- * Note: Babel parses the frontmatter content (without --- delimiters) starting at line 1.
565
- * frontmatterStartLine is the actual file line where the content begins (after first ---).
566
- * So we convert: file_line = (babel_line - 1) + frontmatterStartLine
567
- */
568
- function extractVariableDefinitions(ast: BabelFile, frontmatterStartLine: number): VariableDefinition[] {
569
- const definitions: VariableDefinition[] = []
570
-
571
- function getStringValue(node: BabelNode): string | null {
572
- if (node.type === 'StringLiteral') {
573
- return node.value as string
574
- }
575
- if (node.type === 'TemplateLiteral') {
576
- const quasis = node.quasis as Array<{ value: { cooked: string | null } }> | undefined
577
- const expressions = node.expressions as unknown[] | undefined
578
- if (quasis?.length === 1 && expressions?.length === 0) {
579
- return quasis[0]?.value.cooked ?? null
580
- }
581
- }
582
- return null
583
- }
584
-
585
- function babelLineToFileLine(babelLine: number): number {
586
- // Babel's line 1 = frontmatterStartLine in the actual file
587
- return (babelLine - 1) + frontmatterStartLine
588
- }
589
-
590
- function visitNode(node: BabelNode) {
591
- if (node.type === 'VariableDeclaration') {
592
- const declarations = node.declarations as BabelNode[] | undefined
593
- for (const decl of declarations ?? []) {
594
- const id = decl.id as BabelNode | undefined
595
- const init = decl.init as BabelNode | undefined
596
- if (id?.type === 'Identifier' && init) {
597
- const varName = id.name as string
598
- const loc = decl.loc as { start: { line: number } } | undefined
599
- const line = babelLineToFileLine(loc?.start.line ?? 1)
600
-
601
- // Simple string value
602
- const stringValue = getStringValue(init)
603
- if (stringValue !== null) {
604
- definitions.push({ name: varName, value: stringValue, line })
605
- }
606
-
607
- // Object expression - extract properties
608
- if (init.type === 'ObjectExpression') {
609
- const properties = init.properties as BabelNode[] | undefined
610
- for (const prop of properties ?? []) {
611
- const key = prop.key as BabelNode | undefined
612
- const value = prop.value as BabelNode | undefined
613
- if (prop.type === 'ObjectProperty' && key?.type === 'Identifier' && value) {
614
- const propValue = getStringValue(value)
615
- if (propValue !== null) {
616
- const propLoc = prop.loc as { start: { line: number } } | undefined
617
- const propLine = babelLineToFileLine(propLoc?.start.line ?? 1)
618
- definitions.push({
619
- name: key.name as string,
620
- value: propValue,
621
- line: propLine,
622
- parentName: varName,
623
- })
624
- }
625
- }
626
- }
627
- }
628
- }
629
- }
630
- }
631
-
632
- // Recursively visit child nodes
633
- for (const key of Object.keys(node)) {
634
- const value = node[key]
635
- if (value && typeof value === 'object') {
636
- if (Array.isArray(value)) {
637
- for (const item of value) {
638
- if (item && typeof item === 'object' && 'type' in item) {
639
- visitNode(item as BabelNode)
640
- }
641
- }
642
- } else if ('type' in value) {
643
- visitNode(value as BabelNode)
644
- }
645
- }
646
- }
647
- }
648
-
649
- visitNode(ast.program)
650
- return definitions
651
- }
652
-
653
- interface TemplateMatch {
654
- line: number
655
- type: 'static' | 'variable' | 'computed'
656
- variableName?: string
657
- /** For variables, the definition line in frontmatter */
658
- definitionLine?: number
659
- }
660
-
661
- /**
662
- * Walk the Astro AST to find elements matching a tag with specific text content
663
- */
664
- function findElementWithText(
665
- ast: AstroNode,
666
- tag: string,
667
- searchText: string,
668
- variableDefinitions: VariableDefinition[],
669
- ): TemplateMatch | null {
670
- const normalizedSearch = normalizeText(searchText)
671
- const tagLower = tag.toLowerCase()
672
- let bestMatch: TemplateMatch | null = null
673
- let bestScore = 0
674
-
675
- function getTextContent(node: AstroNode): string {
676
- if (node.type === 'text') {
677
- return (node as TextNode).value
678
- }
679
- if ('children' in node && Array.isArray(node.children)) {
680
- return node.children.map(getTextContent).join('')
681
- }
682
- return ''
683
- }
684
-
685
- function hasExpressionChild(node: AstroNode): { found: boolean; varNames: string[] } {
686
- const varNames: string[] = []
687
- if (node.type === 'expression') {
688
- // Try to extract variable name from expression
689
- // The expression node children contain the text representation
690
- const exprText = getTextContent(node)
691
- // Extract variable names like {foo} or {foo.bar}
692
- const match = exprText.match(/^\s*(\w+)(?:\.(\w+))?\s*$/)
693
- if (match) {
694
- varNames.push(match[2] ?? match[1]!)
695
- }
696
- return { found: true, varNames }
697
- }
698
- if ('children' in node && Array.isArray(node.children)) {
699
- for (const child of node.children) {
700
- const result = hasExpressionChild(child)
701
- if (result.found) {
702
- varNames.push(...result.varNames)
703
- }
704
- }
705
- }
706
- return { found: varNames.length > 0, varNames }
707
- }
708
-
709
- function visit(node: AstroNode) {
710
- // Check if this is an element or component matching our tag
711
- if ((node.type === 'element' || node.type === 'component') && node.name.toLowerCase() === tagLower) {
712
- const elemNode = node as ElementNode | ComponentNode
713
- const textContent = getTextContent(elemNode)
714
- const normalizedContent = normalizeText(textContent)
715
- const line = elemNode.position?.start.line ?? 0
716
-
717
- // Check for expression (variable reference)
718
- const exprInfo = hasExpressionChild(elemNode)
719
- if (exprInfo.found && exprInfo.varNames.length > 0) {
720
- // Look for matching variable definition
721
- for (const varName of exprInfo.varNames) {
722
- for (const def of variableDefinitions) {
723
- if (def.name === varName || (def.parentName && def.name === varName)) {
724
- const normalizedDef = normalizeText(def.value)
725
- if (normalizedDef === normalizedSearch) {
726
- // Found a variable match - this is highest priority
727
- if (bestScore < 100) {
728
- bestScore = 100
729
- bestMatch = {
730
- line,
731
- type: 'variable',
732
- variableName: def.parentName ? `${def.parentName}.${def.name}` : def.name,
733
- definitionLine: def.line,
734
- }
735
- }
736
- return
737
- }
738
- }
739
- }
740
- }
741
- }
742
-
743
- // Check for direct text match (static content)
744
- // Only match if there's meaningful text content (not just variable names/expressions)
745
- if (normalizedContent && normalizedContent.length >= 2 && normalizedSearch.length > 0) {
746
- // For short search text (<= 10 chars), require exact match
747
- if (normalizedSearch.length <= 10) {
748
- if (normalizedContent.includes(normalizedSearch)) {
749
- const score = 80
750
- if (score > bestScore) {
751
- bestScore = score
752
- const actualLine = findTextLine(elemNode, normalizedSearch)
753
- bestMatch = {
754
- line: actualLine ?? line,
755
- type: 'static',
756
- }
757
- }
758
- }
759
- } // For longer search text, check if content contains a significant portion
760
- else if (normalizedSearch.length > 10) {
761
- const textPreview = normalizedSearch.slice(0, Math.min(30, normalizedSearch.length))
762
- if (normalizedContent.includes(textPreview)) {
763
- const matchLength = Math.min(normalizedSearch.length, normalizedContent.length)
764
- const score = 50 + (matchLength / normalizedSearch.length) * 40
765
- if (score > bestScore) {
766
- bestScore = score
767
- const actualLine = findTextLine(elemNode, textPreview)
768
- bestMatch = {
769
- line: actualLine ?? line,
770
- type: 'static',
771
- }
772
- }
773
- } // Try matching first few words for very long text
774
- else if (normalizedSearch.length > 20) {
775
- const firstWords = normalizedSearch.split(' ').slice(0, 3).join(' ')
776
- if (firstWords && normalizedContent.includes(firstWords)) {
777
- const score = 40
778
- if (score > bestScore) {
779
- bestScore = score
780
- const actualLine = findTextLine(elemNode, firstWords)
781
- bestMatch = {
782
- line: actualLine ?? line,
783
- type: 'static',
784
- }
785
- }
786
- }
787
- }
788
- }
789
- }
790
- }
791
-
792
- // Recursively visit children
793
- if ('children' in node && Array.isArray(node.children)) {
794
- for (const child of node.children) {
795
- visit(child)
796
- }
797
- }
798
- }
799
-
800
- function findTextLine(node: AstroNode, searchText: string): number | null {
801
- if (node.type === 'text') {
802
- const textNode = node as TextNode
803
- if (normalizeText(textNode.value).includes(searchText)) {
804
- return textNode.position?.start.line ?? null
805
- }
806
- }
807
- if ('children' in node && Array.isArray(node.children)) {
808
- for (const child of node.children) {
809
- const line = findTextLine(child, searchText)
810
- if (line !== null) return line
811
- }
812
- }
813
- return null
814
- }
815
-
816
- visit(ast)
817
- return bestMatch
818
- }
819
-
820
- interface ComponentPropMatch {
821
- line: number
822
- propName: string
823
- propValue: string
824
- }
825
-
826
- /**
827
- * Walk the Astro AST to find component props with specific text value
828
- */
829
- function findComponentProp(
830
- ast: AstroNode,
831
- searchText: string,
832
- ): ComponentPropMatch | null {
833
- const normalizedSearch = normalizeText(searchText)
834
-
835
- function visit(node: AstroNode): ComponentPropMatch | null {
836
- // Check component nodes (PascalCase names)
837
- if (node.type === 'component') {
838
- const compNode = node as ComponentNode
839
- for (const attr of compNode.attributes) {
840
- if (attr.type === 'attribute' && attr.kind === 'quoted') {
841
- const normalizedValue = normalizeText(attr.value)
842
- if (normalizedValue === normalizedSearch) {
843
- return {
844
- line: attr.position?.start.line ?? compNode.position?.start.line ?? 0,
845
- propName: attr.name,
846
- propValue: attr.value,
847
- }
848
- }
849
- }
850
- }
851
- }
852
-
853
- // Recursively visit children
854
- if ('children' in node && Array.isArray(node.children)) {
855
- for (const child of node.children) {
856
- const result = visit(child)
857
- if (result) return result
858
- }
859
- }
860
-
861
- return null
862
- }
863
-
864
- return visit(ast)
865
- }
866
-
867
- interface ImageMatch {
868
- line: number
869
- src: string
870
- snippet: string
871
- }
872
-
873
- /**
874
- * Walk the Astro AST to find img elements with specific src
875
- */
876
- function findImageElement(
877
- ast: AstroNode,
878
- imageSrc: string,
879
- lines: string[],
880
- ): ImageMatch | null {
881
- function visit(node: AstroNode): ImageMatch | null {
882
- if (node.type === 'element') {
883
- const elemNode = node as ElementNode
884
- if (elemNode.name.toLowerCase() === 'img') {
885
- for (const attr of elemNode.attributes) {
886
- if (attr.type === 'attribute' && attr.name === 'src' && attr.value === imageSrc) {
887
- const srcLine = attr.position?.start.line ?? elemNode.position?.start.line ?? 0
888
- const snippet = extractImageSnippet(lines, srcLine - 1)
889
- return {
890
- line: srcLine,
891
- src: imageSrc,
892
- snippet,
893
- }
894
- }
895
- }
896
- }
897
- }
898
-
899
- // Recursively visit children
900
- if ('children' in node && Array.isArray(node.children)) {
901
- for (const child of node.children) {
902
- const result = visit(child)
903
- if (result) return result
904
- }
905
- }
906
-
907
- return null
908
- }
909
-
910
- return visit(ast)
911
- }
912
-
913
- /**
914
- * Find source file and line number for text content.
915
- * Uses pre-built search index for fast lookups.
916
- */
917
- export async function findSourceLocation(
918
- textContent: string,
919
- tag: string,
920
- ): Promise<SourceLocation | undefined> {
921
- // Use index if available (much faster)
922
- if (searchIndexInitialized) {
923
- return findInTextIndex(textContent, tag)
924
- }
925
-
926
- // Fallback to slow search if index not initialized
927
- const srcDir = path.join(getProjectRoot(), 'src')
928
-
929
- try {
930
- const searchDirs = [
931
- path.join(srcDir, 'components'),
932
- path.join(srcDir, 'pages'),
933
- path.join(srcDir, 'layouts'),
934
- ]
935
-
936
- for (const dir of searchDirs) {
937
- try {
938
- const result = await searchDirectory(dir, textContent, tag)
939
- if (result) {
940
- return result
941
- }
942
- } catch {
943
- // Directory doesn't exist, continue
944
- }
945
- }
946
-
947
- // If not found directly, try searching for prop values in parent components
948
- for (const dir of searchDirs) {
949
- try {
950
- const result = await searchForPropInParents(dir, textContent)
951
- if (result) {
952
- return result
953
- }
954
- } catch {
955
- // Directory doesn't exist, continue
956
- }
957
- }
958
- } catch {
959
- // Search failed
960
- }
961
-
962
- return undefined
963
- }
964
-
965
- /**
966
- * Find source file and line number for an image by its src attribute.
967
- * Uses pre-built search index for fast lookups.
968
- */
969
- export async function findImageSourceLocation(
970
- imageSrc: string,
971
- ): Promise<SourceLocation | undefined> {
972
- // Use index if available (much faster)
973
- if (searchIndexInitialized) {
974
- return findInImageIndex(imageSrc)
975
- }
976
-
977
- // Fallback to slow search if index not initialized
978
- const srcDir = path.join(getProjectRoot(), 'src')
979
-
980
- try {
981
- const searchDirs = [
982
- path.join(srcDir, 'pages'),
983
- path.join(srcDir, 'components'),
984
- path.join(srcDir, 'layouts'),
985
- ]
986
-
987
- for (const dir of searchDirs) {
988
- try {
989
- const result = await searchDirectoryForImage(dir, imageSrc)
990
- if (result) {
991
- return result
992
- }
993
- } catch {
994
- // Directory doesn't exist, continue
995
- }
996
- }
997
- } catch {
998
- // Search failed
999
- }
1000
-
1001
- return undefined
1002
- }
1003
-
1004
- /**
1005
- * Recursively search directory for image with matching src
1006
- */
1007
- async function searchDirectoryForImage(
1008
- dir: string,
1009
- imageSrc: string,
1010
- ): Promise<SourceLocation | undefined> {
1011
- try {
1012
- const entries = await fs.readdir(dir, { withFileTypes: true })
1013
-
1014
- for (const entry of entries) {
1015
- const fullPath = path.join(dir, entry.name)
1016
-
1017
- if (entry.isDirectory()) {
1018
- const result = await searchDirectoryForImage(fullPath, imageSrc)
1019
- if (result) return result
1020
- } else if (entry.isFile() && (entry.name.endsWith('.astro') || entry.name.endsWith('.tsx') || entry.name.endsWith('.jsx'))) {
1021
- const result = await searchFileForImage(fullPath, imageSrc)
1022
- if (result) return result
1023
- }
1024
- }
1025
- } catch {
1026
- // Error reading directory
1027
- }
1028
-
1029
- return undefined
1030
- }
1031
-
1032
- /**
1033
- * Search a single file for an image with matching src.
1034
- * Uses caching for better performance.
1035
- */
1036
- async function searchFileForImage(
1037
- filePath: string,
1038
- imageSrc: string,
1039
- ): Promise<SourceLocation | undefined> {
1040
- try {
1041
- // Use cached parsed file
1042
- const cached = await getCachedParsedFile(filePath)
1043
- if (!cached) return undefined
1044
-
1045
- const { lines, ast } = cached
1046
-
1047
- // Use AST parsing for Astro files
1048
- if (filePath.endsWith('.astro')) {
1049
- const imageMatch = findImageElement(ast, imageSrc, lines)
1050
-
1051
- if (imageMatch) {
1052
- return {
1053
- file: path.relative(getProjectRoot(), filePath),
1054
- line: imageMatch.line,
1055
- snippet: imageMatch.snippet,
1056
- type: 'static',
1057
- }
1058
- }
1059
- }
1060
-
1061
- // Regex fallback for TSX/JSX files or if AST parsing failed
1062
- const srcPatterns = [
1063
- `src="${imageSrc}"`,
1064
- `src='${imageSrc}'`,
1065
- ]
1066
-
1067
- for (let i = 0; i < lines.length; i++) {
1068
- const line = lines[i]
1069
- if (!line) continue
1070
-
1071
- for (const pattern of srcPatterns) {
1072
- if (line.includes(pattern)) {
1073
- // Found the image, extract the full <img> tag as snippet
1074
- const snippet = extractImageSnippet(lines, i)
1075
-
1076
- return {
1077
- file: path.relative(getProjectRoot(), filePath),
1078
- line: i + 1,
1079
- snippet,
1080
- type: 'static',
1081
- }
1082
- }
1083
- }
1084
- }
1085
- } catch {
1086
- // Error reading file
1087
- }
1088
-
1089
- return undefined
1090
- }
1091
-
1092
- /**
1093
- * Extract the full <img> tag snippet from source lines
1094
- */
1095
- function extractImageSnippet(lines: string[], startLine: number): string {
1096
- const snippetLines: string[] = []
1097
- let foundClosing = false
1098
-
1099
- for (let i = startLine; i < Math.min(startLine + 10, lines.length); i++) {
1100
- const line = lines[i]
1101
- if (!line) continue
1102
-
1103
- snippetLines.push(line)
1104
-
1105
- // Check if this line contains the closing of the img tag
1106
- // img tags can be self-closing /> or just >
1107
- if (line.includes('/>') || (line.includes('<img') && line.includes('>'))) {
1108
- foundClosing = true
1109
- break
1110
- }
1111
- }
1112
-
1113
- if (!foundClosing && snippetLines.length > 1) {
1114
- return snippetLines[0]!
1115
- }
1116
-
1117
- return snippetLines.join('\n')
1118
- }
1119
-
1120
- /**
1121
- * Recursively search directory for matching content
1122
- */
1123
- async function searchDirectory(
1124
- dir: string,
1125
- textContent: string,
1126
- tag: string,
1127
- ): Promise<SourceLocation | undefined> {
1128
- try {
1129
- const entries = await fs.readdir(dir, { withFileTypes: true })
1130
-
1131
- for (const entry of entries) {
1132
- const fullPath = path.join(dir, entry.name)
1133
-
1134
- if (entry.isDirectory()) {
1135
- const result = await searchDirectory(fullPath, textContent, tag)
1136
- if (result) return result
1137
- } else if (entry.isFile() && entry.name.endsWith('.astro')) {
1138
- const result = await searchAstroFile(fullPath, textContent, tag)
1139
- if (result) return result
1140
- }
1141
- }
1142
- } catch {
1143
- // Error reading directory
1144
- }
1145
-
1146
- return undefined
1147
- }
1148
-
1149
- /**
1150
- * Search a single Astro file for matching content using AST parsing.
1151
- * Uses caching for better performance.
1152
- */
1153
- async function searchAstroFile(
1154
- filePath: string,
1155
- textContent: string,
1156
- tag: string,
1157
- ): Promise<SourceLocation | undefined> {
1158
- try {
1159
- // Use cached parsed file
1160
- const cached = await getCachedParsedFile(filePath)
1161
- if (!cached) return undefined
1162
-
1163
- const { lines, ast, variableDefinitions } = cached
1164
-
1165
- // Find matching element in template AST
1166
- const match = findElementWithText(ast, tag, textContent, variableDefinitions)
1167
-
1168
- if (match) {
1169
- // Determine the editable line (definition for variables, usage for static)
1170
- const editableLine = match.type === 'variable' && match.definitionLine
1171
- ? match.definitionLine
1172
- : match.line
1173
-
1174
- // Get the source snippet - innerHTML for static content, definition line for variables
1175
- let snippet: string
1176
- if (match.type === 'static') {
1177
- // For static content, extract only the innerHTML (not the wrapper element)
1178
- const completeSnippet = extractCompleteTagSnippet(lines, editableLine - 1, tag)
1179
- snippet = extractInnerHtmlFromSnippet(completeSnippet, tag) ?? completeSnippet
1180
- } else {
1181
- // For variables/props, just the definition line with indentation
1182
- snippet = lines[editableLine - 1] || ''
1183
- }
1184
-
1185
- return {
1186
- file: path.relative(getProjectRoot(), filePath),
1187
- line: editableLine,
1188
- snippet,
1189
- type: match.type,
1190
- variableName: match.variableName,
1191
- definitionLine: match.type === 'variable' ? match.definitionLine : undefined,
1192
- }
1193
- }
1194
- } catch {
1195
- // Error reading/parsing file
1196
- }
1197
-
1198
- return undefined
1199
- }
1200
-
1201
- /**
1202
- * Search for prop values passed to components using AST parsing.
1203
- * Uses caching for better performance.
1204
- */
1205
- async function searchForPropInParents(dir: string, textContent: string): Promise<SourceLocation | undefined> {
1206
- const entries = await fs.readdir(dir, { withFileTypes: true })
1207
-
1208
- for (const entry of entries) {
1209
- const fullPath = path.join(dir, entry.name)
1210
-
1211
- if (entry.isDirectory()) {
1212
- const result = await searchForPropInParents(fullPath, textContent)
1213
- if (result) return result
1214
- } else if (entry.isFile() && entry.name.endsWith('.astro')) {
1215
- try {
1216
- // Use cached parsed file
1217
- const cached = await getCachedParsedFile(fullPath)
1218
- if (!cached) continue
1219
-
1220
- const { lines, ast } = cached
1221
-
1222
- // Find component props matching our text
1223
- const propMatch = findComponentProp(ast, textContent)
1224
-
1225
- if (propMatch) {
1226
- // Extract component snippet for context
1227
- const componentStart = propMatch.line - 1
1228
- const snippetLines: string[] = []
1229
- let depth = 0
1230
-
1231
- for (let i = componentStart; i < Math.min(componentStart + 10, lines.length); i++) {
1232
- const line = lines[i]
1233
- if (!line) continue
1234
- snippetLines.push(line)
1235
-
1236
- // Check for self-closing or end of opening tag
1237
- if (line.includes('/>')) {
1238
- break
1239
- }
1240
- if (line.includes('>') && !line.includes('/>')) {
1241
- // Count opening tags
1242
- const opens = (line.match(/<[A-Z]/g) || []).length
1243
- const closes = (line.match(/\/>/g) || []).length
1244
- depth += opens - closes
1245
- if (depth <= 0 || (i > componentStart && line.includes('>'))) {
1246
- break
1247
- }
1248
- }
1249
- }
1250
-
1251
- return {
1252
- file: path.relative(getProjectRoot(), fullPath),
1253
- line: propMatch.line,
1254
- snippet: snippetLines.join('\n'),
1255
- type: 'prop',
1256
- variableName: propMatch.propName,
1257
- }
1258
- }
1259
- } catch {
1260
- // Error parsing file, continue
1261
- }
1262
- }
1263
- }
1264
-
1265
- return undefined
1266
- }
1267
-
1268
- /**
1269
- * Extract complete tag snippet including content and indentation.
1270
- * Exported for use in html-processor to populate sourceSnippet.
1271
- *
1272
- * When startLine points to a line inside the element (e.g., the text content line),
1273
- * this function searches backwards to find the opening tag first.
1274
- */
1275
- export function extractCompleteTagSnippet(lines: string[], startLine: number, tag: string): string {
1276
- // Pattern to match opening tag - either followed by whitespace/>, or at end of line (multi-line tag)
1277
- const openTagPattern = new RegExp(`<${tag}(?:[\\s>]|$)`, 'gi')
1278
-
1279
- // Check if the start line contains the opening tag
1280
- let actualStartLine = startLine
1281
- const startLineContent = lines[startLine] || ''
1282
- if (!openTagPattern.test(startLineContent)) {
1283
- // Search backwards to find the opening tag
1284
- for (let i = startLine - 1; i >= Math.max(0, startLine - 20); i--) {
1285
- const line = lines[i]
1286
- if (!line) continue
1287
-
1288
- // Reset regex lastIndex for fresh test
1289
- openTagPattern.lastIndex = 0
1290
- if (openTagPattern.test(line)) {
1291
- actualStartLine = i
1292
- break
1293
- }
1294
- }
1295
- }
1296
-
1297
- const snippetLines: string[] = []
1298
- let depth = 0
1299
- let foundClosing = false
1300
-
1301
- // Start from the opening tag line
1302
- for (let i = actualStartLine; i < Math.min(actualStartLine + 30, lines.length); i++) {
1303
- const line = lines[i]
1304
-
1305
- if (!line) {
1306
- continue
1307
- }
1308
-
1309
- snippetLines.push(line)
1310
-
1311
- // Count opening and closing tags
1312
- // Opening tag can be followed by whitespace, >, or end of line (multi-line tag)
1313
- const openTags = (line.match(new RegExp(`<${tag}(?:[\\s>]|$)`, 'gi')) || []).length
1314
- const selfClosing = (line.match(new RegExp(`<${tag}[^>]*/>`, 'gi')) || []).length
1315
- const closeTags = (line.match(new RegExp(`</${tag}>`, 'gi')) || []).length
1316
-
1317
- depth += openTags - selfClosing - closeTags
1318
-
1319
- // If we found a self-closing tag or closed all tags, we're done
1320
- if (selfClosing > 0 || (depth <= 0 && (closeTags > 0 || openTags > 0))) {
1321
- foundClosing = true
1322
- break
1323
- }
1324
- }
1325
-
1326
- // If we didn't find closing tag, just return the first line
1327
- if (!foundClosing && snippetLines.length > 1) {
1328
- return snippetLines[0]!
1329
- }
1330
-
1331
- return snippetLines.join('\n')
1332
- }
1333
-
1334
- /**
1335
- * Extract innerHTML from a complete tag snippet.
1336
- * Given `<p class="foo">content here</p>`, returns `content here`.
1337
- *
1338
- * @param snippet - The complete tag snippet from source
1339
- * @param tag - The tag name (e.g., 'p', 'h1')
1340
- * @returns The innerHTML portion, or undefined if can't extract
1341
- */
1342
- export function extractInnerHtmlFromSnippet(snippet: string, tag: string): string | undefined {
1343
- // Match opening tag (with any attributes) and extract content until closing tag
1344
- // Handle both single-line and multi-line cases
1345
- const openTagPattern = new RegExp(`<${tag}(?:\\s[^>]*)?>`, 'i')
1346
- const closeTagPattern = new RegExp(`</${tag}>`, 'i')
1347
-
1348
- const openMatch = snippet.match(openTagPattern)
1349
- if (!openMatch) return undefined
1350
-
1351
- const openTagEnd = openMatch.index! + openMatch[0].length
1352
- const closeMatch = snippet.match(closeTagPattern)
1353
- if (!closeMatch) return undefined
1354
-
1355
- const closeTagStart = closeMatch.index!
1356
-
1357
- // Extract content between opening and closing tags
1358
- if (closeTagStart > openTagEnd) {
1359
- return snippet.substring(openTagEnd, closeTagStart)
1360
- }
1361
-
1362
- return undefined
1363
- }
1364
-
1365
- /**
1366
- * Read source file and extract the innerHTML at the specified line.
1367
- *
1368
- * @param sourceFile - Path to source file (relative to cwd)
1369
- * @param sourceLine - 1-indexed line number
1370
- * @param tag - The tag name
1371
- * @returns The innerHTML from source, or undefined if can't extract
1372
- */
1373
- export async function extractSourceInnerHtml(
1374
- sourceFile: string,
1375
- sourceLine: number,
1376
- tag: string,
1377
- ): Promise<string | undefined> {
1378
- try {
1379
- const filePath = path.isAbsolute(sourceFile)
1380
- ? sourceFile
1381
- : path.join(getProjectRoot(), sourceFile)
1382
-
1383
- const content = await fs.readFile(filePath, 'utf-8')
1384
- const lines = content.split('\n')
1385
-
1386
- // Extract the complete tag snippet
1387
- const snippet = extractCompleteTagSnippet(lines, sourceLine - 1, tag)
1388
-
1389
- // Extract innerHTML from the snippet
1390
- return extractInnerHtmlFromSnippet(snippet, tag)
1391
- } catch {
1392
- return undefined
1393
- }
1394
- }
1395
-
1396
- /**
1397
- * Normalize text for comparison (handles escaping and entities)
1398
- */
1399
- function normalizeText(text: string): string {
1400
- return text
1401
- .trim()
1402
- .replace(/\\'/g, "'") // Escaped single quotes
1403
- .replace(/\\"/g, '"') // Escaped double quotes
1404
- .replace(/&#39;/g, "'") // HTML entity for apostrophe
1405
- .replace(/&quot;/g, '"') // HTML entity for quote
1406
- .replace(/&apos;/g, "'") // HTML entity for apostrophe (alternative)
1407
- .replace(/&amp;/g, '&') // HTML entity for ampersand
1408
- .replace(/\s+/g, ' ') // Normalize whitespace
1409
- .toLowerCase()
1410
- }
1411
-
1412
- /**
1413
- * Find markdown collection file for a given page path
1414
- * @param pagePath - The URL path of the page (e.g., '/services/3d-tisk')
1415
- * @param contentDir - The content directory (default: 'src/content')
1416
- * @returns Collection info if found, undefined otherwise
1417
- */
1418
- export async function findCollectionSource(
1419
- pagePath: string,
1420
- contentDir: string = 'src/content',
1421
- ): Promise<CollectionInfo | undefined> {
1422
- // Remove leading/trailing slashes
1423
- const cleanPath = pagePath.replace(/^\/+|\/+$/g, '')
1424
- const pathParts = cleanPath.split('/')
1425
-
1426
- if (pathParts.length < 2) {
1427
- // Need at least collection/slug
1428
- return undefined
1429
- }
1430
-
1431
- const contentPath = path.join(getProjectRoot(), contentDir)
1432
-
1433
- try {
1434
- // Check if content directory exists
1435
- await fs.access(contentPath)
1436
- } catch {
1437
- return undefined
1438
- }
1439
-
1440
- // Try different collection/slug combinations
1441
- // Strategy 1: First segment is collection, rest is slug
1442
- // e.g., /services/3d-tisk -> collection: services, slug: 3d-tisk
1443
- const collectionName = pathParts[0]
1444
- const slug = pathParts.slice(1).join('/')
1445
-
1446
- if (!collectionName || !slug) {
1447
- return undefined
1448
- }
1449
-
1450
- const collectionPath = path.join(contentPath, collectionName)
1451
-
1452
- try {
1453
- await fs.access(collectionPath)
1454
- const stat = await fs.stat(collectionPath)
1455
- if (!stat.isDirectory()) {
1456
- return undefined
1457
- }
1458
- } catch {
1459
- return undefined
1460
- }
1461
-
1462
- // Look for markdown files matching the slug
1463
- const mdFile = await findMarkdownFile(collectionPath, slug)
1464
- if (mdFile) {
1465
- return {
1466
- name: collectionName,
1467
- slug,
1468
- file: path.relative(getProjectRoot(), mdFile),
1469
- }
1470
- }
1471
-
1472
- return undefined
1473
- }
1474
-
1475
- /**
1476
- * Find a markdown file in a collection directory by slug
1477
- */
1478
- async function findMarkdownFile(collectionPath: string, slug: string): Promise<string | undefined> {
1479
- // Try direct match: slug.md or slug.mdx
1480
- const directPaths = [
1481
- path.join(collectionPath, `${slug}.md`),
1482
- path.join(collectionPath, `${slug}.mdx`),
1483
- ]
1484
-
1485
- for (const p of directPaths) {
1486
- try {
1487
- await fs.access(p)
1488
- return p
1489
- } catch {
1490
- // File doesn't exist, continue
1491
- }
1492
- }
1493
-
1494
- // Try nested path for slugs with slashes
1495
- const slugParts = slug.split('/')
1496
- if (slugParts.length > 1) {
1497
- const nestedPath = path.join(collectionPath, ...slugParts.slice(0, -1))
1498
- const fileName = slugParts[slugParts.length - 1]
1499
- const nestedPaths = [
1500
- path.join(nestedPath, `${fileName}.md`),
1501
- path.join(nestedPath, `${fileName}.mdx`),
1502
- ]
1503
- for (const p of nestedPaths) {
1504
- try {
1505
- await fs.access(p)
1506
- return p
1507
- } catch {
1508
- // File doesn't exist, continue
1509
- }
1510
- }
1511
- }
1512
-
1513
- // Try index file in slug directory
1514
- const indexPaths = [
1515
- path.join(collectionPath, slug, 'index.md'),
1516
- path.join(collectionPath, slug, 'index.mdx'),
1517
- ]
1518
-
1519
- for (const p of indexPaths) {
1520
- try {
1521
- await fs.access(p)
1522
- return p
1523
- } catch {
1524
- // File doesn't exist, continue
1525
- }
1526
- }
1527
-
1528
- return undefined
1529
- }
1530
-
1531
- /**
1532
- * Get cached markdown file content
1533
- */
1534
- async function getCachedMarkdownFile(filePath: string): Promise<{ content: string; lines: string[] } | null> {
1535
- const cached = markdownFileCache.get(filePath)
1536
- if (cached) return cached
1537
-
1538
- try {
1539
- const content = await fs.readFile(filePath, 'utf-8')
1540
- const lines = content.split('\n')
1541
- const entry = { content, lines }
1542
- markdownFileCache.set(filePath, entry)
1543
- return entry
1544
- } catch {
1545
- return null
1546
- }
1547
- }
1548
-
1549
- /**
1550
- * Find text content in a markdown file and return source location
1551
- * Only matches frontmatter fields, not body content (body is handled separately as a whole)
1552
- * @param textContent - The text content to search for
1553
- * @param collectionInfo - Collection information (name, slug, file path)
1554
- * @returns Source location if found in frontmatter
1555
- */
1556
- export async function findMarkdownSourceLocation(
1557
- textContent: string,
1558
- collectionInfo: CollectionInfo,
1559
- ): Promise<SourceLocation | undefined> {
1560
- try {
1561
- const filePath = path.join(getProjectRoot(), collectionInfo.file)
1562
- const cached = await getCachedMarkdownFile(filePath)
1563
- if (!cached) return undefined
1564
-
1565
- const { lines } = cached
1566
- const normalizedSearch = normalizeText(textContent)
1567
-
1568
- // Parse frontmatter
1569
- let frontmatterEnd = -1
1570
- let inFrontmatter = false
1571
-
1572
- for (let i = 0; i < lines.length; i++) {
1573
- const line = lines[i]?.trim()
1574
- if (line === '---') {
1575
- if (!inFrontmatter) {
1576
- inFrontmatter = true
1577
- } else {
1578
- frontmatterEnd = i
1579
- break
1580
- }
1581
- }
1582
- }
1583
-
1584
- // Search in frontmatter only (for title, subtitle, etc.)
1585
- if (frontmatterEnd > 0) {
1586
- for (let i = 1; i < frontmatterEnd; i++) {
1587
- const line = lines[i]
1588
- if (!line) continue
1589
-
1590
- // Extract value from YAML key: value
1591
- const match = line.match(/^\s*(\w+):\s*(.+)$/)
1592
- if (match) {
1593
- const key = match[1]
1594
- let value = match[2]?.trim() || ''
1595
-
1596
- // Handle quoted strings
1597
- if (
1598
- (value.startsWith('"') && value.endsWith('"'))
1599
- || (value.startsWith("'") && value.endsWith("'"))
1600
- ) {
1601
- value = value.slice(1, -1)
1602
- }
1603
-
1604
- if (normalizeText(value) === normalizedSearch) {
1605
- return {
1606
- file: collectionInfo.file,
1607
- line: i + 1,
1608
- snippet: line,
1609
- type: 'collection',
1610
- variableName: key,
1611
- collectionName: collectionInfo.name,
1612
- collectionSlug: collectionInfo.slug,
1613
- }
1614
- }
1615
- }
1616
- }
1617
- }
1618
-
1619
- // Body content is not searched line-by-line anymore
1620
- // Use parseMarkdownContent to get the full body as one entry
1621
- } catch {
1622
- // Error reading file
1623
- }
1624
-
1625
- return undefined
1626
- }
1627
-
1628
- /**
1629
- * Parse markdown file and extract frontmatter fields and full body content.
1630
- * Uses caching for better performance.
1631
- * @param collectionInfo - Collection information (name, slug, file path)
1632
- * @returns Parsed markdown content with frontmatter and body
1633
- */
1634
- export async function parseMarkdownContent(
1635
- collectionInfo: CollectionInfo,
1636
- ): Promise<MarkdownContent | undefined> {
1637
- try {
1638
- const filePath = path.join(getProjectRoot(), collectionInfo.file)
1639
- const cached = await getCachedMarkdownFile(filePath)
1640
- if (!cached) return undefined
1641
-
1642
- const { lines } = cached
1643
-
1644
- // Parse frontmatter
1645
- let frontmatterStart = -1
1646
- let frontmatterEnd = -1
1647
-
1648
- for (let i = 0; i < lines.length; i++) {
1649
- const line = lines[i]?.trim()
1650
- if (line === '---') {
1651
- if (frontmatterStart === -1) {
1652
- frontmatterStart = i
1653
- } else {
1654
- frontmatterEnd = i
1655
- break
1656
- }
1657
- }
1658
- }
1659
-
1660
- const frontmatter: Record<string, { value: string; line: number }> = {}
1661
-
1662
- // Extract frontmatter fields
1663
- if (frontmatterEnd > 0) {
1664
- for (let i = frontmatterStart + 1; i < frontmatterEnd; i++) {
1665
- const line = lines[i]
1666
- if (!line) continue
1667
-
1668
- // Extract value from YAML key: value (simple single-line values only)
1669
- const match = line.match(/^\s*(\w+):\s*(.+)$/)
1670
- if (match) {
1671
- const key = match[1]
1672
- let value = match[2]?.trim() || ''
1673
-
1674
- // Handle quoted strings
1675
- if (
1676
- (value.startsWith('"') && value.endsWith('"'))
1677
- || (value.startsWith("'") && value.endsWith("'"))
1678
- ) {
1679
- value = value.slice(1, -1)
1680
- }
1681
-
1682
- if (key && value) {
1683
- frontmatter[key] = { value, line: i + 1 }
1684
- }
1685
- }
1686
- }
1687
- }
1688
-
1689
- // Extract body (everything after frontmatter)
1690
- const bodyStartLine = frontmatterEnd > 0 ? frontmatterEnd + 1 : 0
1691
- const bodyLines = lines.slice(bodyStartLine)
1692
- const body = bodyLines.join('\n').trim()
1693
-
1694
- return {
1695
- frontmatter,
1696
- body,
1697
- bodyStartLine: bodyStartLine + 1, // 1-indexed
1698
- file: collectionInfo.file,
1699
- collectionName: collectionInfo.name,
1700
- collectionSlug: collectionInfo.slug,
1701
- }
1702
- } catch {
1703
- // Error reading file
1704
- }
1705
-
1706
- return undefined
1707
- }
1708
-
1709
- /**
1710
- * Strip markdown syntax for text comparison
1711
- */
1712
- function stripMarkdownSyntax(text: string): string {
1713
- return text
1714
- .replace(/^#+\s+/, '') // Headers
1715
- .replace(/\*\*([^*]+)\*\*/g, '$1') // Bold
1716
- .replace(/\*([^*]+)\*/g, '$1') // Italic
1717
- .replace(/__([^_]+)__/g, '$1') // Bold (underscore)
1718
- .replace(/_([^_]+)_/g, '$1') // Italic (underscore)
1719
- .replace(/`([^`]+)`/g, '$1') // Inline code
1720
- .replace(/\[([^\]]+)\]\([^)]+\)/g, '$1') // Links
1721
- .replace(/^\s*[-*+]\s+/, '') // List items
1722
- .replace(/^\s*\d+\.\s+/, '') // Numbered lists
1723
- .trim()
1724
- }
1725
-
1726
- /**
1727
- * Enhance manifest entries with actual source snippets from source files.
1728
- * This reads the source files and extracts the innerHTML at the specified locations.
1729
- * For images, it finds the correct line containing the src attribute.
1730
- *
1731
- * @param entries - Manifest entries to enhance
1732
- * @returns Enhanced entries with sourceSnippet populated
1733
- */
1734
- export async function enhanceManifestWithSourceSnippets(
1735
- entries: Record<string, ManifestEntry>,
1736
- ): Promise<Record<string, ManifestEntry>> {
1737
- const enhanced: Record<string, ManifestEntry> = {}
1738
-
1739
- // Process entries in parallel for better performance
1740
- const entryPromises = Object.entries(entries).map(async ([id, entry]) => {
1741
- // Handle image entries specially - find the line with src attribute
1742
- if (entry.sourceType === 'image' && entry.imageMetadata?.src) {
1743
- const imageLocation = await findImageSourceLocation(entry.imageMetadata.src)
1744
- if (imageLocation) {
1745
- const sourceHash = generateSourceHash(imageLocation.snippet || entry.imageMetadata.src)
1746
- return [id, {
1747
- ...entry,
1748
- sourcePath: imageLocation.file,
1749
- sourceLine: imageLocation.line,
1750
- sourceSnippet: imageLocation.snippet,
1751
- sourceHash,
1752
- }] as const
1753
- }
1754
- return [id, entry] as const
1755
- }
1756
-
1757
- // Skip if already has sourceSnippet or missing source info
1758
- if (entry.sourceSnippet || !entry.sourcePath || !entry.sourceLine || !entry.tag) {
1759
- return [id, entry] as const
1760
- }
1761
-
1762
- // Extract the actual source innerHTML
1763
- const sourceSnippet = await extractSourceInnerHtml(
1764
- entry.sourcePath,
1765
- entry.sourceLine,
1766
- entry.tag,
1767
- )
1768
-
1769
- if (sourceSnippet) {
1770
- // Generate hash of source snippet for conflict detection
1771
- const sourceHash = generateSourceHash(sourceSnippet)
1772
- return [id, { ...entry, sourceSnippet, sourceHash }] as const
1773
- }
1774
-
1775
- return [id, entry] as const
1776
- })
1777
-
1778
- const results = await Promise.all(entryPromises)
1779
- for (const [id, entry] of results) {
1780
- enhanced[id] = entry
1781
- }
1782
-
1783
- return enhanced
1784
- }