@nuasite/cms-marker 0.0.79 → 0.0.80

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -14,7 +14,7 @@
14
14
  "directory": "packages/cms-marker"
15
15
  },
16
16
  "license": "Apache-2.0",
17
- "version": "0.0.79",
17
+ "version": "0.0.80",
18
18
  "module": "src/index.ts",
19
19
  "types": "src/index.ts",
20
20
  "type": "module",
@@ -1,5 +1,8 @@
1
+ import fs from 'node:fs/promises'
1
2
  import { type HTMLElement as ParsedHTMLElement, parse } from 'node-html-parser'
2
- import { findSeoSource } from './source-finder/seo-finder'
3
+ import path from 'node:path'
4
+ import { getProjectRoot } from './config'
5
+ import { findSourceLocation } from './source-finder/source-lookup'
3
6
  import type { CanonicalUrl, JsonLdEntry, OpenGraphData, PageSeoData, RobotsDirective, SeoKeywords, SeoMetaTag, SeoTitle, TwitterCardData } from './types'
4
7
 
5
8
  /** Type for parsed HTML element nodes from node-html-parser */
@@ -99,11 +102,18 @@ async function extractTitle(
99
102
  const content = titleElement.textContent?.trim() || ''
100
103
  if (!content) return undefined
101
104
 
102
- // Try to find source location in actual source files
103
- const sourceLocation = await findSeoSource('title', { content })
105
+ // Use the same source finding logic as regular text entries
106
+ // This tracks through props, variables, and imports
107
+ const sourceLocation = await findSourceLocation(content, 'title')
104
108
 
105
109
  // Fall back to rendered HTML location if source not found
106
- const sourceInfo = sourceLocation || findElementSourceLocation(titleElement, html, sourcePath)
110
+ const sourceInfo = sourceLocation
111
+ ? {
112
+ sourcePath: sourceLocation.file,
113
+ sourceLine: sourceLocation.line,
114
+ sourceSnippet: sourceLocation.snippet || '',
115
+ }
116
+ : findElementSourceLocation(titleElement, html, sourcePath)
107
117
 
108
118
  let cmsId: string | undefined
109
119
  if (markTitle && getNextId) {
@@ -140,8 +150,12 @@ async function extractMetaTags(
140
150
  // Skip meta tags without content or without name/property
141
151
  if (!content || (!name && !property)) continue
142
152
 
143
- // Try to find source location in actual source files
144
- const sourceLocation = await findSeoSource('meta', { name: name || undefined, property: property || undefined, content })
153
+ // Build a tag pattern for context matching (e.g., "meta.*name="description"")
154
+ const identifier = name || property
155
+ const tagPattern = identifier ? `<meta[^>]*(?:name|property)\\s*=\\s*["']${identifier}["']` : '<meta'
156
+
157
+ // Search for the content attribute value in source files
158
+ const sourceLocation = await findAttributeValueSource('content', content, tagPattern)
145
159
 
146
160
  // Fall back to rendered HTML location if source not found
147
161
  const sourceInfo = sourceLocation || findElementSourceLocation(meta, html, sourcePath)
@@ -267,8 +281,8 @@ async function extractCanonical(
267
281
  const href = canonical.getAttribute('href')
268
282
  if (!href) return undefined
269
283
 
270
- // Try to find source location in actual source files
271
- const sourceLocation = await findSeoSource('canonical', { href })
284
+ // Search for the href attribute value in source files
285
+ const sourceLocation = await findAttributeValueSource('href', href, '<link[^>]*rel\\s*=\\s*["\'"]canonical["\'"]')
272
286
 
273
287
  // Fall back to rendered HTML location if source not found
274
288
  const sourceInfo = sourceLocation || findElementSourceLocation(canonical, html, sourcePath)
@@ -301,8 +315,8 @@ async function extractJsonLd(
301
315
  const data = JSON.parse(content)
302
316
  const type = data['@type'] || 'Unknown'
303
317
 
304
- // Try to find source location in actual source files
305
- const sourceLocation = await findSeoSource('jsonld', { jsonLdType: type })
318
+ // Search for JSON-LD script with this @type in source files
319
+ const sourceLocation = await findJsonLdSource(type)
306
320
 
307
321
  // Fall back to rendered HTML location if source not found
308
322
  const sourceInfo = sourceLocation || findElementSourceLocation(script, html, sourcePath)
@@ -320,6 +334,110 @@ async function extractJsonLd(
320
334
  return entries
321
335
  }
322
336
 
337
+ /**
338
+ * Search for JSON-LD script with a specific @type in source files
339
+ */
340
+ async function findJsonLdSource(
341
+ jsonLdType: string,
342
+ ): Promise<{ sourcePath: string; sourceLine: number; sourceSnippet: string } | undefined> {
343
+ const srcDir = path.join(getProjectRoot(), 'src')
344
+ const searchDirs = [
345
+ path.join(srcDir, 'pages'),
346
+ path.join(srcDir, 'layouts'),
347
+ path.join(srcDir, 'components'),
348
+ ]
349
+
350
+ for (const dir of searchDirs) {
351
+ try {
352
+ const result = await searchDirForJsonLd(dir, jsonLdType)
353
+ if (result) return result
354
+ } catch {
355
+ // Directory doesn't exist
356
+ }
357
+ }
358
+
359
+ return undefined
360
+ }
361
+
362
+ /**
363
+ * Recursively search a directory for JSON-LD scripts
364
+ */
365
+ async function searchDirForJsonLd(
366
+ dir: string,
367
+ jsonLdType: string,
368
+ ): Promise<{ sourcePath: string; sourceLine: number; sourceSnippet: string } | undefined> {
369
+ try {
370
+ const entries = await fs.readdir(dir, { withFileTypes: true })
371
+
372
+ for (const entry of entries) {
373
+ const fullPath = path.join(dir, entry.name)
374
+
375
+ if (entry.isDirectory()) {
376
+ const result = await searchDirForJsonLd(fullPath, jsonLdType)
377
+ if (result) return result
378
+ } else if (entry.isFile() && (entry.name.endsWith('.astro') || entry.name.endsWith('.html'))) {
379
+ const result = await searchFileForJsonLd(fullPath, jsonLdType)
380
+ if (result) return result
381
+ }
382
+ }
383
+ } catch {
384
+ // Error reading directory
385
+ }
386
+
387
+ return undefined
388
+ }
389
+
390
+ /**
391
+ * Search a single file for JSON-LD with a specific @type
392
+ */
393
+ async function searchFileForJsonLd(
394
+ filePath: string,
395
+ jsonLdType: string,
396
+ ): Promise<{ sourcePath: string; sourceLine: number; sourceSnippet: string } | undefined> {
397
+ try {
398
+ const content = await fs.readFile(filePath, 'utf-8')
399
+ const lines = content.split('\n')
400
+
401
+ for (let i = 0; i < lines.length; i++) {
402
+ const line = lines[i] || ''
403
+
404
+ // Look for JSON-LD script opening
405
+ if (line.includes('application/ld+json')) {
406
+ // Check following lines for the @type
407
+ const snippetLines: string[] = []
408
+ let foundType = false
409
+
410
+ for (let j = i; j < Math.min(i + 30, lines.length); j++) {
411
+ const snippetLine = lines[j] || ''
412
+ snippetLines.push(snippetLine)
413
+
414
+ // Check if this JSON-LD contains the @type we're looking for
415
+ if (snippetLine.includes(`"@type"`) && snippetLine.includes(jsonLdType)) {
416
+ foundType = true
417
+ }
418
+
419
+ // Check for closing script tag
420
+ if (snippetLine.includes('</script>')) {
421
+ break
422
+ }
423
+ }
424
+
425
+ if (foundType) {
426
+ return {
427
+ sourcePath: path.relative(getProjectRoot(), filePath),
428
+ sourceLine: i + 1,
429
+ sourceSnippet: snippetLines.join('\n'),
430
+ }
431
+ }
432
+ }
433
+ }
434
+ } catch {
435
+ // Error reading file
436
+ }
437
+
438
+ return undefined
439
+ }
440
+
323
441
  /**
324
442
  * Find the source location (line number and snippet) for an element in the rendered HTML.
325
443
  * This is a fallback when the actual source file location cannot be found.
@@ -351,3 +469,145 @@ function findElementSourceLocation(
351
469
  sourceSnippet,
352
470
  }
353
471
  }
472
+
473
+ /**
474
+ * Search for a text value as an attribute value in source files.
475
+ * Handles both static values (content="text") and dynamic expressions (content={variable}).
476
+ */
477
+ async function findAttributeValueSource(
478
+ attrName: string,
479
+ value: string,
480
+ tagPattern?: string,
481
+ ): Promise<{ sourcePath: string; sourceLine: number; sourceSnippet: string } | undefined> {
482
+ const srcDir = path.join(getProjectRoot(), 'src')
483
+ const searchDirs = [
484
+ path.join(srcDir, 'pages'),
485
+ path.join(srcDir, 'layouts'),
486
+ path.join(srcDir, 'components'),
487
+ ]
488
+
489
+ for (const dir of searchDirs) {
490
+ try {
491
+ const result = await searchDirForAttributeValue(dir, attrName, value, tagPattern)
492
+ if (result) return result
493
+ } catch {
494
+ // Directory doesn't exist
495
+ }
496
+ }
497
+
498
+ return undefined
499
+ }
500
+
501
+ /**
502
+ * Recursively search a directory for attribute values
503
+ */
504
+ async function searchDirForAttributeValue(
505
+ dir: string,
506
+ attrName: string,
507
+ value: string,
508
+ tagPattern?: string,
509
+ ): Promise<{ sourcePath: string; sourceLine: number; sourceSnippet: string } | undefined> {
510
+ try {
511
+ const entries = await fs.readdir(dir, { withFileTypes: true })
512
+
513
+ for (const entry of entries) {
514
+ const fullPath = path.join(dir, entry.name)
515
+
516
+ if (entry.isDirectory()) {
517
+ const result = await searchDirForAttributeValue(fullPath, attrName, value, tagPattern)
518
+ if (result) return result
519
+ } else if (entry.isFile() && (entry.name.endsWith('.astro') || entry.name.endsWith('.html'))) {
520
+ const result = await searchFileForAttributeValue(fullPath, attrName, value, tagPattern)
521
+ if (result) return result
522
+ }
523
+ }
524
+ } catch {
525
+ // Error reading directory
526
+ }
527
+
528
+ return undefined
529
+ }
530
+
531
+ /**
532
+ * Search a single file for an attribute value
533
+ */
534
+ async function searchFileForAttributeValue(
535
+ filePath: string,
536
+ attrName: string,
537
+ value: string,
538
+ tagPattern?: string,
539
+ ): Promise<{ sourcePath: string; sourceLine: number; sourceSnippet: string } | undefined> {
540
+ try {
541
+ const content = await fs.readFile(filePath, 'utf-8')
542
+ const lines = content.split('\n')
543
+
544
+ // Escape special regex characters in the value
545
+ const escapedValue = value.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')
546
+
547
+ // Pattern to match static attribute: attrName="value" or attrName='value'
548
+ const staticPattern = new RegExp(`${attrName}\\s*=\\s*["']${escapedValue}["']`, 'i')
549
+
550
+ // Pattern to match the tag context if provided
551
+ const tagRegex = tagPattern ? new RegExp(tagPattern, 'i') : null
552
+
553
+ for (let i = 0; i < lines.length; i++) {
554
+ const line = lines[i] || ''
555
+
556
+ // Check if this line matches the attribute pattern
557
+ if (staticPattern.test(line)) {
558
+ // If tag pattern provided, verify we're in the right context
559
+ if (tagRegex && !tagRegex.test(line)) {
560
+ // Check surrounding lines for tag context
561
+ const contextLines = lines.slice(Math.max(0, i - 3), i + 1).join(' ')
562
+ if (!tagRegex.test(contextLines)) {
563
+ continue
564
+ }
565
+ }
566
+
567
+ // Extract the full element snippet
568
+ const snippet = extractElementSnippetFromLines(lines, i, tagPattern)
569
+
570
+ return {
571
+ sourcePath: path.relative(getProjectRoot(), filePath),
572
+ sourceLine: i + 1,
573
+ sourceSnippet: snippet,
574
+ }
575
+ }
576
+ }
577
+ } catch {
578
+ // Error reading file
579
+ }
580
+
581
+ return undefined
582
+ }
583
+
584
+ /**
585
+ * Extract a multi-line element snippet starting from a given line
586
+ */
587
+ function extractElementSnippetFromLines(lines: string[], startLine: number, tagPattern?: string): string {
588
+ const snippetLines: string[] = []
589
+
590
+ // Look backwards to find the tag opening if we're on an attribute line
591
+ let actualStart = startLine
592
+ for (let i = startLine; i >= Math.max(0, startLine - 5); i--) {
593
+ const line = lines[i] || ''
594
+ if (line.includes('<meta') || line.includes('<link') || line.includes('<title') || line.includes('<script')) {
595
+ actualStart = i
596
+ break
597
+ }
598
+ }
599
+
600
+ // Collect lines until we find the closing
601
+ for (let i = actualStart; i < Math.min(actualStart + 10, lines.length); i++) {
602
+ const line = lines[i]
603
+ if (!line) continue
604
+ snippetLines.push(line)
605
+
606
+ // Check for self-closing or closing tag
607
+ if (line.includes('/>') || line.includes('</') || (line.includes('>') && !line.includes('<'))) {
608
+ break
609
+ }
610
+ }
611
+
612
+ return snippetLines.join('\n')
613
+ }
@@ -24,7 +24,3 @@ export { findCollectionSource, findMarkdownSourceLocation, parseMarkdownContent
24
24
 
25
25
  // Snippet utilities (used by html-processor)
26
26
  export { enhanceManifestWithSourceSnippets, extractCompleteTagSnippet, extractInnerHtmlFromSnippet, extractSourceSnippet } from './snippet-utils'
27
-
28
- // SEO source finding
29
- export { findSeoSource } from './seo-finder'
30
- export type { SeoElementIdentifier, SeoSourceLocation } from './seo-finder'
@@ -1,32 +0,0 @@
1
- /**
2
- * SEO element identifier for source finding
3
- */
4
- export interface SeoElementIdentifier {
5
- /** Meta tag name attribute */
6
- name?: string;
7
- /** Meta tag property attribute (for OG/Twitter) */
8
- property?: string;
9
- /** Content value to match */
10
- content?: string;
11
- /** Canonical URL href */
12
- href?: string;
13
- /** JSON-LD @type value */
14
- jsonLdType?: string;
15
- }
16
- /**
17
- * Result of SEO source finding
18
- */
19
- export interface SeoSourceLocation {
20
- /** Path to source file relative to project root */
21
- sourcePath: string;
22
- /** Line number in source file (1-indexed) */
23
- sourceLine: number;
24
- /** Exact source code snippet */
25
- sourceSnippet: string;
26
- }
27
- /**
28
- * Find the source location for an SEO element.
29
- * Searches Astro/HTML files in src/pages and src/layouts for matching SEO elements.
30
- */
31
- export declare function findSeoSource(type: 'title' | 'meta' | 'canonical' | 'jsonld', identifier: SeoElementIdentifier): Promise<SeoSourceLocation | undefined>;
32
- //# sourceMappingURL=seo-finder.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"seo-finder.d.ts","sourceRoot":"","sources":["../../../src/source-finder/seo-finder.ts"],"names":[],"mappings":"AAKA;;GAEG;AACH,MAAM,WAAW,oBAAoB;IACpC,8BAA8B;IAC9B,IAAI,CAAC,EAAE,MAAM,CAAA;IACb,mDAAmD;IACnD,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,6BAA6B;IAC7B,OAAO,CAAC,EAAE,MAAM,CAAA;IAChB,yBAAyB;IACzB,IAAI,CAAC,EAAE,MAAM,CAAA;IACb,0BAA0B;IAC1B,UAAU,CAAC,EAAE,MAAM,CAAA;CACnB;AAED;;GAEG;AACH,MAAM,WAAW,iBAAiB;IACjC,mDAAmD;IACnD,UAAU,EAAE,MAAM,CAAA;IAClB,6CAA6C;IAC7C,UAAU,EAAE,MAAM,CAAA;IAClB,gCAAgC;IAChC,aAAa,EAAE,MAAM,CAAA;CACrB;AAED;;;GAGG;AACH,wBAAsB,aAAa,CAClC,IAAI,EAAE,OAAO,GAAG,MAAM,GAAG,WAAW,GAAG,QAAQ,EAC/C,UAAU,EAAE,oBAAoB,GAC9B,OAAO,CAAC,iBAAiB,GAAG,SAAS,CAAC,CAkBxC"}