@nuasite/cms-marker 0.0.79 → 0.0.80
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/types/seo-processor.d.ts.map +1 -1
- package/dist/types/source-finder/index.d.ts +0 -2
- package/dist/types/source-finder/index.d.ts.map +1 -1
- package/dist/types/tsconfig.tsbuildinfo +1 -1
- package/package.json +1 -1
- package/src/seo-processor.ts +270 -10
- package/src/source-finder/index.ts +0 -4
- package/dist/types/source-finder/seo-finder.d.ts +0 -32
- package/dist/types/source-finder/seo-finder.d.ts.map +0 -1
- package/src/source-finder/seo-finder.ts +0 -336
package/package.json
CHANGED
package/src/seo-processor.ts
CHANGED
|
@@ -1,5 +1,8 @@
|
|
|
1
|
+
import fs from 'node:fs/promises'
|
|
1
2
|
import { type HTMLElement as ParsedHTMLElement, parse } from 'node-html-parser'
|
|
2
|
-
import
|
|
3
|
+
import path from 'node:path'
|
|
4
|
+
import { getProjectRoot } from './config'
|
|
5
|
+
import { findSourceLocation } from './source-finder/source-lookup'
|
|
3
6
|
import type { CanonicalUrl, JsonLdEntry, OpenGraphData, PageSeoData, RobotsDirective, SeoKeywords, SeoMetaTag, SeoTitle, TwitterCardData } from './types'
|
|
4
7
|
|
|
5
8
|
/** Type for parsed HTML element nodes from node-html-parser */
|
|
@@ -99,11 +102,18 @@ async function extractTitle(
|
|
|
99
102
|
const content = titleElement.textContent?.trim() || ''
|
|
100
103
|
if (!content) return undefined
|
|
101
104
|
|
|
102
|
-
//
|
|
103
|
-
|
|
105
|
+
// Use the same source finding logic as regular text entries
|
|
106
|
+
// This tracks through props, variables, and imports
|
|
107
|
+
const sourceLocation = await findSourceLocation(content, 'title')
|
|
104
108
|
|
|
105
109
|
// Fall back to rendered HTML location if source not found
|
|
106
|
-
const sourceInfo = sourceLocation
|
|
110
|
+
const sourceInfo = sourceLocation
|
|
111
|
+
? {
|
|
112
|
+
sourcePath: sourceLocation.file,
|
|
113
|
+
sourceLine: sourceLocation.line,
|
|
114
|
+
sourceSnippet: sourceLocation.snippet || '',
|
|
115
|
+
}
|
|
116
|
+
: findElementSourceLocation(titleElement, html, sourcePath)
|
|
107
117
|
|
|
108
118
|
let cmsId: string | undefined
|
|
109
119
|
if (markTitle && getNextId) {
|
|
@@ -140,8 +150,12 @@ async function extractMetaTags(
|
|
|
140
150
|
// Skip meta tags without content or without name/property
|
|
141
151
|
if (!content || (!name && !property)) continue
|
|
142
152
|
|
|
143
|
-
//
|
|
144
|
-
const
|
|
153
|
+
// Build a tag pattern for context matching (e.g., "meta.*name="description"")
|
|
154
|
+
const identifier = name || property
|
|
155
|
+
const tagPattern = identifier ? `<meta[^>]*(?:name|property)\\s*=\\s*["']${identifier}["']` : '<meta'
|
|
156
|
+
|
|
157
|
+
// Search for the content attribute value in source files
|
|
158
|
+
const sourceLocation = await findAttributeValueSource('content', content, tagPattern)
|
|
145
159
|
|
|
146
160
|
// Fall back to rendered HTML location if source not found
|
|
147
161
|
const sourceInfo = sourceLocation || findElementSourceLocation(meta, html, sourcePath)
|
|
@@ -267,8 +281,8 @@ async function extractCanonical(
|
|
|
267
281
|
const href = canonical.getAttribute('href')
|
|
268
282
|
if (!href) return undefined
|
|
269
283
|
|
|
270
|
-
//
|
|
271
|
-
const sourceLocation = await
|
|
284
|
+
// Search for the href attribute value in source files
|
|
285
|
+
const sourceLocation = await findAttributeValueSource('href', href, '<link[^>]*rel\\s*=\\s*["\'"]canonical["\'"]')
|
|
272
286
|
|
|
273
287
|
// Fall back to rendered HTML location if source not found
|
|
274
288
|
const sourceInfo = sourceLocation || findElementSourceLocation(canonical, html, sourcePath)
|
|
@@ -301,8 +315,8 @@ async function extractJsonLd(
|
|
|
301
315
|
const data = JSON.parse(content)
|
|
302
316
|
const type = data['@type'] || 'Unknown'
|
|
303
317
|
|
|
304
|
-
//
|
|
305
|
-
const sourceLocation = await
|
|
318
|
+
// Search for JSON-LD script with this @type in source files
|
|
319
|
+
const sourceLocation = await findJsonLdSource(type)
|
|
306
320
|
|
|
307
321
|
// Fall back to rendered HTML location if source not found
|
|
308
322
|
const sourceInfo = sourceLocation || findElementSourceLocation(script, html, sourcePath)
|
|
@@ -320,6 +334,110 @@ async function extractJsonLd(
|
|
|
320
334
|
return entries
|
|
321
335
|
}
|
|
322
336
|
|
|
337
|
+
/**
|
|
338
|
+
* Search for JSON-LD script with a specific @type in source files
|
|
339
|
+
*/
|
|
340
|
+
async function findJsonLdSource(
|
|
341
|
+
jsonLdType: string,
|
|
342
|
+
): Promise<{ sourcePath: string; sourceLine: number; sourceSnippet: string } | undefined> {
|
|
343
|
+
const srcDir = path.join(getProjectRoot(), 'src')
|
|
344
|
+
const searchDirs = [
|
|
345
|
+
path.join(srcDir, 'pages'),
|
|
346
|
+
path.join(srcDir, 'layouts'),
|
|
347
|
+
path.join(srcDir, 'components'),
|
|
348
|
+
]
|
|
349
|
+
|
|
350
|
+
for (const dir of searchDirs) {
|
|
351
|
+
try {
|
|
352
|
+
const result = await searchDirForJsonLd(dir, jsonLdType)
|
|
353
|
+
if (result) return result
|
|
354
|
+
} catch {
|
|
355
|
+
// Directory doesn't exist
|
|
356
|
+
}
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
return undefined
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
/**
|
|
363
|
+
* Recursively search a directory for JSON-LD scripts
|
|
364
|
+
*/
|
|
365
|
+
async function searchDirForJsonLd(
|
|
366
|
+
dir: string,
|
|
367
|
+
jsonLdType: string,
|
|
368
|
+
): Promise<{ sourcePath: string; sourceLine: number; sourceSnippet: string } | undefined> {
|
|
369
|
+
try {
|
|
370
|
+
const entries = await fs.readdir(dir, { withFileTypes: true })
|
|
371
|
+
|
|
372
|
+
for (const entry of entries) {
|
|
373
|
+
const fullPath = path.join(dir, entry.name)
|
|
374
|
+
|
|
375
|
+
if (entry.isDirectory()) {
|
|
376
|
+
const result = await searchDirForJsonLd(fullPath, jsonLdType)
|
|
377
|
+
if (result) return result
|
|
378
|
+
} else if (entry.isFile() && (entry.name.endsWith('.astro') || entry.name.endsWith('.html'))) {
|
|
379
|
+
const result = await searchFileForJsonLd(fullPath, jsonLdType)
|
|
380
|
+
if (result) return result
|
|
381
|
+
}
|
|
382
|
+
}
|
|
383
|
+
} catch {
|
|
384
|
+
// Error reading directory
|
|
385
|
+
}
|
|
386
|
+
|
|
387
|
+
return undefined
|
|
388
|
+
}
|
|
389
|
+
|
|
390
|
+
/**
|
|
391
|
+
* Search a single file for JSON-LD with a specific @type
|
|
392
|
+
*/
|
|
393
|
+
async function searchFileForJsonLd(
|
|
394
|
+
filePath: string,
|
|
395
|
+
jsonLdType: string,
|
|
396
|
+
): Promise<{ sourcePath: string; sourceLine: number; sourceSnippet: string } | undefined> {
|
|
397
|
+
try {
|
|
398
|
+
const content = await fs.readFile(filePath, 'utf-8')
|
|
399
|
+
const lines = content.split('\n')
|
|
400
|
+
|
|
401
|
+
for (let i = 0; i < lines.length; i++) {
|
|
402
|
+
const line = lines[i] || ''
|
|
403
|
+
|
|
404
|
+
// Look for JSON-LD script opening
|
|
405
|
+
if (line.includes('application/ld+json')) {
|
|
406
|
+
// Check following lines for the @type
|
|
407
|
+
const snippetLines: string[] = []
|
|
408
|
+
let foundType = false
|
|
409
|
+
|
|
410
|
+
for (let j = i; j < Math.min(i + 30, lines.length); j++) {
|
|
411
|
+
const snippetLine = lines[j] || ''
|
|
412
|
+
snippetLines.push(snippetLine)
|
|
413
|
+
|
|
414
|
+
// Check if this JSON-LD contains the @type we're looking for
|
|
415
|
+
if (snippetLine.includes(`"@type"`) && snippetLine.includes(jsonLdType)) {
|
|
416
|
+
foundType = true
|
|
417
|
+
}
|
|
418
|
+
|
|
419
|
+
// Check for closing script tag
|
|
420
|
+
if (snippetLine.includes('</script>')) {
|
|
421
|
+
break
|
|
422
|
+
}
|
|
423
|
+
}
|
|
424
|
+
|
|
425
|
+
if (foundType) {
|
|
426
|
+
return {
|
|
427
|
+
sourcePath: path.relative(getProjectRoot(), filePath),
|
|
428
|
+
sourceLine: i + 1,
|
|
429
|
+
sourceSnippet: snippetLines.join('\n'),
|
|
430
|
+
}
|
|
431
|
+
}
|
|
432
|
+
}
|
|
433
|
+
}
|
|
434
|
+
} catch {
|
|
435
|
+
// Error reading file
|
|
436
|
+
}
|
|
437
|
+
|
|
438
|
+
return undefined
|
|
439
|
+
}
|
|
440
|
+
|
|
323
441
|
/**
|
|
324
442
|
* Find the source location (line number and snippet) for an element in the rendered HTML.
|
|
325
443
|
* This is a fallback when the actual source file location cannot be found.
|
|
@@ -351,3 +469,145 @@ function findElementSourceLocation(
|
|
|
351
469
|
sourceSnippet,
|
|
352
470
|
}
|
|
353
471
|
}
|
|
472
|
+
|
|
473
|
+
/**
|
|
474
|
+
* Search for a text value as an attribute value in source files.
|
|
475
|
+
* Handles both static values (content="text") and dynamic expressions (content={variable}).
|
|
476
|
+
*/
|
|
477
|
+
async function findAttributeValueSource(
|
|
478
|
+
attrName: string,
|
|
479
|
+
value: string,
|
|
480
|
+
tagPattern?: string,
|
|
481
|
+
): Promise<{ sourcePath: string; sourceLine: number; sourceSnippet: string } | undefined> {
|
|
482
|
+
const srcDir = path.join(getProjectRoot(), 'src')
|
|
483
|
+
const searchDirs = [
|
|
484
|
+
path.join(srcDir, 'pages'),
|
|
485
|
+
path.join(srcDir, 'layouts'),
|
|
486
|
+
path.join(srcDir, 'components'),
|
|
487
|
+
]
|
|
488
|
+
|
|
489
|
+
for (const dir of searchDirs) {
|
|
490
|
+
try {
|
|
491
|
+
const result = await searchDirForAttributeValue(dir, attrName, value, tagPattern)
|
|
492
|
+
if (result) return result
|
|
493
|
+
} catch {
|
|
494
|
+
// Directory doesn't exist
|
|
495
|
+
}
|
|
496
|
+
}
|
|
497
|
+
|
|
498
|
+
return undefined
|
|
499
|
+
}
|
|
500
|
+
|
|
501
|
+
/**
|
|
502
|
+
* Recursively search a directory for attribute values
|
|
503
|
+
*/
|
|
504
|
+
async function searchDirForAttributeValue(
|
|
505
|
+
dir: string,
|
|
506
|
+
attrName: string,
|
|
507
|
+
value: string,
|
|
508
|
+
tagPattern?: string,
|
|
509
|
+
): Promise<{ sourcePath: string; sourceLine: number; sourceSnippet: string } | undefined> {
|
|
510
|
+
try {
|
|
511
|
+
const entries = await fs.readdir(dir, { withFileTypes: true })
|
|
512
|
+
|
|
513
|
+
for (const entry of entries) {
|
|
514
|
+
const fullPath = path.join(dir, entry.name)
|
|
515
|
+
|
|
516
|
+
if (entry.isDirectory()) {
|
|
517
|
+
const result = await searchDirForAttributeValue(fullPath, attrName, value, tagPattern)
|
|
518
|
+
if (result) return result
|
|
519
|
+
} else if (entry.isFile() && (entry.name.endsWith('.astro') || entry.name.endsWith('.html'))) {
|
|
520
|
+
const result = await searchFileForAttributeValue(fullPath, attrName, value, tagPattern)
|
|
521
|
+
if (result) return result
|
|
522
|
+
}
|
|
523
|
+
}
|
|
524
|
+
} catch {
|
|
525
|
+
// Error reading directory
|
|
526
|
+
}
|
|
527
|
+
|
|
528
|
+
return undefined
|
|
529
|
+
}
|
|
530
|
+
|
|
531
|
+
/**
|
|
532
|
+
* Search a single file for an attribute value
|
|
533
|
+
*/
|
|
534
|
+
async function searchFileForAttributeValue(
|
|
535
|
+
filePath: string,
|
|
536
|
+
attrName: string,
|
|
537
|
+
value: string,
|
|
538
|
+
tagPattern?: string,
|
|
539
|
+
): Promise<{ sourcePath: string; sourceLine: number; sourceSnippet: string } | undefined> {
|
|
540
|
+
try {
|
|
541
|
+
const content = await fs.readFile(filePath, 'utf-8')
|
|
542
|
+
const lines = content.split('\n')
|
|
543
|
+
|
|
544
|
+
// Escape special regex characters in the value
|
|
545
|
+
const escapedValue = value.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')
|
|
546
|
+
|
|
547
|
+
// Pattern to match static attribute: attrName="value" or attrName='value'
|
|
548
|
+
const staticPattern = new RegExp(`${attrName}\\s*=\\s*["']${escapedValue}["']`, 'i')
|
|
549
|
+
|
|
550
|
+
// Pattern to match the tag context if provided
|
|
551
|
+
const tagRegex = tagPattern ? new RegExp(tagPattern, 'i') : null
|
|
552
|
+
|
|
553
|
+
for (let i = 0; i < lines.length; i++) {
|
|
554
|
+
const line = lines[i] || ''
|
|
555
|
+
|
|
556
|
+
// Check if this line matches the attribute pattern
|
|
557
|
+
if (staticPattern.test(line)) {
|
|
558
|
+
// If tag pattern provided, verify we're in the right context
|
|
559
|
+
if (tagRegex && !tagRegex.test(line)) {
|
|
560
|
+
// Check surrounding lines for tag context
|
|
561
|
+
const contextLines = lines.slice(Math.max(0, i - 3), i + 1).join(' ')
|
|
562
|
+
if (!tagRegex.test(contextLines)) {
|
|
563
|
+
continue
|
|
564
|
+
}
|
|
565
|
+
}
|
|
566
|
+
|
|
567
|
+
// Extract the full element snippet
|
|
568
|
+
const snippet = extractElementSnippetFromLines(lines, i, tagPattern)
|
|
569
|
+
|
|
570
|
+
return {
|
|
571
|
+
sourcePath: path.relative(getProjectRoot(), filePath),
|
|
572
|
+
sourceLine: i + 1,
|
|
573
|
+
sourceSnippet: snippet,
|
|
574
|
+
}
|
|
575
|
+
}
|
|
576
|
+
}
|
|
577
|
+
} catch {
|
|
578
|
+
// Error reading file
|
|
579
|
+
}
|
|
580
|
+
|
|
581
|
+
return undefined
|
|
582
|
+
}
|
|
583
|
+
|
|
584
|
+
/**
|
|
585
|
+
* Extract a multi-line element snippet starting from a given line
|
|
586
|
+
*/
|
|
587
|
+
function extractElementSnippetFromLines(lines: string[], startLine: number, tagPattern?: string): string {
|
|
588
|
+
const snippetLines: string[] = []
|
|
589
|
+
|
|
590
|
+
// Look backwards to find the tag opening if we're on an attribute line
|
|
591
|
+
let actualStart = startLine
|
|
592
|
+
for (let i = startLine; i >= Math.max(0, startLine - 5); i--) {
|
|
593
|
+
const line = lines[i] || ''
|
|
594
|
+
if (line.includes('<meta') || line.includes('<link') || line.includes('<title') || line.includes('<script')) {
|
|
595
|
+
actualStart = i
|
|
596
|
+
break
|
|
597
|
+
}
|
|
598
|
+
}
|
|
599
|
+
|
|
600
|
+
// Collect lines until we find the closing
|
|
601
|
+
for (let i = actualStart; i < Math.min(actualStart + 10, lines.length); i++) {
|
|
602
|
+
const line = lines[i]
|
|
603
|
+
if (!line) continue
|
|
604
|
+
snippetLines.push(line)
|
|
605
|
+
|
|
606
|
+
// Check for self-closing or closing tag
|
|
607
|
+
if (line.includes('/>') || line.includes('</') || (line.includes('>') && !line.includes('<'))) {
|
|
608
|
+
break
|
|
609
|
+
}
|
|
610
|
+
}
|
|
611
|
+
|
|
612
|
+
return snippetLines.join('\n')
|
|
613
|
+
}
|
|
@@ -24,7 +24,3 @@ export { findCollectionSource, findMarkdownSourceLocation, parseMarkdownContent
|
|
|
24
24
|
|
|
25
25
|
// Snippet utilities (used by html-processor)
|
|
26
26
|
export { enhanceManifestWithSourceSnippets, extractCompleteTagSnippet, extractInnerHtmlFromSnippet, extractSourceSnippet } from './snippet-utils'
|
|
27
|
-
|
|
28
|
-
// SEO source finding
|
|
29
|
-
export { findSeoSource } from './seo-finder'
|
|
30
|
-
export type { SeoElementIdentifier, SeoSourceLocation } from './seo-finder'
|
|
@@ -1,32 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* SEO element identifier for source finding
|
|
3
|
-
*/
|
|
4
|
-
export interface SeoElementIdentifier {
|
|
5
|
-
/** Meta tag name attribute */
|
|
6
|
-
name?: string;
|
|
7
|
-
/** Meta tag property attribute (for OG/Twitter) */
|
|
8
|
-
property?: string;
|
|
9
|
-
/** Content value to match */
|
|
10
|
-
content?: string;
|
|
11
|
-
/** Canonical URL href */
|
|
12
|
-
href?: string;
|
|
13
|
-
/** JSON-LD @type value */
|
|
14
|
-
jsonLdType?: string;
|
|
15
|
-
}
|
|
16
|
-
/**
|
|
17
|
-
* Result of SEO source finding
|
|
18
|
-
*/
|
|
19
|
-
export interface SeoSourceLocation {
|
|
20
|
-
/** Path to source file relative to project root */
|
|
21
|
-
sourcePath: string;
|
|
22
|
-
/** Line number in source file (1-indexed) */
|
|
23
|
-
sourceLine: number;
|
|
24
|
-
/** Exact source code snippet */
|
|
25
|
-
sourceSnippet: string;
|
|
26
|
-
}
|
|
27
|
-
/**
|
|
28
|
-
* Find the source location for an SEO element.
|
|
29
|
-
* Searches Astro/HTML files in src/pages and src/layouts for matching SEO elements.
|
|
30
|
-
*/
|
|
31
|
-
export declare function findSeoSource(type: 'title' | 'meta' | 'canonical' | 'jsonld', identifier: SeoElementIdentifier): Promise<SeoSourceLocation | undefined>;
|
|
32
|
-
//# sourceMappingURL=seo-finder.d.ts.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"seo-finder.d.ts","sourceRoot":"","sources":["../../../src/source-finder/seo-finder.ts"],"names":[],"mappings":"AAKA;;GAEG;AACH,MAAM,WAAW,oBAAoB;IACpC,8BAA8B;IAC9B,IAAI,CAAC,EAAE,MAAM,CAAA;IACb,mDAAmD;IACnD,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,6BAA6B;IAC7B,OAAO,CAAC,EAAE,MAAM,CAAA;IAChB,yBAAyB;IACzB,IAAI,CAAC,EAAE,MAAM,CAAA;IACb,0BAA0B;IAC1B,UAAU,CAAC,EAAE,MAAM,CAAA;CACnB;AAED;;GAEG;AACH,MAAM,WAAW,iBAAiB;IACjC,mDAAmD;IACnD,UAAU,EAAE,MAAM,CAAA;IAClB,6CAA6C;IAC7C,UAAU,EAAE,MAAM,CAAA;IAClB,gCAAgC;IAChC,aAAa,EAAE,MAAM,CAAA;CACrB;AAED;;;GAGG;AACH,wBAAsB,aAAa,CAClC,IAAI,EAAE,OAAO,GAAG,MAAM,GAAG,WAAW,GAAG,QAAQ,EAC/C,UAAU,EAAE,oBAAoB,GAC9B,OAAO,CAAC,iBAAiB,GAAG,SAAS,CAAC,CAkBxC"}
|