@stati/core 1.6.4 → 1.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +616 -101
- package/dist/core/build.d.ts.map +1 -1
- package/dist/core/build.js +42 -6
- package/dist/core/content.d.ts.map +1 -1
- package/dist/core/content.js +1 -2
- package/dist/core/dev.d.ts.map +1 -1
- package/dist/core/dev.js +2 -5
- package/dist/core/index.d.ts +13 -0
- package/dist/core/index.d.ts.map +1 -0
- package/dist/core/index.js +12 -0
- package/dist/core/invalidate.js +2 -2
- package/dist/core/isg/build-lock.js +1 -1
- package/dist/core/isg/deps.d.ts.map +1 -1
- package/dist/core/isg/deps.js +1 -3
- package/dist/core/isg/hash.js +1 -1
- package/dist/core/isg/index.d.ts +16 -0
- package/dist/core/isg/index.d.ts.map +1 -0
- package/dist/core/isg/index.js +22 -0
- package/dist/core/isg/manifest.js +1 -1
- package/dist/core/preview.d.ts.map +1 -1
- package/dist/core/preview.js +1 -2
- package/dist/core/templates.d.ts.map +1 -1
- package/dist/core/templates.js +4 -7
- package/dist/core/utils/index.d.ts +16 -0
- package/dist/core/utils/index.d.ts.map +1 -0
- package/dist/core/utils/index.js +22 -0
- package/dist/core/utils/partial-validation.d.ts.map +1 -1
- package/dist/core/utils/partial-validation.js +2 -1
- package/dist/index.d.ts +6 -8
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +3 -4
- package/dist/seo/auto-inject.d.ts +48 -0
- package/dist/seo/auto-inject.d.ts.map +1 -0
- package/dist/seo/auto-inject.js +108 -0
- package/dist/seo/generator.d.ts +77 -0
- package/dist/seo/generator.d.ts.map +1 -0
- package/dist/seo/generator.js +320 -0
- package/dist/seo/index.d.ts +12 -0
- package/dist/seo/index.d.ts.map +1 -0
- package/dist/seo/index.js +15 -0
- package/dist/seo/robots.d.ts +84 -0
- package/dist/seo/robots.d.ts.map +1 -0
- package/dist/seo/robots.js +165 -0
- package/dist/seo/sitemap.d.ts +37 -0
- package/dist/seo/sitemap.d.ts.map +1 -0
- package/dist/seo/sitemap.js +320 -0
- package/dist/seo/utils/escape-and-validation.d.ts +99 -0
- package/dist/seo/utils/escape-and-validation.d.ts.map +1 -0
- package/dist/seo/utils/escape-and-validation.js +319 -0
- package/dist/seo/utils/index.d.ts +7 -0
- package/dist/seo/utils/index.d.ts.map +1 -0
- package/dist/seo/utils/index.js +8 -0
- package/dist/seo/utils/url.d.ts +46 -0
- package/dist/seo/utils/url.d.ts.map +1 -0
- package/dist/seo/utils/url.js +66 -0
- package/dist/seo/utils.d.ts +94 -0
- package/dist/seo/utils.d.ts.map +1 -0
- package/dist/seo/utils.js +304 -0
- package/dist/types/config.d.ts +58 -0
- package/dist/types/config.d.ts.map +1 -1
- package/dist/types/content.d.ts +181 -0
- package/dist/types/content.d.ts.map +1 -1
- package/dist/types/index.d.ts +5 -2
- package/dist/types/index.d.ts.map +1 -1
- package/dist/types/index.js +1 -1
- package/dist/types/seo.d.ts +69 -0
- package/dist/types/seo.d.ts.map +1 -0
- package/dist/types/seo.js +36 -0
- package/dist/types/sitemap.d.ts +94 -0
- package/dist/types/sitemap.d.ts.map +1 -0
- package/dist/types/sitemap.js +4 -0
- package/package.json +1 -1
- package/dist/core/utils/partials.d.ts +0 -24
- package/dist/core/utils/partials.d.ts.map +0 -1
- package/dist/core/utils/partials.js +0 -85
- package/dist/tests/utils/test-mocks.d.ts +0 -69
- package/dist/tests/utils/test-mocks.d.ts.map +0 -1
- package/dist/tests/utils/test-mocks.js +0 -125
- package/dist/types.d.ts +0 -543
- package/dist/types.d.ts.map +0 -1
- package/dist/types.js +0 -1
|
@@ -0,0 +1,319 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* SEO utility functions for HTML escaping, validation, and tag detection
|
|
3
|
+
*/
|
|
4
|
+
import { URL } from 'node:url';
|
|
5
|
+
import { SEOTagType as SEOTagTypeEnum } from '../../types/seo.js';
|
|
6
|
+
/**
|
|
7
|
+
* HTML escape cache for performance optimization.
|
|
8
|
+
* Stores up to 1000 frequently used strings to avoid repeated escaping.
|
|
9
|
+
*/
|
|
10
|
+
const escapeHtmlCache = new Map();
|
|
11
|
+
const ESCAPE_CACHE_MAX_SIZE = 1000;
|
|
12
|
+
/**
|
|
13
|
+
* Escape HTML entities to prevent XSS attacks.
|
|
14
|
+
* Uses memoization for performance with frequently repeated strings.
|
|
15
|
+
*
|
|
16
|
+
* Implements LRU-style cache eviction: when the cache is full, it's cleared
|
|
17
|
+
* and the new entry is added. This prevents unbounded memory growth while
|
|
18
|
+
* still providing caching benefits for repeated strings.
|
|
19
|
+
*
|
|
20
|
+
* @param text - The text to escape
|
|
21
|
+
* @returns HTML-safe string with special characters escaped
|
|
22
|
+
*
|
|
23
|
+
* @example
|
|
24
|
+
* ```typescript
|
|
25
|
+
* escapeHtml('<script>alert("xss")</script>');
|
|
26
|
+
* // Returns: '<script>alert("xss")</script>'
|
|
27
|
+
* ```
|
|
28
|
+
*/
|
|
29
|
+
export function escapeHtml(text) {
|
|
30
|
+
// Check cache
|
|
31
|
+
const cached = escapeHtmlCache.get(text);
|
|
32
|
+
if (cached !== undefined) {
|
|
33
|
+
return cached;
|
|
34
|
+
}
|
|
35
|
+
// Compute result
|
|
36
|
+
const htmlEscapes = {
|
|
37
|
+
'&': '&',
|
|
38
|
+
'<': '<',
|
|
39
|
+
'>': '>',
|
|
40
|
+
'"': '"',
|
|
41
|
+
"'": ''',
|
|
42
|
+
};
|
|
43
|
+
const result = text.replace(/[&<>"']/g, (char) => htmlEscapes[char] || char);
|
|
44
|
+
// Store in cache with size limit
|
|
45
|
+
if (escapeHtmlCache.size < ESCAPE_CACHE_MAX_SIZE) {
|
|
46
|
+
escapeHtmlCache.set(text, result);
|
|
47
|
+
}
|
|
48
|
+
else {
|
|
49
|
+
// Clear cache when full (prevents unbounded growth)
|
|
50
|
+
// This is a simple LRU-style eviction strategy
|
|
51
|
+
escapeHtmlCache.clear();
|
|
52
|
+
escapeHtmlCache.set(text, result);
|
|
53
|
+
}
|
|
54
|
+
return result;
|
|
55
|
+
}
|
|
56
|
+
/**
|
|
57
|
+
* Sanitize structured data to prevent XSS attacks and ensure safe JSON-LD output.
|
|
58
|
+
* Recursively processes objects and arrays, escaping string values and enforcing depth limits.
|
|
59
|
+
*
|
|
60
|
+
* Security: Objects exceeding max depth are completely removed rather than replaced with
|
|
61
|
+
* a string placeholder to prevent potential XSS vectors.
|
|
62
|
+
*
|
|
63
|
+
* @param data - The data to sanitize
|
|
64
|
+
* @param logger - Logger instance for warnings
|
|
65
|
+
* @param depth - Current recursion depth (internal use)
|
|
66
|
+
* @param maxDepth - Maximum allowed recursion depth (default: 50)
|
|
67
|
+
* @returns Sanitized data safe for JSON-LD output, or undefined if max depth exceeded
|
|
68
|
+
*
|
|
69
|
+
* @example
|
|
70
|
+
* ```typescript
|
|
71
|
+
* const data = {
|
|
72
|
+
* name: '<script>alert("xss")</script>',
|
|
73
|
+
* nested: { value: 'test' }
|
|
74
|
+
* };
|
|
75
|
+
* sanitizeStructuredData(data, logger);
|
|
76
|
+
* // Returns: { name: '<script>...', nested: { value: 'test' } }
|
|
77
|
+
* ```
|
|
78
|
+
*/
|
|
79
|
+
export function sanitizeStructuredData(data, logger, depth = 0, maxDepth = 50) {
|
|
80
|
+
// Prevent stack overflow from deeply nested objects
|
|
81
|
+
// Return undefined to remove the deeply nested value entirely (safer than string placeholder)
|
|
82
|
+
if (depth > maxDepth) {
|
|
83
|
+
const message = `Structured data exceeds maximum nesting depth of ${maxDepth}, removing deeply nested object`;
|
|
84
|
+
logger.warning(message);
|
|
85
|
+
return undefined;
|
|
86
|
+
}
|
|
87
|
+
// Handle primitives
|
|
88
|
+
if (typeof data === 'string') {
|
|
89
|
+
return escapeHtml(data);
|
|
90
|
+
}
|
|
91
|
+
if (typeof data !== 'object' || data === null) {
|
|
92
|
+
return data;
|
|
93
|
+
}
|
|
94
|
+
// Handle arrays
|
|
95
|
+
if (Array.isArray(data)) {
|
|
96
|
+
// Filter out undefined values that resulted from depth limit
|
|
97
|
+
return data
|
|
98
|
+
.map((item) => sanitizeStructuredData(item, logger, depth + 1, maxDepth))
|
|
99
|
+
.filter((item) => item !== undefined);
|
|
100
|
+
}
|
|
101
|
+
// Handle objects
|
|
102
|
+
const sanitizedObject = {};
|
|
103
|
+
for (const key in data) {
|
|
104
|
+
if (Object.prototype.hasOwnProperty.call(data, key)) {
|
|
105
|
+
const sanitizedValue = sanitizeStructuredData(data[key], logger, depth + 1, maxDepth);
|
|
106
|
+
// Only include the property if it's not undefined (i.e., didn't exceed depth)
|
|
107
|
+
if (sanitizedValue !== undefined) {
|
|
108
|
+
sanitizedObject[key] = sanitizedValue;
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
return sanitizedObject;
|
|
113
|
+
}
|
|
114
|
+
/**
|
|
115
|
+
* Generate robots meta tag content from SEO metadata and robots configuration.
|
|
116
|
+
* Combines noindex flag and robots directives into a comma-separated string.
|
|
117
|
+
*
|
|
118
|
+
* @param seo - SEO metadata containing robots configuration
|
|
119
|
+
* @returns Comma-separated robots directives, or empty string if none
|
|
120
|
+
*
|
|
121
|
+
* @example
|
|
122
|
+
* ```typescript
|
|
123
|
+
* generateRobotsContent({ noindex: true, robots: { follow: false } });
|
|
124
|
+
* // Returns: 'noindex, nofollow'
|
|
125
|
+
* ```
|
|
126
|
+
*/
|
|
127
|
+
export function generateRobotsContent(seo) {
|
|
128
|
+
const directives = [];
|
|
129
|
+
// Collect directives from noindex flag
|
|
130
|
+
if (seo.noindex) {
|
|
131
|
+
directives.push('noindex');
|
|
132
|
+
}
|
|
133
|
+
// Handle robots config
|
|
134
|
+
if (typeof seo.robots === 'string') {
|
|
135
|
+
// If string doesn't include noindex but flag is set, prepend it
|
|
136
|
+
if (seo.noindex && !seo.robots.includes('noindex')) {
|
|
137
|
+
return `noindex, ${seo.robots}`;
|
|
138
|
+
}
|
|
139
|
+
return seo.robots;
|
|
140
|
+
}
|
|
141
|
+
else if (seo.robots) {
|
|
142
|
+
const robots = seo.robots;
|
|
143
|
+
// Only add if not already added via noindex flag
|
|
144
|
+
if (robots.index === false && !directives.includes('noindex')) {
|
|
145
|
+
directives.push('noindex');
|
|
146
|
+
}
|
|
147
|
+
if (robots.follow === false) {
|
|
148
|
+
directives.push('nofollow');
|
|
149
|
+
}
|
|
150
|
+
if (robots.archive === false) {
|
|
151
|
+
directives.push('noarchive');
|
|
152
|
+
}
|
|
153
|
+
if (robots.snippet === false) {
|
|
154
|
+
directives.push('nosnippet');
|
|
155
|
+
}
|
|
156
|
+
if (robots.imageindex === false) {
|
|
157
|
+
directives.push('noimageindex');
|
|
158
|
+
}
|
|
159
|
+
if (robots.translate === false) {
|
|
160
|
+
directives.push('notranslate');
|
|
161
|
+
}
|
|
162
|
+
if (robots.maxSnippet !== undefined) {
|
|
163
|
+
directives.push(`max-snippet:${robots.maxSnippet}`);
|
|
164
|
+
}
|
|
165
|
+
if (robots.maxImagePreview) {
|
|
166
|
+
directives.push(`max-image-preview:${robots.maxImagePreview}`);
|
|
167
|
+
}
|
|
168
|
+
if (robots.maxVideoPreview !== undefined) {
|
|
169
|
+
directives.push(`max-video-preview:${robots.maxVideoPreview}`);
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
return directives.length > 0 ? directives.join(', ') : '';
|
|
173
|
+
}
|
|
174
|
+
/**
|
|
175
|
+
* Validate URL format (http or https only).
|
|
176
|
+
*
|
|
177
|
+
* @param url - The URL to validate
|
|
178
|
+
* @returns True if the URL is valid
|
|
179
|
+
*/
|
|
180
|
+
function isValidUrl(url) {
|
|
181
|
+
try {
|
|
182
|
+
const parsed = new URL(url);
|
|
183
|
+
return parsed.protocol === 'http:' || parsed.protocol === 'https:';
|
|
184
|
+
}
|
|
185
|
+
catch {
|
|
186
|
+
return false;
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
/**
|
|
190
|
+
* Validate SEO metadata before processing.
|
|
191
|
+
* Checks for common issues like invalid URLs, improper lengths, and malformed data.
|
|
192
|
+
*
|
|
193
|
+
* @param seo - SEO metadata to validate
|
|
194
|
+
* @param _pageUrl - URL of the page being validated (for context in error messages)
|
|
195
|
+
* @returns Validation result with valid flag, errors, and warnings
|
|
196
|
+
*
|
|
197
|
+
* @example
|
|
198
|
+
* ```typescript
|
|
199
|
+
* const result = validateSEOMetadata({
|
|
200
|
+
* title: 'My Page',
|
|
201
|
+
* canonical: 'invalid-url'
|
|
202
|
+
* }, '/my-page');
|
|
203
|
+
* // Returns: { valid: false, errors: ['Invalid canonical URL...'], warnings: [] }
|
|
204
|
+
* ```
|
|
205
|
+
*/
|
|
206
|
+
export function validateSEOMetadata(seo, _pageUrl) {
|
|
207
|
+
const errors = [];
|
|
208
|
+
const warnings = [];
|
|
209
|
+
// Validate title length
|
|
210
|
+
if (seo.title) {
|
|
211
|
+
if (seo.title.length < 5) {
|
|
212
|
+
warnings.push(`Title is only ${seo.title.length} characters (recommended: 50-60)`);
|
|
213
|
+
}
|
|
214
|
+
else if (seo.title.length > 70) {
|
|
215
|
+
warnings.push(`Title is ${seo.title.length} characters (recommended: 50-60)`);
|
|
216
|
+
}
|
|
217
|
+
}
|
|
218
|
+
// Validate description length
|
|
219
|
+
if (seo.description) {
|
|
220
|
+
if (seo.description.length < 50) {
|
|
221
|
+
warnings.push(`Description is only ${seo.description.length} characters (recommended: 150-160)`);
|
|
222
|
+
}
|
|
223
|
+
else if (seo.description.length > 160) {
|
|
224
|
+
warnings.push(`Description is ${seo.description.length} characters (recommended: 150-160)`);
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
// Validate canonical URL
|
|
228
|
+
if (seo.canonical && !isValidUrl(seo.canonical)) {
|
|
229
|
+
errors.push(`Invalid canonical URL: ${seo.canonical}`);
|
|
230
|
+
}
|
|
231
|
+
// Validate Open Graph image URL and dimensions
|
|
232
|
+
if (seo.openGraph?.image) {
|
|
233
|
+
const imageUrl = typeof seo.openGraph.image === 'string' ? seo.openGraph.image : seo.openGraph.image.url;
|
|
234
|
+
if (!isValidUrl(imageUrl) && !imageUrl.startsWith('/')) {
|
|
235
|
+
warnings.push(`Open Graph image URL may be invalid: ${imageUrl}`);
|
|
236
|
+
}
|
|
237
|
+
// Validate image dimensions if provided
|
|
238
|
+
if (typeof seo.openGraph.image !== 'string') {
|
|
239
|
+
const { width, height } = seo.openGraph.image;
|
|
240
|
+
if (width !== undefined && (!Number.isInteger(width) || width <= 0)) {
|
|
241
|
+
errors.push(`Open Graph image width must be a positive integer (got ${width})`);
|
|
242
|
+
}
|
|
243
|
+
if (height !== undefined && (!Number.isInteger(height) || height <= 0)) {
|
|
244
|
+
errors.push(`Open Graph image height must be a positive integer (got ${height})`);
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
}
|
|
248
|
+
// Validate Twitter image URL
|
|
249
|
+
if (seo.twitter?.image && !isValidUrl(seo.twitter.image) && !seo.twitter.image.startsWith('/')) {
|
|
250
|
+
warnings.push(`Twitter Card image URL may be invalid: ${seo.twitter.image}`);
|
|
251
|
+
}
|
|
252
|
+
// Validate structured data size
|
|
253
|
+
if (seo.structuredData) {
|
|
254
|
+
const jsonSize = JSON.stringify(seo.structuredData).length;
|
|
255
|
+
const maxSize = 100 * 1024; // 100KB limit
|
|
256
|
+
if (jsonSize > maxSize) {
|
|
257
|
+
warnings.push(`Structured data is ${(jsonSize / 1024).toFixed(2)}KB (recommended: <100KB)`);
|
|
258
|
+
}
|
|
259
|
+
}
|
|
260
|
+
// Validate priority value
|
|
261
|
+
if (seo.priority !== undefined) {
|
|
262
|
+
if (typeof seo.priority !== 'number' || seo.priority < 0 || seo.priority > 1) {
|
|
263
|
+
errors.push('Priority must be a number between 0.0 and 1.0');
|
|
264
|
+
}
|
|
265
|
+
}
|
|
266
|
+
return { valid: errors.length === 0, errors, warnings };
|
|
267
|
+
}
|
|
268
|
+
/**
|
|
269
|
+
* Detect existing SEO tags in HTML to avoid duplication during auto-injection.
|
|
270
|
+
* Uses enhanced regex patterns to handle multi-line attributes and edge cases.
|
|
271
|
+
*
|
|
272
|
+
* Returns a Set of SEOTagType enum values indicating which tag types are already present.
|
|
273
|
+
* This allows for granular control: only missing tags will be generated.
|
|
274
|
+
*
|
|
275
|
+
* @param html - The HTML content to scan
|
|
276
|
+
* @returns Set of SEOTagType enum values for existing tags
|
|
277
|
+
*
|
|
278
|
+
* @example
|
|
279
|
+
* ```typescript
|
|
280
|
+
* const html = '<head><title>My Page</title><meta name="description" content="..."></head>';
|
|
281
|
+
* const existing = detectExistingSEOTags(html);
|
|
282
|
+
* // Returns: Set { SEOTagType.Title, SEOTagType.Description }
|
|
283
|
+
* ```
|
|
284
|
+
*/
|
|
285
|
+
export function detectExistingSEOTags(html) {
|
|
286
|
+
const existingTags = new Set();
|
|
287
|
+
// Extract just the <head> section for more efficient parsing
|
|
288
|
+
const headMatch = html.match(/<head[^>]*>([\s\S]*?)<\/head>/i);
|
|
289
|
+
if (!headMatch) {
|
|
290
|
+
console.warn('No <head> tag found in HTML, SEO auto-injection may not work correctly');
|
|
291
|
+
return existingTags;
|
|
292
|
+
}
|
|
293
|
+
const headContent = headMatch[1];
|
|
294
|
+
// More robust regex patterns that handle multi-line attributes and edge cases
|
|
295
|
+
const patterns = [
|
|
296
|
+
{ regex: /<title\s*>/i, type: SEOTagTypeEnum.Title },
|
|
297
|
+
{
|
|
298
|
+
regex: /<meta\s+[^>]*name\s*=\s*["']description["'][^>]*>/i,
|
|
299
|
+
type: SEOTagTypeEnum.Description,
|
|
300
|
+
},
|
|
301
|
+
{ regex: /<meta\s+[^>]*name\s*=\s*["']keywords["'][^>]*>/i, type: SEOTagTypeEnum.Keywords },
|
|
302
|
+
{ regex: /<meta\s+[^>]*name\s*=\s*["']author["'][^>]*>/i, type: SEOTagTypeEnum.Author },
|
|
303
|
+
{ regex: /<meta\s+[^>]*name\s*=\s*["']robots["'][^>]*>/i, type: SEOTagTypeEnum.Robots },
|
|
304
|
+
{ regex: /<link\s+[^>]*rel\s*=\s*["']canonical["'][^>]*>/i, type: SEOTagTypeEnum.Canonical },
|
|
305
|
+
{ regex: /<meta\s+[^>]*property\s*=\s*["']og:/i, type: SEOTagTypeEnum.OpenGraph },
|
|
306
|
+
{ regex: /<meta\s+[^>]*name\s*=\s*["']twitter:/i, type: SEOTagTypeEnum.Twitter },
|
|
307
|
+
{
|
|
308
|
+
regex: /<script\s+[^>]*type\s*=\s*["']application\/ld\+json["'][^>]*>/i,
|
|
309
|
+
type: SEOTagTypeEnum.StructuredData,
|
|
310
|
+
},
|
|
311
|
+
];
|
|
312
|
+
// Check all patterns in a single pass
|
|
313
|
+
for (const { regex, type } of patterns) {
|
|
314
|
+
if (headContent && regex.test(headContent)) {
|
|
315
|
+
existingTags.add(type);
|
|
316
|
+
}
|
|
317
|
+
}
|
|
318
|
+
return existingTags;
|
|
319
|
+
}
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* SEO utilities index
|
|
3
|
+
* @module seo/utils
|
|
4
|
+
*/
|
|
5
|
+
export { escapeHtml, generateRobotsContent, validateSEOMetadata, detectExistingSEOTags, } from './escape-and-validation.js';
|
|
6
|
+
export { normalizeUrlPath, resolveAbsoluteUrl, isValidUrl } from './url.js';
|
|
7
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/seo/utils/index.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAGH,OAAO,EACL,UAAU,EACV,qBAAqB,EACrB,mBAAmB,EACnB,qBAAqB,GACtB,MAAM,4BAA4B,CAAC;AAGpC,OAAO,EAAE,gBAAgB,EAAE,kBAAkB,EAAE,UAAU,EAAE,MAAM,UAAU,CAAC"}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* SEO utilities index
|
|
3
|
+
* @module seo/utils
|
|
4
|
+
*/
|
|
5
|
+
// Re-export escape and validation utilities
|
|
6
|
+
export { escapeHtml, generateRobotsContent, validateSEOMetadata, detectExistingSEOTags, } from './escape-and-validation.js';
|
|
7
|
+
// Re-export URL utilities
|
|
8
|
+
export { normalizeUrlPath, resolveAbsoluteUrl, isValidUrl } from './url.js';
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* URL resolution and normalization utilities for SEO module
|
|
3
|
+
* @module seo/utils/url
|
|
4
|
+
*/
|
|
5
|
+
/**
|
|
6
|
+
* Normalizes a path to ensure it starts with /
|
|
7
|
+
* @param path - Path to normalize
|
|
8
|
+
* @returns Normalized path starting with /
|
|
9
|
+
*
|
|
10
|
+
* @example
|
|
11
|
+
* ```typescript
|
|
12
|
+
* normalizeUrlPath('about'); // '/about'
|
|
13
|
+
* normalizeUrlPath('/about'); // '/about'
|
|
14
|
+
* ```
|
|
15
|
+
*/
|
|
16
|
+
export declare function normalizeUrlPath(path: string): string;
|
|
17
|
+
/**
|
|
18
|
+
* Resolves a relative or absolute URL to a full absolute URL
|
|
19
|
+
* @param url - Relative or absolute URL
|
|
20
|
+
* @param baseUrl - Base site URL
|
|
21
|
+
* @returns Absolute URL
|
|
22
|
+
*
|
|
23
|
+
* @example
|
|
24
|
+
* ```typescript
|
|
25
|
+
* resolveAbsoluteUrl('/about', 'https://example.com');
|
|
26
|
+
* // Returns: 'https://example.com/about'
|
|
27
|
+
*
|
|
28
|
+
* resolveAbsoluteUrl('https://other.com/page', 'https://example.com');
|
|
29
|
+
* // Returns: 'https://other.com/page'
|
|
30
|
+
* ```
|
|
31
|
+
*/
|
|
32
|
+
export declare function resolveAbsoluteUrl(url: string, baseUrl: string): string;
|
|
33
|
+
/**
|
|
34
|
+
* Validates a URL string
|
|
35
|
+
* @param url - URL to validate
|
|
36
|
+
* @returns true if valid URL with http/https protocol
|
|
37
|
+
*
|
|
38
|
+
* @example
|
|
39
|
+
* ```typescript
|
|
40
|
+
* isValidUrl('https://example.com'); // true
|
|
41
|
+
* isValidUrl('not-a-url'); // false
|
|
42
|
+
* isValidUrl('ftp://example.com'); // false (only http/https)
|
|
43
|
+
* ```
|
|
44
|
+
*/
|
|
45
|
+
export declare function isValidUrl(url: string): boolean;
|
|
46
|
+
//# sourceMappingURL=url.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"url.d.ts","sourceRoot":"","sources":["../../../src/seo/utils/url.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAIH;;;;;;;;;;GAUG;AACH,wBAAgB,gBAAgB,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAErD;AAED;;;;;;;;;;;;;;GAcG;AACH,wBAAgB,kBAAkB,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,GAAG,MAAM,CAavE;AAED;;;;;;;;;;;GAWG;AACH,wBAAgB,UAAU,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAO/C"}
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* URL resolution and normalization utilities for SEO module
|
|
3
|
+
* @module seo/utils/url
|
|
4
|
+
*/
|
|
5
|
+
import { URL } from 'node:url';
|
|
6
|
+
/**
|
|
7
|
+
* Normalizes a path to ensure it starts with /
|
|
8
|
+
* @param path - Path to normalize
|
|
9
|
+
* @returns Normalized path starting with /
|
|
10
|
+
*
|
|
11
|
+
* @example
|
|
12
|
+
* ```typescript
|
|
13
|
+
* normalizeUrlPath('about'); // '/about'
|
|
14
|
+
* normalizeUrlPath('/about'); // '/about'
|
|
15
|
+
* ```
|
|
16
|
+
*/
|
|
17
|
+
export function normalizeUrlPath(path) {
|
|
18
|
+
return path.startsWith('/') ? path : `/${path}`;
|
|
19
|
+
}
|
|
20
|
+
/**
|
|
21
|
+
* Resolves a relative or absolute URL to a full absolute URL
|
|
22
|
+
* @param url - Relative or absolute URL
|
|
23
|
+
* @param baseUrl - Base site URL
|
|
24
|
+
* @returns Absolute URL
|
|
25
|
+
*
|
|
26
|
+
* @example
|
|
27
|
+
* ```typescript
|
|
28
|
+
* resolveAbsoluteUrl('/about', 'https://example.com');
|
|
29
|
+
* // Returns: 'https://example.com/about'
|
|
30
|
+
*
|
|
31
|
+
* resolveAbsoluteUrl('https://other.com/page', 'https://example.com');
|
|
32
|
+
* // Returns: 'https://other.com/page'
|
|
33
|
+
* ```
|
|
34
|
+
*/
|
|
35
|
+
export function resolveAbsoluteUrl(url, baseUrl) {
|
|
36
|
+
// Already absolute
|
|
37
|
+
if (url.startsWith('http://') || url.startsWith('https://')) {
|
|
38
|
+
return url;
|
|
39
|
+
}
|
|
40
|
+
// Ensure baseUrl doesn't end with /
|
|
41
|
+
const cleanBaseUrl = baseUrl.endsWith('/') ? baseUrl.slice(0, -1) : baseUrl;
|
|
42
|
+
// Ensure url starts with /
|
|
43
|
+
const path = normalizeUrlPath(url);
|
|
44
|
+
return `${cleanBaseUrl}${path}`;
|
|
45
|
+
}
|
|
46
|
+
/**
|
|
47
|
+
* Validates a URL string
|
|
48
|
+
* @param url - URL to validate
|
|
49
|
+
* @returns true if valid URL with http/https protocol
|
|
50
|
+
*
|
|
51
|
+
* @example
|
|
52
|
+
* ```typescript
|
|
53
|
+
* isValidUrl('https://example.com'); // true
|
|
54
|
+
* isValidUrl('not-a-url'); // false
|
|
55
|
+
* isValidUrl('ftp://example.com'); // false (only http/https)
|
|
56
|
+
* ```
|
|
57
|
+
*/
|
|
58
|
+
export function isValidUrl(url) {
|
|
59
|
+
try {
|
|
60
|
+
const parsed = new URL(url);
|
|
61
|
+
return parsed.protocol === 'http:' || parsed.protocol === 'https:';
|
|
62
|
+
}
|
|
63
|
+
catch {
|
|
64
|
+
return false;
|
|
65
|
+
}
|
|
66
|
+
}
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* SEO utility functions for HTML escaping, validation, and tag detection
|
|
3
|
+
*/
|
|
4
|
+
import type { SEOMetadata } from '../types/content.js';
|
|
5
|
+
import type { SEOValidationResult, SEOTagType } from '../types/seo.js';
|
|
6
|
+
/**
|
|
7
|
+
* Escape HTML entities to prevent XSS attacks.
|
|
8
|
+
* Uses memoization for performance with frequently repeated strings.
|
|
9
|
+
*
|
|
10
|
+
* Implements LRU-style cache eviction: when the cache is full, it's cleared
|
|
11
|
+
* and the new entry is added. This prevents unbounded memory growth while
|
|
12
|
+
* still providing caching benefits for repeated strings.
|
|
13
|
+
*
|
|
14
|
+
* @param text - The text to escape
|
|
15
|
+
* @returns HTML-safe string with special characters escaped
|
|
16
|
+
*
|
|
17
|
+
* @example
|
|
18
|
+
* ```typescript
|
|
19
|
+
* escapeHtml('<script>alert("xss")</script>');
|
|
20
|
+
* // Returns: '<script>alert("xss")</script>'
|
|
21
|
+
* ```
|
|
22
|
+
*/
|
|
23
|
+
export declare function escapeHtml(text: string): string;
|
|
24
|
+
/**
|
|
25
|
+
* Sanitize structured data to prevent XSS attacks and ensure safe JSON-LD output.
|
|
26
|
+
* Recursively processes objects and arrays, escaping string values and enforcing depth limits.
|
|
27
|
+
*
|
|
28
|
+
* @param data - The data to sanitize
|
|
29
|
+
* @param depth - Current recursion depth (internal use)
|
|
30
|
+
* @param maxDepth - Maximum allowed recursion depth (default: 50)
|
|
31
|
+
* @returns Sanitized data safe for JSON-LD output
|
|
32
|
+
*
|
|
33
|
+
* @example
|
|
34
|
+
* ```typescript
|
|
35
|
+
* const data = {
|
|
36
|
+
* name: '<script>alert("xss")</script>',
|
|
37
|
+
* nested: { value: 'test' }
|
|
38
|
+
* };
|
|
39
|
+
* sanitizeStructuredData(data);
|
|
40
|
+
* // Returns: { name: '<script>...', nested: { value: 'test' } }
|
|
41
|
+
* ```
|
|
42
|
+
*/
|
|
43
|
+
export declare function sanitizeStructuredData(data: unknown, depth?: number, maxDepth?: number): unknown;
|
|
44
|
+
/**
|
|
45
|
+
* Generate robots meta tag content from SEO metadata and robots configuration.
|
|
46
|
+
* Combines noindex flag and robots directives into a comma-separated string.
|
|
47
|
+
*
|
|
48
|
+
* @param seo - SEO metadata containing robots configuration
|
|
49
|
+
* @returns Comma-separated robots directives, or empty string if none
|
|
50
|
+
*
|
|
51
|
+
* @example
|
|
52
|
+
* ```typescript
|
|
53
|
+
* generateRobotsContent({ noindex: true, robots: { follow: false } });
|
|
54
|
+
* // Returns: 'noindex, nofollow'
|
|
55
|
+
* ```
|
|
56
|
+
*/
|
|
57
|
+
export declare function generateRobotsContent(seo: SEOMetadata): string;
|
|
58
|
+
/**
|
|
59
|
+
* Validate SEO metadata before processing.
|
|
60
|
+
* Checks for common issues like invalid URLs, improper lengths, and malformed data.
|
|
61
|
+
*
|
|
62
|
+
* @param seo - SEO metadata to validate
|
|
63
|
+
* @param _pageUrl - URL of the page being validated (for context in error messages)
|
|
64
|
+
* @returns Validation result with valid flag, errors, and warnings
|
|
65
|
+
*
|
|
66
|
+
* @example
|
|
67
|
+
* ```typescript
|
|
68
|
+
* const result = validateSEOMetadata({
|
|
69
|
+
* title: 'My Page',
|
|
70
|
+
* canonical: 'invalid-url'
|
|
71
|
+
* }, '/my-page');
|
|
72
|
+
* // Returns: { valid: false, errors: ['Invalid canonical URL...'], warnings: [] }
|
|
73
|
+
* ```
|
|
74
|
+
*/
|
|
75
|
+
export declare function validateSEOMetadata(seo: SEOMetadata, _pageUrl: string): SEOValidationResult;
|
|
76
|
+
/**
|
|
77
|
+
* Detect existing SEO tags in HTML to avoid duplication during auto-injection.
|
|
78
|
+
* Uses enhanced regex patterns to handle multi-line attributes and edge cases.
|
|
79
|
+
*
|
|
80
|
+
* Returns a Set of SEOTagType enum values indicating which tag types are already present.
|
|
81
|
+
* This allows for granular control: only missing tags will be generated.
|
|
82
|
+
*
|
|
83
|
+
* @param html - The HTML content to scan
|
|
84
|
+
* @returns Set of SEOTagType enum values for existing tags
|
|
85
|
+
*
|
|
86
|
+
* @example
|
|
87
|
+
* ```typescript
|
|
88
|
+
* const html = '<head><title>My Page</title><meta name="description" content="..."></head>';
|
|
89
|
+
* const existing = detectExistingSEOTags(html);
|
|
90
|
+
* // Returns: Set { SEOTagType.Title, SEOTagType.Description }
|
|
91
|
+
* ```
|
|
92
|
+
*/
|
|
93
|
+
export declare function detectExistingSEOTags(html: string): Set<SEOTagType>;
|
|
94
|
+
//# sourceMappingURL=utils.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../../src/seo/utils.ts"],"names":[],"mappings":"AAAA;;GAEG;AAGH,OAAO,KAAK,EAAE,WAAW,EAAgB,MAAM,qBAAqB,CAAC;AACrE,OAAO,KAAK,EAAE,mBAAmB,EAAE,UAAU,EAAE,MAAM,iBAAiB,CAAC;AAUvE;;;;;;;;;;;;;;;;GAgBG;AACH,wBAAgB,UAAU,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CA4B/C;AAED;;;;;;;;;;;;;;;;;;GAkBG;AACH,wBAAgB,sBAAsB,CACpC,IAAI,EAAE,OAAO,EACb,KAAK,GAAE,MAAU,EACjB,QAAQ,GAAE,MAAW,GACpB,OAAO,CA0BT;AAED;;;;;;;;;;;;GAYG;AACH,wBAAgB,qBAAqB,CAAC,GAAG,EAAE,WAAW,GAAG,MAAM,CAiD9D;AAiBD;;;;;;;;;;;;;;;;GAgBG;AACH,wBAAgB,mBAAmB,CAAC,GAAG,EAAE,WAAW,EAAE,QAAQ,EAAE,MAAM,GAAG,mBAAmB,CAwE3F;AAED;;;;;;;;;;;;;;;;GAgBG;AACH,wBAAgB,qBAAqB,CAAC,IAAI,EAAE,MAAM,GAAG,GAAG,CAAC,UAAU,CAAC,CAuCnE"}
|