@hkdigital/lib-core 0.5.13 → 0.5.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -0,0 +1,386 @@
|
|
|
1
|
+
# Meta Utilities
|
|
2
|
+
|
|
3
|
+
SEO and meta tag utilities for robots.txt and sitemap.xml generation.
|
|
4
|
+
|
|
5
|
+
## Overview
|
|
6
|
+
|
|
7
|
+
This module provides functions for generating robots.txt and sitemap.xml
|
|
8
|
+
files dynamically based on configuration. The utilities are used by the
|
|
9
|
+
`routes/(meta)` endpoints but can also be used independently in your own
|
|
10
|
+
server routes.
|
|
11
|
+
|
|
12
|
+
## Modules
|
|
13
|
+
|
|
14
|
+
### Robots
|
|
15
|
+
|
|
16
|
+
Generate robots.txt content with host filtering and sitemap references.
|
|
17
|
+
|
|
18
|
+
```javascript
|
|
19
|
+
import { generateRobotsTxt, isHostAllowed } from '$lib/meta/robots.js';
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
### Sitemap
|
|
23
|
+
|
|
24
|
+
Generate sitemap.xml content from route configurations.
|
|
25
|
+
|
|
26
|
+
```javascript
|
|
27
|
+
import { generateSitemap } from '$lib/meta/sitemap.js';
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
## Usage
|
|
31
|
+
|
|
32
|
+
### Robots.txt Generation
|
|
33
|
+
|
|
34
|
+
The `generateRobotsTxt()` function creates robots.txt content based on the
|
|
35
|
+
request URL and configuration.
|
|
36
|
+
|
|
37
|
+
**Basic usage:**
|
|
38
|
+
|
|
39
|
+
```javascript
|
|
40
|
+
import { generateRobotsTxt } from '$lib/meta/robots.js';
|
|
41
|
+
|
|
42
|
+
export const GET = async ({ url }) => {
|
|
43
|
+
const config = {
|
|
44
|
+
allowedHosts: ['example.com', 'www.example.com'],
|
|
45
|
+
disallowedPaths: ['/admin', '/api']
|
|
46
|
+
};
|
|
47
|
+
|
|
48
|
+
const robotsTxt = generateRobotsTxt(url, config);
|
|
49
|
+
return new Response(robotsTxt, {
|
|
50
|
+
headers: { 'Content-Type': 'text/plain' }
|
|
51
|
+
});
|
|
52
|
+
};
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
**Configuration options:**
|
|
56
|
+
|
|
57
|
+
```javascript
|
|
58
|
+
/**
|
|
59
|
+
* @typedef {Object} RobotsConfig
|
|
60
|
+
* @property {string[] | '*'} [allowedHosts]
|
|
61
|
+
* Allowed host patterns. Use '*' or omit to allow all hosts.
|
|
62
|
+
* Supports wildcards (e.g., '*.example.com')
|
|
63
|
+
* @property {string[]} [disallowedPaths]
|
|
64
|
+
* Paths to block from indexing (e.g., '/admin', '/api/*')
|
|
65
|
+
*/
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
**Host filtering:**
|
|
69
|
+
|
|
70
|
+
```javascript
|
|
71
|
+
// Allow only production domain
|
|
72
|
+
const config = {
|
|
73
|
+
allowedHosts: ['example.com']
|
|
74
|
+
};
|
|
75
|
+
|
|
76
|
+
// example.com → User-agent: *\nAllow: /\nSitemap: ...
|
|
77
|
+
// test.example.com → User-agent: *\nDisallow: /
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
**Wildcard patterns:**
|
|
81
|
+
|
|
82
|
+
```javascript
|
|
83
|
+
// Allow all subdomains
|
|
84
|
+
const config = {
|
|
85
|
+
allowedHosts: ['example.com', '*.example.com']
|
|
86
|
+
};
|
|
87
|
+
|
|
88
|
+
// example.com → allowed
|
|
89
|
+
// test.example.com → allowed
|
|
90
|
+
// staging.example.com → allowed
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
**Path blocking:**
|
|
94
|
+
|
|
95
|
+
```javascript
|
|
96
|
+
// Block specific paths
|
|
97
|
+
const config = {
|
|
98
|
+
allowedHosts: ['example.com'],
|
|
99
|
+
disallowedPaths: ['/admin', '/api', '/private/*']
|
|
100
|
+
};
|
|
101
|
+
|
|
102
|
+
// Generates:
|
|
103
|
+
// User-agent: *
|
|
104
|
+
// Allow: /
|
|
105
|
+
// Disallow: /admin
|
|
106
|
+
// Disallow: /api
|
|
107
|
+
// Disallow: /private/*
|
|
108
|
+
// Sitemap: https://example.com/sitemap.xml
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
**Sitemap reference:**
|
|
112
|
+
|
|
113
|
+
Sitemap reference is always included for allowed hosts:
|
|
114
|
+
|
|
115
|
+
```
|
|
116
|
+
User-agent: *
|
|
117
|
+
Allow: /
|
|
118
|
+
Sitemap: https://example.com/sitemap.xml
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
### Sitemap.xml Generation
|
|
122
|
+
|
|
123
|
+
The `generateSitemap()` function creates sitemap.xml content from route
|
|
124
|
+
configurations.
|
|
125
|
+
|
|
126
|
+
**Basic usage:**
|
|
127
|
+
|
|
128
|
+
```javascript
|
|
129
|
+
import { generateSitemap } from '$lib/meta/sitemap.js';
|
|
130
|
+
|
|
131
|
+
export const GET = async ({ url }) => {
|
|
132
|
+
const routes = ['/', '/about', '/contact'];
|
|
133
|
+
|
|
134
|
+
const sitemap = generateSitemap(url.origin, routes);
|
|
135
|
+
return new Response(sitemap, {
|
|
136
|
+
headers: { 'Content-Type': 'application/xml' }
|
|
137
|
+
});
|
|
138
|
+
};
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
**Route configuration:**
|
|
142
|
+
|
|
143
|
+
```javascript
|
|
144
|
+
/**
|
|
145
|
+
* @typedef {string | SitemapRouteObject} SitemapRoute
|
|
146
|
+
* Route can be a simple string path or an object with details
|
|
147
|
+
*/
|
|
148
|
+
|
|
149
|
+
/**
|
|
150
|
+
* @typedef {Object} SitemapRouteObject
|
|
151
|
+
* @property {string} path - Route path (e.g., '/about')
|
|
152
|
+
* @property {number} [priority] - Priority (0.0 to 1.0)
|
|
153
|
+
* @property {'always'|'hourly'|'daily'|'weekly'|'monthly'|'yearly'|'never'}
|
|
154
|
+
* [changefreq] - Change frequency
|
|
155
|
+
*/
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
**Simple format (recommended):**
|
|
159
|
+
|
|
160
|
+
```javascript
|
|
161
|
+
const routes = [
|
|
162
|
+
'/', // priority: 1.0, changefreq: 'daily'
|
|
163
|
+
'/about', // priority: 0.8, changefreq: 'weekly'
|
|
164
|
+
'/contact' // priority: 0.8, changefreq: 'weekly'
|
|
165
|
+
];
|
|
166
|
+
```
|
|
167
|
+
|
|
168
|
+
**Advanced format with custom settings:**
|
|
169
|
+
|
|
170
|
+
```javascript
|
|
171
|
+
const routes = [
|
|
172
|
+
'/',
|
|
173
|
+
'/about',
|
|
174
|
+
{ path: '/blog', priority: 0.9, changefreq: 'daily' },
|
|
175
|
+
{ path: '/legal', priority: 0.3, changefreq: 'yearly' }
|
|
176
|
+
];
|
|
177
|
+
```
|
|
178
|
+
|
|
179
|
+
**Mixed format:**
|
|
180
|
+
|
|
181
|
+
```javascript
|
|
182
|
+
const routes = [
|
|
183
|
+
'/',
|
|
184
|
+
'/about',
|
|
185
|
+
{ path: '/blog', priority: 0.9, changefreq: 'daily' },
|
|
186
|
+
'/contact'
|
|
187
|
+
];
|
|
188
|
+
```
|
|
189
|
+
|
|
190
|
+
**Default values:**
|
|
191
|
+
|
|
192
|
+
- Root path (`/`): priority `1.0`, changefreq `daily`
|
|
193
|
+
- Other paths: priority `0.8`, changefreq `weekly`
|
|
194
|
+
- Root path is always included (added automatically if missing)
|
|
195
|
+
|
|
196
|
+
## Helper Functions
|
|
197
|
+
|
|
198
|
+
### isHostAllowed()
|
|
199
|
+
|
|
200
|
+
Check if a hostname matches the allowed hosts configuration.
|
|
201
|
+
|
|
202
|
+
```javascript
|
|
203
|
+
import { isHostAllowed } from '$lib/meta/robots.js';
|
|
204
|
+
|
|
205
|
+
// Exact match
|
|
206
|
+
isHostAllowed('example.com', ['example.com']); // true
|
|
207
|
+
isHostAllowed('test.example.com', ['example.com']); // false
|
|
208
|
+
|
|
209
|
+
// Wildcard match
|
|
210
|
+
isHostAllowed('test.example.com', ['*.example.com']); // true
|
|
211
|
+
isHostAllowed('example.com', ['*.example.com']); // false
|
|
212
|
+
|
|
213
|
+
// Allow all
|
|
214
|
+
isHostAllowed('anything.com', '*'); // true
|
|
215
|
+
isHostAllowed('anything.com', undefined); // true
|
|
216
|
+
|
|
217
|
+
// Multiple patterns
|
|
218
|
+
isHostAllowed('example.com', ['example.com', '*.staging.com']); // true
|
|
219
|
+
isHostAllowed('app.staging.com', ['example.com', '*.staging.com']); // true
|
|
220
|
+
```
|
|
221
|
+
|
|
222
|
+
**Case insensitive:**
|
|
223
|
+
|
|
224
|
+
```javascript
|
|
225
|
+
isHostAllowed('Example.COM', ['example.com']); // true
|
|
226
|
+
```
|
|
227
|
+
|
|
228
|
+
**String or array:**
|
|
229
|
+
|
|
230
|
+
```javascript
|
|
231
|
+
// Single string
|
|
232
|
+
isHostAllowed('example.com', 'example.com'); // true
|
|
233
|
+
|
|
234
|
+
// Array
|
|
235
|
+
isHostAllowed('example.com', ['example.com']); // true
|
|
236
|
+
```
|
|
237
|
+
|
|
238
|
+
## Real-World Examples
|
|
239
|
+
|
|
240
|
+
### Production-only indexing
|
|
241
|
+
|
|
242
|
+
```javascript
|
|
243
|
+
// Only allow production domain to be indexed
|
|
244
|
+
const robotsConfig = {
|
|
245
|
+
allowedHosts: ['mysite.com', 'www.mysite.com'],
|
|
246
|
+
disallowedPaths: ['/admin', '/api']
|
|
247
|
+
};
|
|
248
|
+
|
|
249
|
+
// Production: mysite.com → Allow + Sitemap
|
|
250
|
+
// Staging: staging.mysite.com → Disallow
|
|
251
|
+
// Development: localhost → Disallow
|
|
252
|
+
```
|
|
253
|
+
|
|
254
|
+
### Staging and production indexing
|
|
255
|
+
|
|
256
|
+
```javascript
|
|
257
|
+
// Allow both production and staging subdomains
|
|
258
|
+
const robotsConfig = {
|
|
259
|
+
allowedHosts: ['mysite.com', '*.mysite.com'],
|
|
260
|
+
disallowedPaths: ['/admin', '/api']
|
|
261
|
+
};
|
|
262
|
+
|
|
263
|
+
// Production: mysite.com → Allow + Sitemap
|
|
264
|
+
// Staging: staging.mysite.com → Allow + Sitemap
|
|
265
|
+
// Development: localhost → Disallow
|
|
266
|
+
```
|
|
267
|
+
|
|
268
|
+
### Allow all hosts (development)
|
|
269
|
+
|
|
270
|
+
```javascript
|
|
271
|
+
// Allow all hosts - useful during development
|
|
272
|
+
const robotsConfig = {
|
|
273
|
+
allowedHosts: '*',
|
|
274
|
+
disallowedPaths: ['/admin']
|
|
275
|
+
};
|
|
276
|
+
|
|
277
|
+
// All hosts → Allow + Sitemap
|
|
278
|
+
```
|
|
279
|
+
|
|
280
|
+
### Complex sitemap configuration
|
|
281
|
+
|
|
282
|
+
```javascript
|
|
283
|
+
const routes = [
|
|
284
|
+
'/',
|
|
285
|
+
'/about',
|
|
286
|
+
'/contact',
|
|
287
|
+
|
|
288
|
+
// Blog updated frequently
|
|
289
|
+
{ path: '/blog', priority: 0.9, changefreq: 'daily' },
|
|
290
|
+
|
|
291
|
+
// Legal pages rarely change
|
|
292
|
+
{ path: '/privacy', priority: 0.3, changefreq: 'yearly' },
|
|
293
|
+
{ path: '/terms', priority: 0.3, changefreq: 'yearly' },
|
|
294
|
+
|
|
295
|
+
// Documentation moderately important
|
|
296
|
+
{ path: '/docs', priority: 0.7, changefreq: 'monthly' }
|
|
297
|
+
];
|
|
298
|
+
```
|
|
299
|
+
|
|
300
|
+
## Integration with Routes
|
|
301
|
+
|
|
302
|
+
These utilities are used by the `routes/(meta)` endpoints:
|
|
303
|
+
|
|
304
|
+
### robots.txt endpoint
|
|
305
|
+
|
|
306
|
+
```javascript
|
|
307
|
+
// routes/(meta)/robots.txt/+server.js
|
|
308
|
+
import { text } from '@sveltejs/kit';
|
|
309
|
+
import { generateRobotsTxt } from '$lib/meta/robots.js';
|
|
310
|
+
import { robotsConfig } from '../config.js';
|
|
311
|
+
|
|
312
|
+
export const GET = async ({ url }) => {
|
|
313
|
+
const robotsTxt = generateRobotsTxt(url, robotsConfig);
|
|
314
|
+
return text(robotsTxt);
|
|
315
|
+
};
|
|
316
|
+
```
|
|
317
|
+
|
|
318
|
+
### sitemap.xml endpoint
|
|
319
|
+
|
|
320
|
+
```javascript
|
|
321
|
+
// routes/(meta)/sitemap.xml/+server.js
|
|
322
|
+
import { generateSitemap } from '$lib/meta/sitemap.js';
|
|
323
|
+
import { siteRoutes } from '../config.js';
|
|
324
|
+
|
|
325
|
+
export const GET = async ({ url }) => {
|
|
326
|
+
const sitemap = generateSitemap(url.origin, siteRoutes);
|
|
327
|
+
|
|
328
|
+
return new Response(sitemap, {
|
|
329
|
+
headers: {
|
|
330
|
+
'Content-Type': 'application/xml',
|
|
331
|
+
'Cache-Control': 'max-age=0, s-maxage=3600'
|
|
332
|
+
}
|
|
333
|
+
});
|
|
334
|
+
};
|
|
335
|
+
```
|
|
336
|
+
|
|
337
|
+
## Reverse Proxy Configuration
|
|
338
|
+
|
|
339
|
+
If your app is deployed behind a reverse proxy (nginx, Cloudflare, etc.),
|
|
340
|
+
ensure your SvelteKit adapter is configured to trust proxy headers for
|
|
341
|
+
correct origin detection:
|
|
342
|
+
|
|
343
|
+
```javascript
|
|
344
|
+
// svelte.config.js
|
|
345
|
+
import adapter from '@sveltejs/adapter-node';
|
|
346
|
+
|
|
347
|
+
export default {
|
|
348
|
+
kit: {
|
|
349
|
+
adapter: adapter({
|
|
350
|
+
// Trust X-Forwarded-* headers from proxy
|
|
351
|
+
trustProxy: true
|
|
352
|
+
})
|
|
353
|
+
}
|
|
354
|
+
};
|
|
355
|
+
```
|
|
356
|
+
|
|
357
|
+
Without this, `url.origin` may be `http://localhost` instead of your actual
|
|
358
|
+
domain, and the sitemap directive will point to the wrong URL.
|
|
359
|
+
|
|
360
|
+
## Testing
|
|
361
|
+
|
|
362
|
+
Unit tests are included for all functions:
|
|
363
|
+
|
|
364
|
+
```bash
|
|
365
|
+
# Run meta utility tests
|
|
366
|
+
pnpm test:file src/lib/meta/
|
|
367
|
+
|
|
368
|
+
# Test coverage includes:
|
|
369
|
+
# - Host pattern matching (exact, wildcard, multiple)
|
|
370
|
+
# - Robots.txt generation (allowed/blocked hosts, paths, sitemap)
|
|
371
|
+
# - Sitemap generation (simple/advanced routes, defaults, mixed formats)
|
|
372
|
+
```
|
|
373
|
+
|
|
374
|
+
## Type Definitions
|
|
375
|
+
|
|
376
|
+
TypeScript-style JSDoc type definitions are available:
|
|
377
|
+
|
|
378
|
+
```javascript
|
|
379
|
+
// robots.js
|
|
380
|
+
import './robots/typedef.js'; // RobotsConfig
|
|
381
|
+
|
|
382
|
+
// sitemap.js
|
|
383
|
+
import './sitemap/typedef.js'; // SitemapRoute, SitemapRouteObject
|
|
384
|
+
```
|
|
385
|
+
|
|
386
|
+
See `typedef.js` files in each subdirectory for complete type definitions.
|
|
@@ -74,8 +74,8 @@ export function generateRobotsTxt(url, config = {}) {
|
|
|
74
74
|
});
|
|
75
75
|
}
|
|
76
76
|
|
|
77
|
-
//
|
|
78
|
-
if (url.origin
|
|
77
|
+
// Always add sitemap reference
|
|
78
|
+
if (url.origin) {
|
|
79
79
|
content += `\nSitemap: ${url.origin}/sitemap.xml`;
|
|
80
80
|
}
|
|
81
81
|
|
|
@@ -5,8 +5,6 @@
|
|
|
5
5
|
* Supports wildcards (e.g., '*.example.com')
|
|
6
6
|
* @property {string[]} [disallowedPaths]
|
|
7
7
|
* Paths to block from indexing (e.g., '/admin', '/api/*')
|
|
8
|
-
* @property {boolean} [includeSitemap]
|
|
9
|
-
* Include sitemap reference in robots.txt (default: true)
|
|
10
8
|
*/
|
|
11
9
|
|
|
12
10
|
export default {};
|