easy-sitemap-generator 0.2.0 β 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +68 -45
- package/bin/cli.js +37 -15
- package/example.js +11 -0
- package/index.d.ts +29 -10
- package/index.js +1 -6
- package/lib/sitemapGenerator.js +167 -85
- package/package.json +67 -58
- package/services/axios.js +9 -7
- package/utils/chalk.js +13 -0
- package/utils/xml.js +21 -21
- package/example/index.js +0 -7
- package/utils/kleur.js +0 -15
package/README.md
CHANGED
|
@@ -1,45 +1,68 @@
|
|
|
1
|
-
# πΊοΈ Easy Sitemap.xml generator
|
|
2
|
-
|
|
3
|
-
Improve your search engine rankings effortlessly! All you need is Node.js installed and this module.
|
|
4
|
-
|
|
5
|
-
<a href="https://www.npmjs.com/package/easy-sitemap-generator" target="_blank" title="easy-sitemap-generator - npm" style="text-decoration:none">
|
|
6
|
-
<img src="https://img.shields.io/npm/dt/easy-sitemap-generator.svg?maxAge=3600" alt="The number of downloads">
|
|
7
|
-
<img src="https://img.shields.io/github/issues/sefinek/easy-sitemap-generator" alt="Issues">
|
|
8
|
-
<img src="https://img.shields.io/github/last-commit/sefinek/easy-sitemap-generator" alt="Last commit">
|
|
9
|
-
<img src="https://img.shields.io/github/commit-activity/w/sefinek/easy-sitemap-generator" alt="Commit activity">
|
|
10
|
-
<img src="https://img.shields.io/github/languages/code-size/sefinek/easy-sitemap-generator" alt="Code size">
|
|
11
|
-
</a>
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
sitemap
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
```
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
1
|
+
# πΊοΈ Easy Sitemap.xml generator
|
|
2
|
+
A free and easy-to-use `sitemap.xml` generator with no restrictions for your website.
|
|
3
|
+
Improve your search engine rankings effortlessly! All you need is Node.js installed and this module.
|
|
4
|
+
|
|
5
|
+
<a href="https://www.npmjs.com/package/easy-sitemap-generator" target="_blank" title="easy-sitemap-generator - npm" style="text-decoration:none">
|
|
6
|
+
<img src="https://img.shields.io/npm/dt/easy-sitemap-generator.svg?maxAge=3600" alt="The number of downloads">
|
|
7
|
+
<img src="https://img.shields.io/github/issues/sefinek/easy-sitemap-generator" alt="Issues">
|
|
8
|
+
<img src="https://img.shields.io/github/last-commit/sefinek/easy-sitemap-generator" alt="Last commit">
|
|
9
|
+
<img src="https://img.shields.io/github/commit-activity/w/sefinek/easy-sitemap-generator" alt="Commit activity">
|
|
10
|
+
<img src="https://img.shields.io/github/languages/code-size/sefinek/easy-sitemap-generator" alt="Code size">
|
|
11
|
+
</a>
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
## π€ How to use it?
|
|
15
|
+
### CLI (recommenced)
|
|
16
|
+
```bash
|
|
17
|
+
npm install easy-sitemap-generator -g
|
|
18
|
+
sitemap --url=https://example.com
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
#### `--domain`
|
|
22
|
+
If you're generating the sitemap from a local server, use `--domain` to replace the crawled host with your production domain in the output:
|
|
23
|
+
```bash
|
|
24
|
+
sitemap --url=http://localhost:3000 --domain=https://example.com
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
#### `--concurrency`
|
|
28
|
+
Controls how many pages are fetched in parallel. Defaults to `3`:
|
|
29
|
+
```bash
|
|
30
|
+
sitemap --url=https://example.com --concurrency=5
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
#### Aliases
|
|
34
|
+
| sitemap-gen | sitemap-generator | generate-sitemap |
|
|
35
|
+
|-------------|-------------------|------------------|
|
|
36
|
+
|
|
37
|
+
### Script
|
|
38
|
+
This package is ESM-only.
|
|
39
|
+
|
|
40
|
+
```js
|
|
41
|
+
import { generateSitemap } from 'easy-sitemap-generator';
|
|
42
|
+
|
|
43
|
+
(async () => {
|
|
44
|
+
const content = await generateSitemap('https://example.com', {
|
|
45
|
+
destination: 'sitemap.xml', // Optional, defaults to './sitemap.xml'
|
|
46
|
+
domain: 'https://example.com', // Optional, replaces the crawled host in the output
|
|
47
|
+
concurrency: 3, // Optional, defaults to 3
|
|
48
|
+
});
|
|
49
|
+
console.log(content);
|
|
50
|
+
})();
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
## βοΈ Sample generated file
|
|
55
|
+
https://sefinek.net/sitemap.xml
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
## π Why do I need this?
|
|
59
|
+
Indexing bots, such as Google, often check the sitemap.xml file by making a `GET /sitemap.xml` request to find subpages of your website.
|
|
60
|
+
This can improve your siteβs visibility in search engine results. Sitemap files are a standard feature and can be found on every web server.
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
## π Important
|
|
64
|
+
Before running the script or executing the `sitemap` CLI command, make sure you have a stable internet connection. Also, disconnect from any proxy or VPN if you're connected.
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
## π License
|
|
68
|
+
Licensed under the MIT License. See the [LICENSE](LICENSE) file for more details.
|
package/bin/cli.js
CHANGED
|
@@ -1,16 +1,38 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
}
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
})
|
|
2
|
+
|
|
3
|
+
import { parseArgs } from 'node:util';
|
|
4
|
+
import { logError } from '../utils/chalk.js';
|
|
5
|
+
import { generateSitemap } from '../lib/sitemapGenerator.js';
|
|
6
|
+
|
|
7
|
+
let values;
|
|
8
|
+
try {
|
|
9
|
+
({ values } = parseArgs({
|
|
10
|
+
options: {
|
|
11
|
+
url: { type: 'string' },
|
|
12
|
+
domain: { type: 'string' },
|
|
13
|
+
concurrency: { type: 'string' },
|
|
14
|
+
},
|
|
15
|
+
}));
|
|
16
|
+
} catch (err) {
|
|
17
|
+
logError(err.message);
|
|
18
|
+
process.exit(1);
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
if (!values.url) {
|
|
22
|
+
logError('No URL provided. Use: sitemap --url=<YOUR-DOMAIN>');
|
|
23
|
+
process.exit(1);
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
let concurrency;
|
|
27
|
+
if (values.concurrency) {
|
|
28
|
+
concurrency = Number(values.concurrency);
|
|
29
|
+
if (!Number.isInteger(concurrency) || concurrency < 1) {
|
|
30
|
+
logError('Invalid --concurrency value. It must be a positive integer.');
|
|
31
|
+
process.exit(1);
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
generateSitemap(values.url, { domain: values.domain, concurrency }).catch(err => {
|
|
36
|
+
logError(err);
|
|
37
|
+
process.exit(2);
|
|
38
|
+
});
|
package/example.js
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import { generateSitemap, version } from './lib/sitemapGenerator.js';
|
|
2
|
+
|
|
3
|
+
(async () => {
|
|
4
|
+
const content = await generateSitemap('https://sefinek.net', {
|
|
5
|
+
destination: 'sitemap.xml',
|
|
6
|
+
concurrency: 3,
|
|
7
|
+
});
|
|
8
|
+
|
|
9
|
+
console.log(content);
|
|
10
|
+
console.log('Module version:', version);
|
|
11
|
+
})();
|
package/index.d.ts
CHANGED
|
@@ -1,10 +1,29 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
1
|
+
export interface GenerateOptions {
|
|
2
|
+
/**
|
|
3
|
+
* Path to save the sitemap file.
|
|
4
|
+
*
|
|
5
|
+
* @default ./sitemap.xml
|
|
6
|
+
*/
|
|
7
|
+
destination?: string;
|
|
8
|
+
|
|
9
|
+
/** Domain to use in the generated `<loc>` URLs instead of `url`. Useful when crawling a local server but publishing under a different domain. */
|
|
10
|
+
domain?: string;
|
|
11
|
+
|
|
12
|
+
/**
|
|
13
|
+
* Number of pages to fetch in parallel.
|
|
14
|
+
*
|
|
15
|
+
* @default 3
|
|
16
|
+
*/
|
|
17
|
+
concurrency?: number;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
/**
|
|
21
|
+
* Generates a sitemap for the given URL and saves it to a file.
|
|
22
|
+
*
|
|
23
|
+
* @param url - The base URL to generate the sitemap for.
|
|
24
|
+
* @param options - Optional settings for the generated sitemap.
|
|
25
|
+
* @returns A promise that resolves with the contents of the generated sitemap.
|
|
26
|
+
*/
|
|
27
|
+
export function generateSitemap(url: string, options?: GenerateOptions): Promise<string>;
|
|
28
|
+
|
|
29
|
+
export const version: string;
|
package/index.js
CHANGED
package/lib/sitemapGenerator.js
CHANGED
|
@@ -1,19 +1,40 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
const IGNORED_PATTERNS = [
|
|
9
|
-
|
|
1
|
+
import fs from 'node:fs/promises';
|
|
2
|
+
import path from 'node:path';
|
|
3
|
+
import { JSDOM } from 'jsdom';
|
|
4
|
+
import { axios, version } from '../services/axios.js';
|
|
5
|
+
import { escapeXml, normalizeUrl, calculatePriority } from '../utils/xml.js';
|
|
6
|
+
import { logInfo, logSuccess, logError, logWarning } from '../utils/chalk.js';
|
|
7
|
+
|
|
8
|
+
const IGNORED_PATTERNS = [
|
|
9
|
+
'cdn-cgi', '?referrer=', '&referrer=', '/signin/v2/usernamerecovery', '/lifecycle/flows/signup', 'join?return_to=',
|
|
10
|
+
'PHPSESSID=', 'JSESSIONID=', 'ASPSESSIONID', 'sessionid=', 'session_id=', '?sid=', '&sid=', 'phpsessid=',
|
|
11
|
+
];
|
|
12
|
+
const STATIC_EXTENSIONS = new Set([
|
|
13
|
+
'.jpg', '.jpeg', '.png', '.gif', '.webp', '.svg', '.ico', '.bmp', '.avif', '.tiff',
|
|
14
|
+
'.css', '.js', '.mjs', '.map', '.json', '.txt', '.csv', '.xml',
|
|
15
|
+
'.woff', '.woff2', '.ttf', '.eot', '.otf',
|
|
16
|
+
'.pdf', '.doc', '.docx', '.xls', '.xlsx', '.ppt', '.pptx', '.zip', '.rar', '.7z', '.gz', '.tar',
|
|
17
|
+
'.mp3', '.mp4', '.webm', '.avi', '.mov', '.wav', '.ogg', '.flac',
|
|
18
|
+
]);
|
|
19
|
+
const BASE_DELAY = 14_000;
|
|
20
|
+
const DEFAULT_CONCURRENCY = 3;
|
|
10
21
|
const MAX_URLS = 50000;
|
|
22
|
+
const MAX_SITEMAP_BYTES = 50 * 1024 * 1024;
|
|
23
|
+
const MAX_LOC_LENGTH = 2048;
|
|
24
|
+
|
|
25
|
+
const hasStaticExtension = pathname => {
|
|
26
|
+
const lastDot = pathname.lastIndexOf('.');
|
|
27
|
+
if (lastDot === -1) return false;
|
|
28
|
+
return STATIC_EXTENSIONS.has(pathname.slice(lastDot).toLowerCase());
|
|
29
|
+
};
|
|
11
30
|
|
|
12
31
|
const shouldIncludeUrl = (url, baseUrl, baseOrigin, urlOrigin = null) => {
|
|
13
32
|
if (!url.startsWith(baseUrl)) return false;
|
|
14
33
|
if (IGNORED_PATTERNS.some(pattern => url.includes(pattern))) return false;
|
|
15
34
|
try {
|
|
16
|
-
|
|
35
|
+
const parsedUrl = new URL(url);
|
|
36
|
+
if ((urlOrigin ?? parsedUrl.origin) !== baseOrigin) return false;
|
|
37
|
+
return !hasStaticExtension(parsedUrl.pathname);
|
|
17
38
|
} catch {
|
|
18
39
|
return false;
|
|
19
40
|
}
|
|
@@ -25,30 +46,26 @@ const nowIso = () => formatIso(new Date());
|
|
|
25
46
|
|
|
26
47
|
const fetchUrl = async (url, retries = 0) => {
|
|
27
48
|
try {
|
|
28
|
-
logInfoStart(`GET ${url}`);
|
|
29
|
-
|
|
30
49
|
const res = await axios.get(url);
|
|
31
50
|
if (res.status === 200) {
|
|
32
51
|
return res;
|
|
33
52
|
} else {
|
|
34
|
-
process.stdout.write('\n');
|
|
35
53
|
logWarning(`Non-200 status code (${res.status}) for URL: ${url}. Skipping...`);
|
|
36
54
|
return null;
|
|
37
55
|
}
|
|
38
56
|
} catch (err) {
|
|
39
|
-
process.stdout.write('\n');
|
|
40
57
|
if (err.response) {
|
|
41
58
|
const statusCode = err.response.status;
|
|
42
59
|
if (statusCode === 429) {
|
|
43
60
|
const delayTime = BASE_DELAY * (2 ** retries);
|
|
44
|
-
logWarning(`429: Rate limit hit! Retrying in ${(delayTime / 1000).toFixed(2)}s... (Attempt ${retries + 1})`);
|
|
61
|
+
logWarning(`429: Rate limit hit for ${url}! Retrying in ${(delayTime / 1000).toFixed(2)}s... (Attempt ${retries + 1})`);
|
|
45
62
|
await delay(delayTime);
|
|
46
63
|
return fetchUrl(url, retries + 1);
|
|
47
64
|
} else if (statusCode === 404) {
|
|
48
|
-
logWarning(
|
|
65
|
+
logWarning(`404: Not Found - ${url}`);
|
|
49
66
|
return null;
|
|
50
67
|
} else {
|
|
51
|
-
logError(`${statusCode}: Failed to fetch! Skipping...`);
|
|
68
|
+
logError(`${statusCode}: Failed to fetch ${url}! Skipping...`);
|
|
52
69
|
return null;
|
|
53
70
|
}
|
|
54
71
|
} else {
|
|
@@ -58,131 +75,196 @@ const fetchUrl = async (url, retries = 0) => {
|
|
|
58
75
|
}
|
|
59
76
|
};
|
|
60
77
|
|
|
61
|
-
const crawl = async (startUrl, baseUrl, baseOrigin, visitedUrls) => {
|
|
78
|
+
const crawl = async (startUrl, baseUrl, baseOrigin, visitedUrls, concurrency = DEFAULT_CONCURRENCY) => {
|
|
79
|
+
concurrency = Math.max(1, Math.floor(concurrency));
|
|
80
|
+
|
|
62
81
|
const queued = new Set();
|
|
63
82
|
const queue = [];
|
|
64
83
|
|
|
65
84
|
const enqueue = url => {
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
85
|
+
const normalized = normalizeUrl(url);
|
|
86
|
+
if (!queued.has(normalized)) {
|
|
87
|
+
queued.add(normalized);
|
|
88
|
+
queue.push(normalized);
|
|
69
89
|
}
|
|
70
90
|
};
|
|
71
91
|
|
|
72
|
-
enqueue(
|
|
73
|
-
|
|
74
|
-
while (queue.length > 0) {
|
|
75
|
-
const normalizedUrl = queue.shift();
|
|
92
|
+
enqueue(startUrl);
|
|
76
93
|
|
|
94
|
+
const processUrl = async normalizedUrl => {
|
|
77
95
|
const res = await fetchUrl(normalizedUrl);
|
|
78
|
-
if (!res)
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
96
|
+
if (!res) return;
|
|
97
|
+
|
|
98
|
+
let dom;
|
|
99
|
+
try {
|
|
100
|
+
dom = new JSDOM(res.data);
|
|
101
|
+
const { document } = dom.window;
|
|
102
|
+
|
|
103
|
+
const canonicalEl = document.querySelector('link[rel="canonical"]');
|
|
104
|
+
if (canonicalEl) {
|
|
105
|
+
try {
|
|
106
|
+
const canonical = new URL(canonicalEl.getAttribute('href'), baseUrl);
|
|
107
|
+
canonical.hash = '';
|
|
108
|
+
if (canonical.href !== normalizedUrl && shouldIncludeUrl(canonical.href, baseUrl, baseOrigin, canonical.origin)) {
|
|
109
|
+
logInfo(`GET ${normalizedUrl} (canonical β ${canonical.href}, skipped)`);
|
|
110
|
+
enqueue(canonical.href);
|
|
111
|
+
return;
|
|
112
|
+
}
|
|
113
|
+
} catch {
|
|
114
|
+
// ...
|
|
93
115
|
}
|
|
94
|
-
} catch {
|
|
95
|
-
// ...
|
|
96
116
|
}
|
|
97
|
-
}
|
|
98
117
|
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
118
|
+
const links = new Set();
|
|
119
|
+
for (const link of document.querySelectorAll('a[href]')) {
|
|
120
|
+
try {
|
|
121
|
+
const resolved = new URL(link.getAttribute('href'), baseUrl);
|
|
122
|
+
resolved.hash = '';
|
|
123
|
+
if (shouldIncludeUrl(resolved.href, baseUrl, baseOrigin, resolved.origin)) links.add(resolved.href);
|
|
124
|
+
} catch {
|
|
125
|
+
// ...
|
|
126
|
+
}
|
|
107
127
|
}
|
|
108
|
-
}
|
|
109
128
|
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
129
|
+
const rawLastMod = res.headers['last-modified']
|
|
130
|
+
?? document.querySelector('meta[property="article:modified_time"]')?.getAttribute('content')
|
|
131
|
+
?? document.querySelector('meta[name="last-modified"]')?.getAttribute('content');
|
|
113
132
|
|
|
114
|
-
|
|
133
|
+
let lastmod = null;
|
|
134
|
+
if (rawLastMod) {
|
|
135
|
+
const parsedLastMod = new Date(rawLastMod);
|
|
136
|
+
if (!Number.isNaN(parsedLastMod.getTime())) lastmod = formatIso(parsedLastMod);
|
|
137
|
+
}
|
|
115
138
|
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
});
|
|
139
|
+
visitedUrls.set(normalizedUrl, {
|
|
140
|
+
url: normalizedUrl,
|
|
141
|
+
lastmod,
|
|
142
|
+
priority: calculatePriority(normalizedUrl, baseUrl),
|
|
143
|
+
});
|
|
122
144
|
|
|
123
|
-
|
|
145
|
+
logInfo(`GET ${normalizedUrl} (${links.size} urls)`);
|
|
124
146
|
|
|
125
|
-
|
|
126
|
-
|
|
147
|
+
for (const link of links) enqueue(link);
|
|
148
|
+
} catch (err) {
|
|
149
|
+
logError(`Failed to process ${normalizedUrl}: ${err.message}. Skipping...`);
|
|
150
|
+
} finally {
|
|
151
|
+
dom?.window?.close();
|
|
152
|
+
}
|
|
153
|
+
};
|
|
154
|
+
|
|
155
|
+
await new Promise(resolve => {
|
|
156
|
+
let active = 0;
|
|
157
|
+
|
|
158
|
+
const dispatch = () => {
|
|
159
|
+
if (queue.length === 0 && active === 0) {
|
|
160
|
+
resolve();
|
|
161
|
+
return;
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
while (active < concurrency && queue.length > 0) {
|
|
165
|
+
const normalizedUrl = queue.shift();
|
|
166
|
+
active++;
|
|
167
|
+
processUrl(normalizedUrl).finally(() => {
|
|
168
|
+
active--;
|
|
169
|
+
dispatch();
|
|
170
|
+
});
|
|
171
|
+
}
|
|
172
|
+
};
|
|
173
|
+
|
|
174
|
+
dispatch();
|
|
175
|
+
});
|
|
127
176
|
};
|
|
128
177
|
|
|
178
|
+
const buildUrlEntry = ({ url, lastmod, priority }) => ` <url>
|
|
179
|
+
<loc>${escapeXml(url)}</loc>${lastmod ? `
|
|
180
|
+
<lastmod>${lastmod}</lastmod>` : ''}
|
|
181
|
+
<priority>${priority.toFixed(2)}</priority>
|
|
182
|
+
</url>`;
|
|
183
|
+
|
|
129
184
|
const buildSitemapContent = urls => `<?xml version="1.0" encoding="UTF-8"?>
|
|
130
185
|
<!-- Generated by https://github.com/sefinek/easy-sitemap-generator v${version} at ${nowIso()} -->
|
|
131
186
|
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd">
|
|
132
|
-
${urls.map((
|
|
133
|
-
<loc>${escapeXml(url)}</loc>
|
|
134
|
-
<lastmod>${lastmod}</lastmod>
|
|
135
|
-
<priority>${priority.toFixed(2)}</priority>
|
|
136
|
-
</url>`).join('\n')}
|
|
187
|
+
${urls.map(buildUrlEntry).join('\n')}
|
|
137
188
|
</urlset>`;
|
|
138
189
|
|
|
139
190
|
const buildIndexContent = sitemapLocs => `<?xml version="1.0" encoding="UTF-8"?>
|
|
140
191
|
<!-- Generated by https://github.com/sefinek/easy-sitemap-generator v${version} at ${nowIso()} -->
|
|
141
|
-
<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
|
|
192
|
+
<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/siteindex.xsd">
|
|
142
193
|
${sitemapLocs.map(({ loc, lastmod }) => ` <sitemap>
|
|
143
194
|
<loc>${escapeXml(loc)}</loc>
|
|
144
195
|
<lastmod>${lastmod}</lastmod>
|
|
145
196
|
</sitemap>`).join('\n')}
|
|
146
197
|
</sitemapindex>`;
|
|
147
198
|
|
|
148
|
-
const
|
|
199
|
+
const chunkUrls = urls => {
|
|
200
|
+
const shellBytes = Buffer.byteLength(buildSitemapContent([]), 'utf8');
|
|
201
|
+
const chunks = [];
|
|
202
|
+
let current = [];
|
|
203
|
+
let currentBytes = shellBytes;
|
|
204
|
+
|
|
205
|
+
for (const entry of urls) {
|
|
206
|
+
const entryBytes = Buffer.byteLength(buildUrlEntry(entry), 'utf8') + 1; // +1 for the joining newline
|
|
207
|
+
|
|
208
|
+
if (current.length > 0 && (current.length >= MAX_URLS || currentBytes + entryBytes > MAX_SITEMAP_BYTES)) {
|
|
209
|
+
chunks.push(current);
|
|
210
|
+
current = [];
|
|
211
|
+
currentBytes = shellBytes;
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
current.push(entry);
|
|
215
|
+
currentBytes += entryBytes;
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
if (current.length > 0) chunks.push(current);
|
|
219
|
+
return chunks;
|
|
220
|
+
};
|
|
221
|
+
|
|
222
|
+
const generateSitemap = async (baseUrl, { destination = 'sitemap.xml', domain = null, concurrency = DEFAULT_CONCURRENCY } = {}) => {
|
|
149
223
|
logInfo(`Starting crawl for base URL: ${baseUrl}`);
|
|
150
224
|
|
|
151
225
|
const { origin: baseOrigin } = new URL(baseUrl);
|
|
226
|
+
const targetOrigin = domain ? new URL(domain).origin : baseOrigin;
|
|
152
227
|
const visitedUrls = new Map();
|
|
153
|
-
await crawl(baseUrl, baseUrl, baseOrigin, visitedUrls);
|
|
228
|
+
await crawl(baseUrl, baseUrl, baseOrigin, visitedUrls, concurrency);
|
|
154
229
|
|
|
155
230
|
logInfo(`Generating sitemap with ${visitedUrls.size} URLs...`);
|
|
156
231
|
|
|
157
232
|
const urls = Array.from(visitedUrls.values())
|
|
158
|
-
.filter(entry =>
|
|
159
|
-
|
|
233
|
+
.filter(entry => {
|
|
234
|
+
if (entry.url.length > MAX_LOC_LENGTH) {
|
|
235
|
+
logWarning(`URL exceeds ${MAX_LOC_LENGTH} characters and was skipped: ${entry.url}`);
|
|
236
|
+
return false;
|
|
237
|
+
}
|
|
238
|
+
return true;
|
|
239
|
+
})
|
|
240
|
+
.sort((a, b) => b.priority - a.priority)
|
|
241
|
+
.map(entry => targetOrigin === baseOrigin ? entry : { ...entry, url: targetOrigin + entry.url.slice(baseOrigin.length) });
|
|
160
242
|
|
|
161
243
|
const output = path.resolve(destination);
|
|
162
|
-
|
|
244
|
+
const chunks = chunkUrls(urls);
|
|
245
|
+
if (chunks.length <= 1) {
|
|
163
246
|
const content = buildSitemapContent(urls);
|
|
164
247
|
await fs.writeFile(output, content, 'utf8');
|
|
165
248
|
logSuccess(`Sitemap generated at ${output}`);
|
|
166
249
|
return content;
|
|
167
250
|
}
|
|
168
251
|
|
|
169
|
-
logWarning(`Found ${urls.length} URLs β exceeds the ${MAX_URLS}
|
|
252
|
+
logWarning(`Found ${urls.length} URLs β exceeds the sitemap protocol limits (${MAX_URLS.toLocaleString()} URLs / 50MB). Splitting into ${chunks.length} sitemap files...`);
|
|
170
253
|
|
|
171
254
|
const ext = path.extname(destination);
|
|
172
255
|
const base = path.basename(destination, ext);
|
|
173
256
|
const dir = path.dirname(output);
|
|
174
257
|
const timestamp = nowIso();
|
|
175
258
|
|
|
176
|
-
const
|
|
177
|
-
|
|
178
|
-
for (let i = 0, part = 1; i < urls.length; i += MAX_URLS, part++) {
|
|
259
|
+
const sitemapLocs = await Promise.all(chunks.map(async (chunk, i) => {
|
|
260
|
+
const part = i + 1;
|
|
179
261
|
const filename = `${base}-${part}${ext}`;
|
|
180
262
|
const filepath = path.join(dir, filename);
|
|
181
|
-
const content = buildSitemapContent(
|
|
263
|
+
const content = buildSitemapContent(chunk);
|
|
182
264
|
await fs.writeFile(filepath, content, 'utf8');
|
|
183
|
-
logSuccess(`Sitemap part ${part}/${
|
|
184
|
-
|
|
185
|
-
}
|
|
265
|
+
logSuccess(`Sitemap part ${part}/${chunks.length} written to ${filepath}`);
|
|
266
|
+
return { loc: `${targetOrigin}/${filename}`, lastmod: timestamp };
|
|
267
|
+
}));
|
|
186
268
|
|
|
187
269
|
const indexContent = buildIndexContent(sitemapLocs);
|
|
188
270
|
await fs.writeFile(output, indexContent, 'utf8');
|
|
@@ -191,4 +273,4 @@ const generate = async (baseUrl, destination = 'sitemap.xml') => {
|
|
|
191
273
|
return indexContent;
|
|
192
274
|
};
|
|
193
275
|
|
|
194
|
-
|
|
276
|
+
export { generateSitemap, version };
|
package/package.json
CHANGED
|
@@ -1,58 +1,67 @@
|
|
|
1
|
-
{
|
|
2
|
-
"name": "easy-sitemap-generator",
|
|
3
|
-
"version": "0.
|
|
4
|
-
"description": "Easy and free sitemap.xml file generator without any restrictions for your website.",
|
|
5
|
-
"keywords": [
|
|
6
|
-
"sitemap",
|
|
7
|
-
"sitemap-generator",
|
|
8
|
-
"sitemap-xml",
|
|
9
|
-
"website"
|
|
10
|
-
],
|
|
11
|
-
"homepage": "https://github.com/sefinek/easy-sitemap-generator#readme",
|
|
12
|
-
"bugs": {
|
|
13
|
-
"url": "https://github.com/sefinek/easy-sitemap-generator/issues"
|
|
14
|
-
},
|
|
15
|
-
"repository": {
|
|
16
|
-
"type": "git",
|
|
17
|
-
"url": "git+https://github.com/sefinek/easy-sitemap-generator.git"
|
|
18
|
-
},
|
|
19
|
-
"license": "MIT",
|
|
20
|
-
"author": "Sefinek <contact@sefinek.net> (https://sefinek.net)",
|
|
21
|
-
"type": "
|
|
22
|
-
"main": "index.js",
|
|
23
|
-
"types": "index.d.ts",
|
|
24
|
-
"bin": {
|
|
25
|
-
"generate-sitemap": "bin/cli.js",
|
|
26
|
-
"sitemap": "bin/cli.js",
|
|
27
|
-
"sitemap-gen": "bin/cli.js",
|
|
28
|
-
"sitemap-generator": "bin/cli.js"
|
|
29
|
-
},
|
|
30
|
-
"directories": {
|
|
31
|
-
"lib": "lib",
|
|
32
|
-
"
|
|
33
|
-
},
|
|
34
|
-
"files": [
|
|
35
|
-
"bin",
|
|
36
|
-
"example",
|
|
37
|
-
"lib",
|
|
38
|
-
"utils",
|
|
39
|
-
"services",
|
|
40
|
-
"index.d.ts",
|
|
41
|
-
"index.js",
|
|
42
|
-
"LICENSE",
|
|
43
|
-
"README.md"
|
|
44
|
-
],
|
|
45
|
-
"scripts": {
|
|
46
|
-
"m": "ncu -u && npm install && npm update",
|
|
47
|
-
"test": "
|
|
48
|
-
},
|
|
49
|
-
"dependencies": {
|
|
50
|
-
"axios": "^1.
|
|
51
|
-
"
|
|
52
|
-
"
|
|
53
|
-
},
|
|
54
|
-
"devDependencies": {
|
|
55
|
-
"@eslint/js": "^10.0.1",
|
|
56
|
-
"
|
|
57
|
-
|
|
58
|
-
|
|
1
|
+
{
|
|
2
|
+
"name": "easy-sitemap-generator",
|
|
3
|
+
"version": "0.3.0",
|
|
4
|
+
"description": "Easy and free sitemap.xml file generator without any restrictions for your website.",
|
|
5
|
+
"keywords": [
|
|
6
|
+
"sitemap",
|
|
7
|
+
"sitemap-generator",
|
|
8
|
+
"sitemap-xml",
|
|
9
|
+
"website"
|
|
10
|
+
],
|
|
11
|
+
"homepage": "https://github.com/sefinek/easy-sitemap-generator#readme",
|
|
12
|
+
"bugs": {
|
|
13
|
+
"url": "https://github.com/sefinek/easy-sitemap-generator/issues"
|
|
14
|
+
},
|
|
15
|
+
"repository": {
|
|
16
|
+
"type": "git",
|
|
17
|
+
"url": "git+https://github.com/sefinek/easy-sitemap-generator.git"
|
|
18
|
+
},
|
|
19
|
+
"license": "MIT",
|
|
20
|
+
"author": "Sefinek <contact@sefinek.net> (https://sefinek.net)",
|
|
21
|
+
"type": "module",
|
|
22
|
+
"main": "index.js",
|
|
23
|
+
"types": "index.d.ts",
|
|
24
|
+
"bin": {
|
|
25
|
+
"generate-sitemap": "bin/cli.js",
|
|
26
|
+
"sitemap": "bin/cli.js",
|
|
27
|
+
"sitemap-gen": "bin/cli.js",
|
|
28
|
+
"sitemap-generator": "bin/cli.js"
|
|
29
|
+
},
|
|
30
|
+
"directories": {
|
|
31
|
+
"lib": "lib",
|
|
32
|
+
"test": "tests"
|
|
33
|
+
},
|
|
34
|
+
"files": [
|
|
35
|
+
"bin",
|
|
36
|
+
"example.js",
|
|
37
|
+
"lib",
|
|
38
|
+
"utils",
|
|
39
|
+
"services",
|
|
40
|
+
"index.d.ts",
|
|
41
|
+
"index.js",
|
|
42
|
+
"LICENSE",
|
|
43
|
+
"README.md"
|
|
44
|
+
],
|
|
45
|
+
"scripts": {
|
|
46
|
+
"m": "ncu -u && npm install && npm update",
|
|
47
|
+
"test": "node --experimental-vm-modules node_modules/jest/bin/jest.js"
|
|
48
|
+
},
|
|
49
|
+
"dependencies": {
|
|
50
|
+
"axios": "^1.17.0",
|
|
51
|
+
"chalk": "^5.6.2",
|
|
52
|
+
"jsdom": "^29.1.1"
|
|
53
|
+
},
|
|
54
|
+
"devDependencies": {
|
|
55
|
+
"@eslint/js": "^10.0.1",
|
|
56
|
+
"@types/jest": "^30.0.0",
|
|
57
|
+
"@types/node": "^25.9.3",
|
|
58
|
+
"globals": "^17.6.0",
|
|
59
|
+
"jest": "^30.4.2"
|
|
60
|
+
},
|
|
61
|
+
"engines": {
|
|
62
|
+
"node": ">=24.9"
|
|
63
|
+
},
|
|
64
|
+
"allowScripts": {
|
|
65
|
+
"unrs-resolver@1.12.2": true
|
|
66
|
+
}
|
|
67
|
+
}
|
package/services/axios.js
CHANGED
|
@@ -1,7 +1,9 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
1
|
+
import axios from 'axios';
|
|
2
|
+
import { createRequire } from 'node:module';
|
|
3
|
+
|
|
4
|
+
const { version } = createRequire(import.meta.url)('../package.json');
|
|
5
|
+
|
|
6
|
+
axios.defaults.headers.common['User-Agent'] = `Mozilla/5.0 (compatible; EasySitemapGen/${version}; +https://github.com/sefinek/easy-sitemap-generator)`;
|
|
7
|
+
axios.defaults.timeout = 24000;
|
|
8
|
+
|
|
9
|
+
export { axios, version };
|
package/utils/chalk.js
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import chalk from 'chalk';
|
|
2
|
+
|
|
3
|
+
const P_INFO = chalk.blue.bold('[INFO]: ');
|
|
4
|
+
const P_SUCCESS = chalk.green.bold('[SUCCESS]: ');
|
|
5
|
+
const P_ERROR = chalk.red.bold('[ERROR]: ');
|
|
6
|
+
const P_WARN = chalk.yellow.bold('[WARN]: ');
|
|
7
|
+
|
|
8
|
+
const logInfo = msg => console.log(P_INFO + msg);
|
|
9
|
+
const logSuccess = msg => console.log(P_SUCCESS + msg);
|
|
10
|
+
const logError = msg => console.error(P_ERROR + msg);
|
|
11
|
+
const logWarning = msg => console.warn(P_WARN + msg);
|
|
12
|
+
|
|
13
|
+
export { logInfo, logSuccess, logError, logWarning };
|
package/utils/xml.js
CHANGED
|
@@ -1,21 +1,21 @@
|
|
|
1
|
-
const XML_ESCAPE = { '&': '&', '<': '<', '>': '>', '"': '"',
|
|
2
|
-
const escapeXml = str => str.replace(/[&<>"']/g, ch => XML_ESCAPE[ch]);
|
|
3
|
-
|
|
4
|
-
const normalizeUrl = url => {
|
|
5
|
-
const parsedUrl = new URL(url);
|
|
6
|
-
parsedUrl.hash = '';
|
|
7
|
-
return parsedUrl.toString();
|
|
8
|
-
};
|
|
9
|
-
|
|
10
|
-
const calculatePriority = (url, baseUrl) => {
|
|
11
|
-
const path = url.replace(baseUrl, '').split('/').filter(Boolean);
|
|
12
|
-
const depth = path.length;
|
|
13
|
-
const hasQuery = url.includes('?');
|
|
14
|
-
|
|
15
|
-
if (depth === 0) return 1.0;
|
|
16
|
-
if (depth === 1) return 0.85;
|
|
17
|
-
if (depth === 2) return hasQuery ? 0.54 : 0.74;
|
|
18
|
-
return hasQuery ? 0.34 : 0.44;
|
|
19
|
-
};
|
|
20
|
-
|
|
21
|
-
|
|
1
|
+
const XML_ESCAPE = { '&': '&', '<': '<', '>': '>', '"': '"', '\'': ''' };
|
|
2
|
+
const escapeXml = str => str.replace(/[&<>"']/g, ch => XML_ESCAPE[ch]);
|
|
3
|
+
|
|
4
|
+
const normalizeUrl = url => {
|
|
5
|
+
const parsedUrl = new URL(url);
|
|
6
|
+
parsedUrl.hash = '';
|
|
7
|
+
return parsedUrl.toString();
|
|
8
|
+
};
|
|
9
|
+
|
|
10
|
+
const calculatePriority = (url, baseUrl) => {
|
|
11
|
+
const path = url.replace(baseUrl, '').split('/').filter(Boolean);
|
|
12
|
+
const depth = path.length;
|
|
13
|
+
const hasQuery = url.includes('?');
|
|
14
|
+
|
|
15
|
+
if (depth === 0) return 1.0;
|
|
16
|
+
if (depth === 1) return 0.85;
|
|
17
|
+
if (depth === 2) return hasQuery ? 0.54 : 0.74;
|
|
18
|
+
return hasQuery ? 0.34 : 0.44;
|
|
19
|
+
};
|
|
20
|
+
|
|
21
|
+
export { escapeXml, normalizeUrl, calculatePriority };
|
package/example/index.js
DELETED
package/utils/kleur.js
DELETED
|
@@ -1,15 +0,0 @@
|
|
|
1
|
-
const kleur = require('kleur');
|
|
2
|
-
|
|
3
|
-
const P_INFO = kleur.blue().bold('[INFO]: ');
|
|
4
|
-
const P_SUCCESS = kleur.green().bold('[SUCCESS]: ');
|
|
5
|
-
const P_ERROR = kleur.red().bold('[ERROR]: ');
|
|
6
|
-
const P_WARN = kleur.yellow().bold('[WARN]: ');
|
|
7
|
-
|
|
8
|
-
const logInfo = msg => console.log(P_INFO + msg);
|
|
9
|
-
const logSuccess = msg => console.log(P_SUCCESS + msg);
|
|
10
|
-
const logError = msg => console.error(P_ERROR + msg);
|
|
11
|
-
const logWarning = msg => console.warn(P_WARN + msg);
|
|
12
|
-
const logInfoStart = msg => process.stdout.write(P_INFO + msg);
|
|
13
|
-
const logInfoAppend = msg => process.stdout.write(`\r\x1b[K${P_INFO}${msg}\n`);
|
|
14
|
-
|
|
15
|
-
module.exports = { logInfo, logSuccess, logError, logWarning, logInfoStart, logInfoAppend };
|