easy-sitemap-generator 0.2.1 β†’ 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,45 +1,68 @@
1
- # πŸ—ΊοΈ Easy Sitemap.xml generator
2
- Finally! A free and easy-to-use `sitemap.xml` generator with no restrictions for your website.
3
- Improve your search engine rankings effortlessly! All you need is Node.js installed and this module. Good luck!
4
-
5
- <a href="https://www.npmjs.com/package/easy-sitemap-generator" target="_blank" title="easy-sitemap-generator - npm" style="text-decoration:none">
6
- <img src="https://img.shields.io/npm/dt/easy-sitemap-generator.svg?maxAge=3600" alt="The number of downloads">
7
- <img src="https://img.shields.io/github/issues/sefinek/easy-sitemap-generator" alt="Issues">
8
- <img src="https://img.shields.io/github/last-commit/sefinek/easy-sitemap-generator" alt="Last commit">
9
- <img src="https://img.shields.io/github/commit-activity/w/sefinek/easy-sitemap-generator" alt="Commit activity">
10
- <img src="https://img.shields.io/github/languages/code-size/sefinek/easy-sitemap-generator" alt="Code size">
11
- </a>
12
-
13
- ## πŸ€” How to use it?
14
- ### CLI (recommenced)
15
- ```bash
16
- npm install easy-sitemap-generator -g
17
- sitemap --url=https://example.com
18
- ```
19
-
20
- #### Aliases
21
- | sitemap-gen | sitemap-generator | generate-sitemap |
22
- |-------------|-------------------|------------------|
23
-
24
- ### Script
25
- ```js
26
- const sitemap = require('easy-sitemap-generator');
27
-
28
- (async () => {
29
- const content = await sitemap.generate('https://example.com');
30
- console.log(content);
31
- })();
32
- ```
33
-
34
- ## βœ”οΈ Sample generated file
35
- https://sefinek.net/sitemap.xml
36
-
37
- ## πŸ‘€ Why do I need this?
38
- Indexing bots, such as Google, often check the sitemap.xml file by making a `GET /sitemap.xml` request to find subpages of your website.
39
- This can improve your site’s visibility in search engine results. Sitemap files are a standard feature and can be found on every web server.
40
-
41
- ## πŸ˜‰ Important
42
- Before running the script or executing the `sitemap` CLI command, make sure you have a stable internet connection. Also, disconnect from any proxy or VPN if you're connected.
43
-
44
- ## πŸ“˜ License
45
- Licensed under the MIT License. See the [LICENSE](LICENSE) file for more details.
1
+ # πŸ—ΊοΈ Easy Sitemap.xml generator
2
+ A free and easy-to-use `sitemap.xml` generator with no restrictions for your website.
3
+ Improve your search engine rankings effortlessly! All you need is Node.js installed and this module.
4
+
5
+ <a href="https://www.npmjs.com/package/easy-sitemap-generator" target="_blank" title="easy-sitemap-generator - npm" style="text-decoration:none">
6
+ <img src="https://img.shields.io/npm/dt/easy-sitemap-generator.svg?maxAge=3600" alt="The number of downloads">
7
+ <img src="https://img.shields.io/github/issues/sefinek/easy-sitemap-generator" alt="Issues">
8
+ <img src="https://img.shields.io/github/last-commit/sefinek/easy-sitemap-generator" alt="Last commit">
9
+ <img src="https://img.shields.io/github/commit-activity/w/sefinek/easy-sitemap-generator" alt="Commit activity">
10
+ <img src="https://img.shields.io/github/languages/code-size/sefinek/easy-sitemap-generator" alt="Code size">
11
+ </a>
12
+
13
+
14
+ ## πŸ€” How to use it?
15
+ ### CLI (recommenced)
16
+ ```bash
17
+ npm install easy-sitemap-generator -g
18
+ sitemap --url=https://example.com
19
+ ```
20
+
21
+ #### `--domain`
22
+ If you're generating the sitemap from a local server, use `--domain` to replace the crawled host with your production domain in the output:
23
+ ```bash
24
+ sitemap --url=http://localhost:3000 --domain=https://example.com
25
+ ```
26
+
27
+ #### `--concurrency`
28
+ Controls how many pages are fetched in parallel. Defaults to `3`:
29
+ ```bash
30
+ sitemap --url=https://example.com --concurrency=5
31
+ ```
32
+
33
+ #### Aliases
34
+ | sitemap-gen | sitemap-generator | generate-sitemap |
35
+ |-------------|-------------------|------------------|
36
+
37
+ ### Script
38
+ This package is ESM-only.
39
+
40
+ ```js
41
+ import { generateSitemap } from 'easy-sitemap-generator';
42
+
43
+ (async () => {
44
+ const content = await generateSitemap('https://example.com', {
45
+ destination: 'sitemap.xml', // Optional, defaults to './sitemap.xml'
46
+ domain: 'https://example.com', // Optional, replaces the crawled host in the output
47
+ concurrency: 3, // Optional, defaults to 3
48
+ });
49
+ console.log(content);
50
+ })();
51
+ ```
52
+
53
+
54
+ ## βœ”οΈ Sample generated file
55
+ https://sefinek.net/sitemap.xml
56
+
57
+
58
+ ## πŸ‘€ Why do I need this?
59
+ Indexing bots, such as Google, often check the sitemap.xml file by making a `GET /sitemap.xml` request to find subpages of your website.
60
+ This can improve your site’s visibility in search engine results. Sitemap files are a standard feature and can be found on every web server.
61
+
62
+
63
+ ## πŸ˜‰ Important
64
+ Before running the script or executing the `sitemap` CLI command, make sure you have a stable internet connection. Also, disconnect from any proxy or VPN if you're connected.
65
+
66
+
67
+ ## πŸ“˜ License
68
+ Licensed under the MIT License. See the [LICENSE](LICENSE) file for more details.
package/bin/cli.js CHANGED
@@ -1,16 +1,38 @@
1
1
  #!/usr/bin/env node
2
-
3
- const { logError } = require('../utils/kleur.js');
4
- const { generate } = require('../lib/sitemapGenerator.js');
5
-
6
- const args = process.argv.slice(2);
7
- const urlArg = args.find(arg => arg.startsWith('--url='));
8
- if (!urlArg) {
9
- logError('No URL provided. Use: sitemap --url=<YOUR-DOMAIN>');
10
- process.exit(1);
11
- }
12
-
13
- generate(urlArg.slice('--url='.length)).catch(err => {
14
- logError(err);
15
- process.exit(2);
16
- });
2
+
3
+ import { parseArgs } from 'node:util';
4
+ import { logError } from '../utils/chalk.js';
5
+ import { generateSitemap } from '../lib/sitemapGenerator.js';
6
+
7
+ let values;
8
+ try {
9
+ ({ values } = parseArgs({
10
+ options: {
11
+ url: { type: 'string' },
12
+ domain: { type: 'string' },
13
+ concurrency: { type: 'string' },
14
+ },
15
+ }));
16
+ } catch (err) {
17
+ logError(err.message);
18
+ process.exit(1);
19
+ }
20
+
21
+ if (!values.url) {
22
+ logError('No URL provided. Use: sitemap --url=<YOUR-DOMAIN>');
23
+ process.exit(1);
24
+ }
25
+
26
+ let concurrency;
27
+ if (values.concurrency) {
28
+ concurrency = Number(values.concurrency);
29
+ if (!Number.isInteger(concurrency) || concurrency < 1) {
30
+ logError('Invalid --concurrency value. It must be a positive integer.');
31
+ process.exit(1);
32
+ }
33
+ }
34
+
35
+ generateSitemap(values.url, { domain: values.domain, concurrency }).catch(err => {
36
+ logError(err);
37
+ process.exit(2);
38
+ });
package/example.js ADDED
@@ -0,0 +1,11 @@
1
+ import { generateSitemap, version } from './lib/sitemapGenerator.js';
2
+
3
+ (async () => {
4
+ const content = await generateSitemap('https://sefinek.net', {
5
+ destination: 'sitemap.xml',
6
+ concurrency: 3,
7
+ });
8
+
9
+ console.log(content);
10
+ console.log('Module version:', version);
11
+ })();
package/index.d.ts CHANGED
@@ -1,10 +1,29 @@
1
- /**
2
- * Generates a sitemap for the given URL and saves it to a file.
3
- *
4
- * @param url - The base URL to generate the sitemap for.
5
- * @param destination - Optional path to save the sitemap file. Defaults to: `./sitemap.xml`
6
- * @returns A promise that resolves with the contents of the generated sitemap.
7
- */
8
- export function generate(url: string, destination?: string): Promise<string>;
9
-
10
- export const version: string;
1
+ export interface GenerateOptions {
2
+ /**
3
+ * Path to save the sitemap file.
4
+ *
5
+ * @default ./sitemap.xml
6
+ */
7
+ destination?: string;
8
+
9
+ /** Domain to use in the generated `<loc>` URLs instead of `url`. Useful when crawling a local server but publishing under a different domain. */
10
+ domain?: string;
11
+
12
+ /**
13
+ * Number of pages to fetch in parallel.
14
+ *
15
+ * @default 3
16
+ */
17
+ concurrency?: number;
18
+ }
19
+
20
+ /**
21
+ * Generates a sitemap for the given URL and saves it to a file.
22
+ *
23
+ * @param url - The base URL to generate the sitemap for.
24
+ * @param options - Optional settings for the generated sitemap.
25
+ * @returns A promise that resolves with the contents of the generated sitemap.
26
+ */
27
+ export function generateSitemap(url: string, options?: GenerateOptions): Promise<string>;
28
+
29
+ export const version: string;
package/index.js CHANGED
@@ -1,6 +1 @@
1
- const { generate, version } = require('./lib/sitemapGenerator.js');
2
-
3
- module.exports = {
4
- generate,
5
- version,
6
- };
1
+ export { generateSitemap, version } from './lib/sitemapGenerator.js';
@@ -1,19 +1,40 @@
1
- const fs = require('node:fs/promises');
2
- const path = require('node:path');
3
- const { JSDOM } = require('jsdom');
4
- const { axios, version } = require('../services/axios.js');
5
- const { escapeXml, normalizeUrl, calculatePriority } = require('../utils/xml.js');
6
- const { logInfo, logSuccess, logError, logWarning, logInfoStart, logInfoAppend } = require('../utils/kleur.js');
7
-
8
- const IGNORED_PATTERNS = ['cdn-cgi', '?referrer=', '&referrer=', '/signin/v2/usernamerecovery', '/lifecycle/flows/signup', 'join?return_to='];
9
- const BASE_DELAY = 12000;
1
+ import fs from 'node:fs/promises';
2
+ import path from 'node:path';
3
+ import { JSDOM } from 'jsdom';
4
+ import { axios, version } from '../services/axios.js';
5
+ import { escapeXml, normalizeUrl, calculatePriority } from '../utils/xml.js';
6
+ import { logInfo, logSuccess, logError, logWarning } from '../utils/chalk.js';
7
+
8
+ const IGNORED_PATTERNS = [
9
+ 'cdn-cgi', '?referrer=', '&referrer=', '/signin/v2/usernamerecovery', '/lifecycle/flows/signup', 'join?return_to=',
10
+ 'PHPSESSID=', 'JSESSIONID=', 'ASPSESSIONID', 'sessionid=', 'session_id=', '?sid=', '&sid=', 'phpsessid=',
11
+ ];
12
+ const STATIC_EXTENSIONS = new Set([
13
+ '.jpg', '.jpeg', '.png', '.gif', '.webp', '.svg', '.ico', '.bmp', '.avif', '.tiff',
14
+ '.css', '.js', '.mjs', '.map', '.json', '.txt', '.csv', '.xml',
15
+ '.woff', '.woff2', '.ttf', '.eot', '.otf',
16
+ '.pdf', '.doc', '.docx', '.xls', '.xlsx', '.ppt', '.pptx', '.zip', '.rar', '.7z', '.gz', '.tar',
17
+ '.mp3', '.mp4', '.webm', '.avi', '.mov', '.wav', '.ogg', '.flac',
18
+ ]);
19
+ const BASE_DELAY = 14_000;
20
+ const DEFAULT_CONCURRENCY = 3;
10
21
  const MAX_URLS = 50000;
22
+ const MAX_SITEMAP_BYTES = 50 * 1024 * 1024;
23
+ const MAX_LOC_LENGTH = 2048;
24
+
25
+ const hasStaticExtension = pathname => {
26
+ const lastDot = pathname.lastIndexOf('.');
27
+ if (lastDot === -1) return false;
28
+ return STATIC_EXTENSIONS.has(pathname.slice(lastDot).toLowerCase());
29
+ };
11
30
 
12
31
  const shouldIncludeUrl = (url, baseUrl, baseOrigin, urlOrigin = null) => {
13
32
  if (!url.startsWith(baseUrl)) return false;
14
33
  if (IGNORED_PATTERNS.some(pattern => url.includes(pattern))) return false;
15
34
  try {
16
- return (urlOrigin ?? new URL(url).origin) === baseOrigin;
35
+ const parsedUrl = new URL(url);
36
+ if ((urlOrigin ?? parsedUrl.origin) !== baseOrigin) return false;
37
+ return !hasStaticExtension(parsedUrl.pathname);
17
38
  } catch {
18
39
  return false;
19
40
  }
@@ -25,30 +46,26 @@ const nowIso = () => formatIso(new Date());
25
46
 
26
47
  const fetchUrl = async (url, retries = 0) => {
27
48
  try {
28
- logInfoStart(`GET ${url}`);
29
-
30
49
  const res = await axios.get(url);
31
50
  if (res.status === 200) {
32
51
  return res;
33
52
  } else {
34
- process.stdout.write('\n');
35
53
  logWarning(`Non-200 status code (${res.status}) for URL: ${url}. Skipping...`);
36
54
  return null;
37
55
  }
38
56
  } catch (err) {
39
- process.stdout.write('\n');
40
57
  if (err.response) {
41
58
  const statusCode = err.response.status;
42
59
  if (statusCode === 429) {
43
60
  const delayTime = BASE_DELAY * (2 ** retries);
44
- logWarning(`429: Rate limit hit! Retrying in ${(delayTime / 1000).toFixed(2)}s... (Attempt ${retries + 1})`);
61
+ logWarning(`429: Rate limit hit for ${url}! Retrying in ${(delayTime / 1000).toFixed(2)}s... (Attempt ${retries + 1})`);
45
62
  await delay(delayTime);
46
63
  return fetchUrl(url, retries + 1);
47
64
  } else if (statusCode === 404) {
48
- logWarning('404: Not Found');
65
+ logWarning(`404: Not Found - ${url}`);
49
66
  return null;
50
67
  } else {
51
- logError(`${statusCode}: Failed to fetch! Skipping...`);
68
+ logError(`${statusCode}: Failed to fetch ${url}! Skipping...`);
52
69
  return null;
53
70
  }
54
71
  } else {
@@ -58,131 +75,196 @@ const fetchUrl = async (url, retries = 0) => {
58
75
  }
59
76
  };
60
77
 
61
- const crawl = async (startUrl, baseUrl, baseOrigin, visitedUrls) => {
78
+ const crawl = async (startUrl, baseUrl, baseOrigin, visitedUrls, concurrency = DEFAULT_CONCURRENCY) => {
79
+ concurrency = Math.max(1, Math.floor(concurrency));
80
+
62
81
  const queued = new Set();
63
82
  const queue = [];
64
83
 
65
84
  const enqueue = url => {
66
- if (!queued.has(url)) {
67
- queued.add(url);
68
- queue.push(url);
85
+ const normalized = normalizeUrl(url);
86
+ if (!queued.has(normalized)) {
87
+ queued.add(normalized);
88
+ queue.push(normalized);
69
89
  }
70
90
  };
71
91
 
72
- enqueue(normalizeUrl(startUrl));
73
-
74
- while (queue.length > 0) {
75
- const normalizedUrl = queue.shift();
92
+ enqueue(startUrl);
76
93
 
94
+ const processUrl = async normalizedUrl => {
77
95
  const res = await fetchUrl(normalizedUrl);
78
- if (!res) continue;
79
-
80
- const dom = new JSDOM(res.data);
81
- const { document } = dom.window;
82
-
83
- const canonicalEl = document.querySelector('link[rel="canonical"]');
84
- if (canonicalEl) {
85
- try {
86
- const canonical = new URL(canonicalEl.getAttribute('href'), baseUrl);
87
- canonical.hash = '';
88
- if (canonical.href !== normalizedUrl && shouldIncludeUrl(canonical.href, baseUrl, baseOrigin, canonical.origin)) {
89
- logInfoAppend(`GET ${normalizedUrl} (canonical β†’ ${canonical.href}, skipped)`);
90
- dom.window.close();
91
- enqueue(canonical.href);
92
- continue;
96
+ if (!res) return;
97
+
98
+ let dom;
99
+ try {
100
+ dom = new JSDOM(res.data);
101
+ const { document } = dom.window;
102
+
103
+ const canonicalEl = document.querySelector('link[rel="canonical"]');
104
+ if (canonicalEl) {
105
+ try {
106
+ const canonical = new URL(canonicalEl.getAttribute('href'), baseUrl);
107
+ canonical.hash = '';
108
+ if (canonical.href !== normalizedUrl && shouldIncludeUrl(canonical.href, baseUrl, baseOrigin, canonical.origin)) {
109
+ logInfo(`GET ${normalizedUrl} (canonical β†’ ${canonical.href}, skipped)`);
110
+ enqueue(canonical.href);
111
+ return;
112
+ }
113
+ } catch {
114
+ // ...
93
115
  }
94
- } catch {
95
- // ...
96
116
  }
97
- }
98
117
 
99
- const links = new Set();
100
- for (const link of document.querySelectorAll('a[href]')) {
101
- try {
102
- const resolved = new URL(link.getAttribute('href'), baseUrl);
103
- resolved.hash = '';
104
- if (shouldIncludeUrl(resolved.href, baseUrl, baseOrigin, resolved.origin)) links.add(resolved.href);
105
- } catch {
106
- // ...
118
+ const links = new Set();
119
+ for (const link of document.querySelectorAll('a[href]')) {
120
+ try {
121
+ const resolved = new URL(link.getAttribute('href'), baseUrl);
122
+ resolved.hash = '';
123
+ if (shouldIncludeUrl(resolved.href, baseUrl, baseOrigin, resolved.origin)) links.add(resolved.href);
124
+ } catch {
125
+ // ...
126
+ }
107
127
  }
108
- }
109
128
 
110
- const rawLastMod = res.headers['last-modified']
111
- ?? document.querySelector('meta[property="article:modified_time"]')?.getAttribute('content')
112
- ?? document.querySelector('meta[name="last-modified"]')?.getAttribute('content');
129
+ const rawLastMod = res.headers['last-modified']
130
+ ?? document.querySelector('meta[property="article:modified_time"]')?.getAttribute('content')
131
+ ?? document.querySelector('meta[name="last-modified"]')?.getAttribute('content');
113
132
 
114
- dom.window.close();
133
+ let lastmod = null;
134
+ if (rawLastMod) {
135
+ const parsedLastMod = new Date(rawLastMod);
136
+ if (!Number.isNaN(parsedLastMod.getTime())) lastmod = formatIso(parsedLastMod);
137
+ }
115
138
 
116
- const lastmodDate = rawLastMod ? new Date(rawLastMod) : new Date();
117
- visitedUrls.set(normalizedUrl, {
118
- url: normalizedUrl,
119
- lastmod: formatIso(lastmodDate),
120
- priority: calculatePriority(normalizedUrl, baseUrl),
121
- });
139
+ visitedUrls.set(normalizedUrl, {
140
+ url: normalizedUrl,
141
+ lastmod,
142
+ priority: calculatePriority(normalizedUrl, baseUrl),
143
+ });
122
144
 
123
- logInfoAppend(`GET ${normalizedUrl} (${links.size} urls)`);
145
+ logInfo(`GET ${normalizedUrl} (${links.size} urls)`);
124
146
 
125
- for (const link of links) enqueue(link);
126
- }
147
+ for (const link of links) enqueue(link);
148
+ } catch (err) {
149
+ logError(`Failed to process ${normalizedUrl}: ${err.message}. Skipping...`);
150
+ } finally {
151
+ dom?.window?.close();
152
+ }
153
+ };
154
+
155
+ await new Promise(resolve => {
156
+ let active = 0;
157
+
158
+ const dispatch = () => {
159
+ if (queue.length === 0 && active === 0) {
160
+ resolve();
161
+ return;
162
+ }
163
+
164
+ while (active < concurrency && queue.length > 0) {
165
+ const normalizedUrl = queue.shift();
166
+ active++;
167
+ processUrl(normalizedUrl).finally(() => {
168
+ active--;
169
+ dispatch();
170
+ });
171
+ }
172
+ };
173
+
174
+ dispatch();
175
+ });
127
176
  };
128
177
 
178
+ const buildUrlEntry = ({ url, lastmod, priority }) => ` <url>
179
+ <loc>${escapeXml(url)}</loc>${lastmod ? `
180
+ <lastmod>${lastmod}</lastmod>` : ''}
181
+ <priority>${priority.toFixed(2)}</priority>
182
+ </url>`;
183
+
129
184
  const buildSitemapContent = urls => `<?xml version="1.0" encoding="UTF-8"?>
130
185
  <!-- Generated by https://github.com/sefinek/easy-sitemap-generator v${version} at ${nowIso()} -->
131
186
  <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd">
132
- ${urls.map(({ url, priority, lastmod }) => ` <url>
133
- <loc>${escapeXml(url)}</loc>
134
- <lastmod>${lastmod}</lastmod>
135
- <priority>${priority.toFixed(2)}</priority>
136
- </url>`).join('\n')}
187
+ ${urls.map(buildUrlEntry).join('\n')}
137
188
  </urlset>`;
138
189
 
139
190
  const buildIndexContent = sitemapLocs => `<?xml version="1.0" encoding="UTF-8"?>
140
191
  <!-- Generated by https://github.com/sefinek/easy-sitemap-generator v${version} at ${nowIso()} -->
141
- <sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
192
+ <sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/siteindex.xsd">
142
193
  ${sitemapLocs.map(({ loc, lastmod }) => ` <sitemap>
143
194
  <loc>${escapeXml(loc)}</loc>
144
195
  <lastmod>${lastmod}</lastmod>
145
196
  </sitemap>`).join('\n')}
146
197
  </sitemapindex>`;
147
198
 
148
- const generate = async (baseUrl, destination = 'sitemap.xml') => {
199
+ const chunkUrls = urls => {
200
+ const shellBytes = Buffer.byteLength(buildSitemapContent([]), 'utf8');
201
+ const chunks = [];
202
+ let current = [];
203
+ let currentBytes = shellBytes;
204
+
205
+ for (const entry of urls) {
206
+ const entryBytes = Buffer.byteLength(buildUrlEntry(entry), 'utf8') + 1; // +1 for the joining newline
207
+
208
+ if (current.length > 0 && (current.length >= MAX_URLS || currentBytes + entryBytes > MAX_SITEMAP_BYTES)) {
209
+ chunks.push(current);
210
+ current = [];
211
+ currentBytes = shellBytes;
212
+ }
213
+
214
+ current.push(entry);
215
+ currentBytes += entryBytes;
216
+ }
217
+
218
+ if (current.length > 0) chunks.push(current);
219
+ return chunks;
220
+ };
221
+
222
+ const generateSitemap = async (baseUrl, { destination = 'sitemap.xml', domain = null, concurrency = DEFAULT_CONCURRENCY } = {}) => {
149
223
  logInfo(`Starting crawl for base URL: ${baseUrl}`);
150
224
 
151
225
  const { origin: baseOrigin } = new URL(baseUrl);
226
+ const targetOrigin = domain ? new URL(domain).origin : baseOrigin;
152
227
  const visitedUrls = new Map();
153
- await crawl(baseUrl, baseUrl, baseOrigin, visitedUrls);
228
+ await crawl(baseUrl, baseUrl, baseOrigin, visitedUrls, concurrency);
154
229
 
155
230
  logInfo(`Generating sitemap with ${visitedUrls.size} URLs...`);
156
231
 
157
232
  const urls = Array.from(visitedUrls.values())
158
- .filter(entry => entry.lastmod != null && entry.priority != null)
159
- .sort((a, b) => b.priority - a.priority);
233
+ .filter(entry => {
234
+ if (entry.url.length > MAX_LOC_LENGTH) {
235
+ logWarning(`URL exceeds ${MAX_LOC_LENGTH} characters and was skipped: ${entry.url}`);
236
+ return false;
237
+ }
238
+ return true;
239
+ })
240
+ .sort((a, b) => b.priority - a.priority)
241
+ .map(entry => targetOrigin === baseOrigin ? entry : { ...entry, url: targetOrigin + entry.url.slice(baseOrigin.length) });
160
242
 
161
243
  const output = path.resolve(destination);
162
- if (urls.length <= MAX_URLS) {
244
+ const chunks = chunkUrls(urls);
245
+ if (chunks.length <= 1) {
163
246
  const content = buildSitemapContent(urls);
164
247
  await fs.writeFile(output, content, 'utf8');
165
248
  logSuccess(`Sitemap generated at ${output}`);
166
249
  return content;
167
250
  }
168
251
 
169
- logWarning(`Found ${urls.length} URLs β€” exceeds the ${MAX_URLS} limit. Splitting into multiple sitemap files...`);
252
+ logWarning(`Found ${urls.length} URLs β€” exceeds the sitemap protocol limits (${MAX_URLS.toLocaleString()} URLs / 50MB). Splitting into ${chunks.length} sitemap files...`);
170
253
 
171
254
  const ext = path.extname(destination);
172
255
  const base = path.basename(destination, ext);
173
256
  const dir = path.dirname(output);
174
257
  const timestamp = nowIso();
175
258
 
176
- const totalParts = Math.ceil(urls.length / MAX_URLS);
177
- const sitemapLocs = [];
178
- for (let i = 0, part = 1; i < urls.length; i += MAX_URLS, part++) {
259
+ const sitemapLocs = await Promise.all(chunks.map(async (chunk, i) => {
260
+ const part = i + 1;
179
261
  const filename = `${base}-${part}${ext}`;
180
262
  const filepath = path.join(dir, filename);
181
- const content = buildSitemapContent(urls.slice(i, i + MAX_URLS));
263
+ const content = buildSitemapContent(chunk);
182
264
  await fs.writeFile(filepath, content, 'utf8');
183
- logSuccess(`Sitemap part ${part}/${totalParts} written to ${filepath}`);
184
- sitemapLocs.push({ loc: `${baseOrigin}/${filename}`, lastmod: timestamp });
185
- }
265
+ logSuccess(`Sitemap part ${part}/${chunks.length} written to ${filepath}`);
266
+ return { loc: `${targetOrigin}/${filename}`, lastmod: timestamp };
267
+ }));
186
268
 
187
269
  const indexContent = buildIndexContent(sitemapLocs);
188
270
  await fs.writeFile(output, indexContent, 'utf8');
@@ -191,4 +273,4 @@ const generate = async (baseUrl, destination = 'sitemap.xml') => {
191
273
  return indexContent;
192
274
  };
193
275
 
194
- module.exports = { generate, version };
276
+ export { generateSitemap, version };
package/package.json CHANGED
@@ -1,58 +1,67 @@
1
- {
2
- "name": "easy-sitemap-generator",
3
- "version": "0.2.1",
4
- "description": "Easy and free sitemap.xml file generator without any restrictions for your website.",
5
- "keywords": [
6
- "sitemap",
7
- "sitemap-generator",
8
- "sitemap-xml",
9
- "website"
10
- ],
11
- "homepage": "https://github.com/sefinek/easy-sitemap-generator#readme",
12
- "bugs": {
13
- "url": "https://github.com/sefinek/easy-sitemap-generator/issues"
14
- },
15
- "repository": {
16
- "type": "git",
17
- "url": "git+https://github.com/sefinek/easy-sitemap-generator.git"
18
- },
19
- "license": "MIT",
20
- "author": "Sefinek <contact@sefinek.net> (https://sefinek.net)",
21
- "type": "commonjs",
22
- "main": "index.js",
23
- "types": "index.d.ts",
24
- "bin": {
25
- "generate-sitemap": "bin/cli.js",
26
- "sitemap": "bin/cli.js",
27
- "sitemap-gen": "bin/cli.js",
28
- "sitemap-generator": "bin/cli.js"
29
- },
30
- "directories": {
31
- "lib": "lib",
32
- "example": "example"
33
- },
34
- "files": [
35
- "bin",
36
- "example",
37
- "lib",
38
- "utils",
39
- "services",
40
- "index.d.ts",
41
- "index.js",
42
- "LICENSE",
43
- "README.md"
44
- ],
45
- "scripts": {
46
- "m": "ncu -u && npm install && npm update",
47
- "test": "echo \"Error: no test specified\" && exit 1"
48
- },
49
- "dependencies": {
50
- "axios": "^1.16.1",
51
- "jsdom": "^29.1.1",
52
- "kleur": "^4.1.5"
53
- },
54
- "devDependencies": {
55
- "@eslint/js": "^10.0.1",
56
- "globals": "^17.6.0"
57
- }
58
- }
1
+ {
2
+ "name": "easy-sitemap-generator",
3
+ "version": "0.3.0",
4
+ "description": "Easy and free sitemap.xml file generator without any restrictions for your website.",
5
+ "keywords": [
6
+ "sitemap",
7
+ "sitemap-generator",
8
+ "sitemap-xml",
9
+ "website"
10
+ ],
11
+ "homepage": "https://github.com/sefinek/easy-sitemap-generator#readme",
12
+ "bugs": {
13
+ "url": "https://github.com/sefinek/easy-sitemap-generator/issues"
14
+ },
15
+ "repository": {
16
+ "type": "git",
17
+ "url": "git+https://github.com/sefinek/easy-sitemap-generator.git"
18
+ },
19
+ "license": "MIT",
20
+ "author": "Sefinek <contact@sefinek.net> (https://sefinek.net)",
21
+ "type": "module",
22
+ "main": "index.js",
23
+ "types": "index.d.ts",
24
+ "bin": {
25
+ "generate-sitemap": "bin/cli.js",
26
+ "sitemap": "bin/cli.js",
27
+ "sitemap-gen": "bin/cli.js",
28
+ "sitemap-generator": "bin/cli.js"
29
+ },
30
+ "directories": {
31
+ "lib": "lib",
32
+ "test": "tests"
33
+ },
34
+ "files": [
35
+ "bin",
36
+ "example.js",
37
+ "lib",
38
+ "utils",
39
+ "services",
40
+ "index.d.ts",
41
+ "index.js",
42
+ "LICENSE",
43
+ "README.md"
44
+ ],
45
+ "scripts": {
46
+ "m": "ncu -u && npm install && npm update",
47
+ "test": "node --experimental-vm-modules node_modules/jest/bin/jest.js"
48
+ },
49
+ "dependencies": {
50
+ "axios": "^1.17.0",
51
+ "chalk": "^5.6.2",
52
+ "jsdom": "^29.1.1"
53
+ },
54
+ "devDependencies": {
55
+ "@eslint/js": "^10.0.1",
56
+ "@types/jest": "^30.0.0",
57
+ "@types/node": "^25.9.3",
58
+ "globals": "^17.6.0",
59
+ "jest": "^30.4.2"
60
+ },
61
+ "engines": {
62
+ "node": ">=24.9"
63
+ },
64
+ "allowScripts": {
65
+ "unrs-resolver@1.12.2": true
66
+ }
67
+ }
package/services/axios.js CHANGED
@@ -1,7 +1,9 @@
1
- const axios = require('axios');
2
- const { version } = require('../package.json');
3
-
4
- axios.defaults.headers.common['User-Agent'] = `Mozilla/5.0 (compatible; EasySitemapGen/${version}; +https://github.com/sefinek/easy-sitemap-generator)`;
5
- axios.defaults.timeout = 24000;
6
-
7
- module.exports = { axios, version };
1
+ import axios from 'axios';
2
+ import { createRequire } from 'node:module';
3
+
4
+ const { version } = createRequire(import.meta.url)('../package.json');
5
+
6
+ axios.defaults.headers.common['User-Agent'] = `Mozilla/5.0 (compatible; EasySitemapGen/${version}; +https://github.com/sefinek/easy-sitemap-generator)`;
7
+ axios.defaults.timeout = 24000;
8
+
9
+ export { axios, version };
package/utils/chalk.js ADDED
@@ -0,0 +1,13 @@
1
+ import chalk from 'chalk';
2
+
3
+ const P_INFO = chalk.blue.bold('[INFO]: ');
4
+ const P_SUCCESS = chalk.green.bold('[SUCCESS]: ');
5
+ const P_ERROR = chalk.red.bold('[ERROR]: ');
6
+ const P_WARN = chalk.yellow.bold('[WARN]: ');
7
+
8
+ const logInfo = msg => console.log(P_INFO + msg);
9
+ const logSuccess = msg => console.log(P_SUCCESS + msg);
10
+ const logError = msg => console.error(P_ERROR + msg);
11
+ const logWarning = msg => console.warn(P_WARN + msg);
12
+
13
+ export { logInfo, logSuccess, logError, logWarning };
package/utils/xml.js CHANGED
@@ -1,21 +1,21 @@
1
- const XML_ESCAPE = { '&': '&amp;', '<': '&lt;', '>': '&gt;', '"': '&quot;', "'": '&apos;' };
2
- const escapeXml = str => str.replace(/[&<>"']/g, ch => XML_ESCAPE[ch]);
3
-
4
- const normalizeUrl = url => {
5
- const parsedUrl = new URL(url);
6
- parsedUrl.hash = '';
7
- return parsedUrl.toString();
8
- };
9
-
10
- const calculatePriority = (url, baseUrl) => {
11
- const path = url.replace(baseUrl, '').split('/').filter(Boolean);
12
- const depth = path.length;
13
- const hasQuery = url.includes('?');
14
-
15
- if (depth === 0) return 1.0;
16
- if (depth === 1) return 0.85;
17
- if (depth === 2) return hasQuery ? 0.54 : 0.74;
18
- return hasQuery ? 0.34 : 0.44;
19
- };
20
-
21
- module.exports = { escapeXml, normalizeUrl, calculatePriority };
1
+ const XML_ESCAPE = { '&': '&amp;', '<': '&lt;', '>': '&gt;', '"': '&quot;', '\'': '&apos;' };
2
+ const escapeXml = str => str.replace(/[&<>"']/g, ch => XML_ESCAPE[ch]);
3
+
4
+ const normalizeUrl = url => {
5
+ const parsedUrl = new URL(url);
6
+ parsedUrl.hash = '';
7
+ return parsedUrl.toString();
8
+ };
9
+
10
+ const calculatePriority = (url, baseUrl) => {
11
+ const path = url.replace(baseUrl, '').split('/').filter(Boolean);
12
+ const depth = path.length;
13
+ const hasQuery = url.includes('?');
14
+
15
+ if (depth === 0) return 1.0;
16
+ if (depth === 1) return 0.85;
17
+ if (depth === 2) return hasQuery ? 0.54 : 0.74;
18
+ return hasQuery ? 0.34 : 0.44;
19
+ };
20
+
21
+ export { escapeXml, normalizeUrl, calculatePriority };
package/example/index.js DELETED
@@ -1,7 +0,0 @@
1
- const sitemap = require('../lib/sitemapGenerator.js');
2
-
3
- (async () => {
4
- const content = await sitemap.generate('https://sefinek.net');
5
- console.log(content);
6
- console.log('Module version:', sitemap.version);
7
- })();
package/utils/kleur.js DELETED
@@ -1,15 +0,0 @@
1
- const kleur = require('kleur');
2
-
3
- const P_INFO = kleur.blue().bold('[INFO]: ');
4
- const P_SUCCESS = kleur.green().bold('[SUCCESS]: ');
5
- const P_ERROR = kleur.red().bold('[ERROR]: ');
6
- const P_WARN = kleur.yellow().bold('[WARN]: ');
7
-
8
- const logInfo = msg => console.log(P_INFO + msg);
9
- const logSuccess = msg => console.log(P_SUCCESS + msg);
10
- const logError = msg => console.error(P_ERROR + msg);
11
- const logWarning = msg => console.warn(P_WARN + msg);
12
- const logInfoStart = msg => process.stdout.write(P_INFO + msg);
13
- const logInfoAppend = msg => process.stdout.write(`\r\x1b[K${P_INFO}${msg}\n`);
14
-
15
- module.exports = { logInfo, logSuccess, logError, logWarning, logInfoStart, logInfoAppend };