easy-sitemap-generator 0.1.3 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cli.js +1 -1
- package/lib/sitemapGenerator.js +22 -14
- package/package.json +5 -5
- package/utils/kleur.js +4 -4
- package/utils/xml.js +3 -3
package/bin/cli.js
CHANGED
package/lib/sitemapGenerator.js
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
const { JSDOM } = require('jsdom');
|
|
2
|
-
const { axios, version } = require('../services/axios.js');
|
|
3
2
|
const urlModule = require('url');
|
|
4
3
|
const fs = require('fs/promises');
|
|
5
4
|
const path = require('path');
|
|
5
|
+
const { axios, version } = require('../services/axios.js');
|
|
6
6
|
const { escapeXml, normalizeUrl, calculatePriority } = require('../utils/xml.js');
|
|
7
7
|
const { logInfo, logSuccess, logError, logWarning } = require('../utils/kleur.js');
|
|
8
8
|
|
|
9
9
|
const VISITED_URLS = new Map();
|
|
10
10
|
const IGNORED_PATTERNS = ['cdn-cgi', '?referrer=', '&referrer=', '/signin/v2/usernamerecovery', '/lifecycle/flows/signup', 'join?return_to='];
|
|
11
|
-
const BASE_DELAY =
|
|
11
|
+
const BASE_DELAY = 9000;
|
|
12
12
|
|
|
13
13
|
const shouldIncludeUrl = (url, baseUrl) => !IGNORED_PATTERNS.some(pattern => url.includes(pattern)) && url.startsWith(baseUrl);
|
|
14
14
|
const delay = ms => new Promise(resolve => setTimeout(resolve, ms));
|
|
@@ -16,21 +16,27 @@ const delay = ms => new Promise(resolve => setTimeout(resolve, ms));
|
|
|
16
16
|
const fetchUrl = async (url, retries = 0) => {
|
|
17
17
|
try {
|
|
18
18
|
logInfo(`GET ${url}`);
|
|
19
|
-
|
|
19
|
+
|
|
20
|
+
const res = await axios.get(url);
|
|
21
|
+
if (res.status === 200) {
|
|
22
|
+
return res;
|
|
23
|
+
} else {
|
|
24
|
+
logWarning(`Non-200 status code (${res.status}) for URL: ${url}. Skipping...`);
|
|
25
|
+
return null;
|
|
26
|
+
}
|
|
20
27
|
} catch (err) {
|
|
21
|
-
logError(`Error fetching URL: ${url} - ${err.message}`);
|
|
22
28
|
if (err.response) {
|
|
23
29
|
const statusCode = err.response.status;
|
|
24
30
|
if (statusCode === 429) {
|
|
25
31
|
const delayTime = BASE_DELAY * Math.pow(2, retries);
|
|
26
|
-
logWarning(`Rate limit hit
|
|
32
|
+
logWarning(`429: Rate limit hit! Retrying in ${(delayTime / 1000).toFixed(2)}s... (Attempt ${retries + 1})`);
|
|
27
33
|
await delay(delayTime);
|
|
28
34
|
return fetchUrl(url, retries + 1);
|
|
29
|
-
} else if (statusCode
|
|
30
|
-
|
|
35
|
+
} else if (statusCode === 404) {
|
|
36
|
+
logWarning('404: Not Found');
|
|
31
37
|
return null;
|
|
32
|
-
} else
|
|
33
|
-
|
|
38
|
+
} else {
|
|
39
|
+
logError(`${statusCode}: Failed to fetch! Skipping...`);
|
|
34
40
|
return null;
|
|
35
41
|
}
|
|
36
42
|
} else {
|
|
@@ -43,10 +49,11 @@ const fetchUrl = async (url, retries = 0) => {
|
|
|
43
49
|
const crawl = async (url, baseUrl) => {
|
|
44
50
|
const normalizedUrl = normalizeUrl(url);
|
|
45
51
|
if (VISITED_URLS.has(normalizedUrl)) return;
|
|
46
|
-
VISITED_URLS.set(normalizedUrl, { url: normalizedUrl });
|
|
47
52
|
|
|
48
53
|
const res = await fetchUrl(normalizedUrl);
|
|
49
|
-
if (!res) return
|
|
54
|
+
if (!res) return;
|
|
55
|
+
|
|
56
|
+
VISITED_URLS.set(normalizedUrl, { url: normalizedUrl });
|
|
50
57
|
|
|
51
58
|
const { document } = new JSDOM(res.data).window;
|
|
52
59
|
const links = Array.from(document.querySelectorAll('a[href]'))
|
|
@@ -54,7 +61,7 @@ const crawl = async (url, baseUrl) => {
|
|
|
54
61
|
.map(normalizeUrl)
|
|
55
62
|
.filter(link => shouldIncludeUrl(link, baseUrl));
|
|
56
63
|
|
|
57
|
-
logInfo(
|
|
64
|
+
logInfo(`${res.status}: Found ${links.length} urls`);
|
|
58
65
|
|
|
59
66
|
for (const link of links) {
|
|
60
67
|
await crawl(link, baseUrl);
|
|
@@ -73,17 +80,18 @@ const generateSitemap = async (baseUrl, destination = 'sitemap.xml') => {
|
|
|
73
80
|
await crawl(baseUrl, baseUrl);
|
|
74
81
|
|
|
75
82
|
logInfo(`Generating sitemap with ${VISITED_URLS.size} URLs...`);
|
|
83
|
+
// console.log(VISITED_URLS);
|
|
76
84
|
|
|
77
85
|
const urls = Array.from(VISITED_URLS.values())
|
|
78
86
|
.sort((a, b) => b.priority - a.priority);
|
|
79
87
|
|
|
80
88
|
const sitemapContent = `<?xml version="1.0" encoding="UTF-8"?>
|
|
81
|
-
<!-- Generated by https://github.com/sefinek24/easy-sitemap-generator at ${new Date().toISOString()} -->
|
|
89
|
+
<!-- Generated by https://github.com/sefinek24/easy-sitemap-generator v${version} at ${new Date().toISOString()} -->
|
|
82
90
|
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd">
|
|
83
91
|
${urls.map(({ url, priority, lastmod }) => ` <url>
|
|
84
92
|
<loc>${escapeXml(url)}</loc>
|
|
85
93
|
<lastmod>${lastmod}</lastmod>
|
|
86
|
-
<priority>${priority
|
|
94
|
+
<priority>${priority?.toFixed(2) || 0.50}</priority>
|
|
87
95
|
</url>`).join('\n')}
|
|
88
96
|
</urlset>`;
|
|
89
97
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "easy-sitemap-generator",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.5",
|
|
4
4
|
"description": "Easy and free sitemap.xml file generator without any restrictions for your website.",
|
|
5
5
|
"homepage": "https://github.com/sefinek24/easy-sitemap-generator#readme",
|
|
6
6
|
"bugs": {
|
|
@@ -25,12 +25,12 @@
|
|
|
25
25
|
"up": "ncu -u && npm install && npm update && npm audit fix"
|
|
26
26
|
},
|
|
27
27
|
"dependencies": {
|
|
28
|
-
"axios": "^1.7.
|
|
29
|
-
"jsdom": "^
|
|
28
|
+
"axios": "^1.7.7",
|
|
29
|
+
"jsdom": "^25.0.1",
|
|
30
30
|
"kleur": "^4.1.5"
|
|
31
31
|
},
|
|
32
32
|
"devDependencies": {
|
|
33
|
-
"@eslint/js": "^9.
|
|
34
|
-
"globals": "^15.
|
|
33
|
+
"@eslint/js": "^9.12.0",
|
|
34
|
+
"globals": "^15.11.0"
|
|
35
35
|
}
|
|
36
36
|
}
|
package/utils/kleur.js
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
const kleur = require('kleur');
|
|
2
2
|
|
|
3
|
-
const logInfo = msg => console.log(kleur.blue().bold('[INFO]:')
|
|
4
|
-
const logSuccess = msg => console.log(kleur.green().bold('[SUCCESS]:')
|
|
5
|
-
const logError = msg => console.error(kleur.red().bold('[ERROR]:')
|
|
6
|
-
const logWarning = msg => console.warn(kleur.yellow().bold('[WARN]:')
|
|
3
|
+
const logInfo = msg => console.log(kleur.blue().bold('[INFO]: ') + msg);
|
|
4
|
+
const logSuccess = msg => console.log(kleur.green().bold('[SUCCESS]: ') + msg);
|
|
5
|
+
const logError = msg => console.error(kleur.red().bold('[ERROR]: ') + msg);
|
|
6
|
+
const logWarning = msg => console.warn(kleur.yellow().bold('[WARN]: ') + msg);
|
|
7
7
|
|
|
8
8
|
module.exports = { logInfo, logSuccess, logError, logWarning };
|
package/utils/xml.js
CHANGED
|
@@ -17,9 +17,9 @@ const calculatePriority = (url, baseUrl) => {
|
|
|
17
17
|
const hasQuery = url.includes('?');
|
|
18
18
|
|
|
19
19
|
if (depth === 0) return 1.0;
|
|
20
|
-
if (depth === 1) return 0.
|
|
21
|
-
if (depth === 2) return hasQuery ? 0.
|
|
22
|
-
if (depth >= 3) return hasQuery ? 0.
|
|
20
|
+
if (depth === 1) return 0.85;
|
|
21
|
+
if (depth === 2) return hasQuery ? 0.54 : 0.74;
|
|
22
|
+
if (depth >= 3) return hasQuery ? 0.34 : 0.44;
|
|
23
23
|
|
|
24
24
|
return 0.5;
|
|
25
25
|
};
|