easy-sitemap-generator 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024 Sefinek
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,44 @@
1
+ # πŸ—ΊοΈ Easy Sitemap.xml generator without any limits
2
+ Finally! A free and easy-to-use `sitemap.xml` generator with no restrictions for your website.
3
+ Improve your search engine rankings effortlessly! All you need is Node.js installed and this module. Good luck!
4
+
5
+ <a href="https://www.npmjs.com/package/easy-sitemap-generator" target="_blank" title="easy-sitemap-generator - npm" style="text-decoration:none">
6
+ <img src="https://img.shields.io/npm/dt/easy-sitemap-generator.svg?maxAge=3600" alt="The number of downloads">
7
+ <img src="https://img.shields.io/github/issues/sefinek24/easy-sitemap-generator" alt="Issues">
8
+ <img src="https://img.shields.io/github/last-commit/sefinek24/easy-sitemap-generator" alt="Last commit">
9
+ <img src="https://img.shields.io/github/commit-activity/w/sefinek24/easy-sitemap-generator" alt="Commit activity">
10
+ <img src="https://img.shields.io/github/languages/code-size/sefinek24/easy-sitemap-generator" alt="Code size">
11
+ </a>
12
+
13
+ ## πŸ€” How to use it?
14
+ ### CLI (recommenced)
15
+ ```bash
16
+ npm install easy-sitemap-generator -g
17
+ sitemap --domain=example.com
18
+ ```
19
+ #### Aliases
20
+ | sitemap-gen | sitemap-generator | generate-sitemap |
21
+ |-------------|-------------------|------------------|
22
+
23
+ ### Script
24
+ ```js
25
+ const sitemap = require('easy-sitemap-generator');
26
+
27
+ (async () => {
28
+ const content = await sitemap.generate('https://example.com');
29
+ console.log(content);
30
+ })();
31
+ ```
32
+
33
+ ## βœ”οΈ Sample generated file
34
+ https://sefinek.net/sitemap.xml
35
+
36
+ ## πŸ‘€ Why do I need this?
37
+ Indexing bots, such as Google, often check the sitemap.xml file by making a `GET /sitemap.xml` request to find subpages of your website.
38
+ This can improve your site’s visibility in search engine results. Sitemap files are a standard feature and can be found on every web server.
39
+
40
+ ## πŸ˜‰ Important
41
+ Before running the script or executing the `sitemap` CLI command, make sure you have a stable internet connection. Also, disconnect from any proxy or VPN if you're connected.
42
+
43
+ ## πŸ“˜ License
44
+ Licensed under the MIT License. See the [LICENSE](LICENSE) file for more details.
package/bin/cli.js ADDED
@@ -0,0 +1,16 @@
1
+ #!/usr/bin/env node
2
+
3
+ const { logError } = require('../utils/kleur.js');
4
+ const { generate } = require('../lib/sitemapGenerator');
5
+
6
+ const args = process.argv.slice(2);
7
+ const urlArg = args.find(arg => arg.startsWith('--domain='));
8
+ if (!urlArg) {
9
+ logError('No URL provided. Use: sitemap-generator --domain=<YOUR-DOMAIN>');
10
+ process.exit(1);
11
+ }
12
+
13
+ generate(`https://${urlArg.split('=')[1].replace(/(^\w+:|^)\/\//, '')}`).catch(err => {
14
+ logError(err);
15
+ process.exit(2);
16
+ });
package/index.d.ts ADDED
@@ -0,0 +1,16 @@
1
+ declare module 'easy-sitemap-generator' {
2
+ /**
3
+ * Generates a sitemap for the specified webpage and saves it to the given destination.
4
+ *
5
+ * @param url - The URL of the webpage for which the sitemap is to be generated.
6
+ * @param destination - Optional. The path to the file where the generated sitemap will be saved.
7
+ * If not specified, the sitemap will be saved in the same folder where the script is run (default: './sitemap.xml').
8
+ * @returns A promise that resolves to a string containing the contents of the generated sitemap.xml file.
9
+ */
10
+ export function generate(url: string, destination?: string): Promise<string>;
11
+
12
+ /**
13
+ * The current version of the `easy-sitemap-generator` module.
14
+ */
15
+ export const version: string;
16
+ }
package/index.js ADDED
@@ -0,0 +1,6 @@
1
+ const { generate, version } = require('./lib/sitemapGenerator.js');
2
+
3
+ module.exports = {
4
+ generate,
5
+ version
6
+ };
@@ -0,0 +1,105 @@
1
+ const { JSDOM } = require('jsdom');
2
+ const { axios, version } = require('../services/axios.js');
3
+ const urlModule = require('url');
4
+ const fs = require('fs/promises');
5
+ const path = require('path');
6
+ const { escapeXml, normalizeUrl, calculatePriority } = require('../utils/xml.js');
7
+ const { logInfo, logSuccess, logError, logWarning } = require('../utils/kleur.js');
8
+
9
+ const VISITED_URLS = new Set();
10
+ const IGNORED_PATTERNS = ['cdn-cgi', '?referrer=', '&referrer=', '/signin/v2/usernamerecovery', '/lifecycle/flows/signup', 'join?return_to='];
11
+ const BASE_DELAY = 8000;
12
+
13
+ const shouldIncludeUrl = (url, baseUrl) => !IGNORED_PATTERNS.some(pattern => url.includes(pattern)) && url.startsWith(baseUrl);
14
+ const delay = ms => new Promise(resolve => setTimeout(resolve, ms));
15
+
16
+ const fetchUrl = async (url, retries = 0) => {
17
+ try {
18
+ logInfo(`GET ${url}`);
19
+
20
+ return await axios.get(url);
21
+ } catch (err) {
22
+ logError(`Error fetching URL: ${url} - ${err.message}`);
23
+ if (err.response) {
24
+ const statusCode = err.response.status;
25
+ if (statusCode === 429) {
26
+ const delayTime = BASE_DELAY * Math.pow(2, retries);
27
+ logWarning(`Rate limit hit. Retrying in ${(delayTime / 1000).toFixed(2)}s... (Attempt ${retries + 1})`);
28
+ await delay(delayTime);
29
+ return fetchUrl(url, retries + 1);
30
+ } else if (statusCode >= 500) {
31
+ logError(`Failed to fetch ${url}. Status code: ${statusCode}. Skipping...`);
32
+ return null;
33
+ } else if (statusCode >= 400) {
34
+ logWarning(`Failed to fetch ${url}. Status code: ${statusCode}. Skipping...`);
35
+ return null;
36
+ }
37
+ } else {
38
+ logError(`Failed to fetch ${url}. Unknown error: ${err.message}. Skipping...`);
39
+ return null;
40
+ }
41
+ }
42
+ };
43
+
44
+ const crawl = async (url, baseUrl) => {
45
+ const normalizedUrl = normalizeUrl(url);
46
+ if (VISITED_URLS.has(normalizedUrl)) return; else VISITED_URLS.add(normalizedUrl);
47
+
48
+ const response = await fetchUrl(normalizedUrl);
49
+ if (!response) {
50
+ logWarning(`No response received for URL: ${normalizedUrl}`);
51
+ return;
52
+ }
53
+
54
+ const { document } = new JSDOM(response.data).window;
55
+ const links = Array.from(document.querySelectorAll('a[href]'))
56
+ .map(link => urlModule.resolve(baseUrl, link.getAttribute('href')))
57
+ .map(normalizeUrl)
58
+ .filter(link => shouldIncludeUrl(link, baseUrl));
59
+
60
+ logInfo(`Found ${links.length} urls on ${normalizedUrl}`);
61
+
62
+ for (const link of links) {
63
+ await crawl(link, baseUrl);
64
+ }
65
+
66
+ return { url: normalizedUrl, lastmod: response.headers['last-modified'] ? new Date(response.headers['last-modified']).toISOString() : new Date().toISOString() };
67
+ };
68
+
69
+ const generateSitemap = async (baseUrl, destination = 'sitemap.xml') => {
70
+ logInfo(`Starting crawl for base URL: ${baseUrl}`);
71
+
72
+ await crawl(baseUrl, baseUrl);
73
+
74
+ logInfo(`Generating sitemap with ${VISITED_URLS.size} URLs...`);
75
+
76
+ const urls = Array.from(VISITED_URLS)
77
+ .filter(url => shouldIncludeUrl(url, baseUrl))
78
+ .map(url => ({
79
+ url,
80
+ priority: calculatePriority(url, baseUrl),
81
+ lastmod: new Date().toISOString()
82
+ }))
83
+ .sort((a, b) => b.priority - a.priority);
84
+
85
+ const sitemapContent = `<?xml version="1.0" encoding="UTF-8"?>
86
+ <!-- Generated by sitemap generator - ${new Date()} -->
87
+ <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd">
88
+ ${urls.map(({ url, priority, lastmod }) => ` <url>
89
+ <loc>${escapeXml(url)}</loc>
90
+ <lastmod>${lastmod}</lastmod>
91
+ <priority>${priority.toFixed(2)}</priority>
92
+ </url>`).join('\n')}
93
+ </urlset>`;
94
+
95
+ const output = path.resolve(destination);
96
+ await fs.writeFile(output, sitemapContent, 'utf8');
97
+ logSuccess(`Sitemap has been generated at ${output}`);
98
+
99
+ return sitemapContent;
100
+ };
101
+
102
+ module.exports = {
103
+ generate: generateSitemap,
104
+ version
105
+ };
package/package.json ADDED
@@ -0,0 +1,36 @@
1
+ {
2
+ "name": "easy-sitemap-generator",
3
+ "version": "0.1.0",
4
+ "description": "Free and easy sitemap.xml file generator without any restrictions for your website. Try now!",
5
+ "homepage": "https://github.com/sefinek24/easy-sitemap-generator#readme",
6
+ "bugs": {
7
+ "url": "https://github.com/sefinek24/easy-sitemap-generator/issues"
8
+ },
9
+ "repository": {
10
+ "type": "git",
11
+ "url": "git+https://github.com/sefinek24/easy-sitemap-generator.git"
12
+ },
13
+ "license": "MIT",
14
+ "author": "Sefinek <contact@nekosia.cat> (https://sefinek.net)",
15
+ "main": "index.js",
16
+ "typings": "index.d.ts",
17
+ "bin": {
18
+ "generate-sitemap": "./bin/cli.js",
19
+ "sitemap": "./bin/cli.js",
20
+ "sitemap-gen": "./bin/cli.js",
21
+ "sitemap-generator": "./bin/cli.js"
22
+ },
23
+ "scripts": {
24
+ "test": "jest test/index.test.js",
25
+ "up": "ncu -u && npm install && npm update && npm audit fix"
26
+ },
27
+ "dependencies": {
28
+ "axios": "^1.7.3",
29
+ "jsdom": "^24.1.1",
30
+ "kleur": "^4.1.5"
31
+ },
32
+ "devDependencies": {
33
+ "@eslint/js": "^9.9.0",
34
+ "globals": "^15.9.0"
35
+ }
36
+ }
@@ -0,0 +1,7 @@
1
+ const axios = require('axios');
2
+ const { version } = require('../package.json');
3
+
4
+ axios.defaults.timeout = 24000;
5
+ axios.defaults.headers.common['User-Agent'] = `Mozilla/5.0 (compatible; EasySitemapGen/${version}; +https://github.com/sefinek24/easy-sitemap-generator)`;
6
+
7
+ module.exports = { axios, version };
package/utils/kleur.js ADDED
@@ -0,0 +1,8 @@
1
+ const kleur = require('kleur');
2
+
3
+ const logInfo = msg => console.log(kleur.blue().bold('[INFO]:'), msg);
4
+ const logSuccess = msg => console.log(kleur.green().bold('[SUCCESS]:'), msg);
5
+ const logError = msg => console.error(kleur.red().bold('[ERROR]:'), msg);
6
+ const logWarning = msg => console.warn(kleur.yellow().bold('[WARN]:'), msg);
7
+
8
+ module.exports = { logInfo, logSuccess, logError, logWarning };
package/utils/xml.js ADDED
@@ -0,0 +1,27 @@
1
+ const escapeXml = str =>
2
+ str.replace(/&/g, '&amp;')
3
+ .replace(/</g, '&lt;')
4
+ .replace(/>/g, '&gt;')
5
+ .replace(/"/g, '&quot;')
6
+ .replace(/'/g, '&apos;');
7
+
8
+ const normalizeUrl = url => {
9
+ const parsedUrl = new URL(url);
10
+ parsedUrl.hash = '';
11
+ return parsedUrl.toString();
12
+ };
13
+
14
+ const calculatePriority = (url, baseUrl) => {
15
+ const path = url.replace(baseUrl, '').split('/').filter(Boolean);
16
+ const depth = path.length;
17
+ const hasQuery = url.includes('?');
18
+
19
+ if (depth === 0) return 1.0;
20
+ if (depth === 1) return 0.81;
21
+ if (depth === 2) return hasQuery ? 0.51 : 0.71;
22
+ if (depth >= 3) return hasQuery ? 0.31 : 0.41;
23
+
24
+ return 0.5;
25
+ };
26
+
27
+ module.exports = { escapeXml, normalizeUrl, calculatePriority };