@akotliar/sitemap-qa 1.0.0-alpha.5 → 1.0.0-alpha.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +245 -372
- package/dist/index.js.map +1 -1
- package/dist/reporters/templates/partials/finding.hbs +20 -0
- package/dist/reporters/templates/partials/header.hbs +9 -0
- package/dist/reporters/templates/partials/summary.hbs +22 -0
- package/dist/reporters/templates/report.hbs +293 -0
- package/package.json +4 -1
package/dist/index.js
CHANGED
|
@@ -6,7 +6,7 @@ import { Command as Command3 } from "commander";
|
|
|
6
6
|
// src/commands/analyze.ts
|
|
7
7
|
import { Command } from "commander";
|
|
8
8
|
import chalk3 from "chalk";
|
|
9
|
-
import
|
|
9
|
+
import path3 from "path";
|
|
10
10
|
import fs4 from "fs/promises";
|
|
11
11
|
|
|
12
12
|
// src/config/loader.ts
|
|
@@ -94,7 +94,12 @@ var ConfigLoader = class {
|
|
|
94
94
|
static DEFAULT_CONFIG_PATH = "sitemap-qa.yaml";
|
|
95
95
|
static load(configPath) {
|
|
96
96
|
const targetPath = configPath || path.join(process.cwd(), this.DEFAULT_CONFIG_PATH);
|
|
97
|
-
let userConfig = {
|
|
97
|
+
let userConfig = {
|
|
98
|
+
acceptable_patterns: [],
|
|
99
|
+
policies: [],
|
|
100
|
+
outputFormat: "all",
|
|
101
|
+
enforceDomainConsistency: true
|
|
102
|
+
};
|
|
98
103
|
if (fs.existsSync(targetPath)) {
|
|
99
104
|
try {
|
|
100
105
|
const fileContent = fs.readFileSync(targetPath, "utf8");
|
|
@@ -106,15 +111,18 @@ var ConfigLoader = class {
|
|
|
106
111
|
console.error(chalk.yellow(` - ${issue.path.join(".")}: ${issue.message}`));
|
|
107
112
|
});
|
|
108
113
|
process.exit(2);
|
|
114
|
+
return DEFAULT_POLICIES;
|
|
109
115
|
}
|
|
110
116
|
userConfig = result.data;
|
|
111
117
|
} catch (error) {
|
|
112
118
|
console.error(chalk.red("Failed to load configuration:"), error);
|
|
113
119
|
process.exit(2);
|
|
120
|
+
return DEFAULT_POLICIES;
|
|
114
121
|
}
|
|
115
122
|
} else if (configPath) {
|
|
116
123
|
console.error(chalk.red(`Error: Configuration file not found at ${targetPath}`));
|
|
117
124
|
process.exit(2);
|
|
125
|
+
return DEFAULT_POLICIES;
|
|
118
126
|
}
|
|
119
127
|
return this.mergeConfigs(DEFAULT_POLICIES, userConfig);
|
|
120
128
|
}
|
|
@@ -148,9 +156,81 @@ var ConfigLoader = class {
|
|
|
148
156
|
|
|
149
157
|
// src/core/discovery.ts
|
|
150
158
|
import { fetch } from "undici";
|
|
159
|
+
import { Readable } from "stream";
|
|
160
|
+
|
|
161
|
+
// src/core/xml-parser.ts
|
|
151
162
|
import { XMLParser } from "fast-xml-parser";
|
|
152
|
-
|
|
163
|
+
import { gunzipSync } from "zlib";
|
|
164
|
+
var StreamingXmlParser = class {
|
|
153
165
|
parser;
|
|
166
|
+
lastParsedXml;
|
|
167
|
+
constructor() {
|
|
168
|
+
this.parser = new XMLParser({
|
|
169
|
+
ignoreAttributes: false,
|
|
170
|
+
attributeNamePrefix: "@_",
|
|
171
|
+
// Ensure we always get arrays for sitemap and url tags
|
|
172
|
+
isArray: (name) => name === "sitemap" || name === "url",
|
|
173
|
+
removeNSPrefix: true
|
|
174
|
+
});
|
|
175
|
+
}
|
|
176
|
+
/**
|
|
177
|
+
* Parses an XML stream and yields typed entries as they are found.
|
|
178
|
+
* Generator-first design allows consumers to process entries without pre-collecting.
|
|
179
|
+
*/
|
|
180
|
+
async *parse(stream) {
|
|
181
|
+
const xmlData = typeof stream === "string" ? stream : await this.streamToString(stream);
|
|
182
|
+
this.lastParsedXml = xmlData;
|
|
183
|
+
const jsonObj = this.parser.parse(xmlData);
|
|
184
|
+
if (jsonObj.sitemapindex?.sitemap) {
|
|
185
|
+
const sitemaps = jsonObj.sitemapindex.sitemap;
|
|
186
|
+
for (const sitemap of sitemaps) {
|
|
187
|
+
if (sitemap?.loc) {
|
|
188
|
+
yield { type: "sitemap", loc: sitemap.loc };
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
if (jsonObj.urlset?.url) {
|
|
193
|
+
const urls = jsonObj.urlset.url;
|
|
194
|
+
for (const url of urls) {
|
|
195
|
+
if (url?.loc) {
|
|
196
|
+
yield {
|
|
197
|
+
type: "url",
|
|
198
|
+
loc: url.loc,
|
|
199
|
+
lastmod: url.lastmod,
|
|
200
|
+
changefreq: url.changefreq,
|
|
201
|
+
priority: url.priority
|
|
202
|
+
};
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
/**
|
|
208
|
+
* Get the last parsed XML data (useful to avoid re-fetching).
|
|
209
|
+
*/
|
|
210
|
+
getLastParsedXml() {
|
|
211
|
+
return this.lastParsedXml;
|
|
212
|
+
}
|
|
213
|
+
async streamToString(stream) {
|
|
214
|
+
const chunks = [];
|
|
215
|
+
for await (const chunk of stream) {
|
|
216
|
+
chunks.push(Buffer.from(chunk));
|
|
217
|
+
}
|
|
218
|
+
const buffer = Buffer.concat(chunks);
|
|
219
|
+
if (buffer.length >= 2 && buffer[0] === 31 && buffer[1] === 139) {
|
|
220
|
+
try {
|
|
221
|
+
const decompressed = gunzipSync(buffer);
|
|
222
|
+
return decompressed.toString("utf8");
|
|
223
|
+
} catch (error) {
|
|
224
|
+
throw new Error(`Failed to decompress gzipped content: ${error}`);
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
return buffer.toString("utf8");
|
|
228
|
+
}
|
|
229
|
+
};
|
|
230
|
+
|
|
231
|
+
// src/core/discovery.ts
|
|
232
|
+
var DiscoveryService = class {
|
|
233
|
+
xmlParser;
|
|
154
234
|
visited = /* @__PURE__ */ new Set();
|
|
155
235
|
STANDARD_PATHS = [
|
|
156
236
|
"/sitemap.xml",
|
|
@@ -160,10 +240,7 @@ var DiscoveryService = class {
|
|
|
160
240
|
"/sitemap.xml.gz"
|
|
161
241
|
];
|
|
162
242
|
constructor() {
|
|
163
|
-
this.
|
|
164
|
-
ignoreAttributes: false,
|
|
165
|
-
attributeNamePrefix: "@_"
|
|
166
|
-
});
|
|
243
|
+
this.xmlParser = new StreamingXmlParser();
|
|
167
244
|
}
|
|
168
245
|
/**
|
|
169
246
|
* Attempts to find sitemaps for a given base website URL.
|
|
@@ -185,9 +262,9 @@ var DiscoveryService = class {
|
|
|
185
262
|
} catch (e) {
|
|
186
263
|
}
|
|
187
264
|
if (sitemaps.size === 0) {
|
|
188
|
-
for (const
|
|
265
|
+
for (const path5 of this.STANDARD_PATHS) {
|
|
189
266
|
try {
|
|
190
|
-
const sitemapUrl = `${origin}${
|
|
267
|
+
const sitemapUrl = `${origin}${path5}`;
|
|
191
268
|
const response = await fetch(sitemapUrl, { method: "HEAD" });
|
|
192
269
|
if (response.status === 200) {
|
|
193
270
|
sitemaps.add(sitemapUrl);
|
|
@@ -211,17 +288,39 @@ var DiscoveryService = class {
|
|
|
211
288
|
try {
|
|
212
289
|
const response = await fetch(currentUrl);
|
|
213
290
|
if (response.status !== 200) continue;
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
291
|
+
let isIndex = false;
|
|
292
|
+
let isLeaf = false;
|
|
293
|
+
const childSitemaps = [];
|
|
294
|
+
let xmlData;
|
|
295
|
+
let source;
|
|
296
|
+
if (response.body) {
|
|
297
|
+
const nodeStream = Readable.fromWeb(response.body);
|
|
298
|
+
source = nodeStream;
|
|
299
|
+
} else {
|
|
300
|
+
xmlData = await response.text();
|
|
301
|
+
source = xmlData;
|
|
302
|
+
}
|
|
303
|
+
for await (const entry of this.xmlParser.parse(source)) {
|
|
304
|
+
if (entry.type === "sitemap") {
|
|
305
|
+
isIndex = true;
|
|
306
|
+
childSitemaps.push(entry.loc);
|
|
307
|
+
} else if (entry.type === "url") {
|
|
308
|
+
isLeaf = true;
|
|
222
309
|
}
|
|
223
|
-
}
|
|
224
|
-
|
|
310
|
+
}
|
|
311
|
+
if (isIndex) {
|
|
312
|
+
for (const loc of childSitemaps) {
|
|
313
|
+
queue.push(loc);
|
|
314
|
+
}
|
|
315
|
+
}
|
|
316
|
+
if (!xmlData) {
|
|
317
|
+
xmlData = this.xmlParser.getLastParsedXml() || "";
|
|
318
|
+
}
|
|
319
|
+
if (isLeaf || !isIndex && xmlData.includes("<urlset")) {
|
|
320
|
+
yield {
|
|
321
|
+
url: currentUrl,
|
|
322
|
+
xmlData
|
|
323
|
+
};
|
|
225
324
|
}
|
|
226
325
|
} catch (error) {
|
|
227
326
|
console.error(`Failed to fetch or parse sitemap at ${currentUrl}:`, error);
|
|
@@ -231,54 +330,78 @@ var DiscoveryService = class {
|
|
|
231
330
|
};
|
|
232
331
|
|
|
233
332
|
// src/core/parser.ts
|
|
234
|
-
import {
|
|
333
|
+
import { Readable as Readable2 } from "stream";
|
|
235
334
|
import { fetch as fetch2 } from "undici";
|
|
236
335
|
var SitemapParser = class {
|
|
237
|
-
|
|
336
|
+
xmlParser;
|
|
238
337
|
constructor() {
|
|
239
|
-
this.
|
|
240
|
-
ignoreAttributes: false,
|
|
241
|
-
attributeNamePrefix: "@_"
|
|
242
|
-
});
|
|
338
|
+
this.xmlParser = new StreamingXmlParser();
|
|
243
339
|
}
|
|
244
340
|
/**
|
|
245
341
|
* Parses a leaf sitemap and yields SitemapUrl objects.
|
|
246
|
-
*
|
|
247
|
-
*
|
|
248
|
-
*
|
|
249
|
-
*
|
|
250
|
-
*
|
|
342
|
+
* Uses the shared StreamingXmlParser for consistent and efficient parsing.
|
|
343
|
+
*
|
|
344
|
+
* @param sitemapUrlOrData - Accepts one of three input types:
|
|
345
|
+
* - `string`: A URL string. The method will fetch the sitemap from this URL.
|
|
346
|
+
* Use this when you need to fetch a sitemap from a remote location.
|
|
347
|
+
* - `{ type: 'xmlData'; url: string; xmlData: string }`: An object with a URL and pre-fetched XML data.
|
|
348
|
+
* Use this when you already have the XML content (e.g., from a cache or file)
|
|
349
|
+
* and want to avoid an additional HTTP request.
|
|
350
|
+
* - `{ type: 'stream'; url: string; stream: ReadableStream | Readable }`: An object with a URL and a stream.
|
|
351
|
+
* Accepts either a Web ReadableStream or Node.js Readable stream.
|
|
352
|
+
* Use this when you have a stream source (e.g., from a streaming HTTP response)
|
|
353
|
+
* that should be consumed and parsed. Web streams are converted to Node.js Readable internally.
|
|
354
|
+
*
|
|
355
|
+
* @yields {SitemapUrl} Parsed sitemap URL entries containing `loc` (URL), `source` (sitemap URL),
|
|
356
|
+
* optional metadata (`lastmod`, `changefreq`, `priority`), and a `risks` array (initialized as empty,
|
|
357
|
+
* populated later in the processing pipeline). Other properties like `ignored`/`ignoredBy` are not
|
|
358
|
+
* set by this method and may be added by downstream processors.
|
|
251
359
|
*/
|
|
252
360
|
async *parse(sitemapUrlOrData) {
|
|
253
|
-
|
|
361
|
+
const sitemapUrl = typeof sitemapUrlOrData === "string" ? sitemapUrlOrData : sitemapUrlOrData.url;
|
|
254
362
|
try {
|
|
255
|
-
let
|
|
363
|
+
let source;
|
|
256
364
|
if (typeof sitemapUrlOrData === "string") {
|
|
257
365
|
const response = await fetch2(sitemapUrl);
|
|
258
|
-
|
|
366
|
+
if (response.status !== 200) throw new Error(`Failed to fetch sitemap at ${sitemapUrl}: HTTP ${response.status}`);
|
|
367
|
+
if (response.body) {
|
|
368
|
+
source = Readable2.fromWeb(response.body);
|
|
369
|
+
} else {
|
|
370
|
+
source = await response.text();
|
|
371
|
+
}
|
|
372
|
+
} else if (sitemapUrlOrData.type === "stream") {
|
|
373
|
+
if (sitemapUrlOrData.stream instanceof Readable2) {
|
|
374
|
+
source = sitemapUrlOrData.stream;
|
|
375
|
+
} else {
|
|
376
|
+
source = Readable2.fromWeb(sitemapUrlOrData.stream);
|
|
377
|
+
}
|
|
259
378
|
} else {
|
|
260
|
-
|
|
379
|
+
source = sitemapUrlOrData.xmlData;
|
|
380
|
+
source = sitemapUrlOrData.xmlData;
|
|
261
381
|
}
|
|
262
|
-
const
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
priority: url.priority,
|
|
273
|
-
risks: []
|
|
274
|
-
};
|
|
275
|
-
}
|
|
382
|
+
for await (const entry of this.xmlParser.parse(source)) {
|
|
383
|
+
if (entry.type === "url") {
|
|
384
|
+
yield {
|
|
385
|
+
loc: entry.loc,
|
|
386
|
+
source: sitemapUrl,
|
|
387
|
+
lastmod: entry.lastmod,
|
|
388
|
+
changefreq: entry.changefreq,
|
|
389
|
+
priority: entry.priority,
|
|
390
|
+
risks: []
|
|
391
|
+
};
|
|
276
392
|
}
|
|
277
393
|
}
|
|
278
394
|
} catch (error) {
|
|
279
395
|
console.error(`Failed to parse sitemap at ${sitemapUrl}:`, error);
|
|
280
396
|
}
|
|
281
397
|
}
|
|
398
|
+
async streamToString(stream) {
|
|
399
|
+
const chunks = [];
|
|
400
|
+
for await (const chunk of stream) {
|
|
401
|
+
chunks.push(Buffer.from(chunk));
|
|
402
|
+
}
|
|
403
|
+
return Buffer.concat(chunks).toString("utf8");
|
|
404
|
+
}
|
|
282
405
|
};
|
|
283
406
|
|
|
284
407
|
// src/core/extractor.ts
|
|
@@ -327,7 +450,8 @@ var ExtractorService = class {
|
|
|
327
450
|
for (const startUrl of startUrls) {
|
|
328
451
|
for await (const discovered of this.discovery.discover(startUrl)) {
|
|
329
452
|
this.discoveredSitemaps.add(discovered.url);
|
|
330
|
-
|
|
453
|
+
const parserInput = { type: "xmlData", url: discovered.url, xmlData: discovered.xmlData };
|
|
454
|
+
for await (const urlObj of this.parser.parse(parserInput)) {
|
|
331
455
|
const normalized = this.normalizeUrl(urlObj.loc);
|
|
332
456
|
if (!this.seenUrls.has(normalized)) {
|
|
333
457
|
this.seenUrls.add(normalized);
|
|
@@ -469,343 +593,92 @@ var JsonReporter = class {
|
|
|
469
593
|
|
|
470
594
|
// src/reporters/html-reporter.ts
|
|
471
595
|
import fs3 from "fs/promises";
|
|
596
|
+
import path2 from "path";
|
|
597
|
+
import { fileURLToPath } from "url";
|
|
598
|
+
import Handlebars from "handlebars";
|
|
599
|
+
var __filename2 = fileURLToPath(import.meta.url);
|
|
600
|
+
var __dirname2 = path2.dirname(__filename2);
|
|
472
601
|
var HtmlReporter = class {
|
|
473
602
|
outputPath;
|
|
474
603
|
constructor(outputPath = "sitemap-qa-report.html") {
|
|
475
604
|
this.outputPath = outputPath;
|
|
605
|
+
Handlebars.registerHelper("json", (context) => {
|
|
606
|
+
return JSON.stringify(context);
|
|
607
|
+
});
|
|
476
608
|
}
|
|
477
609
|
async generate(data) {
|
|
478
|
-
const
|
|
479
|
-
|
|
610
|
+
const partialsDir = path2.join(__dirname2, "templates", "partials");
|
|
611
|
+
try {
|
|
612
|
+
const partialFiles = await fs3.readdir(partialsDir);
|
|
613
|
+
for (const file of partialFiles) {
|
|
614
|
+
if (file.endsWith(".hbs")) {
|
|
615
|
+
const partialName = path2.basename(file, ".hbs");
|
|
616
|
+
const partialSource = await fs3.readFile(path2.join(partialsDir, file), "utf8");
|
|
617
|
+
Handlebars.registerPartial(partialName, partialSource);
|
|
618
|
+
}
|
|
619
|
+
}
|
|
620
|
+
} catch (error) {
|
|
621
|
+
console.warn("Could not load partials:", error);
|
|
622
|
+
}
|
|
623
|
+
const templatePath = path2.join(__dirname2, "templates", "report.hbs");
|
|
624
|
+
const templateSource = await fs3.readFile(templatePath, "utf8");
|
|
625
|
+
const template = Handlebars.compile(templateSource);
|
|
626
|
+
const templateData = this.prepareTemplateData(data);
|
|
627
|
+
const html = template(templateData);
|
|
480
628
|
await fs3.writeFile(this.outputPath, html, "utf8");
|
|
481
629
|
console.log(`HTML report generated at ${this.outputPath}`);
|
|
482
630
|
}
|
|
483
|
-
|
|
484
|
-
const
|
|
631
|
+
prepareTemplateData(data) {
|
|
632
|
+
const duration = ((data.endTime.getTime() - data.startTime.getTime()) / 1e3).toFixed(1);
|
|
633
|
+
const timestamp = data.endTime.toLocaleString();
|
|
634
|
+
const categoriesMap = {};
|
|
485
635
|
for (const urlObj of data.urlsWithRisks) {
|
|
486
636
|
for (const risk of urlObj.risks) {
|
|
487
|
-
if (!
|
|
488
|
-
|
|
637
|
+
if (!categoriesMap[risk.category]) {
|
|
638
|
+
categoriesMap[risk.category] = {};
|
|
489
639
|
}
|
|
490
|
-
if (!
|
|
491
|
-
|
|
640
|
+
if (!categoriesMap[risk.category][risk.pattern]) {
|
|
641
|
+
categoriesMap[risk.category][risk.pattern] = {
|
|
492
642
|
reason: risk.reason,
|
|
493
643
|
urls: []
|
|
494
644
|
};
|
|
495
645
|
}
|
|
496
|
-
|
|
646
|
+
categoriesMap[risk.category][risk.pattern].urls.push(urlObj.loc);
|
|
497
647
|
}
|
|
498
648
|
}
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
background-color: var(--bg-dark);
|
|
533
|
-
color: white;
|
|
534
|
-
padding: 40px 20px;
|
|
535
|
-
text-align: left;
|
|
536
|
-
}
|
|
537
|
-
.container {
|
|
538
|
-
max-width: 1200px;
|
|
539
|
-
margin: 0 auto;
|
|
540
|
-
padding: 0 20px;
|
|
541
|
-
}
|
|
542
|
-
header h1 { margin: 0; font-size: 24px; }
|
|
543
|
-
header .meta { margin-top: 10px; color: #94a3b8; font-size: 14px; }
|
|
544
|
-
|
|
545
|
-
.summary-grid {
|
|
546
|
-
display: grid;
|
|
547
|
-
grid-template-columns: repeat(5, 1fr);
|
|
548
|
-
border-bottom: 1px solid var(--border);
|
|
549
|
-
margin-bottom: 40px;
|
|
550
|
-
}
|
|
551
|
-
.summary-card {
|
|
552
|
-
padding: 30px 20px;
|
|
553
|
-
text-align: center;
|
|
554
|
-
border-right: 1px solid var(--border);
|
|
555
|
-
}
|
|
556
|
-
.summary-card:last-child { border-right: none; }
|
|
557
|
-
.summary-card h3 {
|
|
558
|
-
margin: 0;
|
|
559
|
-
font-size: 12px;
|
|
560
|
-
text-transform: uppercase;
|
|
561
|
-
color: var(--text-muted);
|
|
562
|
-
letter-spacing: 0.05em;
|
|
563
|
-
}
|
|
564
|
-
.summary-card p {
|
|
565
|
-
margin: 10px 0 0;
|
|
566
|
-
font-size: 32px;
|
|
567
|
-
font-weight: 700;
|
|
568
|
-
color: var(--text-main);
|
|
569
|
-
}
|
|
570
|
-
.summary-card.highlight p { color: var(--danger); }
|
|
571
|
-
|
|
572
|
-
details {
|
|
573
|
-
margin-bottom: 20px;
|
|
574
|
-
border: 1px solid var(--border);
|
|
575
|
-
border-radius: 8px;
|
|
576
|
-
overflow: hidden;
|
|
577
|
-
}
|
|
578
|
-
summary {
|
|
579
|
-
padding: 15px 20px;
|
|
580
|
-
background-color: #fff;
|
|
581
|
-
cursor: pointer;
|
|
582
|
-
font-weight: 600;
|
|
583
|
-
display: flex;
|
|
584
|
-
justify-content: space-between;
|
|
585
|
-
align-items: center;
|
|
586
|
-
list-style: none;
|
|
587
|
-
}
|
|
588
|
-
summary::-webkit-details-marker { display: none; }
|
|
589
|
-
summary::after {
|
|
590
|
-
content: '\u25B6';
|
|
591
|
-
font-size: 12px;
|
|
592
|
-
color: var(--text-muted);
|
|
593
|
-
transition: transform 0.2s;
|
|
594
|
-
}
|
|
595
|
-
details[open] summary::after { transform: rotate(90deg); }
|
|
596
|
-
|
|
597
|
-
.category-section {
|
|
598
|
-
border: 1px solid var(--warning);
|
|
599
|
-
border-radius: 8px;
|
|
600
|
-
margin-bottom: 20px;
|
|
601
|
-
}
|
|
602
|
-
.category-header {
|
|
603
|
-
padding: 15px 20px;
|
|
604
|
-
background-color: #fffbeb;
|
|
605
|
-
color: var(--warning);
|
|
606
|
-
font-weight: 600;
|
|
607
|
-
cursor: pointer;
|
|
608
|
-
display: flex;
|
|
609
|
-
justify-content: space-between;
|
|
610
|
-
align-items: center;
|
|
611
|
-
}
|
|
612
|
-
.category-content {
|
|
613
|
-
padding: 20px;
|
|
614
|
-
background-color: #fff;
|
|
615
|
-
}
|
|
616
|
-
|
|
617
|
-
.finding-group {
|
|
618
|
-
border: 1px solid var(--border);
|
|
619
|
-
border-radius: 8px;
|
|
620
|
-
padding: 20px;
|
|
621
|
-
margin-bottom: 20px;
|
|
622
|
-
}
|
|
623
|
-
.finding-header {
|
|
624
|
-
display: flex;
|
|
625
|
-
align-items: center;
|
|
626
|
-
gap: 10px;
|
|
627
|
-
margin-bottom: 10px;
|
|
628
|
-
}
|
|
629
|
-
.finding-header h4 { margin: 0; font-size: 16px; }
|
|
630
|
-
.badge {
|
|
631
|
-
background-color: var(--primary);
|
|
632
|
-
color: white;
|
|
633
|
-
padding: 2px 8px;
|
|
634
|
-
border-radius: 12px;
|
|
635
|
-
font-size: 12px;
|
|
636
|
-
}
|
|
637
|
-
.finding-description {
|
|
638
|
-
color: var(--text-muted);
|
|
639
|
-
font-size: 14px;
|
|
640
|
-
margin-bottom: 20px;
|
|
641
|
-
}
|
|
642
|
-
|
|
643
|
-
.url-list {
|
|
644
|
-
background-color: var(--bg-light);
|
|
645
|
-
border-radius: 4px;
|
|
646
|
-
padding: 15px;
|
|
647
|
-
margin-bottom: 15px;
|
|
648
|
-
}
|
|
649
|
-
.url-item {
|
|
650
|
-
font-family: monospace;
|
|
651
|
-
font-size: 13px;
|
|
652
|
-
padding: 8px 12px;
|
|
653
|
-
background: white;
|
|
654
|
-
border: 1px solid var(--border);
|
|
655
|
-
border-radius: 4px;
|
|
656
|
-
margin-bottom: 8px;
|
|
657
|
-
white-space: nowrap;
|
|
658
|
-
overflow: hidden;
|
|
659
|
-
text-overflow: ellipsis;
|
|
660
|
-
}
|
|
661
|
-
.url-item:last-child { margin-bottom: 0; }
|
|
662
|
-
|
|
663
|
-
.more-count {
|
|
664
|
-
font-size: 12px;
|
|
665
|
-
color: var(--text-muted);
|
|
666
|
-
font-style: italic;
|
|
667
|
-
margin-bottom: 15px;
|
|
668
|
-
}
|
|
669
|
-
|
|
670
|
-
.btn {
|
|
671
|
-
display: inline-flex;
|
|
672
|
-
align-items: center;
|
|
673
|
-
gap: 8px;
|
|
674
|
-
background-color: var(--primary);
|
|
675
|
-
color: white;
|
|
676
|
-
padding: 8px 16px;
|
|
677
|
-
border-radius: 6px;
|
|
678
|
-
text-decoration: none;
|
|
679
|
-
font-size: 13px;
|
|
680
|
-
font-weight: 500;
|
|
681
|
-
}
|
|
682
|
-
.btn:hover { opacity: 0.9; }
|
|
683
|
-
|
|
684
|
-
footer {
|
|
685
|
-
text-align: center;
|
|
686
|
-
padding: 40px;
|
|
687
|
-
color: var(--text-muted);
|
|
688
|
-
font-size: 12px;
|
|
689
|
-
border-top: 1px solid var(--border);
|
|
690
|
-
margin-top: 40px;
|
|
691
|
-
}
|
|
692
|
-
</style>
|
|
693
|
-
</head>
|
|
694
|
-
<body>
|
|
695
|
-
<header>
|
|
696
|
-
<div class="container">
|
|
697
|
-
<h1>Sitemap Analysis</h1>
|
|
698
|
-
<div class="meta">
|
|
699
|
-
<div>${esc(data.rootUrl)}</div>
|
|
700
|
-
<div>${esc(timestamp)}</div>
|
|
701
|
-
</div>
|
|
702
|
-
</div>
|
|
703
|
-
</header>
|
|
704
|
-
|
|
705
|
-
<div class="summary-grid">
|
|
706
|
-
<div class="summary-card">
|
|
707
|
-
<h3>Sitemaps</h3>
|
|
708
|
-
<p>${data.discoveredSitemaps.length}</p>
|
|
709
|
-
</div>
|
|
710
|
-
<div class="summary-card">
|
|
711
|
-
<h3>URLs Analyzed</h3>
|
|
712
|
-
<p>${data.totalUrls.toLocaleString()}</p>
|
|
713
|
-
</div>
|
|
714
|
-
<div class="summary-card highlight">
|
|
715
|
-
<h3>Issues Found</h3>
|
|
716
|
-
<p>${data.totalRisks}</p>
|
|
717
|
-
</div>
|
|
718
|
-
<div class="summary-card">
|
|
719
|
-
<h3>URLs Ignored</h3>
|
|
720
|
-
<p>${data.ignoredUrls.length}</p>
|
|
721
|
-
</div>
|
|
722
|
-
<div class="summary-card">
|
|
723
|
-
<h3>Scan Time</h3>
|
|
724
|
-
<p>${duration}s</p>
|
|
725
|
-
</div>
|
|
726
|
-
</div>
|
|
727
|
-
|
|
728
|
-
<div class="container">
|
|
729
|
-
<details>
|
|
730
|
-
<summary>Sitemaps Discovered (${data.discoveredSitemaps.length})</summary>
|
|
731
|
-
<div style="padding: 20px; background: var(--bg-light);">
|
|
732
|
-
${data.discoveredSitemaps.map((s) => `<div class="url-item">${esc(s)}</div>`).join("")}
|
|
733
|
-
</div>
|
|
734
|
-
</details>
|
|
735
|
-
|
|
736
|
-
${data.ignoredUrls.length > 0 ? `
|
|
737
|
-
<details>
|
|
738
|
-
<summary>Ignored URLs (${data.ignoredUrls.length})</summary>
|
|
739
|
-
<div style="padding: 20px; background: var(--bg-light);">
|
|
740
|
-
${data.ignoredUrls.map((u) => {
|
|
741
|
-
const suppressedRisks = u.risks.length > 0 ? ` <span style="color: var(--danger); font-size: 11px; font-weight: bold;">[Suppressed Risks: ${[...new Set(u.risks.map((r) => r.category))].map(esc).join(", ")}]</span>` : "";
|
|
742
|
-
const ignoredBy = u.ignoredBy ?? "Unknown";
|
|
743
|
-
return `<div class="url-item" title="Ignored by: ${esc(ignoredBy)}">${esc(u.loc)} <span style="color: var(--text-muted); font-size: 11px;">(by ${esc(ignoredBy)})</span>${suppressedRisks}</div>`;
|
|
744
|
-
}).join("")}
|
|
745
|
-
</div>
|
|
746
|
-
</details>
|
|
747
|
-
` : ""}
|
|
748
|
-
|
|
749
|
-
${Object.entries(categories).map(([category, findings]) => {
|
|
750
|
-
const totalCategoryUrls = Object.values(findings).reduce((acc, f) => acc + f.urls.length, 0);
|
|
751
|
-
return `
|
|
752
|
-
<div class="category-section">
|
|
753
|
-
<div class="category-header">
|
|
754
|
-
<span>${esc(category)} (${totalCategoryUrls} URLs)</span>
|
|
755
|
-
<span>\u25BC</span>
|
|
756
|
-
</div>
|
|
757
|
-
<div class="category-content">
|
|
758
|
-
${Object.entries(findings).map(([pattern, finding]) => `
|
|
759
|
-
<div class="finding-group">
|
|
760
|
-
<div class="finding-header">
|
|
761
|
-
<h4>${esc(pattern)}</h4>
|
|
762
|
-
<span class="badge">${finding.urls.length} URLs</span>
|
|
763
|
-
</div>
|
|
764
|
-
<div class="finding-description">
|
|
765
|
-
${esc(finding.reason)}
|
|
766
|
-
</div>
|
|
767
|
-
<div class="url-list">
|
|
768
|
-
${finding.urls.slice(0, 3).map((url) => `
|
|
769
|
-
<div class="url-item">${esc(url)}</div>
|
|
770
|
-
`).join("")}
|
|
771
|
-
</div>
|
|
772
|
-
${finding.urls.length > 3 ? `
|
|
773
|
-
<div class="more-count">... and ${finding.urls.length - 3} more</div>
|
|
774
|
-
` : ""}
|
|
775
|
-
<a href="#" class="btn" onclick="downloadUrls(${JSON.stringify(pattern).replace(/"/g, """)}, ${JSON.stringify(finding.urls).replace(/"/g, """)})">
|
|
776
|
-
\u{1F4E5} Download All ${finding.urls.length} URLs
|
|
777
|
-
</a>
|
|
778
|
-
</div>
|
|
779
|
-
`).join("")}
|
|
780
|
-
</div>
|
|
781
|
-
</div>
|
|
782
|
-
`;
|
|
783
|
-
}).join("")}
|
|
784
|
-
</div>
|
|
785
|
-
|
|
786
|
-
<footer>
|
|
787
|
-
Generated by sitemap-qa v1.0.0
|
|
788
|
-
</footer>
|
|
789
|
-
|
|
790
|
-
<script>
|
|
791
|
-
function downloadUrls(name, urls) {
|
|
792
|
-
const blob = new Blob([urls.join('\\n')], { type: 'text/plain' });
|
|
793
|
-
const url = window.URL.createObjectURL(blob);
|
|
794
|
-
const a = document.createElement('a');
|
|
795
|
-
a.href = url;
|
|
796
|
-
a.download = \`\${name.replace(/[^a-z0-9]/gi, '_').toLowerCase()}_urls.txt\`;
|
|
797
|
-
document.body.appendChild(a);
|
|
798
|
-
a.click();
|
|
799
|
-
window.URL.revokeObjectURL(url);
|
|
800
|
-
document.body.removeChild(a);
|
|
801
|
-
}
|
|
802
|
-
</script>
|
|
803
|
-
</body>
|
|
804
|
-
</html>
|
|
805
|
-
`;
|
|
806
|
-
}
|
|
807
|
-
escapeHtml(str) {
|
|
808
|
-
return str.replace(/&/g, "&").replace(/</g, "<").replace(/>/g, ">").replace(/"/g, """).replace(/'/g, "'");
|
|
649
|
+
const categories = Object.entries(categoriesMap).map(([name, findingsMap]) => {
|
|
650
|
+
const findings = Object.entries(findingsMap).map(([pattern, finding]) => ({
|
|
651
|
+
pattern,
|
|
652
|
+
urls: finding.urls,
|
|
653
|
+
reason: finding.reason,
|
|
654
|
+
displayUrls: finding.urls.slice(0, 3),
|
|
655
|
+
moreCount: finding.urls.length > 3 ? finding.urls.length - 3 : 0
|
|
656
|
+
}));
|
|
657
|
+
const totalUrls = findings.reduce((acc, f) => acc + f.urls.length, 0);
|
|
658
|
+
return {
|
|
659
|
+
name,
|
|
660
|
+
totalUrls,
|
|
661
|
+
findings
|
|
662
|
+
};
|
|
663
|
+
});
|
|
664
|
+
const ignoredUrls = data.ignoredUrls.map((u) => {
|
|
665
|
+
const suppressedCategories = u.risks.length > 0 ? [...new Set(u.risks.map((r) => r.category))].join(", ") : void 0;
|
|
666
|
+
return {
|
|
667
|
+
loc: u.loc,
|
|
668
|
+
ignoredBy: u.ignoredBy ?? "Unknown",
|
|
669
|
+
suppressedCategories
|
|
670
|
+
};
|
|
671
|
+
});
|
|
672
|
+
return {
|
|
673
|
+
rootUrl: data.rootUrl,
|
|
674
|
+
timestamp,
|
|
675
|
+
discoveredSitemaps: data.discoveredSitemaps,
|
|
676
|
+
totalUrls: data.totalUrls.toLocaleString(),
|
|
677
|
+
totalRisks: data.totalRisks,
|
|
678
|
+
ignoredUrls,
|
|
679
|
+
duration,
|
|
680
|
+
categories
|
|
681
|
+
};
|
|
809
682
|
}
|
|
810
683
|
};
|
|
811
684
|
|
|
@@ -853,11 +726,11 @@ var analyzeCommand = new Command("analyze").description("Analyze a sitemap for p
|
|
|
853
726
|
const reporters = [new ConsoleReporter()];
|
|
854
727
|
await fs4.mkdir(outDir, { recursive: true });
|
|
855
728
|
if (outputFormat === "json" || outputFormat === "all") {
|
|
856
|
-
const jsonPath =
|
|
729
|
+
const jsonPath = path3.join(outDir, "sitemap-qa-report.json");
|
|
857
730
|
reporters.push(new JsonReporter(jsonPath));
|
|
858
731
|
}
|
|
859
732
|
if (outputFormat === "html" || outputFormat === "all") {
|
|
860
|
-
const htmlPath =
|
|
733
|
+
const htmlPath = path3.join(outDir, "sitemap-qa-report.html");
|
|
861
734
|
reporters.push(new HtmlReporter(htmlPath));
|
|
862
735
|
}
|
|
863
736
|
for (const reporter of reporters) {
|
|
@@ -877,7 +750,7 @@ var analyzeCommand = new Command("analyze").description("Analyze a sitemap for p
|
|
|
877
750
|
// src/commands/init.ts
|
|
878
751
|
import { Command as Command2 } from "commander";
|
|
879
752
|
import fs5 from "fs";
|
|
880
|
-
import
|
|
753
|
+
import path4 from "path";
|
|
881
754
|
import chalk4 from "chalk";
|
|
882
755
|
var DEFAULT_CONFIG = `# sitemap-qa configuration
|
|
883
756
|
# This file defines the risk categories and patterns to monitor.
|
|
@@ -933,7 +806,7 @@ policies:
|
|
|
933
806
|
reason: "Archive or database backup files exposed."
|
|
934
807
|
`;
|
|
935
808
|
var initCommand = new Command2("init").description("Initialize a default sitemap-qa.yaml configuration file").action(() => {
|
|
936
|
-
const configPath =
|
|
809
|
+
const configPath = path4.join(process.cwd(), "sitemap-qa.yaml");
|
|
937
810
|
if (fs5.existsSync(configPath)) {
|
|
938
811
|
console.error(chalk4.red(`Error: ${configPath} already exists.`));
|
|
939
812
|
process.exit(1);
|