@nuxtjs/sitemap 7.4.0 → 7.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/client/200.html +2 -2
- package/dist/client/404.html +2 -2
- package/dist/client/_nuxt/builds/latest.json +1 -1
- package/dist/client/_nuxt/builds/meta/096c9e6a-a9a8-483d-bd1d-fed5634b0d08.json +1 -0
- package/dist/client/index.html +2 -2
- package/dist/module.d.ts +1 -1
- package/dist/module.json +1 -1
- package/dist/module.mjs +4 -120
- package/dist/runtime/server/sitemap/urlset/sources.js +3 -2
- package/dist/runtime/types.d.ts +1 -0
- package/dist/shared/sitemap.DR3_6qqU.mjs +212 -0
- package/dist/utils.d.mts +28 -0
- package/dist/utils.d.ts +28 -0
- package/dist/utils.mjs +368 -0
- package/package.json +15 -7
- package/dist/client/_nuxt/builds/meta/048e1f4c-a575-4a94-bbab-d777afe4c585.json +0 -1
- package/dist/content.cjs +0 -48
- package/dist/content.d.cts +0 -232
- package/dist/module.cjs +0 -1334
- package/dist/module.d.cts +0 -10
- package/dist/runtime/server/sitemap/utils/extractSitemapXML.d.ts +0 -2
- package/dist/runtime/server/sitemap/utils/extractSitemapXML.js +0 -75
package/dist/client/200.html
CHANGED
|
@@ -9,5 +9,5 @@
|
|
|
9
9
|
<link rel="prefetch" as="script" crossorigin href="/__sitemap__/devtools/_nuxt/cqJZcoo0.js">
|
|
10
10
|
<link rel="prefetch" as="style" crossorigin href="/__sitemap__/devtools/_nuxt/error-500.DbX9fggi.css">
|
|
11
11
|
<link rel="prefetch" as="script" crossorigin href="/__sitemap__/devtools/_nuxt/SQMF8ibg.js">
|
|
12
|
-
<script type="module" src="/__sitemap__/devtools/_nuxt/ChEizYIG.js" crossorigin></script></head><body><div id="__nuxt"></div><div id="teleports"></div><script type="application/json" data-nuxt-data="nuxt-app" data-ssr="false" id="__NUXT_DATA__">[{"prerenderedAt":1,"serverRendered":2},
|
|
13
|
-
<script>window.__NUXT__={};window.__NUXT__.config={public:{},app:{baseURL:"/__sitemap__/devtools",buildId:"
|
|
12
|
+
<script type="module" src="/__sitemap__/devtools/_nuxt/ChEizYIG.js" crossorigin></script></head><body><div id="__nuxt"></div><div id="teleports"></div><script type="application/json" data-nuxt-data="nuxt-app" data-ssr="false" id="__NUXT_DATA__">[{"prerenderedAt":1,"serverRendered":2},1750240316763,false]</script>
|
|
13
|
+
<script>window.__NUXT__={};window.__NUXT__.config={public:{},app:{baseURL:"/__sitemap__/devtools",buildId:"096c9e6a-a9a8-483d-bd1d-fed5634b0d08",buildAssetsDir:"/_nuxt/",cdnURL:""}}</script></body></html>
|
package/dist/client/404.html
CHANGED
|
@@ -9,5 +9,5 @@
|
|
|
9
9
|
<link rel="prefetch" as="script" crossorigin href="/__sitemap__/devtools/_nuxt/cqJZcoo0.js">
|
|
10
10
|
<link rel="prefetch" as="style" crossorigin href="/__sitemap__/devtools/_nuxt/error-500.DbX9fggi.css">
|
|
11
11
|
<link rel="prefetch" as="script" crossorigin href="/__sitemap__/devtools/_nuxt/SQMF8ibg.js">
|
|
12
|
-
<script type="module" src="/__sitemap__/devtools/_nuxt/ChEizYIG.js" crossorigin></script></head><body><div id="__nuxt"></div><div id="teleports"></div><script type="application/json" data-nuxt-data="nuxt-app" data-ssr="false" id="__NUXT_DATA__">[{"prerenderedAt":1,"serverRendered":2},
|
|
13
|
-
<script>window.__NUXT__={};window.__NUXT__.config={public:{},app:{baseURL:"/__sitemap__/devtools",buildId:"
|
|
12
|
+
<script type="module" src="/__sitemap__/devtools/_nuxt/ChEizYIG.js" crossorigin></script></head><body><div id="__nuxt"></div><div id="teleports"></div><script type="application/json" data-nuxt-data="nuxt-app" data-ssr="false" id="__NUXT_DATA__">[{"prerenderedAt":1,"serverRendered":2},1750240316764,false]</script>
|
|
13
|
+
<script>window.__NUXT__={};window.__NUXT__.config={public:{},app:{baseURL:"/__sitemap__/devtools",buildId:"096c9e6a-a9a8-483d-bd1d-fed5634b0d08",buildAssetsDir:"/_nuxt/",cdnURL:""}}</script></body></html>
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"id":"
|
|
1
|
+
{"id":"096c9e6a-a9a8-483d-bd1d-fed5634b0d08","timestamp":1750240314599}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"id":"096c9e6a-a9a8-483d-bd1d-fed5634b0d08","timestamp":1750240314599,"matcher":{"static":{},"wildcard":{},"dynamic":{}},"prerendered":[]}
|
package/dist/client/index.html
CHANGED
|
@@ -9,5 +9,5 @@
|
|
|
9
9
|
<link rel="prefetch" as="script" crossorigin href="/__sitemap__/devtools/_nuxt/cqJZcoo0.js">
|
|
10
10
|
<link rel="prefetch" as="style" crossorigin href="/__sitemap__/devtools/_nuxt/error-500.DbX9fggi.css">
|
|
11
11
|
<link rel="prefetch" as="script" crossorigin href="/__sitemap__/devtools/_nuxt/SQMF8ibg.js">
|
|
12
|
-
<script type="module" src="/__sitemap__/devtools/_nuxt/ChEizYIG.js" crossorigin></script></head><body><div id="__nuxt"></div><div id="teleports"></div><script type="application/json" data-nuxt-data="nuxt-app" data-ssr="false" id="__NUXT_DATA__">[{"prerenderedAt":1,"serverRendered":2},
|
|
13
|
-
<script>window.__NUXT__={};window.__NUXT__.config={public:{},app:{baseURL:"/__sitemap__/devtools",buildId:"
|
|
12
|
+
<script type="module" src="/__sitemap__/devtools/_nuxt/ChEizYIG.js" crossorigin></script></head><body><div id="__nuxt"></div><div id="teleports"></div><script type="application/json" data-nuxt-data="nuxt-app" data-ssr="false" id="__NUXT_DATA__">[{"prerenderedAt":1,"serverRendered":2},1750240316764,false]</script>
|
|
13
|
+
<script>window.__NUXT__={};window.__NUXT__.config={public:{},app:{baseURL:"/__sitemap__/devtools",buildId:"096c9e6a-a9a8-483d-bd1d-fed5634b0d08",buildAssetsDir:"/_nuxt/",cdnURL:""}}</script></body></html>
|
package/dist/module.d.ts
CHANGED
package/dist/module.json
CHANGED
package/dist/module.mjs
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { useNuxt, loadNuxtModuleInstance, createResolver, addTemplate, extendPages, tryUseNuxt, defineNuxtModule, useLogger, hasNuxtModule, getNuxtModuleVersion, hasNuxtModuleCompatibility, addServerImports, addServerPlugin, resolveModule, addServerHandler, addPrerenderRoutes } from '@nuxt/kit';
|
|
2
|
-
import { withHttps, withBase,
|
|
2
|
+
import { withHttps, withBase, joinURL, withTrailingSlash, withoutLeadingSlash, withLeadingSlash, withoutTrailingSlash } from 'ufo';
|
|
3
3
|
import { withSiteUrl, installNuxtSiteConfig } from 'nuxt-site-config/kit';
|
|
4
4
|
import { defu } from 'defu';
|
|
5
5
|
import { readPackageJSON } from 'pkg-types';
|
|
@@ -10,7 +10,9 @@ import { provider, env } from 'std-env';
|
|
|
10
10
|
import { mkdir, writeFile } from 'node:fs/promises';
|
|
11
11
|
import { join } from 'node:path';
|
|
12
12
|
import chalk from 'chalk';
|
|
13
|
+
import { p as parseHtmlExtractSitemapMeta } from './shared/sitemap.DR3_6qqU.mjs';
|
|
13
14
|
import { normaliseDate } from '../dist/runtime/server/sitemap/urlset/normalise.js';
|
|
15
|
+
import 'ultrahtml';
|
|
14
16
|
|
|
15
17
|
async function resolveUrls(urls, ctx) {
|
|
16
18
|
try {
|
|
@@ -272,124 +274,6 @@ function resolveNitroPreset(nitroConfig) {
|
|
|
272
274
|
return preset.replace("_", "-");
|
|
273
275
|
}
|
|
274
276
|
|
|
275
|
-
const videoRegex = /<video[^>]*>([\s\S]*?)<\/video>/g;
|
|
276
|
-
const videoSrcRegex = /<video[^>]*\ssrc="([^"]+)"/;
|
|
277
|
-
const videoPosterRegex = /<video[^>]*\sposter="([^"]+)"/;
|
|
278
|
-
const videoTitleRegex = /<video[^>]*\sdata-title="([^"]+)"/;
|
|
279
|
-
const videoDescriptionRegex = /<video[^>]*\sdata-description="([^"]+)"/;
|
|
280
|
-
const videoPlayerLocRegex = /<video[^>]*\sdata-player-loc="([^"]+)"/;
|
|
281
|
-
const videoDurationRegex = /<video[^>]*\sdata-duration="([^"]+)"/;
|
|
282
|
-
const videoExpirationDateRegex = /<video[^>]*\sdata-expiration-date="([^"]+)"/;
|
|
283
|
-
const videoRatingRegex = /<video[^>]*\sdata-rating="([^"]+)"/;
|
|
284
|
-
const videoViewCountRegex = /<video[^>]*\sdata-view-count="([^"]+)"/;
|
|
285
|
-
const videoPublicationDateRegex = /<video[^>]*\sdata-publication-date="([^"]+)"/;
|
|
286
|
-
const videoFamilyFriendlyRegex = /<video[^>]*\sdata-family-friendly="([^"]+)"/;
|
|
287
|
-
const videoRequiresSubscriptionRegex = /<video[^>]*\sdata-requires-subscription="([^"]+)"/;
|
|
288
|
-
const videoLiveRegex = /<video[^>]*\sdata-live="([^"]+)"/;
|
|
289
|
-
const videoTagRegex = /<video[^>]*\sdata-tag="([^"]+)"/;
|
|
290
|
-
const sourceRegex = /<source[^>]*\ssrc="([^"]+)"/g;
|
|
291
|
-
function extractSitemapMetaFromHtml(html, options) {
|
|
292
|
-
options = options || { images: true, videos: true, lastmod: true, alternatives: true };
|
|
293
|
-
const payload = {};
|
|
294
|
-
const resolveUrl = options?.resolveUrl || ((s) => s);
|
|
295
|
-
const mainRegex = /<main[^>]*>([\s\S]*?)<\/main>/;
|
|
296
|
-
const mainMatch = mainRegex.exec(html);
|
|
297
|
-
if (options?.images) {
|
|
298
|
-
const images = /* @__PURE__ */ new Set();
|
|
299
|
-
if (mainMatch?.[1] && mainMatch[1].includes("<img")) {
|
|
300
|
-
const imgRegex = /<img\s+(?:[^>]*?\s)?src=["']((?!data:|blob:|file:)[^"']+?)["'][^>]*>/gi;
|
|
301
|
-
let match;
|
|
302
|
-
while ((match = imgRegex.exec(mainMatch[1])) !== null) {
|
|
303
|
-
if (match.index === imgRegex.lastIndex)
|
|
304
|
-
imgRegex.lastIndex++;
|
|
305
|
-
const url = resolveUrl(match[1]);
|
|
306
|
-
images.add(url);
|
|
307
|
-
}
|
|
308
|
-
}
|
|
309
|
-
if (images.size > 0)
|
|
310
|
-
payload.images = [...images].map((i) => ({ loc: i }));
|
|
311
|
-
}
|
|
312
|
-
if (options?.videos) {
|
|
313
|
-
const videos = [];
|
|
314
|
-
if (mainMatch?.[1] && mainMatch[1].includes("<video")) {
|
|
315
|
-
let videoMatch;
|
|
316
|
-
while ((videoMatch = videoRegex.exec(mainMatch[1])) !== null) {
|
|
317
|
-
const videoContent = videoMatch[1];
|
|
318
|
-
const videoTag = videoMatch[0];
|
|
319
|
-
const content_loc = (videoSrcRegex.exec(videoTag) || [])[1] || "";
|
|
320
|
-
const thumbnail_loc = (videoPosterRegex.exec(videoTag) || [])[1] || "";
|
|
321
|
-
const title = (videoTitleRegex.exec(videoTag) || [])[1] || "";
|
|
322
|
-
const description = (videoDescriptionRegex.exec(videoTag) || [])[1] || "";
|
|
323
|
-
const videoObj = {
|
|
324
|
-
content_loc,
|
|
325
|
-
thumbnail_loc,
|
|
326
|
-
title,
|
|
327
|
-
description
|
|
328
|
-
};
|
|
329
|
-
const player_loc = (videoPlayerLocRegex.exec(videoTag) || [])[1];
|
|
330
|
-
if (player_loc) videoObj.player_loc = player_loc;
|
|
331
|
-
const duration = (videoDurationRegex.exec(videoTag) || [])[1];
|
|
332
|
-
if (duration) videoObj.duration = Number.parseInt(duration, 10);
|
|
333
|
-
const expiration_date = (videoExpirationDateRegex.exec(videoTag) || [])[1];
|
|
334
|
-
if (expiration_date) videoObj.expiration_date = expiration_date;
|
|
335
|
-
const rating = (videoRatingRegex.exec(videoTag) || [])[1];
|
|
336
|
-
if (rating) videoObj.rating = Number.parseFloat(rating);
|
|
337
|
-
const view_count = (videoViewCountRegex.exec(videoTag) || [])[1];
|
|
338
|
-
if (view_count) videoObj.view_count = Number.parseInt(view_count, 10);
|
|
339
|
-
const publication_date = (videoPublicationDateRegex.exec(videoTag) || [])[1];
|
|
340
|
-
if (publication_date) videoObj.publication_date = publication_date;
|
|
341
|
-
const family_friendly = (videoFamilyFriendlyRegex.exec(videoTag) || [])[1];
|
|
342
|
-
if (family_friendly) videoObj.family_friendly = family_friendly;
|
|
343
|
-
const requires_subscription = (videoRequiresSubscriptionRegex.exec(videoTag) || [])[1];
|
|
344
|
-
if (requires_subscription) videoObj.requires_subscription = requires_subscription;
|
|
345
|
-
const live = (videoLiveRegex.exec(videoTag) || [])[1];
|
|
346
|
-
if (live) videoObj.live = live;
|
|
347
|
-
const tag = (videoTagRegex.exec(videoTag) || [])[1];
|
|
348
|
-
if (tag) videoObj.tag = tag;
|
|
349
|
-
const sources = [];
|
|
350
|
-
let sourceMatch;
|
|
351
|
-
while ((sourceMatch = sourceRegex.exec(videoContent)) !== null) {
|
|
352
|
-
sources.push(sourceMatch[1]);
|
|
353
|
-
}
|
|
354
|
-
if (sources.length > 0) {
|
|
355
|
-
videos.push(...sources.map((source) => {
|
|
356
|
-
if (videoObj.thumbnail_loc) {
|
|
357
|
-
videoObj.thumbnail_loc = resolveUrl(String(videoObj.thumbnail_loc));
|
|
358
|
-
}
|
|
359
|
-
return {
|
|
360
|
-
...videoObj,
|
|
361
|
-
content_loc: resolveUrl(source)
|
|
362
|
-
};
|
|
363
|
-
}));
|
|
364
|
-
} else {
|
|
365
|
-
videos.push(videoObj);
|
|
366
|
-
}
|
|
367
|
-
}
|
|
368
|
-
}
|
|
369
|
-
const validVideos = videos.filter((v) => {
|
|
370
|
-
return v.content_loc && v.thumbnail_loc && v.title && v.description;
|
|
371
|
-
});
|
|
372
|
-
if (validVideos.length > 0) {
|
|
373
|
-
payload.videos = validVideos;
|
|
374
|
-
}
|
|
375
|
-
}
|
|
376
|
-
if (options?.lastmod) {
|
|
377
|
-
const articleModifiedTime = html.match(/<meta[^>]+property="article:modified_time"[^>]+content="([^"]+)"/)?.[1] || html.match(/<meta[^>]+content="([^"]+)"[^>]+property="article:modified_time"/)?.[1];
|
|
378
|
-
if (articleModifiedTime)
|
|
379
|
-
payload.lastmod = articleModifiedTime;
|
|
380
|
-
}
|
|
381
|
-
if (options?.alternatives) {
|
|
382
|
-
const alternatives = (html.match(/<link[^>]+rel="alternate"[^>]+>/g) || []).map((a) => {
|
|
383
|
-
const href = a.match(/href="([^"]+)"/)?.[1];
|
|
384
|
-
const hreflang = a.match(/hreflang="([^"]+)"/)?.[1];
|
|
385
|
-
return { hreflang, href: parseURL(href).pathname };
|
|
386
|
-
}).filter((a) => a.hreflang && a.href);
|
|
387
|
-
if (alternatives?.length && (alternatives.length > 1 || alternatives?.[0].hreflang !== "x-default"))
|
|
388
|
-
payload.alternatives = alternatives;
|
|
389
|
-
}
|
|
390
|
-
return payload;
|
|
391
|
-
}
|
|
392
|
-
|
|
393
277
|
function formatPrerenderRoute(route) {
|
|
394
278
|
let str = ` \u251C\u2500 ${route.route} (${route.generateTimeMS}ms)`;
|
|
395
279
|
if (route.error) {
|
|
@@ -453,7 +337,7 @@ function setupPrerenderHandler(_options, nuxt = useNuxt()) {
|
|
|
453
337
|
route._sitemap._sitemap = _sitemap;
|
|
454
338
|
}
|
|
455
339
|
}
|
|
456
|
-
route._sitemap = defu(
|
|
340
|
+
route._sitemap = defu(parseHtmlExtractSitemapMeta(html, {
|
|
457
341
|
images: options.discoverImages,
|
|
458
342
|
videos: options.discoverVideos,
|
|
459
343
|
// TODO configurable?
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
import { getRequestHost } from "h3";
|
|
2
2
|
import { defu } from "defu";
|
|
3
3
|
import { parseURL } from "ufo";
|
|
4
|
-
import { extractSitemapXML } from "../utils/extractSitemapXML.js";
|
|
5
4
|
import { logger } from "../../../utils-pure.js";
|
|
6
5
|
async function tryFetchWithFallback(url, options, event) {
|
|
7
6
|
const isExternalUrl = !url.startsWith("/");
|
|
@@ -78,7 +77,9 @@ export async function fetchDataSource(input, event) {
|
|
|
78
77
|
if (typeof res === "object") {
|
|
79
78
|
urls = res.urls || res;
|
|
80
79
|
} else if (typeof res === "string" && parseURL(url).pathname.endsWith(".xml")) {
|
|
81
|
-
|
|
80
|
+
const { parseSitemapXml } = await import("@nuxtjs/sitemap/utils");
|
|
81
|
+
const result = parseSitemapXml(res);
|
|
82
|
+
urls = result.urls;
|
|
82
83
|
}
|
|
83
84
|
return {
|
|
84
85
|
...input,
|
package/dist/runtime/types.d.ts
CHANGED
|
@@ -0,0 +1,212 @@
|
|
|
1
|
+
import { parseURL } from 'ufo';
|
|
2
|
+
import { parse, walkSync, ELEMENT_NODE } from 'ultrahtml';
|
|
3
|
+
|
|
4
|
+
function isValidUrl(url) {
|
|
5
|
+
if (!url || typeof url !== "string") return false;
|
|
6
|
+
const trimmed = url.trim();
|
|
7
|
+
if (!trimmed) return false;
|
|
8
|
+
if (trimmed.startsWith("data:") || trimmed.startsWith("blob:") || trimmed.startsWith("file:")) {
|
|
9
|
+
return false;
|
|
10
|
+
}
|
|
11
|
+
try {
|
|
12
|
+
const parsed = parseURL(trimmed);
|
|
13
|
+
return !!(parsed.protocol && parsed.host) || !!parsed.pathname;
|
|
14
|
+
} catch {
|
|
15
|
+
return false;
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
function isValidString(value) {
|
|
19
|
+
return typeof value === "string" && value.trim().length > 0;
|
|
20
|
+
}
|
|
21
|
+
function sanitizeString(value) {
|
|
22
|
+
if (!isValidString(value)) return "";
|
|
23
|
+
return String(value).trim().replace(/[\x00-\x1F\x7F-\x9F]/g, "");
|
|
24
|
+
}
|
|
25
|
+
function isValidDate(dateString) {
|
|
26
|
+
if (!dateString) return false;
|
|
27
|
+
const date = new Date(dateString);
|
|
28
|
+
return !Number.isNaN(date.getTime()) && date.getFullYear() > 1900 && date.getFullYear() < 3e3;
|
|
29
|
+
}
|
|
30
|
+
function parseHtmlExtractSitemapMeta(html, options) {
|
|
31
|
+
options = options || { images: true, videos: true, lastmod: true, alternatives: true };
|
|
32
|
+
const payload = {};
|
|
33
|
+
const resolveUrl = options?.resolveUrl || ((s) => s);
|
|
34
|
+
let doc;
|
|
35
|
+
try {
|
|
36
|
+
doc = parse(html);
|
|
37
|
+
} catch (error) {
|
|
38
|
+
console.warn("Failed to parse HTML:", error);
|
|
39
|
+
return payload;
|
|
40
|
+
}
|
|
41
|
+
let mainElement = null;
|
|
42
|
+
const images = /* @__PURE__ */ new Set();
|
|
43
|
+
const videos = [];
|
|
44
|
+
const videoSources = /* @__PURE__ */ new Map();
|
|
45
|
+
let articleModifiedTime;
|
|
46
|
+
const alternatives = [];
|
|
47
|
+
walkSync(doc, (node) => {
|
|
48
|
+
if (node.type === ELEMENT_NODE) {
|
|
49
|
+
const element = node;
|
|
50
|
+
const attrs = element.attributes || {};
|
|
51
|
+
if (element.name === "main" && !mainElement) {
|
|
52
|
+
mainElement = element;
|
|
53
|
+
}
|
|
54
|
+
if (options?.lastmod && element.name === "meta") {
|
|
55
|
+
const property = sanitizeString(attrs.property);
|
|
56
|
+
const content = sanitizeString(attrs.content);
|
|
57
|
+
if (property === "article:modified_time" && content && isValidDate(content)) {
|
|
58
|
+
articleModifiedTime = content;
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
if (options?.alternatives && element.name === "link") {
|
|
62
|
+
const rel = sanitizeString(attrs.rel);
|
|
63
|
+
const href = sanitizeString(attrs.href);
|
|
64
|
+
const hreflang = sanitizeString(attrs.hreflang);
|
|
65
|
+
if (rel === "alternate" && href && hreflang && isValidUrl(href)) {
|
|
66
|
+
const hreflangPattern = /^[a-z]{2}(?:-[A-Z]{2})?$|^x-default$/;
|
|
67
|
+
if (hreflangPattern.test(hreflang)) {
|
|
68
|
+
try {
|
|
69
|
+
const parsed = parseURL(href);
|
|
70
|
+
if (parsed.pathname) {
|
|
71
|
+
alternatives.push({
|
|
72
|
+
hreflang,
|
|
73
|
+
href: parsed.pathname
|
|
74
|
+
});
|
|
75
|
+
}
|
|
76
|
+
} catch {
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
});
|
|
83
|
+
const searchScope = mainElement || doc;
|
|
84
|
+
walkSync(searchScope, (node) => {
|
|
85
|
+
if (node.type === ELEMENT_NODE) {
|
|
86
|
+
const element = node;
|
|
87
|
+
const attrs = element.attributes || {};
|
|
88
|
+
if (options?.images && element.name === "img") {
|
|
89
|
+
const src = sanitizeString(attrs.src);
|
|
90
|
+
if (src && isValidUrl(src)) {
|
|
91
|
+
const resolvedUrl = resolveUrl(src);
|
|
92
|
+
if (isValidUrl(resolvedUrl)) {
|
|
93
|
+
images.add(resolvedUrl);
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
if (options?.videos && element.name === "video") {
|
|
98
|
+
const content_loc = sanitizeString(attrs.src);
|
|
99
|
+
const thumbnail_loc = sanitizeString(attrs.poster);
|
|
100
|
+
const title = sanitizeString(attrs["data-title"]);
|
|
101
|
+
const description = sanitizeString(attrs["data-description"]);
|
|
102
|
+
if (!title || !description) {
|
|
103
|
+
return;
|
|
104
|
+
}
|
|
105
|
+
const videoObj = {
|
|
106
|
+
content_loc,
|
|
107
|
+
thumbnail_loc,
|
|
108
|
+
title,
|
|
109
|
+
description
|
|
110
|
+
};
|
|
111
|
+
const player_loc = sanitizeString(attrs["data-player-loc"]);
|
|
112
|
+
if (player_loc && isValidUrl(player_loc)) {
|
|
113
|
+
videoObj.player_loc = player_loc;
|
|
114
|
+
}
|
|
115
|
+
const duration = sanitizeString(attrs["data-duration"]);
|
|
116
|
+
if (duration) {
|
|
117
|
+
const parsedDuration = Number.parseInt(duration, 10);
|
|
118
|
+
if (!Number.isNaN(parsedDuration) && parsedDuration > 0 && parsedDuration <= 28800) {
|
|
119
|
+
videoObj.duration = parsedDuration;
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
const expiration_date = sanitizeString(attrs["data-expiration-date"]);
|
|
123
|
+
if (expiration_date && isValidDate(expiration_date)) {
|
|
124
|
+
videoObj.expiration_date = expiration_date;
|
|
125
|
+
}
|
|
126
|
+
const rating = sanitizeString(attrs["data-rating"]);
|
|
127
|
+
if (rating) {
|
|
128
|
+
const parsedRating = Number.parseFloat(rating);
|
|
129
|
+
if (!Number.isNaN(parsedRating) && parsedRating >= 0 && parsedRating <= 5) {
|
|
130
|
+
videoObj.rating = parsedRating;
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
const view_count = sanitizeString(attrs["data-view-count"]);
|
|
134
|
+
if (view_count) {
|
|
135
|
+
const parsedViewCount = Number.parseInt(view_count, 10);
|
|
136
|
+
if (!Number.isNaN(parsedViewCount) && parsedViewCount >= 0) {
|
|
137
|
+
videoObj.view_count = parsedViewCount;
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
const publication_date = sanitizeString(attrs["data-publication-date"]);
|
|
141
|
+
if (publication_date && isValidDate(publication_date)) {
|
|
142
|
+
videoObj.publication_date = publication_date;
|
|
143
|
+
}
|
|
144
|
+
const family_friendly = sanitizeString(attrs["data-family-friendly"]);
|
|
145
|
+
if (family_friendly && ["yes", "no"].includes(family_friendly.toLowerCase())) {
|
|
146
|
+
videoObj.family_friendly = family_friendly.toLowerCase();
|
|
147
|
+
}
|
|
148
|
+
const requires_subscription = sanitizeString(attrs["data-requires-subscription"]);
|
|
149
|
+
if (requires_subscription && ["yes", "no"].includes(requires_subscription.toLowerCase())) {
|
|
150
|
+
videoObj.requires_subscription = requires_subscription.toLowerCase();
|
|
151
|
+
}
|
|
152
|
+
const live = sanitizeString(attrs["data-live"]);
|
|
153
|
+
if (live && ["yes", "no"].includes(live.toLowerCase())) {
|
|
154
|
+
videoObj.live = live.toLowerCase();
|
|
155
|
+
}
|
|
156
|
+
const tag = sanitizeString(attrs["data-tag"]);
|
|
157
|
+
if (tag && tag.length <= 256) {
|
|
158
|
+
videoObj.tag = tag;
|
|
159
|
+
}
|
|
160
|
+
videos.push({ videoObj, element });
|
|
161
|
+
}
|
|
162
|
+
if (options?.videos && element.name === "source" && element.parent && element.parent.name === "video") {
|
|
163
|
+
const videoElement = element.parent;
|
|
164
|
+
const src = sanitizeString(attrs.src);
|
|
165
|
+
if (src && isValidUrl(src)) {
|
|
166
|
+
if (!videoSources.has(videoElement)) {
|
|
167
|
+
videoSources.set(videoElement, []);
|
|
168
|
+
}
|
|
169
|
+
videoSources.get(videoElement).push(src);
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
});
|
|
174
|
+
if (options?.images && images.size > 0) {
|
|
175
|
+
payload.images = [...images].map((i) => ({ loc: i }));
|
|
176
|
+
}
|
|
177
|
+
if (options?.videos) {
|
|
178
|
+
const processedVideos = [];
|
|
179
|
+
for (const { videoObj, element } of videos) {
|
|
180
|
+
const sources = videoSources.get(element) || [];
|
|
181
|
+
if (sources.length > 0) {
|
|
182
|
+
for (const source of sources) {
|
|
183
|
+
const resolvedVideoObj = { ...videoObj };
|
|
184
|
+
if (resolvedVideoObj.thumbnail_loc) {
|
|
185
|
+
resolvedVideoObj.thumbnail_loc = resolveUrl(String(resolvedVideoObj.thumbnail_loc));
|
|
186
|
+
}
|
|
187
|
+
processedVideos.push({
|
|
188
|
+
...resolvedVideoObj,
|
|
189
|
+
content_loc: resolveUrl(source)
|
|
190
|
+
});
|
|
191
|
+
}
|
|
192
|
+
} else {
|
|
193
|
+
processedVideos.push(videoObj);
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
const validVideos = processedVideos.filter((v) => {
|
|
197
|
+
return isValidString(v.title) && isValidString(v.description) && isValidString(v.content_loc) && isValidUrl(v.content_loc) && isValidString(v.thumbnail_loc) && isValidUrl(v.thumbnail_loc) && v.title.length <= 2048 && v.description.length <= 2048;
|
|
198
|
+
});
|
|
199
|
+
if (validVideos.length > 0) {
|
|
200
|
+
payload.videos = validVideos;
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
if (options?.lastmod && articleModifiedTime) {
|
|
204
|
+
payload.lastmod = articleModifiedTime;
|
|
205
|
+
}
|
|
206
|
+
if (options?.alternatives && alternatives.length > 0 && (alternatives.length > 1 || alternatives[0].hreflang !== "x-default")) {
|
|
207
|
+
payload.alternatives = alternatives;
|
|
208
|
+
}
|
|
209
|
+
return payload;
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
export { parseHtmlExtractSitemapMeta as p };
|
package/dist/utils.d.mts
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
import { SitemapUrlInput, SitemapUrl } from '../dist/runtime/types.js';
|
|
2
|
+
export * from '../dist/runtime/types.js';
|
|
3
|
+
|
|
4
|
+
interface SitemapWarning {
|
|
5
|
+
type: 'validation';
|
|
6
|
+
message: string;
|
|
7
|
+
context?: {
|
|
8
|
+
url?: string;
|
|
9
|
+
field?: string;
|
|
10
|
+
value?: unknown;
|
|
11
|
+
};
|
|
12
|
+
}
|
|
13
|
+
interface SitemapParseResult {
|
|
14
|
+
urls: SitemapUrlInput[];
|
|
15
|
+
warnings: SitemapWarning[];
|
|
16
|
+
}
|
|
17
|
+
declare function parseSitemapXml(xml: string): SitemapParseResult;
|
|
18
|
+
|
|
19
|
+
declare function parseHtmlExtractSitemapMeta(html: string, options?: {
|
|
20
|
+
images?: boolean;
|
|
21
|
+
videos?: boolean;
|
|
22
|
+
lastmod?: boolean;
|
|
23
|
+
alternatives?: boolean;
|
|
24
|
+
resolveUrl?: (s: string) => string;
|
|
25
|
+
}): Partial<SitemapUrl>;
|
|
26
|
+
|
|
27
|
+
export { parseHtmlExtractSitemapMeta, parseSitemapXml };
|
|
28
|
+
export type { SitemapParseResult, SitemapWarning };
|
package/dist/utils.d.ts
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
import { SitemapUrlInput, SitemapUrl } from '../dist/runtime/types.js';
|
|
2
|
+
export * from '../dist/runtime/types.js';
|
|
3
|
+
|
|
4
|
+
interface SitemapWarning {
|
|
5
|
+
type: 'validation';
|
|
6
|
+
message: string;
|
|
7
|
+
context?: {
|
|
8
|
+
url?: string;
|
|
9
|
+
field?: string;
|
|
10
|
+
value?: unknown;
|
|
11
|
+
};
|
|
12
|
+
}
|
|
13
|
+
interface SitemapParseResult {
|
|
14
|
+
urls: SitemapUrlInput[];
|
|
15
|
+
warnings: SitemapWarning[];
|
|
16
|
+
}
|
|
17
|
+
declare function parseSitemapXml(xml: string): SitemapParseResult;
|
|
18
|
+
|
|
19
|
+
declare function parseHtmlExtractSitemapMeta(html: string, options?: {
|
|
20
|
+
images?: boolean;
|
|
21
|
+
videos?: boolean;
|
|
22
|
+
lastmod?: boolean;
|
|
23
|
+
alternatives?: boolean;
|
|
24
|
+
resolveUrl?: (s: string) => string;
|
|
25
|
+
}): Partial<SitemapUrl>;
|
|
26
|
+
|
|
27
|
+
export { parseHtmlExtractSitemapMeta, parseSitemapXml };
|
|
28
|
+
export type { SitemapParseResult, SitemapWarning };
|