@nuxtjs/sitemap 7.3.1 → 7.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/client/200.html +7 -8
- package/dist/client/404.html +7 -8
- package/dist/client/_nuxt/ChEizYIG.js +172 -0
- package/dist/client/_nuxt/{CT3BV8Rj.js → SQMF8ibg.js} +1 -1
- package/dist/client/_nuxt/builds/latest.json +1 -1
- package/dist/client/_nuxt/builds/meta/096c9e6a-a9a8-483d-bd1d-fed5634b0d08.json +1 -0
- package/dist/client/_nuxt/{5vafBU9X.js → cqJZcoo0.js} +1 -1
- package/dist/client/_nuxt/error-404.DljSaiyF.css +1 -0
- package/dist/client/_nuxt/error-500.DbX9fggi.css +1 -0
- package/dist/client/index.html +7 -8
- package/dist/module.d.ts +1 -1
- package/dist/module.json +1 -1
- package/dist/module.mjs +32 -138
- package/dist/runtime/server/content-compat.d.ts +1 -0
- package/dist/runtime/server/content-compat.js +2 -0
- package/dist/runtime/server/routes/__sitemap__/nuxt-content-urls-v3.d.ts +1 -1
- package/dist/runtime/server/routes/__sitemap__/nuxt-content-urls-v3.js +4 -2
- package/dist/runtime/server/routes/sitemap.xsl.js +37 -13
- package/dist/runtime/server/routes/sitemap_index.xml.js +6 -2
- package/dist/runtime/server/sitemap/builder/sitemap-index.d.ts +11 -2
- package/dist/runtime/server/sitemap/builder/sitemap-index.js +23 -5
- package/dist/runtime/server/sitemap/builder/sitemap.d.ts +7 -1
- package/dist/runtime/server/sitemap/builder/sitemap.js +10 -5
- package/dist/runtime/server/sitemap/builder/xml.d.ts +4 -1
- package/dist/runtime/server/sitemap/builder/xml.js +13 -4
- package/dist/runtime/server/sitemap/nitro.js +7 -3
- package/dist/runtime/server/sitemap/urlset/normalise.js +4 -3
- package/dist/runtime/server/sitemap/urlset/sources.js +65 -19
- package/dist/runtime/server/utils.d.ts +1 -0
- package/dist/runtime/server/utils.js +3 -0
- package/dist/runtime/types.d.ts +2 -0
- package/dist/shared/sitemap.DR3_6qqU.mjs +212 -0
- package/dist/utils.d.mts +28 -0
- package/dist/utils.d.ts +28 -0
- package/dist/utils.mjs +368 -0
- package/package.json +24 -15
- package/dist/client/_nuxt/BIHI7g3E.js +0 -1
- package/dist/client/_nuxt/Bn78IMkz.js +0 -172
- package/dist/client/_nuxt/builds/meta/5ecca6e1-2b8a-4fc5-a128-35cbc27bf6d7.json +0 -1
- package/dist/client/_nuxt/error-404.D_zhMyJm.css +0 -1
- package/dist/client/_nuxt/error-500.rdOYVbxo.css +0 -1
- package/dist/content.cjs +0 -48
- package/dist/content.d.cts +0 -232
- package/dist/module.cjs +0 -1324
- package/dist/module.d.cts +0 -10
- package/dist/runtime/server/sitemap/utils/extractSitemapXML.d.ts +0 -2
- package/dist/runtime/server/sitemap/utils/extractSitemapXML.js +0 -75
|
@@ -1,3 +1,6 @@
|
|
|
1
1
|
import type { ModuleRuntimeConfig, NitroUrlResolvers, ResolvedSitemapUrl } from '../../../types.js';
|
|
2
2
|
export declare function escapeValueForXml(value: boolean | string | number): string;
|
|
3
|
-
export declare function urlsToXml(urls: ResolvedSitemapUrl[], resolvers: NitroUrlResolvers, { version, xsl, credits, minify }: Pick<ModuleRuntimeConfig, 'version' | 'xsl' | 'credits' | 'minify'
|
|
3
|
+
export declare function urlsToXml(urls: ResolvedSitemapUrl[], resolvers: NitroUrlResolvers, { version, xsl, credits, minify }: Pick<ModuleRuntimeConfig, 'version' | 'xsl' | 'credits' | 'minify'>, errorInfo?: {
|
|
4
|
+
messages: string[];
|
|
5
|
+
urls: string[];
|
|
6
|
+
}): string;
|
|
@@ -1,7 +1,9 @@
|
|
|
1
|
+
import { withQuery } from "ufo";
|
|
2
|
+
import { xmlEscape } from "../../utils.js";
|
|
1
3
|
export function escapeValueForXml(value) {
|
|
2
4
|
if (value === true || value === false)
|
|
3
5
|
return value ? "yes" : "no";
|
|
4
|
-
return String(value)
|
|
6
|
+
return xmlEscape(String(value));
|
|
5
7
|
}
|
|
6
8
|
const URLSET_OPENING_TAG = '<urlset xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:video="http://www.google.com/schemas/sitemap-video/1.1" xmlns:xhtml="http://www.w3.org/1999/xhtml" xmlns:image="http://www.google.com/schemas/sitemap-image/1.1" xmlns:news="http://www.google.com/schemas/sitemap-news/0.9" xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd http://www.google.com/schemas/sitemap-image/1.1 http://www.google.com/schemas/sitemap-image/1.1/sitemap-image.xsd" xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">';
|
|
7
9
|
function buildUrlXml(url) {
|
|
@@ -162,13 +164,20 @@ function buildUrlXml(url) {
|
|
|
162
164
|
parts[partIndex++] = " </url>";
|
|
163
165
|
return parts.slice(0, partIndex).join("\n");
|
|
164
166
|
}
|
|
165
|
-
export function urlsToXml(urls, resolvers, { version, xsl, credits, minify }) {
|
|
167
|
+
export function urlsToXml(urls, resolvers, { version, xsl, credits, minify }, errorInfo) {
|
|
166
168
|
const estimatedSize = urls.length + 5;
|
|
167
169
|
const xmlParts = Array.from({ length: estimatedSize });
|
|
168
170
|
let partIndex = 0;
|
|
169
|
-
|
|
171
|
+
let xslHref = xsl ? resolvers.relativeBaseUrlResolver(xsl) : false;
|
|
172
|
+
if (xslHref && errorInfo && errorInfo.messages.length > 0) {
|
|
173
|
+
xslHref = withQuery(xslHref, {
|
|
174
|
+
errors: "true",
|
|
175
|
+
error_messages: errorInfo.messages,
|
|
176
|
+
error_urls: errorInfo.urls
|
|
177
|
+
});
|
|
178
|
+
}
|
|
170
179
|
if (xslHref) {
|
|
171
|
-
xmlParts[partIndex++] = `<?xml version="1.0" encoding="UTF-8"?><?xml-stylesheet type="text/xsl" href="${xslHref}"?>`;
|
|
180
|
+
xmlParts[partIndex++] = `<?xml version="1.0" encoding="UTF-8"?><?xml-stylesheet type="text/xsl" href="${escapeValueForXml(xslHref)}"?>`;
|
|
172
181
|
} else {
|
|
173
182
|
xmlParts[partIndex++] = '<?xml version="1.0" encoding="UTF-8"?>';
|
|
174
183
|
}
|
|
@@ -41,7 +41,7 @@ async function buildSitemapXml(event, definition, resolvers, runtimeConfig) {
|
|
|
41
41
|
});
|
|
42
42
|
}
|
|
43
43
|
}
|
|
44
|
-
const sitemapUrls = await buildSitemapUrls(definition, resolvers, runtimeConfig, nitro);
|
|
44
|
+
const { urls: sitemapUrls, failedSources } = await buildSitemapUrls(definition, resolvers, runtimeConfig, nitro);
|
|
45
45
|
const routeRuleMatcher = createNitroRouteRuleMatcher();
|
|
46
46
|
const { autoI18n } = runtimeConfig;
|
|
47
47
|
let validCount = 0;
|
|
@@ -94,7 +94,11 @@ async function buildSitemapXml(event, definition, resolvers, runtimeConfig) {
|
|
|
94
94
|
}
|
|
95
95
|
}
|
|
96
96
|
}
|
|
97
|
-
const
|
|
97
|
+
const errorInfo = failedSources.length > 0 ? {
|
|
98
|
+
messages: failedSources.map((f) => f.error),
|
|
99
|
+
urls: failedSources.map((f) => f.url)
|
|
100
|
+
} : void 0;
|
|
101
|
+
const sitemap = urlsToXml(urls, resolvers, runtimeConfig, errorInfo);
|
|
98
102
|
const ctx = { sitemap, sitemapName, event };
|
|
99
103
|
await nitro.hooks.callHook("sitemap:output", ctx);
|
|
100
104
|
return ctx.sitemap;
|
|
@@ -120,7 +124,7 @@ const buildSitemapXmlCached = defineCachedFunction(
|
|
|
120
124
|
);
|
|
121
125
|
export async function createSitemap(event, definition, runtimeConfig) {
|
|
122
126
|
const resolvers = useNitroUrlResolvers(event);
|
|
123
|
-
const shouldCache = !import.meta.dev && runtimeConfig.cacheMaxAgeSeconds > 0;
|
|
127
|
+
const shouldCache = !import.meta.dev && typeof runtimeConfig.cacheMaxAgeSeconds === "number" && runtimeConfig.cacheMaxAgeSeconds > 0;
|
|
124
128
|
const xml = shouldCache ? await buildSitemapXmlCached(event, definition, resolvers, runtimeConfig) : await buildSitemapXml(event, definition, resolvers, runtimeConfig);
|
|
125
129
|
setHeader(event, "Content-Type", "text/xml; charset=UTF-8");
|
|
126
130
|
if (runtimeConfig.cacheMaxAgeSeconds) {
|
|
@@ -75,7 +75,7 @@ export function normaliseEntry(_e, defaults, resolvers) {
|
|
|
75
75
|
delete e.lastmod;
|
|
76
76
|
e.loc = resolve(e.loc, resolvers);
|
|
77
77
|
if (e.alternatives) {
|
|
78
|
-
const alternatives = e.alternatives;
|
|
78
|
+
const alternatives = e.alternatives.map((a) => ({ ...a }));
|
|
79
79
|
for (let i = 0; i < alternatives.length; i++) {
|
|
80
80
|
const alt = alternatives[i];
|
|
81
81
|
if (typeof alt.href === "string") {
|
|
@@ -87,19 +87,20 @@ export function normaliseEntry(_e, defaults, resolvers) {
|
|
|
87
87
|
e.alternatives = mergeOnKey(alternatives, "hreflang");
|
|
88
88
|
}
|
|
89
89
|
if (e.images) {
|
|
90
|
-
const images = e.images;
|
|
90
|
+
const images = e.images.map((i) => ({ ...i }));
|
|
91
91
|
for (let i = 0; i < images.length; i++) {
|
|
92
92
|
images[i].loc = resolve(images[i].loc, resolvers);
|
|
93
93
|
}
|
|
94
94
|
e.images = mergeOnKey(images, "loc");
|
|
95
95
|
}
|
|
96
96
|
if (e.videos) {
|
|
97
|
-
const videos = e.videos;
|
|
97
|
+
const videos = e.videos.map((v) => ({ ...v }));
|
|
98
98
|
for (let i = 0; i < videos.length; i++) {
|
|
99
99
|
if (videos[i].content_loc) {
|
|
100
100
|
videos[i].content_loc = resolve(videos[i].content_loc, resolvers);
|
|
101
101
|
}
|
|
102
102
|
}
|
|
103
|
+
e.videos = mergeOnKey(videos, "content_loc");
|
|
103
104
|
}
|
|
104
105
|
return e;
|
|
105
106
|
}
|
|
@@ -1,36 +1,70 @@
|
|
|
1
1
|
import { getRequestHost } from "h3";
|
|
2
2
|
import { defu } from "defu";
|
|
3
3
|
import { parseURL } from "ufo";
|
|
4
|
-
import {
|
|
4
|
+
import { logger } from "../../../utils-pure.js";
|
|
5
|
+
async function tryFetchWithFallback(url, options, event) {
|
|
6
|
+
const isExternalUrl = !url.startsWith("/");
|
|
7
|
+
if (isExternalUrl) {
|
|
8
|
+
const strategies = [
|
|
9
|
+
// Strategy 1: Use globalThis.$fetch (original approach)
|
|
10
|
+
() => globalThis.$fetch(url, options),
|
|
11
|
+
// Strategy 2: If event is available, try using event context even for external URLs
|
|
12
|
+
event ? () => event.$fetch(url, options) : null,
|
|
13
|
+
// Strategy 3: Use native fetch as last resort
|
|
14
|
+
() => $fetch(url, options)
|
|
15
|
+
].filter(Boolean);
|
|
16
|
+
let lastError = null;
|
|
17
|
+
for (const strategy of strategies) {
|
|
18
|
+
try {
|
|
19
|
+
return await strategy();
|
|
20
|
+
} catch (error) {
|
|
21
|
+
lastError = error;
|
|
22
|
+
continue;
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
throw lastError;
|
|
26
|
+
}
|
|
27
|
+
const fetchContainer = url.startsWith("/") && event ? event : globalThis;
|
|
28
|
+
return await fetchContainer.$fetch(url, options);
|
|
29
|
+
}
|
|
5
30
|
export async function fetchDataSource(input, event) {
|
|
6
31
|
const context = typeof input.context === "string" ? { name: input.context } : input.context || { name: "fetch" };
|
|
7
|
-
context.tips = context.tips || [];
|
|
8
32
|
const url = typeof input.fetch === "string" ? input.fetch : input.fetch[0];
|
|
9
33
|
const options = typeof input.fetch === "string" ? {} : input.fetch[1];
|
|
10
34
|
const start = Date.now();
|
|
11
|
-
const
|
|
35
|
+
const isExternalUrl = !url.startsWith("/");
|
|
36
|
+
const timeout = isExternalUrl ? 1e4 : options.timeout || 5e3;
|
|
12
37
|
const timeoutController = new AbortController();
|
|
13
38
|
const abortRequestTimeout = setTimeout(() => timeoutController.abort(), timeout);
|
|
14
|
-
let isMaybeErrorResponse = false;
|
|
15
|
-
const isXmlRequest = parseURL(url).pathname.endsWith(".xml");
|
|
16
|
-
const fetchContainer = url.startsWith("/") && event ? event : globalThis;
|
|
17
39
|
try {
|
|
18
|
-
|
|
40
|
+
let isMaybeErrorResponse = false;
|
|
41
|
+
const isXmlRequest = parseURL(url).pathname.endsWith(".xml");
|
|
42
|
+
const mergedHeaders = defu(
|
|
43
|
+
options?.headers,
|
|
44
|
+
{
|
|
45
|
+
Accept: isXmlRequest ? "text/xml" : "application/json"
|
|
46
|
+
},
|
|
47
|
+
event ? { host: getRequestHost(event, { xForwardedHost: true }) } : {}
|
|
48
|
+
);
|
|
49
|
+
const fetchOptions = {
|
|
19
50
|
...options,
|
|
20
51
|
responseType: isXmlRequest ? "text" : "json",
|
|
21
52
|
signal: timeoutController.signal,
|
|
22
|
-
headers:
|
|
23
|
-
|
|
24
|
-
|
|
53
|
+
headers: mergedHeaders,
|
|
54
|
+
// Use ofetch's built-in retry for external sources
|
|
55
|
+
...isExternalUrl && {
|
|
56
|
+
retry: 2,
|
|
57
|
+
retryDelay: 200
|
|
58
|
+
},
|
|
25
59
|
// @ts-expect-error untyped
|
|
26
60
|
onResponse({ response }) {
|
|
27
61
|
if (typeof response._data === "string" && response._data.startsWith("<!DOCTYPE html>"))
|
|
28
62
|
isMaybeErrorResponse = true;
|
|
29
63
|
}
|
|
30
|
-
}
|
|
64
|
+
};
|
|
65
|
+
const res = await tryFetchWithFallback(url, fetchOptions, event);
|
|
31
66
|
const timeTakenMs = Date.now() - start;
|
|
32
67
|
if (isMaybeErrorResponse) {
|
|
33
|
-
context.tips.push("This is usually because the URL isn't correct or is throwing an error. Please check the URL");
|
|
34
68
|
return {
|
|
35
69
|
...input,
|
|
36
70
|
context,
|
|
@@ -43,7 +77,9 @@ export async function fetchDataSource(input, event) {
|
|
|
43
77
|
if (typeof res === "object") {
|
|
44
78
|
urls = res.urls || res;
|
|
45
79
|
} else if (typeof res === "string" && parseURL(url).pathname.endsWith(".xml")) {
|
|
46
|
-
|
|
80
|
+
const { parseSitemapXml } = await import("@nuxtjs/sitemap/utils");
|
|
81
|
+
const result = parseSitemapXml(res);
|
|
82
|
+
urls = result.urls;
|
|
47
83
|
}
|
|
48
84
|
return {
|
|
49
85
|
...input,
|
|
@@ -53,16 +89,26 @@ export async function fetchDataSource(input, event) {
|
|
|
53
89
|
};
|
|
54
90
|
} catch (_err) {
|
|
55
91
|
const error = _err;
|
|
56
|
-
if (
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
92
|
+
if (isExternalUrl) {
|
|
93
|
+
const errorInfo = {
|
|
94
|
+
url,
|
|
95
|
+
timeout,
|
|
96
|
+
error: error.message,
|
|
97
|
+
statusCode: error.response?.status,
|
|
98
|
+
statusText: error.response?.statusText,
|
|
99
|
+
method: options?.method || "GET"
|
|
100
|
+
};
|
|
101
|
+
logger.error("Failed to fetch external source.", errorInfo);
|
|
102
|
+
} else {
|
|
103
|
+
logger.error("Failed to fetch source.", { url, error: error.message });
|
|
104
|
+
}
|
|
61
105
|
return {
|
|
62
106
|
...input,
|
|
63
107
|
context,
|
|
64
108
|
urls: [],
|
|
65
|
-
error: error.message
|
|
109
|
+
error: error.message,
|
|
110
|
+
_isFailure: true
|
|
111
|
+
// Mark as failure to prevent caching
|
|
66
112
|
};
|
|
67
113
|
} finally {
|
|
68
114
|
if (abortRequestTimeout) {
|
|
@@ -1,6 +1,9 @@
|
|
|
1
1
|
import { useRuntimeConfig } from "nitropack/runtime";
|
|
2
2
|
import { normalizeRuntimeFilters } from "../utils-pure.js";
|
|
3
3
|
export * from "../utils-pure.js";
|
|
4
|
+
export function xmlEscape(str) {
|
|
5
|
+
return str.replace(/&/g, "&").replace(/</g, "<").replace(/>/g, ">").replace(/"/g, """).replace(/'/g, "'");
|
|
6
|
+
}
|
|
4
7
|
export function useSitemapRuntimeConfig(e) {
|
|
5
8
|
const clone = JSON.parse(JSON.stringify(useRuntimeConfig(e).sitemap));
|
|
6
9
|
for (const k in clone.sitemaps) {
|
package/dist/runtime/types.d.ts
CHANGED
|
@@ -178,6 +178,7 @@ export interface SitemapSourceResolved extends Omit<SitemapSourceBase, 'urls'> {
|
|
|
178
178
|
urls: SitemapUrlInput[];
|
|
179
179
|
error?: any;
|
|
180
180
|
timeTakenMs?: number;
|
|
181
|
+
_isFailure?: boolean;
|
|
181
182
|
}
|
|
182
183
|
export type AppSourceContext = 'nuxt:pages' | 'nuxt:prerender' | 'nuxt:route-rules' | '@nuxtjs/i18n:pages' | '@nuxt/content:document-driven';
|
|
183
184
|
export type SitemapSourceInput = string | [string, FetchOptions] | SitemapSourceBase | SitemapSourceResolved;
|
|
@@ -371,6 +372,7 @@ export interface SitemapUrl {
|
|
|
371
372
|
_i18nTransform?: boolean;
|
|
372
373
|
_sitemap?: string;
|
|
373
374
|
}
|
|
375
|
+
export type SitemapStrict = Required<SitemapUrl>;
|
|
374
376
|
export interface AlternativeEntry {
|
|
375
377
|
hreflang: string;
|
|
376
378
|
href: string | URL;
|
|
@@ -0,0 +1,212 @@
|
|
|
1
|
+
import { parseURL } from 'ufo';
|
|
2
|
+
import { parse, walkSync, ELEMENT_NODE } from 'ultrahtml';
|
|
3
|
+
|
|
4
|
+
function isValidUrl(url) {
|
|
5
|
+
if (!url || typeof url !== "string") return false;
|
|
6
|
+
const trimmed = url.trim();
|
|
7
|
+
if (!trimmed) return false;
|
|
8
|
+
if (trimmed.startsWith("data:") || trimmed.startsWith("blob:") || trimmed.startsWith("file:")) {
|
|
9
|
+
return false;
|
|
10
|
+
}
|
|
11
|
+
try {
|
|
12
|
+
const parsed = parseURL(trimmed);
|
|
13
|
+
return !!(parsed.protocol && parsed.host) || !!parsed.pathname;
|
|
14
|
+
} catch {
|
|
15
|
+
return false;
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
function isValidString(value) {
|
|
19
|
+
return typeof value === "string" && value.trim().length > 0;
|
|
20
|
+
}
|
|
21
|
+
function sanitizeString(value) {
|
|
22
|
+
if (!isValidString(value)) return "";
|
|
23
|
+
return String(value).trim().replace(/[\x00-\x1F\x7F-\x9F]/g, "");
|
|
24
|
+
}
|
|
25
|
+
function isValidDate(dateString) {
|
|
26
|
+
if (!dateString) return false;
|
|
27
|
+
const date = new Date(dateString);
|
|
28
|
+
return !Number.isNaN(date.getTime()) && date.getFullYear() > 1900 && date.getFullYear() < 3e3;
|
|
29
|
+
}
|
|
30
|
+
function parseHtmlExtractSitemapMeta(html, options) {
|
|
31
|
+
options = options || { images: true, videos: true, lastmod: true, alternatives: true };
|
|
32
|
+
const payload = {};
|
|
33
|
+
const resolveUrl = options?.resolveUrl || ((s) => s);
|
|
34
|
+
let doc;
|
|
35
|
+
try {
|
|
36
|
+
doc = parse(html);
|
|
37
|
+
} catch (error) {
|
|
38
|
+
console.warn("Failed to parse HTML:", error);
|
|
39
|
+
return payload;
|
|
40
|
+
}
|
|
41
|
+
let mainElement = null;
|
|
42
|
+
const images = /* @__PURE__ */ new Set();
|
|
43
|
+
const videos = [];
|
|
44
|
+
const videoSources = /* @__PURE__ */ new Map();
|
|
45
|
+
let articleModifiedTime;
|
|
46
|
+
const alternatives = [];
|
|
47
|
+
walkSync(doc, (node) => {
|
|
48
|
+
if (node.type === ELEMENT_NODE) {
|
|
49
|
+
const element = node;
|
|
50
|
+
const attrs = element.attributes || {};
|
|
51
|
+
if (element.name === "main" && !mainElement) {
|
|
52
|
+
mainElement = element;
|
|
53
|
+
}
|
|
54
|
+
if (options?.lastmod && element.name === "meta") {
|
|
55
|
+
const property = sanitizeString(attrs.property);
|
|
56
|
+
const content = sanitizeString(attrs.content);
|
|
57
|
+
if (property === "article:modified_time" && content && isValidDate(content)) {
|
|
58
|
+
articleModifiedTime = content;
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
if (options?.alternatives && element.name === "link") {
|
|
62
|
+
const rel = sanitizeString(attrs.rel);
|
|
63
|
+
const href = sanitizeString(attrs.href);
|
|
64
|
+
const hreflang = sanitizeString(attrs.hreflang);
|
|
65
|
+
if (rel === "alternate" && href && hreflang && isValidUrl(href)) {
|
|
66
|
+
const hreflangPattern = /^[a-z]{2}(?:-[A-Z]{2})?$|^x-default$/;
|
|
67
|
+
if (hreflangPattern.test(hreflang)) {
|
|
68
|
+
try {
|
|
69
|
+
const parsed = parseURL(href);
|
|
70
|
+
if (parsed.pathname) {
|
|
71
|
+
alternatives.push({
|
|
72
|
+
hreflang,
|
|
73
|
+
href: parsed.pathname
|
|
74
|
+
});
|
|
75
|
+
}
|
|
76
|
+
} catch {
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
});
|
|
83
|
+
const searchScope = mainElement || doc;
|
|
84
|
+
walkSync(searchScope, (node) => {
|
|
85
|
+
if (node.type === ELEMENT_NODE) {
|
|
86
|
+
const element = node;
|
|
87
|
+
const attrs = element.attributes || {};
|
|
88
|
+
if (options?.images && element.name === "img") {
|
|
89
|
+
const src = sanitizeString(attrs.src);
|
|
90
|
+
if (src && isValidUrl(src)) {
|
|
91
|
+
const resolvedUrl = resolveUrl(src);
|
|
92
|
+
if (isValidUrl(resolvedUrl)) {
|
|
93
|
+
images.add(resolvedUrl);
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
if (options?.videos && element.name === "video") {
|
|
98
|
+
const content_loc = sanitizeString(attrs.src);
|
|
99
|
+
const thumbnail_loc = sanitizeString(attrs.poster);
|
|
100
|
+
const title = sanitizeString(attrs["data-title"]);
|
|
101
|
+
const description = sanitizeString(attrs["data-description"]);
|
|
102
|
+
if (!title || !description) {
|
|
103
|
+
return;
|
|
104
|
+
}
|
|
105
|
+
const videoObj = {
|
|
106
|
+
content_loc,
|
|
107
|
+
thumbnail_loc,
|
|
108
|
+
title,
|
|
109
|
+
description
|
|
110
|
+
};
|
|
111
|
+
const player_loc = sanitizeString(attrs["data-player-loc"]);
|
|
112
|
+
if (player_loc && isValidUrl(player_loc)) {
|
|
113
|
+
videoObj.player_loc = player_loc;
|
|
114
|
+
}
|
|
115
|
+
const duration = sanitizeString(attrs["data-duration"]);
|
|
116
|
+
if (duration) {
|
|
117
|
+
const parsedDuration = Number.parseInt(duration, 10);
|
|
118
|
+
if (!Number.isNaN(parsedDuration) && parsedDuration > 0 && parsedDuration <= 28800) {
|
|
119
|
+
videoObj.duration = parsedDuration;
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
const expiration_date = sanitizeString(attrs["data-expiration-date"]);
|
|
123
|
+
if (expiration_date && isValidDate(expiration_date)) {
|
|
124
|
+
videoObj.expiration_date = expiration_date;
|
|
125
|
+
}
|
|
126
|
+
const rating = sanitizeString(attrs["data-rating"]);
|
|
127
|
+
if (rating) {
|
|
128
|
+
const parsedRating = Number.parseFloat(rating);
|
|
129
|
+
if (!Number.isNaN(parsedRating) && parsedRating >= 0 && parsedRating <= 5) {
|
|
130
|
+
videoObj.rating = parsedRating;
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
const view_count = sanitizeString(attrs["data-view-count"]);
|
|
134
|
+
if (view_count) {
|
|
135
|
+
const parsedViewCount = Number.parseInt(view_count, 10);
|
|
136
|
+
if (!Number.isNaN(parsedViewCount) && parsedViewCount >= 0) {
|
|
137
|
+
videoObj.view_count = parsedViewCount;
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
const publication_date = sanitizeString(attrs["data-publication-date"]);
|
|
141
|
+
if (publication_date && isValidDate(publication_date)) {
|
|
142
|
+
videoObj.publication_date = publication_date;
|
|
143
|
+
}
|
|
144
|
+
const family_friendly = sanitizeString(attrs["data-family-friendly"]);
|
|
145
|
+
if (family_friendly && ["yes", "no"].includes(family_friendly.toLowerCase())) {
|
|
146
|
+
videoObj.family_friendly = family_friendly.toLowerCase();
|
|
147
|
+
}
|
|
148
|
+
const requires_subscription = sanitizeString(attrs["data-requires-subscription"]);
|
|
149
|
+
if (requires_subscription && ["yes", "no"].includes(requires_subscription.toLowerCase())) {
|
|
150
|
+
videoObj.requires_subscription = requires_subscription.toLowerCase();
|
|
151
|
+
}
|
|
152
|
+
const live = sanitizeString(attrs["data-live"]);
|
|
153
|
+
if (live && ["yes", "no"].includes(live.toLowerCase())) {
|
|
154
|
+
videoObj.live = live.toLowerCase();
|
|
155
|
+
}
|
|
156
|
+
const tag = sanitizeString(attrs["data-tag"]);
|
|
157
|
+
if (tag && tag.length <= 256) {
|
|
158
|
+
videoObj.tag = tag;
|
|
159
|
+
}
|
|
160
|
+
videos.push({ videoObj, element });
|
|
161
|
+
}
|
|
162
|
+
if (options?.videos && element.name === "source" && element.parent && element.parent.name === "video") {
|
|
163
|
+
const videoElement = element.parent;
|
|
164
|
+
const src = sanitizeString(attrs.src);
|
|
165
|
+
if (src && isValidUrl(src)) {
|
|
166
|
+
if (!videoSources.has(videoElement)) {
|
|
167
|
+
videoSources.set(videoElement, []);
|
|
168
|
+
}
|
|
169
|
+
videoSources.get(videoElement).push(src);
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
});
|
|
174
|
+
if (options?.images && images.size > 0) {
|
|
175
|
+
payload.images = [...images].map((i) => ({ loc: i }));
|
|
176
|
+
}
|
|
177
|
+
if (options?.videos) {
|
|
178
|
+
const processedVideos = [];
|
|
179
|
+
for (const { videoObj, element } of videos) {
|
|
180
|
+
const sources = videoSources.get(element) || [];
|
|
181
|
+
if (sources.length > 0) {
|
|
182
|
+
for (const source of sources) {
|
|
183
|
+
const resolvedVideoObj = { ...videoObj };
|
|
184
|
+
if (resolvedVideoObj.thumbnail_loc) {
|
|
185
|
+
resolvedVideoObj.thumbnail_loc = resolveUrl(String(resolvedVideoObj.thumbnail_loc));
|
|
186
|
+
}
|
|
187
|
+
processedVideos.push({
|
|
188
|
+
...resolvedVideoObj,
|
|
189
|
+
content_loc: resolveUrl(source)
|
|
190
|
+
});
|
|
191
|
+
}
|
|
192
|
+
} else {
|
|
193
|
+
processedVideos.push(videoObj);
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
const validVideos = processedVideos.filter((v) => {
|
|
197
|
+
return isValidString(v.title) && isValidString(v.description) && isValidString(v.content_loc) && isValidUrl(v.content_loc) && isValidString(v.thumbnail_loc) && isValidUrl(v.thumbnail_loc) && v.title.length <= 2048 && v.description.length <= 2048;
|
|
198
|
+
});
|
|
199
|
+
if (validVideos.length > 0) {
|
|
200
|
+
payload.videos = validVideos;
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
if (options?.lastmod && articleModifiedTime) {
|
|
204
|
+
payload.lastmod = articleModifiedTime;
|
|
205
|
+
}
|
|
206
|
+
if (options?.alternatives && alternatives.length > 0 && (alternatives.length > 1 || alternatives[0].hreflang !== "x-default")) {
|
|
207
|
+
payload.alternatives = alternatives;
|
|
208
|
+
}
|
|
209
|
+
return payload;
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
export { parseHtmlExtractSitemapMeta as p };
|
package/dist/utils.d.mts
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
import { SitemapUrlInput, SitemapUrl } from '../dist/runtime/types.js';
|
|
2
|
+
export * from '../dist/runtime/types.js';
|
|
3
|
+
|
|
4
|
+
interface SitemapWarning {
|
|
5
|
+
type: 'validation';
|
|
6
|
+
message: string;
|
|
7
|
+
context?: {
|
|
8
|
+
url?: string;
|
|
9
|
+
field?: string;
|
|
10
|
+
value?: unknown;
|
|
11
|
+
};
|
|
12
|
+
}
|
|
13
|
+
interface SitemapParseResult {
|
|
14
|
+
urls: SitemapUrlInput[];
|
|
15
|
+
warnings: SitemapWarning[];
|
|
16
|
+
}
|
|
17
|
+
declare function parseSitemapXml(xml: string): SitemapParseResult;
|
|
18
|
+
|
|
19
|
+
declare function parseHtmlExtractSitemapMeta(html: string, options?: {
|
|
20
|
+
images?: boolean;
|
|
21
|
+
videos?: boolean;
|
|
22
|
+
lastmod?: boolean;
|
|
23
|
+
alternatives?: boolean;
|
|
24
|
+
resolveUrl?: (s: string) => string;
|
|
25
|
+
}): Partial<SitemapUrl>;
|
|
26
|
+
|
|
27
|
+
export { parseHtmlExtractSitemapMeta, parseSitemapXml };
|
|
28
|
+
export type { SitemapParseResult, SitemapWarning };
|
package/dist/utils.d.ts
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
import { SitemapUrlInput, SitemapUrl } from '../dist/runtime/types.js';
|
|
2
|
+
export * from '../dist/runtime/types.js';
|
|
3
|
+
|
|
4
|
+
interface SitemapWarning {
|
|
5
|
+
type: 'validation';
|
|
6
|
+
message: string;
|
|
7
|
+
context?: {
|
|
8
|
+
url?: string;
|
|
9
|
+
field?: string;
|
|
10
|
+
value?: unknown;
|
|
11
|
+
};
|
|
12
|
+
}
|
|
13
|
+
interface SitemapParseResult {
|
|
14
|
+
urls: SitemapUrlInput[];
|
|
15
|
+
warnings: SitemapWarning[];
|
|
16
|
+
}
|
|
17
|
+
declare function parseSitemapXml(xml: string): SitemapParseResult;
|
|
18
|
+
|
|
19
|
+
declare function parseHtmlExtractSitemapMeta(html: string, options?: {
|
|
20
|
+
images?: boolean;
|
|
21
|
+
videos?: boolean;
|
|
22
|
+
lastmod?: boolean;
|
|
23
|
+
alternatives?: boolean;
|
|
24
|
+
resolveUrl?: (s: string) => string;
|
|
25
|
+
}): Partial<SitemapUrl>;
|
|
26
|
+
|
|
27
|
+
export { parseHtmlExtractSitemapMeta, parseSitemapXml };
|
|
28
|
+
export type { SitemapParseResult, SitemapWarning };
|