@weborigami/origami 0.3.0 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@weborigami/origami",
3
- "version": "0.3.0",
3
+ "version": "0.3.2",
4
4
  "description": "Web Origami language, CLI, framework, and server",
5
5
  "type": "module",
6
6
  "repository": {
@@ -17,13 +17,15 @@
17
17
  "typescript": "5.8.2"
18
18
  },
19
19
  "dependencies": {
20
- "@weborigami/async-tree": "0.3.0",
21
- "@weborigami/language": "0.3.0",
20
+ "@weborigami/async-tree": "0.3.2",
22
21
  "@weborigami/json-feed-to-rss": "1.0.0",
23
- "@weborigami/types": "0.3.0",
22
+ "@weborigami/language": "0.3.2",
23
+ "@weborigami/types": "0.3.2",
24
+ "css-tree": "3.1.0",
24
25
  "exif-parser": "0.1.12",
25
26
  "graphviz-wasm": "3.0.2",
26
27
  "highlight.js": "11.11.1",
28
+ "jsdom": "26.1.0",
27
29
  "marked": "15.0.7",
28
30
  "marked-gfm-heading-id": "4.1.1",
29
31
  "marked-highlight": "2.2.1",
@@ -0,0 +1,85 @@
1
+ import { pathFromKeys, symbols, Tree } from "@weborigami/async-tree";
2
+ import getTreeArgument from "../../common/getTreeArgument.js";
3
+ import crawlResources from "./crawlResources.js";
4
+ import { getBaseUrl } from "./utilities.js";
5
+
6
+ /**
7
+ * Crawl the indicated tree and return an audit of any broken links to internal
8
+ * pages or other resources.
9
+ *
10
+ * @typedef {import("@weborigami/types").AsyncTree} AsyncTree
11
+ * @typedef {import("@weborigami/async-tree").Treelike} Treelike
12
+ *
13
+ * @this {AsyncTree|null}
14
+ * @param {Treelike} treelike
15
+ * @param {string} [baseHref]
16
+ */
17
+ export default async function audit(treelike, baseHref) {
18
+ const tree = await getTreeArgument(this, arguments, treelike, "site:audit");
19
+ const baseUrl = getBaseUrl(baseHref, treelike);
20
+
21
+ let errors = {};
22
+ let report;
23
+ const resourceReferences = {};
24
+ const resourcePromises = {};
25
+
26
+ // Iterate through all the resources to crawl the whole tree.
27
+ for await (const result of crawlResources(tree, baseUrl)) {
28
+ const { normalizedKeys, resourcePaths, value: resource } = result;
29
+ const normalizedPath = pathFromKeys(normalizedKeys);
30
+ if (normalizedPath === "crawl-errors.json") {
31
+ // Final error report; add missing pages to the errors
32
+ report = JSON.parse(resource);
33
+ for (const [path, pagePaths] of Object.entries(report)) {
34
+ if (!errors[path]) {
35
+ errors[path] = [];
36
+ }
37
+ errors[path].push(...pagePaths);
38
+ }
39
+ } else {
40
+ // Record which resources this path references
41
+ resourceReferences[normalizedPath] = resourcePaths;
42
+
43
+ // Add all resources to the set that should be verified
44
+ for (const resourcePath of resourcePaths) {
45
+ // Start request, don't wait for it to complete yet
46
+ resourcePromises[resourcePath] ??= Tree.traversePath(
47
+ tree,
48
+ resourcePath
49
+ ).then(
50
+ // Just return true or false to indicate if value is defined
51
+ (value) => value !== undefined
52
+ );
53
+ }
54
+ }
55
+ }
56
+
57
+ // Add any references to missing resources to the errors
58
+ for (const [refererPath, resourcePaths] of Object.entries(
59
+ resourceReferences
60
+ )) {
61
+ for (const resourcePath of resourcePaths) {
62
+ const found = await resourcePromises[resourcePath];
63
+ if (!found) {
64
+ if (!errors[refererPath]) {
65
+ errors[refererPath] = [];
66
+ }
67
+ errors[refererPath].push(resourcePath);
68
+ }
69
+ }
70
+ }
71
+
72
+ if (Object.keys(errors).length === 0) {
73
+ return undefined;
74
+ }
75
+
76
+ Object.defineProperty(errors, symbols.parent, {
77
+ enumerable: false,
78
+ value: this,
79
+ });
80
+ Object.defineProperty(errors, symbols.deep, {
81
+ enumerable: false,
82
+ value: true,
83
+ });
84
+ return errors;
85
+ }
@@ -2,13 +2,12 @@ import {
2
2
  DeepObjectTree,
3
3
  Tree,
4
4
  deepMerge,
5
- isPlainObject,
6
5
  keysFromPath,
7
- trailingSlash,
8
6
  } from "@weborigami/async-tree";
9
7
  import { InvokeFunctionsTransform } from "@weborigami/language";
10
8
  import getTreeArgument from "../../common/getTreeArgument.js";
11
9
  import crawlResources from "./crawlResources.js";
10
+ import { addValueToObject, getBaseUrl } from "./utilities.js";
12
11
 
13
12
  /**
14
13
  * Crawl a tree, starting its root index.html page, and following links to
@@ -20,6 +19,7 @@ import crawlResources from "./crawlResources.js";
20
19
  *
21
20
  * @typedef {import("@weborigami/types").AsyncTree} AsyncTree
22
21
  * @typedef {import("@weborigami/async-tree").Treelike} Treelike
22
+ *
23
23
  * @this {AsyncTree|null}
24
24
  * @param {Treelike} treelike
25
25
  * @param {string} [baseHref]
@@ -27,39 +27,10 @@ import crawlResources from "./crawlResources.js";
27
27
  */
28
28
  export default async function crawlBuiltin(treelike, baseHref) {
29
29
  const tree = await getTreeArgument(this, arguments, treelike, "site:crawl");
30
-
31
- if (baseHref === undefined) {
32
- // Ask tree or original treelike if it has an `href` property we can use as
33
- // the base href to determine whether a link is local within the tree or
34
- // not. If not, use a fake `local:/` base href.
35
- baseHref =
36
- /** @type {any} */ (tree).href ??
37
- /** @type {any} */ (treelike).href ??
38
- "local:/";
39
- if (!baseHref?.endsWith("/")) {
40
- baseHref += "/";
41
- }
42
- } else {
43
- // Is the href already valid?
44
- let isHrefValid = false;
45
- try {
46
- new URL(baseHref);
47
- isHrefValid = true;
48
- } catch (e) {
49
- // Ignore
50
- }
51
- if (!isHrefValid) {
52
- // Use a fake base href.
53
- baseHref = `local:/${baseHref}`;
54
- }
55
- }
56
-
57
- // @ts-ignore
58
- const baseUrl = new URL(baseHref);
30
+ const baseUrl = getBaseUrl(baseHref, treelike);
59
31
 
60
32
  const cache = {};
61
33
  const resources = {};
62
- const errors = [];
63
34
 
64
35
  // We iterate until there are no more promises to wait for.
65
36
  for await (const result of crawlResources(tree, baseUrl)) {
@@ -81,14 +52,6 @@ export default async function crawlBuiltin(treelike, baseHref) {
81
52
  }
82
53
  }
83
54
 
84
- if (errors.length) {
85
- addValueToObject(
86
- cache,
87
- ["crawl-errors.json"],
88
- JSON.stringify(errors, null, 2)
89
- );
90
- }
91
-
92
55
  // Merge the cache on top of the resources tree. If we have an actual value
93
56
  // for something already, that's better than a function that will get that
94
57
  // value.
@@ -98,29 +61,3 @@ export default async function crawlBuiltin(treelike, baseHref) {
98
61
  );
99
62
  return result;
100
63
  }
101
-
102
- function addValueToObject(object, keys, value) {
103
- for (let i = 0, current = object; i < keys.length; i++) {
104
- const key = trailingSlash.remove(keys[i]);
105
- if (i === keys.length - 1) {
106
- // Write out value
107
- if (isPlainObject(current[key])) {
108
- // Route with existing values; treat the new value as an index.html
109
- current[key]["index.html"] = value;
110
- } else {
111
- current[key] = value;
112
- }
113
- } else {
114
- // Traverse further
115
- if (!current[key]) {
116
- current[key] = {};
117
- } else if (!isPlainObject(current[key])) {
118
- // Already have a value at this point. The site has a page at a route
119
- // like /foo, and the site also has resources within that at routes like
120
- // /foo/bar.jpg. We move the current value to "index.html".
121
- current[key] = { "index.html": current[key] };
122
- }
123
- current = current[key];
124
- }
125
- }
126
- }
@@ -129,28 +129,54 @@ async function processPath(tree, path, baseUrl) {
129
129
  keys = keys.map(decodeURIComponent);
130
130
 
131
131
  // Traverse tree to get value.
132
- let value = await Tree.traverse(tree, ...keys);
133
- const normalizedKeys = keys.slice();
134
- let normalizedPath = path;
135
- if (Tree.isTreelike(value)) {
136
- // Path is actually a directory. See if we can get the empty string or
137
- // "index.html".
138
- value =
139
- (await Tree.traverse(value, "")) ??
140
- (await Tree.traverse(value, "index.html"));
141
- if (value !== undefined) {
142
- if (path.length > 0) {
143
- // Mark the path as ending in a slash
144
- normalizedPath = trailingSlash.add(path);
145
- const key = normalizedKeys.pop();
146
- normalizedKeys.push(trailingSlash.add(key));
132
+ let value;
133
+ let normalizedKeys;
134
+ let normalizedPath;
135
+ try {
136
+ value = await Tree.traverse(tree, ...keys);
137
+ normalizedKeys = keys.slice();
138
+ normalizedPath = path;
139
+ if (Tree.isTreelike(value)) {
140
+ // Path is actually a directory. See if we can get the empty string or
141
+ // "index.html".
142
+ value =
143
+ (await Tree.traverse(value, "")) ??
144
+ (await Tree.traverse(value, "index.html"));
145
+ if (value !== undefined) {
146
+ if (path.length > 0) {
147
+ // Mark the path as ending in a slash
148
+ normalizedPath = trailingSlash.add(path);
149
+ const key = normalizedKeys.pop();
150
+ normalizedKeys.push(trailingSlash.add(key));
151
+ }
152
+
153
+ // Add index.html to keys if it's not already there
154
+ if (normalizedKeys.at(-1) !== "index.html") {
155
+ normalizedKeys.push("index.html");
156
+ }
147
157
  }
158
+ }
148
159
 
149
- // Add index.html to keys if it's not already there
150
- if (normalizedKeys.at(-1) !== "index.html") {
151
- normalizedKeys.push("index.html");
160
+ if (value === undefined && path.length > 0) {
161
+ // The path may be a URL like `foo` or `foo/` that points to `foo.html`, so
162
+ // we'll try looking adding `.html` to the end. We don't want to check every
163
+ // path twice, so we only do this if the last key does *not* include an
164
+ // extension.
165
+ const lastKey = keys.at(-1);
166
+ if (lastKey !== "" && !lastKey?.includes(".")) {
167
+ const adjustedLastKey = `${trailingSlash.remove(lastKey)}.html`;
168
+ const adjustedKeys = [...keys.slice(0, -1), adjustedLastKey];
169
+ value = await Tree.traverse(tree, ...adjustedKeys);
170
+ if (value !== undefined) {
171
+ // Page exists at foo.html
172
+ normalizedPath = pathFromKeys(adjustedKeys);
173
+ normalizedKeys = adjustedKeys;
174
+ }
152
175
  }
153
176
  }
177
+ } catch (error) {
178
+ // Ignore errors, return empty paths below
179
+ value = undefined;
154
180
  }
155
181
 
156
182
  if (value === undefined) {
@@ -0,0 +1,90 @@
1
+ import { extension, toString } from "@weborigami/async-tree";
2
+ import pathsInCss from "./pathsInCss.js";
3
+ import pathsInHtml from "./pathsInHtml.js";
4
+ import pathsInImageMap from "./pathsInImageMap.js";
5
+ import pathsInJs from "./pathsInJs.js";
6
+ import pathsInRobotsTxt from "./pathsInRobotsTxt.js";
7
+ import pathsInSitemap from "./pathsInSitemap.js";
8
+
9
+ // Filter the paths to those that are local to the site.
10
+ function filterPaths(paths, baseUrl, localPath) {
11
+ // Convert paths to absolute URLs.
12
+ const localUrl = new URL(localPath, baseUrl);
13
+ const basePathname = baseUrl.pathname;
14
+ // @ts-ignore
15
+ const absoluteUrls = paths.map((path) => new URL(path, localUrl));
16
+
17
+ // Convert the absolute URLs to paths relative to the baseHref. If the URL
18
+ // points outside the tree rooted at the baseHref, the relative path will be
19
+ // null. We ignore the protocol in this test, because in practice sites often
20
+ // fumble the use of http and https, treating them interchangeably.
21
+ const relativePaths = absoluteUrls.map((url) => {
22
+ if (url.host === baseUrl.host && url.pathname.startsWith(basePathname)) {
23
+ const path = url.pathname.slice(basePathname.length);
24
+ // The process of creating the URLs will have escaped characters. We
25
+ // remove them. This has the side-effect of removing them if they existed
26
+ // in the original path; it would be better if we avoided that.
27
+ return decodeURIComponent(path);
28
+ } else {
29
+ return null;
30
+ }
31
+ });
32
+
33
+ // Filter out the null paths.
34
+ /** @type {string[]} */
35
+ // @ts-ignore
36
+ const filteredPaths = relativePaths.filter((path) => path);
37
+ return filteredPaths;
38
+ }
39
+
40
+ /**
41
+ * Given a value retrieved from a site using a given key (name), determine what
42
+ * kind of file it is and, based on that, find the paths it references.
43
+ */
44
+ export default function findPaths(value, key, baseUrl, localPath) {
45
+ const text = toString(value);
46
+
47
+ // We guess the value is HTML is if its key has an .html extension or
48
+ // doesn't have an extension, or the value starts with `<`.
49
+ const ext = key ? extension.extname(key).toLowerCase() : "";
50
+ let foundPaths;
51
+ if (ext === ".html" || ext === ".htm" || ext === ".xhtml") {
52
+ foundPaths = pathsInHtml(text);
53
+ } else if (ext === ".css") {
54
+ foundPaths = pathsInCss(text);
55
+ } else if (ext === ".js") {
56
+ foundPaths = pathsInJs(text);
57
+ } else if (ext === ".map") {
58
+ foundPaths = pathsInImageMap(text);
59
+ } else if (key === "robots.txt") {
60
+ foundPaths = pathsInRobotsTxt(text);
61
+ } else if (key === "sitemap.xml") {
62
+ foundPaths = pathsInSitemap(text);
63
+ } else if (ext === "" && text?.trim().startsWith("<")) {
64
+ // Probably HTML
65
+ foundPaths = pathsInHtml(text);
66
+ } else {
67
+ // Doesn't have an extension we want to process
68
+ return {
69
+ crawlablePaths: [],
70
+ resourcePaths: [],
71
+ };
72
+ }
73
+
74
+ const crawlablePaths = filterPaths(
75
+ foundPaths.crawlablePaths,
76
+ baseUrl,
77
+ localPath
78
+ );
79
+
80
+ const resourcePaths = filterPaths(
81
+ foundPaths.resourcePaths,
82
+ baseUrl,
83
+ localPath
84
+ );
85
+
86
+ return {
87
+ crawlablePaths,
88
+ resourcePaths,
89
+ };
90
+ }
@@ -0,0 +1,51 @@
1
+ import { parse, walk } from "css-tree";
2
+ import { addHref } from "./utilities.js";
3
+
4
+ const imageFunctions = ["cross-fade", "image", "image-set"];
5
+
6
+ export default function pathsInCss(css, context = "stylesheet") {
7
+ const paths = {
8
+ crawlablePaths: [],
9
+ resourcePaths: [],
10
+ };
11
+
12
+ let ast;
13
+ try {
14
+ ast = parse(css, { context });
15
+ } catch (e) {
16
+ // If the CSS is invalid, we can't parse it, so we can't extract paths. For
17
+ // now we just return no paths.
18
+ return paths;
19
+ }
20
+
21
+ if (!ast) {
22
+ // Unclear why parser sometimes returns an undefined AST
23
+ return paths;
24
+ }
25
+
26
+ walk(
27
+ ast,
28
+ /** @this {any} */
29
+ function (node) {
30
+ const { type, value } = node;
31
+ if (
32
+ this.atrule?.name === "import" &&
33
+ (type === "String" || type === "Url")
34
+ ) {
35
+ // A plain string or url() in an @import
36
+ addHref(paths, value, true);
37
+ } else if (
38
+ type === "String" &&
39
+ imageFunctions.includes(this.function?.name)
40
+ ) {
41
+ // A plain string in an cross-fade(), image(), or image-set()
42
+ addHref(paths, value, false);
43
+ } else if (type === "Url") {
44
+ // A url() anywhere else
45
+ addHref(paths, value, false);
46
+ }
47
+ }
48
+ );
49
+
50
+ return paths;
51
+ }
@@ -0,0 +1,161 @@
1
+ import { JSDOM, VirtualConsole } from "jsdom";
2
+ import pathsInCss from "./pathsInCss.js";
3
+ import pathsInJs from "./pathsInJs.js";
4
+ import { addHref } from "./utilities.js";
5
+
6
+ export default function pathsInHtml(html) {
7
+ const paths = {
8
+ crawlablePaths: [],
9
+ resourcePaths: [],
10
+ };
11
+
12
+ // Create a virtual console to avoid logging errors to the console
13
+ const virtualConsole = new VirtualConsole();
14
+ const document = new JSDOM(html, { virtualConsole }).window.document;
15
+
16
+ // Find `href` attributes in anchor, area, link, SVG tags.
17
+ //
18
+ // NOTE: As of April 2024, jsdom querySelectorAll does not appear to find
19
+ // elements with mixed-case tag names.
20
+ const hrefTags = document.querySelectorAll(
21
+ "a[href], area[href], image[href], feImage[href], filter[href], linearGradient[href], link[href], mpath[href], pattern[href], radialGradient[href], textPath[href], use[href]"
22
+ );
23
+ for (const hrefTag of hrefTags) {
24
+ const crawlable = ["A", "AREA"].includes(hrefTag.tagName)
25
+ ? true
26
+ : undefined;
27
+ addHref(paths, hrefTag.getAttribute("href"), crawlable);
28
+ }
29
+
30
+ // Find `src` attributes in input, frame, media, and script tags.
31
+ const srcTags = document.querySelectorAll(
32
+ "audio[src], embed[src], frame[src], iframe[src], img[src], input[src], script[src], source[src], track[src], video[src]"
33
+ );
34
+ for (const srcTag of srcTags) {
35
+ const crawlable = ["FRAME", "IFRAME"].includes(srcTag.tagName)
36
+ ? true
37
+ : srcTag.tagName === "SCRIPT"
38
+ ? srcTag.type === "module" // Only crawl modules
39
+ : undefined;
40
+ addHref(paths, srcTag.getAttribute("src"), crawlable);
41
+ }
42
+
43
+ // Find `srcset` attributes in image and source tags.
44
+ const srcsetTags = document.querySelectorAll("img[srcset], source[srcset]");
45
+ for (const srcsetTag of srcsetTags) {
46
+ const srcset = srcsetTag.getAttribute("srcset");
47
+ const srcRegex = /(?<url>[^\s,]+)(?=\s+\d+(?:\.\d+)?[wxh])/g;
48
+ let match;
49
+ while ((match = srcRegex.exec(srcset))) {
50
+ if (match.groups?.url) {
51
+ addHref(paths, match.groups.url, false);
52
+ }
53
+ }
54
+ }
55
+
56
+ // Find `poster` attributes in <video> tags.
57
+ const posterTags = document.querySelectorAll("video[poster]");
58
+ for (const posterTag of posterTags) {
59
+ addHref(paths, posterTag.getAttribute("poster"), false);
60
+ }
61
+
62
+ // Find `data` attributes in <object> tags.
63
+ const objectTags = document.querySelectorAll("object[data]");
64
+ for (const objectTag of objectTags) {
65
+ addHref(paths, objectTag.getAttribute("data"), false);
66
+ }
67
+
68
+ // Find deprecated `background` attribute on body and table tags.
69
+ const backgroundTags = document.querySelectorAll(
70
+ "body[background], table[background], td[background], th[background]"
71
+ );
72
+ for (const backgroundTag of backgroundTags) {
73
+ addHref(paths, backgroundTag.getAttribute("background"), false);
74
+ }
75
+
76
+ // Find deprecated `longdesc` attributes on <img> tags.
77
+ const longdescTags = document.querySelectorAll("img[longdesc]");
78
+ for (const longdescTag of longdescTags) {
79
+ addHref(paths, longdescTag.getAttribute("longdesc"), false);
80
+ }
81
+
82
+ // Find paths in <meta> image tags.
83
+ const imageMetaTags = document.querySelectorAll('meta[property$=":image"]');
84
+ for (const imageMetaTag of imageMetaTags) {
85
+ const content = imageMetaTag.getAttribute("content");
86
+ if (content) {
87
+ addHref(paths, content, false);
88
+ }
89
+ }
90
+
91
+ // Find paths in CSS in <style> tags.
92
+ const styleTags = document.querySelectorAll("style");
93
+ for (const styleAttribute of styleTags) {
94
+ const cssPaths = pathsInCss(styleAttribute.textContent);
95
+ paths.crawlablePaths.push(...cssPaths.crawlablePaths);
96
+ paths.resourcePaths.push(...cssPaths.resourcePaths);
97
+ }
98
+
99
+ // Find URLs in CSS in `style` attributes.
100
+ const styleAttributeTags = document.querySelectorAll("[style]");
101
+ for (const tag of styleAttributeTags) {
102
+ const style = tag.getAttribute("style");
103
+ const stylePaths = pathsInCss(style, "declarationList");
104
+ stylePaths.resourcePaths.forEach((href) => {
105
+ addHref(paths, href, false);
106
+ });
107
+ }
108
+
109
+ // Find URLs in SVG attributes.
110
+ const svgAttributeNames = [
111
+ "clip-path",
112
+ "fill",
113
+ "filter",
114
+ "marker-end",
115
+ "marker-start",
116
+ "mask",
117
+ "stroke",
118
+ ];
119
+ const svgTags = document.querySelectorAll(
120
+ svgAttributeNames.map((name) => `[${name}]`).join(", ")
121
+ );
122
+ for (const svgTag of svgTags) {
123
+ for (const name of svgAttributeNames) {
124
+ const attributeValue = svgTag.getAttribute(name);
125
+ if (!attributeValue) {
126
+ continue;
127
+ }
128
+ const urlRegex = /url\((['"]?)(?<href>.*?)\1\)/g;
129
+ const attributeValueMatch = urlRegex.exec(attributeValue);
130
+ if (attributeValueMatch) {
131
+ const href = attributeValueMatch.groups?.href;
132
+ if (href) {
133
+ addHref(paths, href, false);
134
+ }
135
+ }
136
+ }
137
+ }
138
+
139
+ // Also look for JS `import` statements that might be in <script type="module"> tags.
140
+ const scriptTags = document.querySelectorAll("script[type='module']");
141
+ for (const scriptTag of scriptTags) {
142
+ const jsPaths = pathsInJs(scriptTag.textContent);
143
+ paths.crawlablePaths.push(...jsPaths.crawlablePaths);
144
+ }
145
+
146
+ // Special handling for <noframes> in framesets. We need to use a regex for
147
+ // this because the jsdom parser supports frames, so it will treat a
148
+ // <noframes> tag as a text node.
149
+ const noframesRegex = /<noframes>(?<html>[\s\S]*?)<\/noframes>/g;
150
+ let match;
151
+ while ((match = noframesRegex.exec(html))) {
152
+ const noframesHtml = match.groups?.html;
153
+ if (noframesHtml) {
154
+ const noframesPaths = pathsInHtml(noframesHtml);
155
+ paths.crawlablePaths.push(...noframesPaths.crawlablePaths);
156
+ paths.resourcePaths.push(...noframesPaths.resourcePaths);
157
+ }
158
+ }
159
+
160
+ return paths;
161
+ }
@@ -0,0 +1,25 @@
1
+ import { normalizeHref } from "./utilities.js";
2
+
3
+ // These are ancient server-side image maps. They're so old that it's hard to
4
+ // find documentation on them, but they're used on the reference Space Jam
5
+ // website we use for testing the crawler.
6
+ //
7
+ // Example: https://www.spacejam.com/1996/bin/bball.map
8
+ export default function pathsInImageMap(imageMap) {
9
+ const resourcePaths = [];
10
+ let match;
11
+
12
+ // Find hrefs as the second column in each line.
13
+ const hrefRegex = /^\w+ (?<href>\S+)(\s*$| [\d, ]+$)/gm;
14
+ while ((match = hrefRegex.exec(imageMap))) {
15
+ const href = normalizeHref(match.groups?.href);
16
+ if (href) {
17
+ resourcePaths.push(href);
18
+ }
19
+ }
20
+
21
+ return {
22
+ crawlablePaths: [],
23
+ resourcePaths,
24
+ };
25
+ }