@weborigami/origami 0.3.1 → 0.3.3-jse.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/internal.js CHANGED
@@ -15,6 +15,10 @@ export { default as oriHandler } from "./handlers/ori.handler.js";
15
15
 
16
16
  export { default as oridocumentHandler } from "./handlers/oridocument.handler.js";
17
17
 
18
+ export { default as jseHandler } from "./handlers/jse.handler.js";
19
+
20
+ export { default as jsedocumentHandler } from "./handlers/jsedocument.handler.js";
21
+
18
22
  export { default as processUnpackedContent } from "./common/processUnpackedContent.js";
19
23
 
20
24
  export { default as wasmHandler } from "./handlers/wasm.handler.js";
package/src/js.js CHANGED
@@ -1,38 +1,79 @@
1
- async function fetchWrapper(resource, options) {
2
- const response = await fetch(resource, options);
3
- return response.ok ? await response.arrayBuffer() : undefined;
4
- }
5
-
1
+ /**
2
+ * The complete set of support JavaScript globals and global-like values.
3
+ *
4
+ * See
5
+ * https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects.
6
+ * That page lists some things like `TypedArrays` which are not globals so are
7
+ * omitted here.
8
+ */
6
9
  export default {
10
+ AggregateError,
7
11
  Array,
12
+ ArrayBuffer,
13
+ Atomics,
8
14
  BigInt,
15
+ BigInt64Array,
16
+ BigUint64Array,
9
17
  Boolean,
18
+ DataView,
10
19
  Date,
11
20
  Error,
21
+ EvalError,
22
+ FinalizationRegistry,
23
+ Float32Array,
24
+ Float64Array,
25
+ Function,
12
26
  Infinity,
27
+ Int16Array,
28
+ Int32Array,
29
+ Int8Array,
13
30
  Intl,
31
+ // @ts-ignore Iterator does exist despite what TypeScript thinks
32
+ Iterator,
14
33
  JSON,
15
34
  Map,
16
35
  Math,
17
36
  NaN,
18
37
  Number,
19
38
  Object,
39
+ Promise,
40
+ Proxy,
41
+ RangeError,
42
+ ReferenceError,
43
+ Reflect,
20
44
  RegExp,
21
- Response,
22
45
  Set,
46
+ SharedArrayBuffer,
23
47
  String,
24
48
  Symbol,
49
+ SyntaxError,
50
+ TypeError,
51
+ URIError,
52
+ Uint16Array,
53
+ Uint32Array,
54
+ Uint8Array,
55
+ Uint8ClampedArray,
56
+ WeakMap,
57
+ WeakRef,
58
+ WeakSet,
25
59
  decodeURI,
26
60
  decodeURIComponent,
27
61
  encodeURI,
28
62
  encodeURIComponent,
29
- false: false,
30
- fetch: fetchWrapper,
63
+ eval,
64
+ false: false, // treat like a global
65
+ fetch: fetchWrapper, // special case
66
+ globalThis,
31
67
  isFinite,
32
68
  isNaN,
33
- null: null,
69
+ null: null, // treat like a global
34
70
  parseFloat,
35
71
  parseInt,
36
- true: true,
37
- undefined: undefined,
72
+ true: true, // treat like a global
73
+ undefined,
38
74
  };
75
+
76
+ async function fetchWrapper(resource, options) {
77
+ const response = await fetch(resource, options);
78
+ return response.ok ? await response.arrayBuffer() : undefined;
79
+ }
package/src/site/site.js CHANGED
@@ -1,5 +1,3 @@
1
- export { default as audit } from "./audit.js";
2
- export { default as crawl } from "./crawler/crawl.js";
3
1
  export { default as index } from "./index.js";
4
2
  export { default as jsonKeys } from "./jsonKeys.js";
5
3
  export { default as redirect } from "./redirect.js";
@@ -0,0 +1,6 @@
1
+ import { JSDOM } from "jsdom";
2
+
3
+ export default function htmlDom(html) {
4
+ const dom = JSDOM.fragment(html);
5
+ return dom;
6
+ }
package/src/text/text.js CHANGED
@@ -1,4 +1,5 @@
1
1
  export { taggedTemplateIndent as indent } from "@weborigami/language";
2
2
  export { default as document } from "./document.js";
3
+ export { default as htmlDom } from "./htmlDom.js";
3
4
  export { default as inline } from "./inline.js";
4
5
  export { default as mdHtml } from "./mdHtml.js";
package/src/calc/calc.js DELETED
@@ -1,81 +0,0 @@
1
- import { Tree } from "@weborigami/async-tree";
2
- import assertTreeIsDefined from "../common/assertTreeIsDefined.js";
3
-
4
- export function add(...args) {
5
- console.warn(`Warning: "add" is deprecated. Use the "+" operator instead.`);
6
- const numbers = args.map((arg) => Number(arg));
7
- return numbers.reduce((acc, val) => acc + val, 0);
8
- }
9
-
10
- export function and(...args) {
11
- console.warn(`Warning: "and" is deprecated. Use the "&&" operator instead.`);
12
- return args.every((arg) => arg);
13
- }
14
-
15
- export function divide(a, b) {
16
- console.warn(
17
- `Warning: "divide" is deprecated. Use the "/" operator instead.`
18
- );
19
- return Number(a) / Number(b);
20
- }
21
-
22
- export function equals(a, b) {
23
- console.warn(
24
- `Warning: "equals" is deprecated. Use the "===" operator instead.`
25
- );
26
- return a === b;
27
- }
28
-
29
- /**
30
- * @typedef {import("@weborigami/types").AsyncTree} AsyncTree
31
- *
32
- * @this {AsyncTree|null}
33
- * @param {any} value
34
- * @param {any} trueResult
35
- * @param {any} [falseResult]
36
- */
37
- export async function ifBuiltin(value, trueResult, falseResult) {
38
- console.warn(
39
- `Warning: "if" is deprecated. Use the conditional "a ? b : c" operator instead.`
40
- );
41
-
42
- assertTreeIsDefined(this, "calc:if");
43
- let condition = await value;
44
- if (Tree.isAsyncTree(condition)) {
45
- const keys = Array.from(await condition.keys());
46
- condition = keys.length > 0;
47
- }
48
-
49
- // 0 is true, null/undefined/false is false
50
- let result = condition || condition === 0 ? trueResult : falseResult;
51
- if (typeof result === "function") {
52
- result = await result.call(this);
53
- }
54
- return result;
55
- }
56
- ifBuiltin.key = "if";
57
-
58
- export function multiply(...args) {
59
- console.warn(
60
- `Warning: "multiply" is deprecated. Use the "*" operator instead.`
61
- );
62
- const numbers = args.map((arg) => Number(arg));
63
- return numbers.reduce((acc, val) => acc * val, 1);
64
- }
65
-
66
- export function not(value) {
67
- console.warn(`Warning: "not" is deprecated. Use the "!" operator instead.`);
68
- return !value;
69
- }
70
-
71
- export function or(...args) {
72
- console.warn(`Warning: "or" is deprecated. Use the "||" operator instead.`);
73
- return args.find((arg) => arg);
74
- }
75
-
76
- export function subtract(a, b) {
77
- console.warn(
78
- `Warning: "subtract" is deprecated. Use the "-" operator instead.`
79
- );
80
- return Number(a) - Number(b);
81
- }
package/src/site/audit.js DELETED
@@ -1,19 +0,0 @@
1
- import { Tree } from "@weborigami/async-tree";
2
- import getTreeArgument from "../common/getTreeArgument.js";
3
- import crawl from "./crawler/crawl.js";
4
-
5
- /**
6
- * @this {import("@weborigami/types").AsyncTree|null}
7
- * @param {import("@weborigami/async-tree").Treelike} treelike
8
- */
9
- export default async function audit(treelike) {
10
- const tree = await getTreeArgument(this, arguments, treelike, "site:audit");
11
- const crawled = await crawl.call(this, tree);
12
- let crawlErrorsJson = await crawled.get("crawl-errors.json");
13
- if (!crawlErrorsJson) {
14
- return undefined;
15
- }
16
- const errors = Tree.from(JSON.parse(crawlErrorsJson), { deep: true });
17
- errors.parent = this;
18
- return errors;
19
- }
@@ -1,266 +0,0 @@
1
- import { extension, toString } from "@weborigami/async-tree";
2
- import { isCrawlableHref, normalizeHref } from "./utilities.js";
3
-
4
- // Filter the paths to those that are local to the site.
5
- function filterPaths(paths, baseUrl, localPath) {
6
- // Convert paths to absolute URLs.
7
- const localUrl = new URL(localPath, baseUrl);
8
- const basePathname = baseUrl.pathname;
9
- // @ts-ignore
10
- const absoluteUrls = paths.map((path) => new URL(path, localUrl));
11
-
12
- // Convert the absolute URLs to paths relative to the baseHref. If the URL
13
- // points outside the tree rooted at the baseHref, the relative path will be
14
- // null. We ignore the protocol in this test, because in practice sites often
15
- // fumble the use of http and https, treating them interchangeably.
16
- const relativePaths = absoluteUrls.map((url) => {
17
- if (url.host === baseUrl.host && url.pathname.startsWith(basePathname)) {
18
- const path = url.pathname.slice(basePathname.length);
19
- // The process of creating the URLs will have escaped characters. We
20
- // remove them. This has the side-effect of removing them if they existed
21
- // in the original path; it would be better if we avoided that.
22
- return decodeURIComponent(path);
23
- } else {
24
- return null;
25
- }
26
- });
27
-
28
- // Filter out the null paths.
29
- /** @type {string[]} */
30
- // @ts-ignore
31
- const filteredPaths = relativePaths.filter((path) => path);
32
- return filteredPaths;
33
- }
34
-
35
- /**
36
- * Given a value retrieved from a site using a given key (name), determine what
37
- * kind of file it is and, based on that, find the paths it references.
38
- */
39
- export default function findPaths(value, key, baseUrl, localPath) {
40
- const text = toString(value);
41
-
42
- // We guess the value is HTML is if its key has an .html extension or
43
- // doesn't have an extension, or the value starts with `<`.
44
- const ext = key ? extension.extname(key).toLowerCase() : "";
45
- let foundPaths;
46
- if (ext === ".html" || ext === ".htm" || ext === ".xhtml") {
47
- foundPaths = findPathsInHtml(text);
48
- } else if (ext === ".css") {
49
- foundPaths = findPathsInCss(text);
50
- } else if (ext === ".js") {
51
- foundPaths = findPathsInJs(text);
52
- } else if (ext === ".map") {
53
- foundPaths = findPathsInImageMap(text);
54
- } else if (key === "robots.txt") {
55
- foundPaths = findPathsInRobotsTxt(text);
56
- } else if (key === "sitemap.xml") {
57
- foundPaths = findPathsInSitemapXml(text);
58
- } else if (ext === "" && text?.trim().startsWith("<")) {
59
- // Probably HTML
60
- foundPaths = findPathsInHtml(text);
61
- } else {
62
- // Doesn't have an extension we want to process
63
- return {
64
- crawlablePaths: [],
65
- resourcePaths: [],
66
- };
67
- }
68
-
69
- const crawlablePaths = filterPaths(
70
- foundPaths.crawlablePaths,
71
- baseUrl,
72
- localPath
73
- );
74
-
75
- const resourcePaths = filterPaths(
76
- foundPaths.resourcePaths,
77
- baseUrl,
78
- localPath
79
- );
80
-
81
- return {
82
- crawlablePaths,
83
- resourcePaths,
84
- };
85
- }
86
-
87
- function findPathsInCss(css) {
88
- const resourcePaths = [];
89
- let match;
90
-
91
- // Find `url()` functions.
92
- const urlRegex = /url\(["']?(?<href>[^"')]*?)["']?\)/g;
93
- while ((match = urlRegex.exec(css))) {
94
- const href = normalizeHref(match.groups?.href);
95
- if (href) {
96
- resourcePaths.push(href);
97
- }
98
- }
99
-
100
- return {
101
- crawlablePaths: [],
102
- resourcePaths,
103
- };
104
- }
105
-
106
- // These are ancient server-side image maps. They're so old that it's hard to
107
- // find documentation on them, but they're used on the reference Space Jam
108
- // website we use for testing the crawler. Example:
109
- // https://www.spacejam.com/1996/bin/bball.map
110
- function findPathsInImageMap(imageMap) {
111
- const resourcePaths = [];
112
- let match;
113
-
114
- // Find hrefs as the second column in each line.
115
- const hrefRegex = /^\w+ (?<href>\S+)(\s*$| [\d, ]+$)/gm;
116
- while ((match = hrefRegex.exec(imageMap))) {
117
- const href = normalizeHref(match.groups?.href);
118
- if (href) {
119
- resourcePaths.push(href);
120
- }
121
- }
122
-
123
- return {
124
- crawlablePaths: [],
125
- resourcePaths,
126
- };
127
- }
128
-
129
- function findPathsInJs(js) {
130
- const crawlablePaths = [];
131
- let match;
132
-
133
- // Find `import` statements.
134
- const importRegex = /import [\s\S]+?from\s+["'](?<import>[^"']*)["'];/g;
135
- while ((match = importRegex.exec(js))) {
136
- const href = normalizeHref(match.groups?.import);
137
- if (href) {
138
- crawlablePaths.push(href);
139
- }
140
- }
141
-
142
- return {
143
- crawlablePaths,
144
- resourcePaths: [],
145
- };
146
- }
147
-
148
- function findPathsInHtml(html) {
149
- const crawlablePaths = [];
150
- const resourcePaths = [];
151
- let match;
152
-
153
- // Find `href` attributes in anchor and link tags.
154
- const linkRegex =
155
- /<(?:a|A|link|LINK)[\s][^>]*?(?:href|HREF)=["'](?<link>[^>]*?)["'][^>]*>/g;
156
- while ((match = linkRegex.exec(html))) {
157
- // Links can point to be other crawlable paths and resource paths.
158
- // We guess the type based on the extension.
159
- const href = normalizeHref(match.groups?.link);
160
- if (href) {
161
- if (isCrawlableHref(href)) {
162
- crawlablePaths.push(href);
163
- } else {
164
- resourcePaths.push(href);
165
- }
166
- }
167
- }
168
-
169
- // Find `src` attributes in img and script tags.
170
- const srcRegex =
171
- /<(?<tag>img|IMG|script|SCRIPT)[\s][^>]*?(?:src|SRC)=["'](?<src>[^>]*?)["'][^>]*>/g;
172
- while ((match = srcRegex.exec(html))) {
173
- const tag = match.groups?.tag;
174
- const src = normalizeHref(match.groups?.src);
175
- if (src) {
176
- if (tag === "script" || tag === "SCRIPT") {
177
- crawlablePaths.push(src);
178
- } else {
179
- resourcePaths.push(src);
180
- }
181
- }
182
- }
183
-
184
- // Find `url()` functions in CSS.
185
- const urlRegex = /url\(["']?(?<href>[^"')]*?)["']?\)/g;
186
- while ((match = urlRegex.exec(html))) {
187
- const href = normalizeHref(match.groups?.href);
188
- if (href) {
189
- resourcePaths.push(href);
190
- }
191
- }
192
-
193
- // Find `src` attribute on frame tags.
194
- const frameRegex =
195
- /<(?:frame|FRAME)[\s][^>]*?(?:src|SRC)=["'](?<href>[^>]*?)["'][^>]*>/g;
196
- while ((match = frameRegex.exec(html))) {
197
- const href = normalizeHref(match.groups?.href);
198
- if (href) {
199
- crawlablePaths.push(href);
200
- }
201
- }
202
-
203
- // Find ancient `background` attribute on body tag.
204
- const backgroundRegex =
205
- /<(?:body|BODY)[\s][^>]*?(?:background|BACKGROUND)=["'](?<href>[^>]*?)["'][^>]*>/g;
206
- while ((match = backgroundRegex.exec(html))) {
207
- const href = normalizeHref(match.groups?.href);
208
- if (href) {
209
- resourcePaths.push(href);
210
- }
211
- }
212
-
213
- // Find `href` attribute on area tags.
214
- const areaRegex =
215
- /<(?:area|AREA)[\s][^>]*?(?:href|HREF)=["'](?<href>[^>]*?)["'][^>]*>/g;
216
- while ((match = areaRegex.exec(html))) {
217
- const href = normalizeHref(match.groups?.href);
218
- if (href) {
219
- crawlablePaths.push(href);
220
- }
221
- }
222
-
223
- // Also look for JS `import` statements that might be in <script type="module"> tags.
224
- const jsResults = findPathsInJs(html);
225
- crawlablePaths.push(...jsResults.crawlablePaths);
226
-
227
- return { crawlablePaths, resourcePaths };
228
- }
229
-
230
- function findPathsInRobotsTxt(txt) {
231
- const crawlablePaths = [];
232
- let match;
233
-
234
- // Find `Sitemap` directives.
235
- const sitemapRegex = /Sitemap:\s*(?<href>[^\s]*)/g;
236
- while ((match = sitemapRegex.exec(txt))) {
237
- const href = normalizeHref(match.groups?.href);
238
- if (href) {
239
- crawlablePaths.push(href);
240
- }
241
- }
242
-
243
- return {
244
- crawlablePaths,
245
- resourcePaths: [],
246
- };
247
- }
248
-
249
- function findPathsInSitemapXml(xml) {
250
- const crawlablePaths = [];
251
- let match;
252
-
253
- // Find `loc` elements.
254
- const locRegex = /<loc>(?<href>[^<]*)<\/loc>/g;
255
- while ((match = locRegex.exec(xml))) {
256
- const href = normalizeHref(match.groups?.href);
257
- if (href) {
258
- crawlablePaths.push(href);
259
- }
260
- }
261
-
262
- return {
263
- crawlablePaths,
264
- resourcePaths: [],
265
- };
266
- }
@@ -1,37 +0,0 @@
1
- import { extension, trailingSlash } from "@weborigami/async-tree";
2
-
3
- // A fake base URL used to handle cases where an href is relative and must be
4
- // treated relative to some base URL.
5
- const fakeBaseUrl = new URL("https://fake");
6
-
7
- export function isCrawlableHref(href) {
8
- // Use a fake base URL to cover the case where the href is relative.
9
- const url = new URL(href, fakeBaseUrl);
10
- const pathname = url.pathname;
11
- const lastKey = pathname.split("/").pop() ?? "";
12
- if (lastKey === "robots.txt" || lastKey === "sitemap.xml") {
13
- return true;
14
- }
15
- const ext = extension.extname(lastKey);
16
- // We assume an empty extension is HTML.
17
- const crawlableExtensions = [".html", ".css", ".js", ".map", ".xhtml", ""];
18
- return crawlableExtensions.includes(ext);
19
- }
20
-
21
- // Remove any search parameters or hash from the href. Preserve absolute or
22
- // relative nature of URL. If the URL only has a search or hash, return null.
23
- export function normalizeHref(href) {
24
- // Remove everything after a `#` or `?` character.
25
- const normalized = href.split(/[?#]/)[0];
26
- return normalized === "" ? null : normalized;
27
- }
28
-
29
- // For indexing and storage purposes, treat a path that ends in a trailing slash
30
- // as if it ends in index.html.
31
- export function normalizeKeys(keys) {
32
- const normalized = keys.slice();
33
- if (normalized.length === 0 || trailingSlash.has(normalized.at(-1))) {
34
- normalized.push("index.html");
35
- }
36
- return normalized;
37
- }