@weborigami/origami 0.3.0 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,140 @@
1
+ /**
2
+ * Find static module references in JavaScript code.
3
+ *
4
+ * Matches:
5
+ *
6
+ * * `import … from "x"`
7
+ * * `import "x"`
8
+ * * `export … from "x"`
9
+ * * `export { … } from "x"`
10
+ *
11
+ * This does simple lexical analysis to avoid matching paths inside comments or
12
+ * string literals.
13
+ *
14
+ * @param {string} js
15
+ */
16
+ export default function pathsInJs(js) {
17
+ return {
18
+ crawlablePaths: modulePaths(js),
19
+ resourcePaths: [],
20
+ };
21
+ }
22
+
23
+ function modulePaths(src) {
24
+ const tokens = Array.from(tokenize(src));
25
+ const paths = new Set();
26
+
27
+ for (let i = 0; i < tokens.length; i++) {
28
+ const t = tokens[i];
29
+
30
+ // static import
31
+ if (t.type === "Identifier" && t.value === "import") {
32
+ // look ahead for either:
33
+ // import "mod"
34
+ // import … from "mod"
35
+ let j = i + 1;
36
+ // skip any punctuation or identifiers until we hit 'from' or a StringLiteral
37
+ while (
38
+ j < tokens.length &&
39
+ tokens[j].type !== "StringLiteral" &&
40
+ !(tokens[j].type === "Identifier" && tokens[j].value === "from")
41
+ ) {
42
+ j++;
43
+ }
44
+ // import "mod"
45
+ if (tokens[j]?.type === "StringLiteral") {
46
+ paths.add(tokens[j].value);
47
+ } else if (
48
+ // import … from "mod"
49
+ tokens[j]?.value === "from" &&
50
+ tokens[j + 1]?.type === "StringLiteral"
51
+ ) {
52
+ paths.add(tokens[j + 1].value);
53
+ }
54
+ } else if (t.type === "Identifier" && t.value === "export") {
55
+ // re-export or export‐from
56
+
57
+ // find a 'from' token on the same statement
58
+ let j = i + 1;
59
+ while (
60
+ j < tokens.length &&
61
+ !(tokens[j].type === "Identifier" && tokens[j].value === "from")
62
+ ) {
63
+ // stop at semicolon so we don't run past the statement
64
+ if (tokens[j].type === "Punctuator" && tokens[j].value === ";") {
65
+ break;
66
+ }
67
+ j++;
68
+ }
69
+
70
+ if (
71
+ tokens[j]?.value === "from" &&
72
+ tokens[j + 1]?.type === "StringLiteral"
73
+ ) {
74
+ paths.add(tokens[j + 1].value);
75
+ }
76
+ }
77
+ }
78
+
79
+ return [...paths];
80
+ }
81
+
82
+ // Lexer emits Identifiers, StringLiterals, and Punctuators
83
+ function* tokenize(src) {
84
+ let i = 0;
85
+ while (i < src.length) {
86
+ const c = src[i];
87
+
88
+ // Skip single‐line comments
89
+ if (c === "/" && src[i + 1] === "/") {
90
+ i += 2;
91
+ while (i < src.length && src[i] !== "\n") {
92
+ i++;
93
+ }
94
+ } else if (c === "/" && src[i + 1] === "*") {
95
+ // Skip multi‐line comments
96
+ i += 2;
97
+ while (i < src.length && !(src[i] === "*" && src[i + 1] === "/")) {
98
+ i++;
99
+ }
100
+ i += 2;
101
+ continue;
102
+ } else if (c === '"' || c === "'" || c === "`") {
103
+ // Skip string literals (but capture them)
104
+ const quote = c;
105
+ let start = i + 1;
106
+ i++;
107
+ while (i < src.length) {
108
+ if (src[i] === "\\") {
109
+ i += 2;
110
+ continue;
111
+ }
112
+ if (src[i] === quote) {
113
+ break;
114
+ }
115
+ i++;
116
+ }
117
+ const str = src.slice(start, i);
118
+ i++;
119
+ yield { type: "StringLiteral", value: str };
120
+ continue;
121
+ } else if (/[A-Za-z_$]/.test(c)) {
122
+ // Identifier
123
+ let start = i;
124
+ i++;
125
+ while (i < src.length && /[\w$]/.test(src[i])) {
126
+ i++;
127
+ }
128
+ yield { type: "Identifier", value: src.slice(start, i) };
129
+ continue;
130
+ } else if (/[{}();,]/.test(c)) {
131
+ // Punctuator (we still keep braces/semis for possible future use)
132
+ yield { type: "Punctuator", value: c };
133
+ i++;
134
+ continue;
135
+ } else {
136
+ // Skip everything else (whitespace, operators, etc.)
137
+ i++;
138
+ }
139
+ }
140
+ }
@@ -0,0 +1,20 @@
1
+ import { normalizeHref } from "./utilities.js";
2
+
3
+ export default function pathsInRobotsTxt(txt) {
4
+ const crawlablePaths = [];
5
+ let match;
6
+
7
+ // Find `Sitemap` directives.
8
+ const sitemapRegex = /Sitemap:\s*(?<href>[^\s]*)/g;
9
+ while ((match = sitemapRegex.exec(txt))) {
10
+ const href = normalizeHref(match.groups?.href);
11
+ if (href) {
12
+ crawlablePaths.push(href);
13
+ }
14
+ }
15
+
16
+ return {
17
+ crawlablePaths,
18
+ resourcePaths: [],
19
+ };
20
+ }
@@ -0,0 +1,20 @@
1
+ import { normalizeHref } from "./utilities.js";
2
+
3
+ export default function pathsInSitemap(xml) {
4
+ const crawlablePaths = [];
5
+ let match;
6
+
7
+ // Find `loc` elements.
8
+ const locRegex = /<loc>(?<href>[^<]*)<\/loc>/g;
9
+ while ((match = locRegex.exec(xml))) {
10
+ const href = normalizeHref(match.groups?.href);
11
+ if (href) {
12
+ crawlablePaths.push(href);
13
+ }
14
+ }
15
+
16
+ return {
17
+ crawlablePaths,
18
+ resourcePaths: [],
19
+ };
20
+ }
@@ -0,0 +1,125 @@
1
+ import {
2
+ extension,
3
+ isPlainObject,
4
+ trailingSlash,
5
+ } from "@weborigami/async-tree";
6
+
7
+ // A fake base URL used to handle cases where an href is relative and must be
8
+ // treated relative to some base URL.
9
+ const fakeBaseUrl = new URL("fake:/");
10
+
11
+ /**
12
+ * Destructively add a path to the paths object
13
+ */
14
+ export function addHref(paths, href, isCrawlable) {
15
+ href = normalizeHref(href);
16
+ if (href === null) {
17
+ // Normalized href is null, was just an anchor or search; skip
18
+ return;
19
+ }
20
+ isCrawlable ??= isCrawlableHref(href);
21
+ if (isCrawlable) {
22
+ paths.crawlablePaths.push(href);
23
+ } else {
24
+ paths.resourcePaths.push(href);
25
+ }
26
+ }
27
+
28
+ /**
29
+ * Add the value to the object at the path given by the keys
30
+ *
31
+ * @param {any} object
32
+ * @param {string[]} keys
33
+ * @param {any} value
34
+ */
35
+ export function addValueToObject(object, keys, value) {
36
+ for (let i = 0, current = object; i < keys.length; i++) {
37
+ const key = trailingSlash.remove(keys[i]);
38
+ if (i === keys.length - 1) {
39
+ // Write out value
40
+ if (isPlainObject(current[key])) {
41
+ // Route with existing values; treat the new value as an index.html
42
+ current[key]["index.html"] = value;
43
+ } else {
44
+ current[key] = value;
45
+ }
46
+ } else {
47
+ // Traverse further
48
+ if (!current[key]) {
49
+ current[key] = {};
50
+ } else if (!isPlainObject(current[key])) {
51
+ // Already have a value at this point. The site has a page at a route
52
+ // like /foo, and the site also has resources within that at routes like
53
+ // /foo/bar.jpg. We move the current value to "index.html".
54
+ current[key] = { "index.html": current[key] };
55
+ }
56
+ current = current[key];
57
+ }
58
+ }
59
+ }
60
+
61
+ /**
62
+ * Determine a URL we can use to determine whether a link is local within the
63
+ * tree or not.
64
+ *
65
+ * If a baseHref is supplied, convert that to a URL. If it's a relative path,
66
+ * use a fake base URL. If no baseHref is supplied, see if the `object`
67
+ * parameter defines an `href` property and use that to construct a URL.
68
+ *
69
+ * @param {string|undefined} baseHref
70
+ * @param {any} object
71
+ */
72
+ export function getBaseUrl(baseHref, object) {
73
+ let url;
74
+ if (baseHref !== undefined) {
75
+ // See if the href is valid
76
+ try {
77
+ url = new URL(baseHref);
78
+ } catch (e) {
79
+ // Invalid, probably a path; use a fake protocol
80
+ url = new URL(baseHref, fakeBaseUrl);
81
+ }
82
+ } else if (object.href) {
83
+ // Use href property on object
84
+ let href = object.href;
85
+ if (!href?.endsWith("/")) {
86
+ href += "/";
87
+ }
88
+ url = new URL(href);
89
+ } else {
90
+ url = fakeBaseUrl;
91
+ }
92
+ return url;
93
+ }
94
+
95
+ export function isCrawlableHref(href) {
96
+ // Use a fake base URL to cover the case where the href is relative.
97
+ const url = new URL(href, fakeBaseUrl);
98
+ const pathname = url.pathname;
99
+ const lastKey = pathname.split("/").pop() ?? "";
100
+ if (lastKey === "robots.txt" || lastKey === "sitemap.xml") {
101
+ return true;
102
+ }
103
+ const ext = extension.extname(lastKey);
104
+ // We assume an empty extension is HTML.
105
+ const crawlableExtensions = [".html", ".css", ".js", ".map", ".xhtml", ""];
106
+ return crawlableExtensions.includes(ext);
107
+ }
108
+
109
+ // Remove any search parameters or hash from the href. Preserve absolute or
110
+ // relative nature of URL. If the URL only has a search or hash, return null.
111
+ export function normalizeHref(href) {
112
+ // Remove everything after a `#` or `?` character.
113
+ const normalized = href.split(/[?#]/)[0];
114
+ return normalized === "" ? null : normalized;
115
+ }
116
+
117
+ // For indexing and storage purposes, treat a path that ends in a trailing slash
118
+ // as if it ends in index.html.
119
+ export function normalizeKeys(keys) {
120
+ const normalized = keys.slice();
121
+ if (normalized.length === 0 || trailingSlash.has(normalized.at(-1))) {
122
+ normalized.push("index.html");
123
+ }
124
+ return normalized;
125
+ }
package/src/dev/dev.js CHANGED
@@ -1,6 +1,8 @@
1
1
  export { default as breakpoint } from "./breakpoint.js";
2
2
  export { default as changes } from "./changes.js";
3
3
  export { default as code } from "./code.js";
4
+ export { default as audit } from "./crawler/audit.js";
5
+ export { default as crawl } from "./crawler/crawl.js";
4
6
  export { default as debug } from "./debug.js";
5
7
  export { default as explore } from "./explore.js";
6
8
  export { default as log } from "./log.js";
@@ -14,6 +14,7 @@ import jpgHandler from "./jpg.handler.js";
14
14
  import jsonHandler from "./json.handler.js";
15
15
  import mdHandler from "./md.handler.js";
16
16
  import mjsHandler from "./mjs.handler.js";
17
+ import tsHandler from "./ts.handler.js";
17
18
  import txtHandler from "./txt.handler.js";
18
19
  import xhtmlHandler from "./xhtml.handler.js";
19
20
  import ymlHandler from "./yml.handler.js";
@@ -31,6 +32,7 @@ export default {
31
32
  "mjs.handler": mjsHandler,
32
33
  "ori.handler": oriHandler,
33
34
  "oridocument.handler": oridocumentHandler,
35
+ "ts.handler": tsHandler,
34
36
  "txt.handler": txtHandler,
35
37
  "wasm.handler": wasmHandler,
36
38
  "xhtml.handler": xhtmlHandler,
@@ -0,0 +1 @@
1
+ export { default as default } from "./js.handler.js";
@@ -1,12 +1,18 @@
1
1
  dev:
2
2
  description: Develop and debug Origami projects
3
3
  commands:
4
+ audit:
5
+ args: (tree)
6
+ description: Identify broken internal links and references
4
7
  breakpoint:
5
8
  args: (a)
6
9
  description: Break into the JavaScript debugger, then return a
7
10
  changes:
8
11
  args: (old, new)
9
12
  description: Return a tree of changes
13
+ crawl:
14
+ args: (tree, base)
15
+ description: A tree of a site's discoverable resources
10
16
  debug:
11
17
  args: (tree)
12
18
  description: Add debug features to the tree
@@ -213,12 +219,6 @@ scope:
213
219
  site:
214
220
  description: Add common website features
215
221
  commands:
216
- audit:
217
- args: (tree)
218
- description: Identify broken internal links and references
219
- crawl:
220
- args: (tree, base)
221
- description: A tree of a site's discoverable resources
222
222
  index:
223
223
  args: (tree)
224
224
  description: A default index.html page for the tree
@@ -12,11 +12,16 @@ import assertTreeIsDefined from "../common/assertTreeIsDefined.js";
12
12
  */
13
13
  export default async function csv(object) {
14
14
  assertTreeIsDefined(this, "origami:csv");
15
+ object = object ?? this;
16
+ if (object === undefined) {
17
+ return undefined;
18
+ }
15
19
  if (isUnpackable(object)) {
16
20
  object = await object.unpack();
17
21
  }
18
22
  const value = await toPlainValue(object);
19
- const text = formatCsv(value);
23
+ const array = Array.isArray(value) ? value : Object.values(value);
24
+ const text = formatCsv(array);
20
25
  return text;
21
26
  }
22
27
 
package/src/site/site.js CHANGED
@@ -1,5 +1,3 @@
1
- export { default as audit } from "./audit.js";
2
- export { default as crawl } from "./crawler/crawl.js";
3
1
  export { default as index } from "./index.js";
4
2
  export { default as jsonKeys } from "./jsonKeys.js";
5
3
  export { default as redirect } from "./redirect.js";
@@ -0,0 +1,6 @@
1
+ import { JSDOM } from "jsdom";
2
+
3
+ export default function htmlDom(html) {
4
+ const dom = JSDOM.fragment(html);
5
+ return dom;
6
+ }
package/src/text/text.js CHANGED
@@ -1,4 +1,5 @@
1
1
  export { taggedTemplateIndent as indent } from "@weborigami/language";
2
2
  export { default as document } from "./document.js";
3
+ export { default as htmlDom } from "./htmlDom.js";
3
4
  export { default as inline } from "./inline.js";
4
5
  export { default as mdHtml } from "./mdHtml.js";
package/src/tree/map.js CHANGED
@@ -1,13 +1,10 @@
1
1
  import {
2
- cachedKeyFunctions,
3
- extensionKeyFunctions,
4
2
  isPlainObject,
5
3
  isUnpackable,
6
4
  map as mapTransform,
7
5
  } from "@weborigami/async-tree";
8
6
  import getTreeArgument from "../common/getTreeArgument.js";
9
7
  import { toFunction } from "../common/utilities.js";
10
- import parseExtensions from "./parseExtensions.js";
11
8
 
12
9
  /**
13
10
  * Map a hierarchical tree of keys and values to a new tree of keys and values.
@@ -81,12 +78,6 @@ function extendedOptions(context, operation) {
81
78
  let keyFn = options.key;
82
79
  let inverseKeyFn = options.inverseKey;
83
80
 
84
- if (extension && (keyFn || inverseKeyFn)) {
85
- throw new TypeError(
86
- `map: You can't specify extensions and also a key or inverseKey function`
87
- );
88
- }
89
-
90
81
  if (valueFn) {
91
82
  // @ts-ignore
92
83
  valueFn = toFunction(valueFn);
@@ -94,36 +85,23 @@ function extendedOptions(context, operation) {
94
85
  // Origami builtins can be used as value functions.
95
86
  // @ts-ignore
96
87
  const bound = valueFn.bind(context);
88
+ // Transfer sidecar functions
97
89
  // @ts-ignore
98
90
  Object.assign(bound, valueFn);
99
91
  valueFn = bound;
100
92
  }
101
93
 
102
- if (extension) {
103
- // Generate key/inverseKey functions from the extension
104
- let { resultExtension, sourceExtension } = parseExtensions(extension);
105
- const keyFns = extensionKeyFunctions(sourceExtension, resultExtension);
106
- keyFn = keyFns.key;
107
- inverseKeyFn = keyFns.inverseKey;
108
- } else if (keyFn) {
109
- // Extend the key function to include a value parameter
110
- keyFn = extendKeyFn(keyFn);
111
- } else {
112
- // Use sidecar key/inverseKey functions if the valueFn defines them
113
- keyFn = /** @type {any} */ (valueFn)?.key;
114
- inverseKeyFn = /** @type {any} */ (valueFn)?.inverseKey;
115
- }
116
-
117
- if (keyFn && !inverseKeyFn) {
118
- // Only keyFn was provided, so we need to generate the inverseKeyFn
119
- const keyFns = cachedKeyFunctions(keyFn, deep);
120
- keyFn = keyFns.key;
121
- inverseKeyFn = keyFns.inverseKey;
94
+ if (!extension) {
95
+ if (keyFn) {
96
+ // Extend the key function to include a value parameter
97
+ keyFn = extendKeyFn(keyFn);
98
+ }
122
99
  }
123
100
 
124
101
  return {
125
102
  deep,
126
103
  description,
104
+ extension,
127
105
  inverseKey: inverseKeyFn,
128
106
  key: keyFn,
129
107
  needsSourceValue,
package/src/site/audit.js DELETED
@@ -1,19 +0,0 @@
1
- import { Tree } from "@weborigami/async-tree";
2
- import getTreeArgument from "../common/getTreeArgument.js";
3
- import crawl from "./crawler/crawl.js";
4
-
5
- /**
6
- * @this {import("@weborigami/types").AsyncTree|null}
7
- * @param {import("@weborigami/async-tree").Treelike} treelike
8
- */
9
- export default async function audit(treelike) {
10
- const tree = await getTreeArgument(this, arguments, treelike, "site:audit");
11
- const crawled = await crawl.call(this, tree);
12
- let crawlErrorsJson = await crawled.get("crawl-errors.json");
13
- if (!crawlErrorsJson) {
14
- return undefined;
15
- }
16
- const errors = Tree.from(JSON.parse(crawlErrorsJson), { deep: true });
17
- errors.parent = this;
18
- return errors;
19
- }