@pagepocket/lib 0.7.1 → 0.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (261) hide show
  1. package/README.md +7 -6
  2. package/dist/build-snapshot-from-bundle.d.ts +23 -0
  3. package/dist/build-snapshot-from-bundle.js +68 -0
  4. package/dist/builtin-blacklist.js +3 -6
  5. package/dist/bundle/from-network-store.d.ts +10 -0
  6. package/dist/bundle/from-network-store.js +26 -0
  7. package/dist/bundle/types.d.ts +32 -0
  8. package/dist/bundle/types.js +2 -0
  9. package/dist/capture/index.d.ts +14 -0
  10. package/dist/capture/index.js +86 -0
  11. package/dist/capture/memory-content-store.d.ts +4 -0
  12. package/dist/capture/memory-content-store.js +42 -0
  13. package/dist/capture/types.d.ts +61 -0
  14. package/dist/capture/types.js +2 -0
  15. package/dist/content-store.js +3 -8
  16. package/dist/content-type.d.ts +1 -1
  17. package/dist/content-type.js +2 -28
  18. package/dist/core/_impl/completion.d.ts +4 -0
  19. package/dist/core/_impl/completion.js +29 -0
  20. package/dist/core/_impl/content-store.d.ts +21 -0
  21. package/dist/core/_impl/content-store.js +91 -0
  22. package/dist/core/_impl/debug.d.ts +1 -0
  23. package/dist/core/_impl/debug.js +16 -0
  24. package/dist/core/_impl/inflight-tracker.d.ts +19 -0
  25. package/dist/core/_impl/inflight-tracker.js +48 -0
  26. package/dist/core/_impl/pagepocket.d.ts +27 -0
  27. package/dist/core/_impl/pagepocket.js +155 -0
  28. package/dist/core/capture/_impl/memory-content-store.d.ts +4 -0
  29. package/dist/core/capture/_impl/memory-content-store.js +42 -0
  30. package/dist/core/capture/_impl/types.d.ts +61 -0
  31. package/dist/core/capture/_impl/types.js +2 -0
  32. package/dist/core/capture/internal/memory-content-store.d.ts +4 -0
  33. package/dist/core/capture/internal/memory-content-store.js +42 -0
  34. package/dist/core/capture/internal/types.d.ts +61 -0
  35. package/dist/core/capture/internal/types.js +2 -0
  36. package/dist/core/capture/memory-content-store.d.ts +4 -0
  37. package/dist/core/capture/memory-content-store.js +38 -0
  38. package/dist/core/capture/types.d.ts +61 -0
  39. package/dist/core/capture/types.js +1 -0
  40. package/dist/core/completion.d.ts +4 -0
  41. package/dist/core/completion.js +23 -0
  42. package/dist/core/content-store.d.ts +21 -0
  43. package/dist/core/content-store.js +54 -0
  44. package/dist/core/debug.d.ts +1 -0
  45. package/dist/core/debug.js +12 -0
  46. package/dist/core/file-tree-merge.d.ts +2 -0
  47. package/dist/core/file-tree-merge.js +27 -0
  48. package/dist/core/file-tree.d.ts +36 -0
  49. package/dist/core/file-tree.js +1 -0
  50. package/dist/core/inflight-tracker.d.ts +19 -0
  51. package/dist/core/inflight-tracker.js +44 -0
  52. package/dist/core/internal/completion.d.ts +4 -0
  53. package/dist/core/internal/completion.js +29 -0
  54. package/dist/core/internal/content-store.d.ts +21 -0
  55. package/dist/core/internal/content-store.js +91 -0
  56. package/dist/core/internal/debug.d.ts +1 -0
  57. package/dist/core/internal/debug.js +16 -0
  58. package/dist/core/internal/inflight-tracker.d.ts +19 -0
  59. package/dist/core/internal/inflight-tracker.js +48 -0
  60. package/dist/core/internal/pagepocket.d.ts +27 -0
  61. package/dist/core/internal/pagepocket.js +155 -0
  62. package/dist/core/pagepocket.d.ts +38 -0
  63. package/dist/core/pagepocket.js +57 -0
  64. package/dist/core/plugin/_impl/context.d.ts +47 -0
  65. package/dist/core/plugin/_impl/context.js +142 -0
  66. package/dist/core/plugin/_impl/runner.d.ts +12 -0
  67. package/dist/core/plugin/_impl/runner.js +232 -0
  68. package/dist/core/plugin/_impl/types.d.ts +108 -0
  69. package/dist/core/plugin/_impl/types.js +2 -0
  70. package/dist/core/plugin/context.d.ts +47 -0
  71. package/dist/core/plugin/context.js +205 -0
  72. package/dist/core/plugin/internal/context.d.ts +47 -0
  73. package/dist/core/plugin/internal/context.js +142 -0
  74. package/dist/core/plugin/internal/runner.d.ts +12 -0
  75. package/dist/core/plugin/internal/runner.js +232 -0
  76. package/dist/core/plugin/internal/types.d.ts +108 -0
  77. package/dist/core/plugin/internal/types.js +2 -0
  78. package/dist/core/plugin/runner-utils.d.ts +9 -0
  79. package/dist/core/plugin/runner-utils.js +29 -0
  80. package/dist/core/plugin/runner.d.ts +12 -0
  81. package/dist/core/plugin/runner.js +118 -0
  82. package/dist/core/plugin/types.d.ts +117 -0
  83. package/dist/core/plugin/types.js +1 -0
  84. package/dist/core/runtime/types.d.ts +14 -0
  85. package/dist/core/runtime/types.js +2 -0
  86. package/dist/css-rewrite.js +1 -5
  87. package/dist/debug.d.ts +0 -1
  88. package/dist/debug.js +3 -5
  89. package/dist/files/types.d.ts +41 -0
  90. package/dist/files/types.js +2 -0
  91. package/dist/hack-html.js +20 -13
  92. package/dist/hackers/index.d.ts +1 -1
  93. package/dist/hackers/index.js +24 -27
  94. package/dist/hackers/preload-fetch.d.ts +1 -1
  95. package/dist/hackers/preload-fetch.js +1 -4
  96. package/dist/hackers/preload-xhr.d.ts +1 -1
  97. package/dist/hackers/preload-xhr.js +1 -4
  98. package/dist/hackers/replay-beacon.d.ts +1 -1
  99. package/dist/hackers/replay-beacon.js +1 -4
  100. package/dist/hackers/replay-block-text-fragment.d.ts +1 -1
  101. package/dist/hackers/replay-block-text-fragment.js +1 -4
  102. package/dist/hackers/replay-css-proxy.d.ts +1 -1
  103. package/dist/hackers/replay-css-proxy.js +9 -12
  104. package/dist/hackers/replay-dom-rewrite.d.ts +1 -1
  105. package/dist/hackers/replay-dom-rewrite.js +165 -154
  106. package/dist/hackers/replay-eventsource.d.ts +1 -1
  107. package/dist/hackers/replay-eventsource.js +1 -4
  108. package/dist/hackers/replay-fetch.d.ts +1 -1
  109. package/dist/hackers/replay-fetch.js +1 -4
  110. package/dist/hackers/replay-history-path.d.ts +1 -1
  111. package/dist/hackers/replay-history-path.js +1 -4
  112. package/dist/hackers/replay-svg-image.d.ts +1 -1
  113. package/dist/hackers/replay-svg-image.js +1 -4
  114. package/dist/hackers/replay-websocket.d.ts +1 -1
  115. package/dist/hackers/replay-websocket.js +1 -4
  116. package/dist/hackers/replay-xhr.d.ts +1 -1
  117. package/dist/hackers/replay-xhr.js +1 -4
  118. package/dist/hackers/types.js +1 -2
  119. package/dist/index.d.ts +29 -13
  120. package/dist/index.js +23 -44
  121. package/dist/kind-map.d.ts +68 -0
  122. package/dist/kind-map.js +58 -0
  123. package/dist/network-store.js +12 -1
  124. package/dist/pagepocket.d.ts +19 -4
  125. package/dist/pagepocket.js +36 -102
  126. package/dist/path-resolver.d.ts +1 -2
  127. package/dist/path-resolver.js +9 -16
  128. package/dist/plugin/builtins/build-snapshot-plugin.d.ts +5 -0
  129. package/dist/plugin/builtins/build-snapshot-plugin.js +84 -0
  130. package/dist/plugin/builtins/replace-elements-plugin.d.ts +8 -0
  131. package/dist/plugin/builtins/replace-elements-plugin.js +13 -0
  132. package/dist/plugin/builtins/to-directory-plugin.d.ts +7 -0
  133. package/dist/plugin/builtins/to-directory-plugin.js +20 -0
  134. package/dist/plugin/builtins/to-zip-plugin.d.ts +5 -0
  135. package/dist/plugin/builtins/to-zip-plugin.js +19 -0
  136. package/dist/plugin/context.d.ts +47 -0
  137. package/dist/plugin/context.js +142 -0
  138. package/dist/plugin/runner.d.ts +12 -0
  139. package/dist/plugin/runner.js +232 -0
  140. package/dist/plugin/types.d.ts +108 -0
  141. package/dist/plugin/types.js +2 -0
  142. package/dist/plugins/build-files-from-capture.d.ts +5 -0
  143. package/dist/plugins/build-files-from-capture.js +85 -0
  144. package/dist/plugins/build-warc.d.ts +5 -0
  145. package/dist/plugins/build-warc.js +225 -0
  146. package/dist/plugins/builtins/manifest.d.ts +2 -0
  147. package/dist/plugins/builtins/manifest.js +42 -0
  148. package/dist/plugins/builtins/snapshot-directory.d.ts +2 -0
  149. package/dist/plugins/builtins/snapshot-directory.js +24 -0
  150. package/dist/plugins/builtins/snapshot-zip.d.ts +2 -0
  151. package/dist/plugins/builtins/snapshot-zip.js +25 -0
  152. package/dist/plugins/capture-http-lighterceptor.d.ts +5 -0
  153. package/dist/plugins/capture-http-lighterceptor.js +85 -0
  154. package/dist/plugins/capture-http-puppeteer.d.ts +5 -0
  155. package/dist/plugins/capture-http-puppeteer.js +85 -0
  156. package/dist/plugins/host.d.ts +37 -0
  157. package/dist/plugins/host.js +105 -0
  158. package/dist/plugins/index.d.ts +6 -0
  159. package/dist/plugins/index.js +11 -0
  160. package/dist/plugins/ordering.d.ts +2 -0
  161. package/dist/plugins/ordering.js +19 -0
  162. package/dist/plugins/types.d.ts +51 -0
  163. package/dist/plugins/types.js +2 -0
  164. package/dist/preload.js +3 -7
  165. package/dist/replace-elements/actions.d.ts +5 -0
  166. package/dist/replace-elements/actions.js +86 -0
  167. package/dist/replace-elements/match.d.ts +5 -0
  168. package/dist/replace-elements/match.js +46 -0
  169. package/dist/replace-elements/normalize.d.ts +21 -0
  170. package/dist/replace-elements/normalize.js +50 -0
  171. package/dist/replace-elements.d.ts +1 -1
  172. package/dist/replace-elements.js +5 -185
  173. package/dist/replay/match-api.d.ts +10 -0
  174. package/dist/replay/match-api.js +162 -0
  175. package/dist/replay/templates/match-api-source.d.ts +1 -0
  176. package/dist/replay/templates/match-api-source.js +137 -0
  177. package/dist/replay/templates/replay-script-template.d.ts +5 -0
  178. package/dist/replay/templates/replay-script-template.js +337 -0
  179. package/dist/replay/templates/resource-proxy-script.d.ts +1 -0
  180. package/dist/replay/templates/resource-proxy-script.js +274 -0
  181. package/dist/replay-script.d.ts +3 -10
  182. package/dist/replay-script.js +11 -625
  183. package/dist/resource-filter.d.ts +1 -1
  184. package/dist/resource-filter.js +1 -5
  185. package/dist/resource-proxy/escape-percent.d.ts +1 -0
  186. package/dist/resource-proxy/escape-percent.js +12 -0
  187. package/dist/resource-proxy/multimap.d.ts +3 -0
  188. package/dist/resource-proxy/multimap.js +18 -0
  189. package/dist/resource-proxy/pathname-variants.d.ts +3 -0
  190. package/dist/resource-proxy/pathname-variants.js +54 -0
  191. package/dist/resource-proxy.d.ts +4 -2
  192. package/dist/resource-proxy.js +48 -117
  193. package/dist/resources.js +4 -42
  194. package/dist/rewrite-links/js-imports.d.ts +3 -0
  195. package/dist/rewrite-links/js-imports.js +56 -0
  196. package/dist/rewrite-links/link-rel.d.ts +2 -0
  197. package/dist/rewrite-links/link-rel.js +10 -0
  198. package/dist/rewrite-links/meta-refresh.d.ts +3 -0
  199. package/dist/rewrite-links/meta-refresh.js +22 -0
  200. package/dist/rewrite-links/skip.d.ts +1 -0
  201. package/dist/rewrite-links/skip.js +10 -0
  202. package/dist/rewrite-links/srcset.d.ts +3 -0
  203. package/dist/rewrite-links/srcset.js +63 -0
  204. package/dist/rewrite-links/url-resolve.d.ts +3 -0
  205. package/dist/rewrite-links/url-resolve.js +13 -0
  206. package/dist/rewrite-links.d.ts +3 -3
  207. package/dist/rewrite-links.js +31 -240
  208. package/dist/snapshot-builder/api.d.ts +3 -0
  209. package/dist/snapshot-builder/api.js +6 -0
  210. package/dist/snapshot-builder/build-snapshot.d.ts +3 -0
  211. package/dist/snapshot-builder/build-snapshot.js +138 -0
  212. package/dist/snapshot-builder/capture-index/index-capture.d.ts +13 -0
  213. package/dist/snapshot-builder/capture-index/index-capture.js +168 -0
  214. package/dist/snapshot-builder/capture-index/index.d.ts +2 -0
  215. package/dist/snapshot-builder/capture-index/index.js +1 -0
  216. package/dist/snapshot-builder/capture-index/types.d.ts +12 -0
  217. package/dist/snapshot-builder/capture-index/types.js +1 -0
  218. package/dist/snapshot-builder/capture-index.d.ts +12 -0
  219. package/dist/snapshot-builder/capture-index.js +173 -0
  220. package/dist/snapshot-builder/emit-document.d.ts +24 -0
  221. package/dist/snapshot-builder/emit-document.js +50 -0
  222. package/dist/snapshot-builder/grouping.d.ts +8 -0
  223. package/dist/snapshot-builder/grouping.js +87 -0
  224. package/dist/snapshot-builder/http.d.ts +6 -0
  225. package/dist/snapshot-builder/http.js +28 -0
  226. package/dist/snapshot-builder/index.d.ts +4 -0
  227. package/dist/snapshot-builder/index.js +2 -0
  228. package/dist/snapshot-builder/path-map.d.ts +3 -0
  229. package/dist/snapshot-builder/path-map.js +35 -0
  230. package/dist/snapshot-builder/resources-path.d.ts +23 -0
  231. package/dist/snapshot-builder/resources-path.js +47 -0
  232. package/dist/snapshot-builder/rewrite-resource.d.ts +18 -0
  233. package/dist/snapshot-builder/rewrite-resource.js +52 -0
  234. package/dist/snapshot-builder/types.d.ts +37 -0
  235. package/dist/snapshot-builder/types.js +2 -0
  236. package/dist/snapshot-builder.d.ts +12 -8
  237. package/dist/snapshot-builder.js +252 -27
  238. package/dist/types.d.ts +122 -78
  239. package/dist/types.js +4 -2
  240. package/dist/units/contracts-bridge.d.ts +76 -0
  241. package/dist/units/contracts-bridge.js +6 -0
  242. package/dist/units/index.d.ts +4 -0
  243. package/dist/units/index.js +2 -0
  244. package/dist/units/runner.d.ts +11 -0
  245. package/dist/units/runner.js +270 -0
  246. package/dist/units/types.d.ts +39 -0
  247. package/dist/units/types.js +1 -0
  248. package/dist/utils/streams.d.ts +2 -0
  249. package/dist/utils/streams.js +29 -0
  250. package/dist/utils.d.ts +35 -1
  251. package/dist/utils.js +107 -29
  252. package/dist/v3/contracts-bridge.d.ts +69 -0
  253. package/dist/v3/contracts-bridge.js +5 -0
  254. package/dist/v3/index.d.ts +4 -0
  255. package/dist/v3/index.js +2 -0
  256. package/dist/v3/runner.d.ts +20 -0
  257. package/dist/v3/runner.js +245 -0
  258. package/dist/v3/types.d.ts +39 -0
  259. package/dist/v3/types.js +1 -0
  260. package/dist/writers.js +3 -1
  261. package/package.json +11 -3
@@ -0,0 +1,19 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.sortPluginsDeterministically = void 0;
4
+ const priorityOf = (plugin) => plugin.order?.priority ?? 0;
5
+ const sortPluginsDeterministically = (plugins) => {
6
+ return plugins.slice().sort((left, right) => {
7
+ const pLeft = priorityOf(left);
8
+ const pRight = priorityOf(right);
9
+ if (pLeft !== pRight) {
10
+ return pRight - pLeft;
11
+ }
12
+ if (left.name < right.name)
13
+ return -1;
14
+ if (left.name > right.name)
15
+ return 1;
16
+ return 0;
17
+ });
18
+ };
19
+ exports.sortPluginsDeterministically = sortPluginsDeterministically;
@@ -0,0 +1,51 @@
1
+ import type { ReplaceElementsConfig, PageSnapshot } from "../types";
2
+ import type { CaptureBundle } from "../bundle/types";
3
+ export type LogLevel = "debug" | "info" | "warn" | "error";
4
+ export interface PluginContext {
5
+ entryUrl: string;
6
+ now(): number;
7
+ log(level: LogLevel, msg: string, meta?: unknown): void;
8
+ config: Record<string, unknown>;
9
+ }
10
+ export interface RunContext extends PluginContext {
11
+ bundle: CaptureBundle;
12
+ snapshot?: PageSnapshot;
13
+ sink: ArtifactSink;
14
+ outputDir?: string;
15
+ }
16
+ export type OutputRequirement = "bundle" | "snapshot";
17
+ export interface OutputWriter {
18
+ id: string;
19
+ requires: OutputRequirement;
20
+ write(ctx: RunContext): Promise<void>;
21
+ }
22
+ export interface PluginOrder {
23
+ priority?: number;
24
+ before?: string[];
25
+ after?: string[];
26
+ }
27
+ export interface PagePocketPlugin {
28
+ name: string;
29
+ version?: string;
30
+ order?: PluginOrder;
31
+ /** Contribute Cheerio rewrite rules (applied only if snapshot is built). */
32
+ replaceElements?: (ctx: PluginContext) => ReplaceElementsConfig | Promise<ReplaceElementsConfig>;
33
+ outputs?: OutputWriter[];
34
+ hooks?: {
35
+ init?(ctx: PluginContext): void | Promise<void>;
36
+ shutdown?(ctx: PluginContext): void | Promise<void>;
37
+ };
38
+ }
39
+ export interface ArtifactSink {
40
+ writeFile(relativePath: string, data: Uint8Array, contentType?: string): Promise<void>;
41
+ writeJson(relativePath: string, value: unknown): Promise<void>;
42
+ createWriteStream?(relativePath: string, options?: {
43
+ contentType?: string;
44
+ overwrite?: boolean;
45
+ }): NodeJS.WritableStream | undefined;
46
+ record?(meta: {
47
+ relativePath: string;
48
+ contentType?: string;
49
+ role?: string;
50
+ }): void;
51
+ }
@@ -0,0 +1,2 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
package/dist/preload.js CHANGED
@@ -1,10 +1,7 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.buildPreloadScript = void 0;
4
- const hackers_1 = require("./hackers");
5
- const buildPreloadScript = () => {
1
+ import { preloadHackers } from "./hackers/index.js";
2
+ export const buildPreloadScript = () => {
6
3
  const context = { stage: "preload" };
7
- const hackerScripts = hackers_1.preloadHackers
4
+ const hackerScripts = preloadHackers
8
5
  .map((hacker) => ` // hacker:${hacker.id}\n${hacker.build(context)}`)
9
6
  .join("\n");
10
7
  return `
@@ -57,4 +54,3 @@ ${hackerScripts}
57
54
  })();
58
55
  `;
59
56
  };
60
- exports.buildPreloadScript = buildPreloadScript;
@@ -0,0 +1,5 @@
1
+ import type { Cheerio, CheerioAPI } from "cheerio";
2
+ import type { ReplaceAction } from "../types.js";
3
+ export declare const isHtmlElement: ($: CheerioAPI, $el: Cheerio<any>) => boolean;
4
+ export declare const applyReplaceAction: ($: CheerioAPI, $el: Cheerio<any>, action: ReplaceAction) => void;
5
+ export declare const tagNameMatches: (el: unknown, expected?: string) => boolean;
@@ -0,0 +1,86 @@
1
+ const getNodeName = (el) => {
2
+ if (!el || typeof el !== "object")
3
+ return null;
4
+ const name = el.name;
5
+ return typeof name === "string" ? name : null;
6
+ };
7
+ const getNodeType = (el) => {
8
+ if (!el || typeof el !== "object")
9
+ return null;
10
+ const type = el.type;
11
+ return typeof type === "string" ? type : null;
12
+ };
13
+ export const isHtmlElement = ($, $el) => {
14
+ if (!$el || !$el.length)
15
+ return false;
16
+ if (!$el.closest("html").length)
17
+ return false;
18
+ const el = $el.get(0);
19
+ if (!el)
20
+ return false;
21
+ const type = getNodeType(el);
22
+ if (type && type !== "tag")
23
+ return false;
24
+ return true;
25
+ };
26
+ export const applyReplaceAction = ($, $el, action) => {
27
+ switch (action.type) {
28
+ case "replaceWithHtml": {
29
+ $el.replaceWith(action.html);
30
+ return;
31
+ }
32
+ case "replaceWithElement": {
33
+ const next = $(`<${action.tagName}></${action.tagName}>`);
34
+ if (action.attrs) {
35
+ for (const [key, value] of Object.entries(action.attrs)) {
36
+ if (value === null) {
37
+ next.removeAttr(key);
38
+ continue;
39
+ }
40
+ next.attr(key, value);
41
+ }
42
+ }
43
+ if (action.html !== undefined) {
44
+ next.html(action.html);
45
+ }
46
+ else if (action.textContent !== undefined) {
47
+ next.text(action.textContent);
48
+ }
49
+ $el.replaceWith(next);
50
+ return;
51
+ }
52
+ case "renameTag": {
53
+ const to = action.to;
54
+ const keepAttributes = action.keepAttributes !== false;
55
+ const keepChildren = action.keepChildren !== false;
56
+ const next = $(`<${to}></${to}>`);
57
+ if (keepAttributes) {
58
+ const attrs = ($el.attr() ?? {});
59
+ for (const [key, value] of Object.entries(attrs)) {
60
+ next.attr(key, value);
61
+ }
62
+ }
63
+ if (keepChildren) {
64
+ next.append($el.contents());
65
+ }
66
+ $el.replaceWith(next);
67
+ return;
68
+ }
69
+ case "remove": {
70
+ $el.remove();
71
+ return;
72
+ }
73
+ default: {
74
+ const exhaustive = action;
75
+ throw new Error(`Unknown replace action: ${String(exhaustive?.type ?? "")}`);
76
+ }
77
+ }
78
+ };
79
+ export const tagNameMatches = (el, expected) => {
80
+ if (!expected)
81
+ return true;
82
+ const name = getNodeName(el);
83
+ if (!name)
84
+ return false;
85
+ return name.toLowerCase() === expected.toLowerCase();
86
+ };
@@ -0,0 +1,5 @@
1
+ import type { Cheerio, CheerioAPI } from "cheerio";
2
+ import type { MatchQuery } from "../types.js";
3
+ import { isHtmlElement } from "./actions.js";
4
+ export declare const elementMatchesFilter: ($: CheerioAPI, $el: Cheerio<any>, filter: MatchQuery | null) => boolean;
5
+ export { isHtmlElement };
@@ -0,0 +1,46 @@
1
+ import { isHtmlElement, tagNameMatches } from "./actions.js";
2
+ const idMatches = (_$, $el, expected) => {
3
+ if (!expected)
4
+ return true;
5
+ return ($el.attr("id") || "") === expected;
6
+ };
7
+ const attrsMatch = ($el, attrs) => {
8
+ if (!attrs)
9
+ return true;
10
+ for (const [name, expected] of Object.entries(attrs)) {
11
+ const actual = $el.attr(name);
12
+ if (expected === true) {
13
+ if (actual === undefined)
14
+ return false;
15
+ continue;
16
+ }
17
+ if (typeof expected === "string") {
18
+ if (actual !== expected)
19
+ return false;
20
+ continue;
21
+ }
22
+ if (expected instanceof RegExp) {
23
+ if (actual === undefined)
24
+ return false;
25
+ if (!expected.test(actual))
26
+ return false;
27
+ continue;
28
+ }
29
+ }
30
+ return true;
31
+ };
32
+ export const elementMatchesFilter = ($, $el, filter) => {
33
+ if (!filter || typeof filter === "string")
34
+ return true;
35
+ const el = $el.get(0);
36
+ if (!el)
37
+ return false;
38
+ if (!tagNameMatches(el, filter.tagName))
39
+ return false;
40
+ if (!idMatches($, $el, filter.id))
41
+ return false;
42
+ if (!attrsMatch($el, filter.attrs))
43
+ return false;
44
+ return true;
45
+ };
46
+ export { isHtmlElement };
@@ -0,0 +1,21 @@
1
+ import type { ApplyOptions, MatchQuery, ReplaceElementFn, ReplaceElementRule, ReplaceElementsConfig } from "../types.js";
2
+ declare const defaultApply: Required<Pick<ApplyOptions, "scope" | "limit" | "onReplaced">>;
3
+ type NormalizedItem = {
4
+ kind: "rule";
5
+ ruleIndex: number;
6
+ rule: ReplaceElementRule;
7
+ apply: typeof defaultApply;
8
+ } | {
9
+ kind: "fn";
10
+ ruleIndex: number;
11
+ query: string;
12
+ run: ReplaceElementFn;
13
+ apply: typeof defaultApply;
14
+ };
15
+ export declare const normalizeMatchToSelector: (match: MatchQuery) => {
16
+ selector: string;
17
+ filter: MatchQuery | null;
18
+ };
19
+ export declare const normalizeReplaceElementsConfig: (replaceElements: ReplaceElementsConfig) => NormalizedItem[];
20
+ export type { NormalizedItem };
21
+ export { defaultApply };
@@ -0,0 +1,50 @@
1
+ const defaultApply = {
2
+ scope: "document",
3
+ limit: "all",
4
+ onReplaced: "stop"
5
+ };
6
+ const isPlainObject = (value) => typeof value === "object" && value !== null;
7
+ const normalizeApply = (apply) => ({
8
+ scope: apply?.scope ?? defaultApply.scope,
9
+ limit: apply?.limit ?? defaultApply.limit,
10
+ onReplaced: apply?.onReplaced ?? defaultApply.onReplaced
11
+ });
12
+ export const normalizeMatchToSelector = (match) => {
13
+ if (typeof match === "string") {
14
+ return { selector: match, filter: null };
15
+ }
16
+ return { selector: match.selector ?? "*", filter: match };
17
+ };
18
+ export const normalizeReplaceElementsConfig = (replaceElements) => {
19
+ const items = [];
20
+ replaceElements.forEach((item, ruleIndex) => {
21
+ if (typeof item === "function") {
22
+ items.push({
23
+ kind: "fn",
24
+ ruleIndex,
25
+ query: "*",
26
+ run: item,
27
+ apply: normalizeApply(undefined)
28
+ });
29
+ return;
30
+ }
31
+ if (!isPlainObject(item)) {
32
+ throw new Error("replaceElements item must be a rule object or function");
33
+ }
34
+ if ("run" in item && typeof item.run === "function") {
35
+ const fnItem = item;
36
+ items.push({
37
+ kind: "fn",
38
+ ruleIndex,
39
+ query: fnItem.query,
40
+ run: fnItem.run,
41
+ apply: normalizeApply(fnItem.apply)
42
+ });
43
+ return;
44
+ }
45
+ const rule = item;
46
+ items.push({ kind: "rule", ruleIndex, rule, apply: normalizeApply(rule.apply) });
47
+ });
48
+ return items;
49
+ };
50
+ export { defaultApply };
@@ -1,5 +1,5 @@
1
1
  import type { CheerioAPI } from "cheerio";
2
- import type { ReplaceElementsConfig } from "./types";
2
+ import type { ReplaceElementsConfig } from "./types.js";
3
3
  export declare const applyReplaceElements: (input: {
4
4
  $: CheerioAPI;
5
5
  entryUrl: string;
@@ -1,186 +1,7 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.applyReplaceElements = void 0;
4
- const defaultApply = {
5
- scope: "document",
6
- limit: "all",
7
- onReplaced: "stop"
8
- };
9
- const isPlainObject = (value) => typeof value === "object" && value !== null;
10
- const normalizeApply = (apply) => ({
11
- scope: apply?.scope ?? defaultApply.scope,
12
- limit: apply?.limit ?? defaultApply.limit,
13
- onReplaced: apply?.onReplaced ?? defaultApply.onReplaced
14
- });
15
- const normalizeMatchToSelector = (match) => {
16
- if (typeof match === "string") {
17
- return { selector: match, filter: null };
18
- }
19
- return { selector: match.selector ?? "*", filter: match };
20
- };
21
- const getNodeName = (el) => {
22
- if (!el || typeof el !== "object")
23
- return null;
24
- const name = el.name;
25
- return typeof name === "string" ? name : null;
26
- };
27
- const getNodeType = (el) => {
28
- if (!el || typeof el !== "object")
29
- return null;
30
- const type = el.type;
31
- return typeof type === "string" ? type : null;
32
- };
33
- const tagNameMatches = (el, expected) => {
34
- if (!expected)
35
- return true;
36
- const name = getNodeName(el);
37
- if (!name)
38
- return false;
39
- return name.toLowerCase() === expected.toLowerCase();
40
- };
41
- const idMatches = (_$, $el, expected) => {
42
- if (!expected)
43
- return true;
44
- return ($el.attr("id") || "") === expected;
45
- };
46
- const attrsMatch = ($el, attrs) => {
47
- if (!attrs)
48
- return true;
49
- for (const [name, expected] of Object.entries(attrs)) {
50
- const actual = $el.attr(name);
51
- if (expected === true) {
52
- if (actual === undefined)
53
- return false;
54
- continue;
55
- }
56
- if (typeof expected === "string") {
57
- if (actual !== expected)
58
- return false;
59
- continue;
60
- }
61
- if (expected instanceof RegExp) {
62
- if (actual === undefined)
63
- return false;
64
- if (!expected.test(actual))
65
- return false;
66
- continue;
67
- }
68
- }
69
- return true;
70
- };
71
- const elementMatchesFilter = ($, $el, filter) => {
72
- if (!filter || typeof filter === "string")
73
- return true;
74
- const el = $el.get(0);
75
- if (!el)
76
- return false;
77
- if (!tagNameMatches(el, filter.tagName))
78
- return false;
79
- if (!idMatches($, $el, filter.id))
80
- return false;
81
- if (!attrsMatch($el, filter.attrs))
82
- return false;
83
- return true;
84
- };
85
- const isHtmlElement = ($, $el) => {
86
- if (!$el || !$el.length)
87
- return false;
88
- if (!$el.closest("html").length)
89
- return false;
90
- const el = $el.get(0);
91
- if (!el)
92
- return false;
93
- // Cheerio nodes for tags have a "type" of "tag".
94
- const type = getNodeType(el);
95
- if (type && type !== "tag")
96
- return false;
97
- return true;
98
- };
99
- const applyReplaceAction = ($, $el, action) => {
100
- switch (action.type) {
101
- case "replaceWithHtml": {
102
- $el.replaceWith(action.html);
103
- return;
104
- }
105
- case "replaceWithElement": {
106
- const next = $(`<${action.tagName}></${action.tagName}>`);
107
- if (action.attrs) {
108
- for (const [key, value] of Object.entries(action.attrs)) {
109
- if (value === null) {
110
- next.removeAttr(key);
111
- continue;
112
- }
113
- next.attr(key, value);
114
- }
115
- }
116
- if (action.html !== undefined) {
117
- next.html(action.html);
118
- }
119
- else if (action.textContent !== undefined) {
120
- next.text(action.textContent);
121
- }
122
- $el.replaceWith(next);
123
- return;
124
- }
125
- case "renameTag": {
126
- const to = action.to;
127
- const keepAttributes = action.keepAttributes !== false;
128
- const keepChildren = action.keepChildren !== false;
129
- const next = $(`<${to}></${to}>`);
130
- if (keepAttributes) {
131
- const attrs = ($el.attr() ?? {});
132
- for (const [key, value] of Object.entries(attrs)) {
133
- next.attr(key, value);
134
- }
135
- }
136
- if (keepChildren) {
137
- next.append($el.contents());
138
- }
139
- $el.replaceWith(next);
140
- return;
141
- }
142
- case "remove": {
143
- $el.remove();
144
- return;
145
- }
146
- default: {
147
- const exhaustive = action;
148
- throw new Error(`Unknown replace action: ${String(exhaustive?.type ?? "")}`);
149
- }
150
- }
151
- };
152
- const normalizeReplaceElementsConfig = (replaceElements) => {
153
- const items = [];
154
- replaceElements.forEach((item, ruleIndex) => {
155
- if (typeof item === "function") {
156
- items.push({
157
- kind: "fn",
158
- ruleIndex,
159
- query: "*",
160
- run: item,
161
- apply: normalizeApply(undefined)
162
- });
163
- return;
164
- }
165
- if (!isPlainObject(item)) {
166
- throw new Error("replaceElements item must be a rule object or function");
167
- }
168
- if ("run" in item && typeof item.run === "function") {
169
- const fnItem = item;
170
- items.push({
171
- kind: "fn",
172
- ruleIndex,
173
- query: fnItem.query,
174
- run: fnItem.run,
175
- apply: normalizeApply(fnItem.apply)
176
- });
177
- return;
178
- }
179
- const rule = item;
180
- items.push({ kind: "rule", ruleIndex, rule, apply: normalizeApply(rule.apply) });
181
- });
182
- return items;
183
- };
1
+ import { applyReplaceAction } from "./replace-elements/actions.js";
2
+ import { elementMatchesFilter, isHtmlElement } from "./replace-elements/match.js";
3
+ import { normalizeMatchToSelector, normalizeReplaceElementsConfig } from "./replace-elements/normalize.js";
4
+ // helper logic moved to ./replace-elements/*
184
5
  const runFnRuleOnSelection = async (input) => {
185
6
  const { $, item } = input;
186
7
  const selection = $(item.query).toArray();
@@ -210,7 +31,7 @@ const runFnRuleOnSelection = async (input) => {
210
31
  }
211
32
  }
212
33
  };
213
- const applyReplaceElements = async (input) => {
34
+ export const applyReplaceElements = async (input) => {
214
35
  if (!input.replaceElements || input.replaceElements.length === 0) {
215
36
  return;
216
37
  }
@@ -255,4 +76,3 @@ const applyReplaceElements = async (input) => {
255
76
  }
256
77
  }
257
78
  };
258
- exports.applyReplaceElements = applyReplaceElements;
@@ -0,0 +1,10 @@
1
+ import type { ApiRecord } from "../types.js";
2
+ export type MatchApiOptions = {
3
+ records: ApiRecord[];
4
+ byKey?: Map<string, ApiRecord>;
5
+ baseUrl: string;
6
+ method: string;
7
+ url: string;
8
+ body?: unknown;
9
+ };
10
+ export declare function matchAPI(options: MatchApiOptions): ApiRecord | undefined;