@depup/cheerio 1.2.0-depup.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (230) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +36 -0
  3. package/Readme.md +229 -0
  4. package/changes.json +30 -0
  5. package/dist/browser/api/attributes.d.ts +385 -0
  6. package/dist/browser/api/attributes.d.ts.map +1 -0
  7. package/dist/browser/api/attributes.js +636 -0
  8. package/dist/browser/api/attributes.js.map +1 -0
  9. package/dist/browser/api/css.d.ts +42 -0
  10. package/dist/browser/api/css.d.ts.map +1 -0
  11. package/dist/browser/api/css.js +116 -0
  12. package/dist/browser/api/css.js.map +1 -0
  13. package/dist/browser/api/extract.d.ts +27 -0
  14. package/dist/browser/api/extract.d.ts.map +1 -0
  15. package/dist/browser/api/extract.js +42 -0
  16. package/dist/browser/api/extract.js.map +1 -0
  17. package/dist/browser/api/forms.d.ts +36 -0
  18. package/dist/browser/api/forms.d.ts.map +1 -0
  19. package/dist/browser/api/forms.js +81 -0
  20. package/dist/browser/api/forms.js.map +1 -0
  21. package/dist/browser/api/manipulation.d.ts +528 -0
  22. package/dist/browser/api/manipulation.d.ts.map +1 -0
  23. package/dist/browser/api/manipulation.js +831 -0
  24. package/dist/browser/api/manipulation.js.map +1 -0
  25. package/dist/browser/api/traversing.d.ts +657 -0
  26. package/dist/browser/api/traversing.d.ts.map +1 -0
  27. package/dist/browser/api/traversing.js +857 -0
  28. package/dist/browser/api/traversing.js.map +1 -0
  29. package/dist/browser/cheerio.d.ts +85 -0
  30. package/dist/browser/cheerio.d.ts.map +1 -0
  31. package/dist/browser/cheerio.js +58 -0
  32. package/dist/browser/cheerio.js.map +1 -0
  33. package/dist/browser/index-browser.d.mts.map +1 -0
  34. package/dist/browser/index-browser.mjs.map +1 -0
  35. package/dist/browser/index.d.ts +5 -0
  36. package/dist/browser/index.js +3 -0
  37. package/dist/browser/load-parse.d.ts +20 -0
  38. package/dist/browser/load-parse.d.ts.map +1 -0
  39. package/dist/browser/load-parse.js +28 -0
  40. package/dist/browser/load-parse.js.map +1 -0
  41. package/dist/browser/load.d.ts +91 -0
  42. package/dist/browser/load.d.ts.map +1 -0
  43. package/dist/browser/load.js +129 -0
  44. package/dist/browser/load.js.map +1 -0
  45. package/dist/browser/options.d.ts +98 -0
  46. package/dist/browser/options.d.ts.map +1 -0
  47. package/dist/browser/options.js +34 -0
  48. package/dist/browser/options.js.map +1 -0
  49. package/dist/browser/package.json +3 -0
  50. package/dist/browser/parse.d.ts +18 -0
  51. package/dist/browser/parse.d.ts.map +1 -0
  52. package/dist/browser/parse.js +73 -0
  53. package/dist/browser/parse.js.map +1 -0
  54. package/dist/browser/parsers/parse5-adapter.d.ts +20 -0
  55. package/dist/browser/parsers/parse5-adapter.d.ts.map +1 -0
  56. package/dist/browser/parsers/parse5-adapter.js +50 -0
  57. package/dist/browser/parsers/parse5-adapter.js.map +1 -0
  58. package/dist/browser/slim.d.ts +25 -0
  59. package/dist/browser/slim.d.ts.map +1 -0
  60. package/dist/browser/slim.js +22 -0
  61. package/dist/browser/slim.js.map +1 -0
  62. package/dist/browser/static.d.ts +112 -0
  63. package/dist/browser/static.d.ts.map +1 -0
  64. package/dist/browser/static.js +204 -0
  65. package/dist/browser/static.js.map +1 -0
  66. package/dist/browser/types.d.ts +21 -0
  67. package/dist/browser/types.d.ts.map +1 -0
  68. package/dist/browser/types.js +3 -0
  69. package/dist/browser/types.js.map +1 -0
  70. package/dist/browser/utils.d.ts +55 -0
  71. package/dist/browser/utils.d.ts.map +1 -0
  72. package/dist/browser/utils.js +84 -0
  73. package/dist/browser/utils.js.map +1 -0
  74. package/dist/commonjs/api/attributes.d.ts +385 -0
  75. package/dist/commonjs/api/attributes.d.ts.map +1 -0
  76. package/dist/commonjs/api/attributes.js +647 -0
  77. package/dist/commonjs/api/attributes.js.map +1 -0
  78. package/dist/commonjs/api/css.d.ts +42 -0
  79. package/dist/commonjs/api/css.d.ts.map +1 -0
  80. package/dist/commonjs/api/css.js +119 -0
  81. package/dist/commonjs/api/css.js.map +1 -0
  82. package/dist/commonjs/api/extract.d.ts +27 -0
  83. package/dist/commonjs/api/extract.d.ts.map +1 -0
  84. package/dist/commonjs/api/extract.js +45 -0
  85. package/dist/commonjs/api/extract.js.map +1 -0
  86. package/dist/commonjs/api/forms.d.ts +36 -0
  87. package/dist/commonjs/api/forms.d.ts.map +1 -0
  88. package/dist/commonjs/api/forms.js +85 -0
  89. package/dist/commonjs/api/forms.js.map +1 -0
  90. package/dist/commonjs/api/manipulation.d.ts +528 -0
  91. package/dist/commonjs/api/manipulation.d.ts.map +1 -0
  92. package/dist/commonjs/api/manipulation.js +850 -0
  93. package/dist/commonjs/api/manipulation.js.map +1 -0
  94. package/dist/commonjs/api/traversing.d.ts +657 -0
  95. package/dist/commonjs/api/traversing.d.ts.map +1 -0
  96. package/dist/commonjs/api/traversing.js +914 -0
  97. package/dist/commonjs/api/traversing.js.map +1 -0
  98. package/dist/commonjs/cheerio.d.ts +85 -0
  99. package/dist/commonjs/cheerio.d.ts.map +1 -0
  100. package/dist/commonjs/cheerio.js +95 -0
  101. package/dist/commonjs/cheerio.js.map +1 -0
  102. package/dist/commonjs/index.d.ts +104 -0
  103. package/dist/commonjs/index.d.ts.map +1 -0
  104. package/dist/commonjs/index.js +250 -0
  105. package/dist/commonjs/index.js.map +1 -0
  106. package/dist/commonjs/load-parse.d.ts +20 -0
  107. package/dist/commonjs/load-parse.d.ts.map +1 -0
  108. package/dist/commonjs/load-parse.js +34 -0
  109. package/dist/commonjs/load-parse.js.map +1 -0
  110. package/dist/commonjs/load.d.ts +91 -0
  111. package/dist/commonjs/load.d.ts.map +1 -0
  112. package/dist/commonjs/load.js +165 -0
  113. package/dist/commonjs/load.js.map +1 -0
  114. package/dist/commonjs/options.d.ts +98 -0
  115. package/dist/commonjs/options.d.ts.map +1 -0
  116. package/dist/commonjs/options.js +37 -0
  117. package/dist/commonjs/options.js.map +1 -0
  118. package/dist/commonjs/package.json +3 -0
  119. package/dist/commonjs/parse.d.ts +18 -0
  120. package/dist/commonjs/parse.d.ts.map +1 -0
  121. package/dist/commonjs/parse.js +77 -0
  122. package/dist/commonjs/parse.js.map +1 -0
  123. package/dist/commonjs/parsers/parse5-adapter.d.ts +20 -0
  124. package/dist/commonjs/parsers/parse5-adapter.d.ts.map +1 -0
  125. package/dist/commonjs/parsers/parse5-adapter.js +54 -0
  126. package/dist/commonjs/parsers/parse5-adapter.js.map +1 -0
  127. package/dist/commonjs/slim.d.ts +25 -0
  128. package/dist/commonjs/slim.d.ts.map +1 -0
  129. package/dist/commonjs/slim.js +30 -0
  130. package/dist/commonjs/slim.js.map +1 -0
  131. package/dist/commonjs/static.d.ts +112 -0
  132. package/dist/commonjs/static.d.ts.map +1 -0
  133. package/dist/commonjs/static.js +214 -0
  134. package/dist/commonjs/static.js.map +1 -0
  135. package/dist/commonjs/types.d.ts +21 -0
  136. package/dist/commonjs/types.d.ts.map +1 -0
  137. package/dist/commonjs/types.js +4 -0
  138. package/dist/commonjs/types.js.map +1 -0
  139. package/dist/commonjs/utils.d.ts +55 -0
  140. package/dist/commonjs/utils.d.ts.map +1 -0
  141. package/dist/commonjs/utils.js +91 -0
  142. package/dist/commonjs/utils.js.map +1 -0
  143. package/dist/esm/api/attributes.d.ts +385 -0
  144. package/dist/esm/api/attributes.d.ts.map +1 -0
  145. package/dist/esm/api/attributes.js +636 -0
  146. package/dist/esm/api/attributes.js.map +1 -0
  147. package/dist/esm/api/css.d.ts +42 -0
  148. package/dist/esm/api/css.d.ts.map +1 -0
  149. package/dist/esm/api/css.js +116 -0
  150. package/dist/esm/api/css.js.map +1 -0
  151. package/dist/esm/api/extract.d.ts +27 -0
  152. package/dist/esm/api/extract.d.ts.map +1 -0
  153. package/dist/esm/api/extract.js +42 -0
  154. package/dist/esm/api/extract.js.map +1 -0
  155. package/dist/esm/api/forms.d.ts +36 -0
  156. package/dist/esm/api/forms.d.ts.map +1 -0
  157. package/dist/esm/api/forms.js +81 -0
  158. package/dist/esm/api/forms.js.map +1 -0
  159. package/dist/esm/api/manipulation.d.ts +528 -0
  160. package/dist/esm/api/manipulation.d.ts.map +1 -0
  161. package/dist/esm/api/manipulation.js +831 -0
  162. package/dist/esm/api/manipulation.js.map +1 -0
  163. package/dist/esm/api/traversing.d.ts +657 -0
  164. package/dist/esm/api/traversing.d.ts.map +1 -0
  165. package/dist/esm/api/traversing.js +857 -0
  166. package/dist/esm/api/traversing.js.map +1 -0
  167. package/dist/esm/cheerio.d.ts +85 -0
  168. package/dist/esm/cheerio.d.ts.map +1 -0
  169. package/dist/esm/cheerio.js +58 -0
  170. package/dist/esm/cheerio.js.map +1 -0
  171. package/dist/esm/index.d.ts +104 -0
  172. package/dist/esm/index.d.ts.map +1 -0
  173. package/dist/esm/index.js +202 -0
  174. package/dist/esm/index.js.map +1 -0
  175. package/dist/esm/load-parse.d.ts +20 -0
  176. package/dist/esm/load-parse.d.ts.map +1 -0
  177. package/dist/esm/load-parse.js +28 -0
  178. package/dist/esm/load-parse.js.map +1 -0
  179. package/dist/esm/load.d.ts +91 -0
  180. package/dist/esm/load.d.ts.map +1 -0
  181. package/dist/esm/load.js +129 -0
  182. package/dist/esm/load.js.map +1 -0
  183. package/dist/esm/options.d.ts +98 -0
  184. package/dist/esm/options.d.ts.map +1 -0
  185. package/dist/esm/options.js +34 -0
  186. package/dist/esm/options.js.map +1 -0
  187. package/dist/esm/package.json +3 -0
  188. package/dist/esm/parse.d.ts +18 -0
  189. package/dist/esm/parse.d.ts.map +1 -0
  190. package/dist/esm/parse.js +73 -0
  191. package/dist/esm/parse.js.map +1 -0
  192. package/dist/esm/parsers/parse5-adapter.d.ts +20 -0
  193. package/dist/esm/parsers/parse5-adapter.d.ts.map +1 -0
  194. package/dist/esm/parsers/parse5-adapter.js +50 -0
  195. package/dist/esm/parsers/parse5-adapter.js.map +1 -0
  196. package/dist/esm/slim.d.ts +25 -0
  197. package/dist/esm/slim.d.ts.map +1 -0
  198. package/dist/esm/slim.js +22 -0
  199. package/dist/esm/slim.js.map +1 -0
  200. package/dist/esm/static.d.ts +112 -0
  201. package/dist/esm/static.d.ts.map +1 -0
  202. package/dist/esm/static.js +204 -0
  203. package/dist/esm/static.js.map +1 -0
  204. package/dist/esm/types.d.ts +21 -0
  205. package/dist/esm/types.d.ts.map +1 -0
  206. package/dist/esm/types.js +3 -0
  207. package/dist/esm/types.js.map +1 -0
  208. package/dist/esm/utils.d.ts +55 -0
  209. package/dist/esm/utils.d.ts.map +1 -0
  210. package/dist/esm/utils.js +84 -0
  211. package/dist/esm/utils.js.map +1 -0
  212. package/package.json +219 -0
  213. package/src/api/attributes.ts +1145 -0
  214. package/src/api/css.ts +224 -0
  215. package/src/api/extract.ts +92 -0
  216. package/src/api/forms.ts +103 -0
  217. package/src/api/manipulation.ts +1115 -0
  218. package/src/api/traversing.ts +1175 -0
  219. package/src/cheerio.ts +143 -0
  220. package/src/index-browser.mts +10 -0
  221. package/src/index.ts +294 -0
  222. package/src/load-parse.ts +39 -0
  223. package/src/load.ts +282 -0
  224. package/src/options.ts +136 -0
  225. package/src/parse.ts +105 -0
  226. package/src/parsers/parse5-adapter.ts +66 -0
  227. package/src/slim.ts +33 -0
  228. package/src/static.ts +312 -0
  229. package/src/types.ts +58 -0
  230. package/src/utils.ts +99 -0
package/src/load.ts ADDED
@@ -0,0 +1,282 @@
1
+ import {
2
+ type CheerioOptions,
3
+ type InternalOptions,
4
+ flattenOptions,
5
+ } from './options.js';
6
+ import * as staticMethods from './static.js';
7
+ import { Cheerio } from './cheerio.js';
8
+ import { isHtml, isCheerio } from './utils.js';
9
+ import type { AnyNode, Document, Element, ParentNode } from 'domhandler';
10
+ import type { SelectorType, BasicAcceptedElems } from './types.js';
11
+ import { ElementType } from 'htmlparser2';
12
+
13
+ type StaticType = typeof staticMethods;
14
+
15
+ /**
16
+ * A querying function, bound to a document created from the provided markup.
17
+ *
18
+ * Also provides several helper methods for dealing with the document as a
19
+ * whole.
20
+ */
21
+ export interface CheerioAPI extends StaticType {
22
+ /**
23
+ * This selector method is the starting point for traversing and manipulating
24
+ * the document. Like jQuery, it's the primary method for selecting elements
25
+ * in the document.
26
+ *
27
+ * `selector` searches within the `context` scope, which searches within the
28
+ * `root` scope.
29
+ *
30
+ * @example
31
+ *
32
+ * ```js
33
+ * $('ul .pear').attr('class');
34
+ * //=> pear
35
+ *
36
+ * $('li[class=orange]').html();
37
+ * //=> Orange
38
+ *
39
+ * $('.apple', '#fruits').text();
40
+ * //=> Apple
41
+ * ```
42
+ *
43
+ * Optionally, you can also load HTML by passing the string as the selector:
44
+ *
45
+ * ```js
46
+ * $('<ul id="fruits">...</ul>');
47
+ * ```
48
+ *
49
+ * Or the context:
50
+ *
51
+ * ```js
52
+ * $('ul', '<ul id="fruits">...</ul>');
53
+ * ```
54
+ *
55
+ * Or as the root:
56
+ *
57
+ * ```js
58
+ * $('li', 'ul', '<ul id="fruits">...</ul>');
59
+ * ```
60
+ *
61
+ * @param selector - Either a selector to look for within the document, or the
62
+ * contents of a new Cheerio instance.
63
+ * @param context - Either a selector to look for within the root, or the
64
+ * contents of the document to query.
65
+ * @param root - Optional HTML document string.
66
+ */
67
+ <T extends AnyNode, S extends string>(
68
+ selector?: S | BasicAcceptedElems<T>,
69
+ context?: BasicAcceptedElems<AnyNode> | null,
70
+ root?: BasicAcceptedElems<Document>,
71
+ options?: CheerioOptions,
72
+ ): Cheerio<S extends SelectorType ? Element : T>;
73
+
74
+ /**
75
+ * The root the document was originally loaded with.
76
+ *
77
+ * @private
78
+ */
79
+ _root: Document;
80
+
81
+ /**
82
+ * The options the document was originally loaded with.
83
+ *
84
+ * @private
85
+ */
86
+ _options: InternalOptions;
87
+
88
+ /** Mimic jQuery's prototype alias for plugin authors. */
89
+ fn: typeof Cheerio.prototype;
90
+
91
+ /**
92
+ * The `.load` static method defined on the "loaded" Cheerio factory function
93
+ * is deprecated. Users are encouraged to instead use the `load` function
94
+ * exported by the Cheerio module.
95
+ *
96
+ * @deprecated Use the `load` function exported by the Cheerio module.
97
+ * @category Deprecated
98
+ * @example
99
+ *
100
+ * ```js
101
+ * const $ = cheerio.load('<h1>Hello, <span>world</span>.</h1>');
102
+ * ```
103
+ */
104
+ load: ReturnType<typeof getLoad>;
105
+ }
106
+
107
+ export function getLoad(
108
+ parse: Cheerio<AnyNode>['_parse'],
109
+ render: (
110
+ dom: AnyNode | ArrayLike<AnyNode>,
111
+ options: InternalOptions,
112
+ ) => string,
113
+ ) {
114
+ /**
115
+ * Create a querying function, bound to a document created from the provided
116
+ * markup.
117
+ *
118
+ * Note that similar to web browser contexts, this operation may introduce
119
+ * `<html>`, `<head>`, and `<body>` elements; set `isDocument` to `false` to
120
+ * switch to fragment mode and disable this.
121
+ *
122
+ * @param content - Markup to be loaded.
123
+ * @param options - Options for the created instance.
124
+ * @param isDocument - Allows parser to be switched to fragment mode.
125
+ * @returns The loaded document.
126
+ * @see {@link https://cheerio.js.org/docs/basics/loading#load} for additional usage information.
127
+ */
128
+ return function load(
129
+ content: string | AnyNode | AnyNode[] | Buffer,
130
+ options?: CheerioOptions | null,
131
+ isDocument = true,
132
+ ): CheerioAPI {
133
+ if ((content as string | null) == null) {
134
+ throw new Error('cheerio.load() expects a string');
135
+ }
136
+
137
+ const internalOpts = flattenOptions(options);
138
+ const initialRoot = parse(content, internalOpts, isDocument, null);
139
+
140
+ /**
141
+ * Create an extended class here, so that extensions only live on one
142
+ * instance.
143
+ */
144
+ class LoadedCheerio<T> extends Cheerio<T> {
145
+ _make<T>(
146
+ selector?: ArrayLike<T> | T | string,
147
+ context?: BasicAcceptedElems<AnyNode> | null,
148
+ ): Cheerio<T> {
149
+ const cheerio = initialize(selector, context);
150
+ cheerio.prevObject = this;
151
+
152
+ return cheerio;
153
+ }
154
+
155
+ _parse(
156
+ content: string | Document | AnyNode | AnyNode[] | Buffer,
157
+ options: InternalOptions,
158
+ isDocument: boolean,
159
+ context: ParentNode | null,
160
+ ) {
161
+ return parse(content, options, isDocument, context);
162
+ }
163
+
164
+ _render(dom: AnyNode | ArrayLike<AnyNode>): string {
165
+ return render(dom, this.options);
166
+ }
167
+ }
168
+
169
+ function initialize<T = AnyNode, S extends string = string>(
170
+ selector?: ArrayLike<T> | T | S,
171
+ context?: BasicAcceptedElems<AnyNode> | null,
172
+ root: BasicAcceptedElems<Document> = initialRoot,
173
+ opts?: CheerioOptions,
174
+ ): Cheerio<S extends SelectorType ? Element : T> {
175
+ type Result = S extends SelectorType ? Element : T;
176
+
177
+ // $($)
178
+ if (selector && isCheerio<Result>(selector)) return selector;
179
+
180
+ const options = flattenOptions(opts, internalOpts);
181
+ const r =
182
+ typeof root === 'string'
183
+ ? [parse(root, options, false, null)]
184
+ : 'length' in root
185
+ ? root
186
+ : [root];
187
+ const rootInstance = isCheerio<Document>(r)
188
+ ? r
189
+ : new LoadedCheerio<Document>(r, null, options);
190
+ // Add a cyclic reference, so that calling methods on `_root` never fails.
191
+ rootInstance._root = rootInstance;
192
+
193
+ // $(), $(null), $(undefined), $(false)
194
+ if (!selector) {
195
+ return new LoadedCheerio<Result>(undefined, rootInstance, options);
196
+ }
197
+
198
+ const elements: AnyNode[] | undefined =
199
+ typeof selector === 'string' && isHtml(selector)
200
+ ? // $(<html>)
201
+ parse(selector, options, false, null).children
202
+ : isNode(selector)
203
+ ? // $(dom)
204
+ [selector]
205
+ : Array.isArray(selector)
206
+ ? // $([dom])
207
+ selector
208
+ : undefined;
209
+
210
+ const instance = new LoadedCheerio(elements, rootInstance, options);
211
+
212
+ if (elements) {
213
+ return instance as Cheerio<Result>;
214
+ }
215
+
216
+ if (typeof selector !== 'string') {
217
+ throw new TypeError('Unexpected type of selector');
218
+ }
219
+
220
+ // We know that our selector is a string now.
221
+ let search = selector;
222
+
223
+ const searchContext: Cheerio<AnyNode> | undefined = context
224
+ ? // If we don't have a context, maybe we have a root, from loading
225
+ typeof context === 'string'
226
+ ? isHtml(context)
227
+ ? // $('li', '<ul>...</ul>')
228
+ new LoadedCheerio<Document>(
229
+ [parse(context, options, false, null)],
230
+ rootInstance,
231
+ options,
232
+ )
233
+ : // $('li', 'ul')
234
+ ((search = `${context} ${search}` as S), rootInstance)
235
+ : isCheerio<AnyNode>(context)
236
+ ? // $('li', $)
237
+ context
238
+ : // $('li', node), $('li', [nodes])
239
+ new LoadedCheerio<AnyNode>(
240
+ Array.isArray(context) ? context : [context],
241
+ rootInstance,
242
+ options,
243
+ )
244
+ : rootInstance;
245
+
246
+ // If we still don't have a context, return
247
+ if (!searchContext) return instance as Cheerio<Result>;
248
+
249
+ /*
250
+ * #id, .class, tag
251
+ */
252
+ return searchContext.find(search) as Cheerio<Result>;
253
+ }
254
+
255
+ // Add in static methods & properties
256
+ Object.assign(initialize, staticMethods, {
257
+ load,
258
+ // `_root` and `_options` are used in static methods.
259
+ _root: initialRoot,
260
+ _options: internalOpts,
261
+ // Add `fn` for plugins
262
+ fn: LoadedCheerio.prototype,
263
+ // Add the prototype here to maintain `instanceof` behavior.
264
+ prototype: LoadedCheerio.prototype,
265
+ });
266
+
267
+ return initialize as CheerioAPI;
268
+ };
269
+ }
270
+
271
+ function isNode(obj: unknown): obj is AnyNode {
272
+ return (
273
+ // @ts-expect-error: TS doesn't know about the `name` property.
274
+ !!obj.name ||
275
+ // @ts-expect-error: TS doesn't know about the `type` property.
276
+ obj.type === ElementType.Root ||
277
+ // @ts-expect-error: TS doesn't know about the `type` property.
278
+ obj.type === ElementType.Text ||
279
+ // @ts-expect-error: TS doesn't know about the `type` property.
280
+ obj.type === ElementType.Comment
281
+ );
282
+ }
package/src/options.ts ADDED
@@ -0,0 +1,136 @@
1
+ import type { DomHandlerOptions } from 'domhandler';
2
+ import type { ParserOptions as HTMLParser2ParserOptions } from 'htmlparser2';
3
+ import type { ParserOptions as Parse5ParserOptions } from 'parse5';
4
+ import type { Htmlparser2TreeAdapterMap } from 'parse5-htmlparser2-tree-adapter';
5
+ import type { Options as SelectOptions } from 'cheerio-select';
6
+ import type { DomSerializerOptions } from 'dom-serializer';
7
+
8
+ /**
9
+ * Options accepted by htmlparser2, the default parser for XML.
10
+ *
11
+ * @see https://github.com/fb55/htmlparser2/wiki/Parser-options
12
+ */
13
+ export interface HTMLParser2Options
14
+ extends DomHandlerOptions, DomSerializerOptions, HTMLParser2ParserOptions {
15
+ /** Treat the input as an XML document. */
16
+ xmlMode?: boolean;
17
+ }
18
+
19
+ /**
20
+ * Options accepted by Cheerio.
21
+ *
22
+ * Please note that parser-specific options are _only recognized_ if the
23
+ * relevant parser is used.
24
+ */
25
+ export interface CheerioOptions extends Parse5ParserOptions<Htmlparser2TreeAdapterMap> {
26
+ /**
27
+ * Recommended way of configuring htmlparser2 when wanting to parse XML.
28
+ *
29
+ * This will switch Cheerio to use htmlparser2.
30
+ *
31
+ * @default false
32
+ */
33
+ xml?: HTMLParser2Options | boolean;
34
+
35
+ /**
36
+ * Enable xml mode, which will switch Cheerio to use htmlparser2.
37
+ *
38
+ * @deprecated Please use the `xml` option instead.
39
+ * @default false
40
+ */
41
+ xmlMode?: boolean;
42
+
43
+ /** The base URI for the document. Used to resolve the `href` and `src` props. */
44
+ baseURI?: string | URL;
45
+
46
+ /**
47
+ * Is the document in quirks mode?
48
+ *
49
+ * This will lead to `.className` and `#id` being case-insensitive.
50
+ *
51
+ * @default false
52
+ */
53
+ quirksMode?: SelectOptions['quirksMode'];
54
+ /**
55
+ * Extension point for pseudo-classes.
56
+ *
57
+ * Maps from names to either strings of functions.
58
+ *
59
+ * - A string value is a selector that the element must match to be selected.
60
+ * - A function is called with the element as its first argument, and optional
61
+ * parameters second. If it returns true, the element is selected.
62
+ *
63
+ * @example
64
+ *
65
+ * ```js
66
+ * const $ = cheerio.load(
67
+ * '<div class="foo"></div><div data-bar="boo"></div>',
68
+ * {
69
+ * pseudos: {
70
+ * // `:foo` is an alias for `div.foo`
71
+ * foo: 'div.foo',
72
+ * // `:bar(val)` is equivalent to `[data-bar=val s]`
73
+ * bar: (el, val) => el.attribs['data-bar'] === val,
74
+ * },
75
+ * },
76
+ * );
77
+ *
78
+ * $(':foo').length; // 1
79
+ * $('div:bar(boo)').length; // 1
80
+ * $('div:bar(baz)').length; // 0
81
+ * ```
82
+ */
83
+ pseudos?: SelectOptions['pseudos'];
84
+ }
85
+
86
+ /** Internal options for Cheerio. */
87
+ export interface InternalOptions
88
+ extends HTMLParser2Options, Omit<CheerioOptions, 'xml'> {
89
+ /**
90
+ * Whether to use htmlparser2.
91
+ *
92
+ * This is set to true if `xml` is set to true.
93
+ */
94
+ _useHtmlParser2?: boolean;
95
+ }
96
+
97
+ const defaultOpts: InternalOptions = {
98
+ _useHtmlParser2: false,
99
+ };
100
+
101
+ /**
102
+ * Flatten the options for Cheerio.
103
+ *
104
+ * This will set `_useHtmlParser2` to true if `xml` is set to true.
105
+ *
106
+ * @param options - The options to flatten.
107
+ * @param baseOptions - The base options to use.
108
+ * @returns The flattened options.
109
+ */
110
+ export function flattenOptions(
111
+ options?: CheerioOptions | null,
112
+ baseOptions?: InternalOptions,
113
+ ): InternalOptions {
114
+ if (!options) {
115
+ return baseOptions ?? defaultOpts;
116
+ }
117
+
118
+ const opts: InternalOptions = {
119
+ _useHtmlParser2: !!options.xmlMode,
120
+ ...baseOptions,
121
+ ...options,
122
+ };
123
+
124
+ if (options.xml) {
125
+ opts._useHtmlParser2 = true;
126
+ opts.xmlMode = true;
127
+
128
+ if (options.xml !== true) {
129
+ Object.assign(opts, options.xml);
130
+ }
131
+ } else if (options.xmlMode) {
132
+ opts._useHtmlParser2 = true;
133
+ }
134
+
135
+ return opts;
136
+ }
package/src/parse.ts ADDED
@@ -0,0 +1,105 @@
1
+ import { removeElement } from 'domutils';
2
+ import {
3
+ type AnyNode,
4
+ Document,
5
+ type ParentNode,
6
+ isDocument as checkIsDocument,
7
+ } from 'domhandler';
8
+ import type { InternalOptions } from './options.js';
9
+
10
+ /**
11
+ * Get the parse function with options.
12
+ *
13
+ * @param parser - The parser function.
14
+ * @returns The parse function with options.
15
+ */
16
+ export function getParse(
17
+ parser: (
18
+ content: string,
19
+ options: InternalOptions,
20
+ isDocument: boolean,
21
+ context: ParentNode | null,
22
+ ) => Document,
23
+ ) {
24
+ /**
25
+ * Parse a HTML string or a node.
26
+ *
27
+ * @param content - The HTML string or node.
28
+ * @param options - The parser options.
29
+ * @param isDocument - If `content` is a document.
30
+ * @param context - The context node in the DOM tree.
31
+ * @returns The parsed document node.
32
+ */
33
+ return function parse(
34
+ content: string | Document | AnyNode | AnyNode[] | Buffer,
35
+ options: InternalOptions,
36
+ isDocument: boolean,
37
+ context: ParentNode | null,
38
+ ): Document {
39
+ if (typeof Buffer !== 'undefined' && Buffer.isBuffer(content)) {
40
+ content = content.toString();
41
+ }
42
+
43
+ if (typeof content === 'string') {
44
+ return parser(content, options, isDocument, context);
45
+ }
46
+
47
+ const doc = content as AnyNode | AnyNode[] | Document;
48
+
49
+ if (!Array.isArray(doc) && checkIsDocument(doc)) {
50
+ // If `doc` is already a root, just return it
51
+ return doc;
52
+ }
53
+
54
+ // Add content to new root element
55
+ const root = new Document([]);
56
+
57
+ // Update the DOM using the root
58
+ update(doc, root);
59
+
60
+ return root;
61
+ };
62
+ }
63
+
64
+ /**
65
+ * Update the dom structure, for one changed layer.
66
+ *
67
+ * @param newChilds - The new children.
68
+ * @param parent - The new parent.
69
+ * @returns The parent node.
70
+ */
71
+ export function update(
72
+ newChilds: AnyNode[] | AnyNode,
73
+ parent: ParentNode | null,
74
+ ): ParentNode | null {
75
+ // Normalize
76
+ const arr = Array.isArray(newChilds) ? newChilds : [newChilds];
77
+
78
+ // Update parent
79
+ if (parent) {
80
+ parent.children = arr;
81
+ } else {
82
+ parent = null;
83
+ }
84
+
85
+ // Update neighbors
86
+ for (let i = 0; i < arr.length; i++) {
87
+ const node = arr[i];
88
+
89
+ // Cleanly remove existing nodes from their previous structures.
90
+ if (node.parent && node.parent.children !== arr) {
91
+ removeElement(node);
92
+ }
93
+
94
+ if (parent) {
95
+ node.prev = arr[i - 1] || null;
96
+ node.next = arr[i + 1] || null;
97
+ } else {
98
+ node.prev = node.next = null;
99
+ }
100
+
101
+ node.parent = parent;
102
+ }
103
+
104
+ return parent;
105
+ }
@@ -0,0 +1,66 @@
1
+ import {
2
+ type AnyNode,
3
+ type Document,
4
+ type ParentNode,
5
+ isDocument,
6
+ } from 'domhandler';
7
+ import { parse as parseDocument, parseFragment, serializeOuter } from 'parse5';
8
+ import { adapter as htmlparser2Adapter } from 'parse5-htmlparser2-tree-adapter';
9
+ import type { InternalOptions } from '../options.js';
10
+
11
+ /**
12
+ * Parse the content with `parse5` in the context of the given `ParentNode`.
13
+ *
14
+ * @param content - The content to parse.
15
+ * @param options - A set of options to use to parse.
16
+ * @param isDocument - Whether to parse the content as a full HTML document.
17
+ * @param context - The context in which to parse the content.
18
+ * @returns The parsed content.
19
+ */
20
+ export function parseWithParse5(
21
+ content: string,
22
+ options: InternalOptions,
23
+ isDocument: boolean,
24
+ context: ParentNode | null,
25
+ ): Document {
26
+ options.treeAdapter ??= htmlparser2Adapter;
27
+
28
+ if (options.scriptingEnabled !== false) {
29
+ options.scriptingEnabled = true;
30
+ }
31
+
32
+ return isDocument
33
+ ? parseDocument(content, options)
34
+ : parseFragment(context, content, options);
35
+ }
36
+
37
+ const renderOpts = { treeAdapter: htmlparser2Adapter };
38
+
39
+ /**
40
+ * Renders the given DOM tree with `parse5` and returns the result as a string.
41
+ *
42
+ * @param dom - The DOM tree to render.
43
+ * @returns The rendered document.
44
+ */
45
+ export function renderWithParse5(dom: AnyNode | ArrayLike<AnyNode>): string {
46
+ /*
47
+ * `dom-serializer` passes over the special "root" node and renders the
48
+ * node's children in its place. To mimic this behavior with `parse5`, an
49
+ * equivalent operation must be applied to the input array.
50
+ */
51
+ const nodes = 'length' in dom ? dom : [dom];
52
+ for (let index = 0; index < nodes.length; index += 1) {
53
+ const node = nodes[index];
54
+ if (isDocument(node)) {
55
+ Array.prototype.splice.call(nodes, index, 1, ...node.children);
56
+ }
57
+ }
58
+
59
+ let result = '';
60
+ for (let index = 0; index < nodes.length; index += 1) {
61
+ const node = nodes[index];
62
+ result += serializeOuter(node, renderOpts);
63
+ }
64
+
65
+ return result;
66
+ }
package/src/slim.ts ADDED
@@ -0,0 +1,33 @@
1
+ /**
2
+ * @file Alternative entry point for Cheerio that always uses htmlparser2. This
3
+ * way, parse5 won't be loaded, saving some memory.
4
+ */
5
+ import { type CheerioAPI, getLoad } from './load.js';
6
+ import { type CheerioOptions } from './options.js';
7
+ import { getParse } from './parse.js';
8
+ import type { AnyNode } from 'domhandler';
9
+ import render from 'dom-serializer';
10
+ import { parseDocument } from 'htmlparser2';
11
+
12
+ export { contains, merge } from './static.js';
13
+ export type * from './types.js';
14
+ export type { Cheerio } from './cheerio.js';
15
+ export type { CheerioOptions, HTMLParser2Options } from './options.js';
16
+ export type { CheerioAPI } from './load.js';
17
+
18
+ /**
19
+ * Create a querying function, bound to a document created from the provided
20
+ * markup.
21
+ *
22
+ * @param content - Markup to be loaded.
23
+ * @param options - Options for the created instance.
24
+ * @param isDocument - Always `false` here, as we are always using
25
+ * `htmlparser2`.
26
+ * @returns The loaded document.
27
+ * @see {@link https://cheerio.js.org#loading} for additional usage information.
28
+ */
29
+ export const load: (
30
+ content: string | AnyNode | AnyNode[] | Buffer,
31
+ options?: CheerioOptions | null,
32
+ isDocument?: boolean,
33
+ ) => CheerioAPI = getLoad(getParse(parseDocument), render);