@depup/cheerio 1.2.0-depup.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +36 -0
- package/Readme.md +229 -0
- package/changes.json +30 -0
- package/dist/browser/api/attributes.d.ts +385 -0
- package/dist/browser/api/attributes.d.ts.map +1 -0
- package/dist/browser/api/attributes.js +636 -0
- package/dist/browser/api/attributes.js.map +1 -0
- package/dist/browser/api/css.d.ts +42 -0
- package/dist/browser/api/css.d.ts.map +1 -0
- package/dist/browser/api/css.js +116 -0
- package/dist/browser/api/css.js.map +1 -0
- package/dist/browser/api/extract.d.ts +27 -0
- package/dist/browser/api/extract.d.ts.map +1 -0
- package/dist/browser/api/extract.js +42 -0
- package/dist/browser/api/extract.js.map +1 -0
- package/dist/browser/api/forms.d.ts +36 -0
- package/dist/browser/api/forms.d.ts.map +1 -0
- package/dist/browser/api/forms.js +81 -0
- package/dist/browser/api/forms.js.map +1 -0
- package/dist/browser/api/manipulation.d.ts +528 -0
- package/dist/browser/api/manipulation.d.ts.map +1 -0
- package/dist/browser/api/manipulation.js +831 -0
- package/dist/browser/api/manipulation.js.map +1 -0
- package/dist/browser/api/traversing.d.ts +657 -0
- package/dist/browser/api/traversing.d.ts.map +1 -0
- package/dist/browser/api/traversing.js +857 -0
- package/dist/browser/api/traversing.js.map +1 -0
- package/dist/browser/cheerio.d.ts +85 -0
- package/dist/browser/cheerio.d.ts.map +1 -0
- package/dist/browser/cheerio.js +58 -0
- package/dist/browser/cheerio.js.map +1 -0
- package/dist/browser/index-browser.d.mts.map +1 -0
- package/dist/browser/index-browser.mjs.map +1 -0
- package/dist/browser/index.d.ts +5 -0
- package/dist/browser/index.js +3 -0
- package/dist/browser/load-parse.d.ts +20 -0
- package/dist/browser/load-parse.d.ts.map +1 -0
- package/dist/browser/load-parse.js +28 -0
- package/dist/browser/load-parse.js.map +1 -0
- package/dist/browser/load.d.ts +91 -0
- package/dist/browser/load.d.ts.map +1 -0
- package/dist/browser/load.js +129 -0
- package/dist/browser/load.js.map +1 -0
- package/dist/browser/options.d.ts +98 -0
- package/dist/browser/options.d.ts.map +1 -0
- package/dist/browser/options.js +34 -0
- package/dist/browser/options.js.map +1 -0
- package/dist/browser/package.json +3 -0
- package/dist/browser/parse.d.ts +18 -0
- package/dist/browser/parse.d.ts.map +1 -0
- package/dist/browser/parse.js +73 -0
- package/dist/browser/parse.js.map +1 -0
- package/dist/browser/parsers/parse5-adapter.d.ts +20 -0
- package/dist/browser/parsers/parse5-adapter.d.ts.map +1 -0
- package/dist/browser/parsers/parse5-adapter.js +50 -0
- package/dist/browser/parsers/parse5-adapter.js.map +1 -0
- package/dist/browser/slim.d.ts +25 -0
- package/dist/browser/slim.d.ts.map +1 -0
- package/dist/browser/slim.js +22 -0
- package/dist/browser/slim.js.map +1 -0
- package/dist/browser/static.d.ts +112 -0
- package/dist/browser/static.d.ts.map +1 -0
- package/dist/browser/static.js +204 -0
- package/dist/browser/static.js.map +1 -0
- package/dist/browser/types.d.ts +21 -0
- package/dist/browser/types.d.ts.map +1 -0
- package/dist/browser/types.js +3 -0
- package/dist/browser/types.js.map +1 -0
- package/dist/browser/utils.d.ts +55 -0
- package/dist/browser/utils.d.ts.map +1 -0
- package/dist/browser/utils.js +84 -0
- package/dist/browser/utils.js.map +1 -0
- package/dist/commonjs/api/attributes.d.ts +385 -0
- package/dist/commonjs/api/attributes.d.ts.map +1 -0
- package/dist/commonjs/api/attributes.js +647 -0
- package/dist/commonjs/api/attributes.js.map +1 -0
- package/dist/commonjs/api/css.d.ts +42 -0
- package/dist/commonjs/api/css.d.ts.map +1 -0
- package/dist/commonjs/api/css.js +119 -0
- package/dist/commonjs/api/css.js.map +1 -0
- package/dist/commonjs/api/extract.d.ts +27 -0
- package/dist/commonjs/api/extract.d.ts.map +1 -0
- package/dist/commonjs/api/extract.js +45 -0
- package/dist/commonjs/api/extract.js.map +1 -0
- package/dist/commonjs/api/forms.d.ts +36 -0
- package/dist/commonjs/api/forms.d.ts.map +1 -0
- package/dist/commonjs/api/forms.js +85 -0
- package/dist/commonjs/api/forms.js.map +1 -0
- package/dist/commonjs/api/manipulation.d.ts +528 -0
- package/dist/commonjs/api/manipulation.d.ts.map +1 -0
- package/dist/commonjs/api/manipulation.js +850 -0
- package/dist/commonjs/api/manipulation.js.map +1 -0
- package/dist/commonjs/api/traversing.d.ts +657 -0
- package/dist/commonjs/api/traversing.d.ts.map +1 -0
- package/dist/commonjs/api/traversing.js +914 -0
- package/dist/commonjs/api/traversing.js.map +1 -0
- package/dist/commonjs/cheerio.d.ts +85 -0
- package/dist/commonjs/cheerio.d.ts.map +1 -0
- package/dist/commonjs/cheerio.js +95 -0
- package/dist/commonjs/cheerio.js.map +1 -0
- package/dist/commonjs/index.d.ts +104 -0
- package/dist/commonjs/index.d.ts.map +1 -0
- package/dist/commonjs/index.js +250 -0
- package/dist/commonjs/index.js.map +1 -0
- package/dist/commonjs/load-parse.d.ts +20 -0
- package/dist/commonjs/load-parse.d.ts.map +1 -0
- package/dist/commonjs/load-parse.js +34 -0
- package/dist/commonjs/load-parse.js.map +1 -0
- package/dist/commonjs/load.d.ts +91 -0
- package/dist/commonjs/load.d.ts.map +1 -0
- package/dist/commonjs/load.js +165 -0
- package/dist/commonjs/load.js.map +1 -0
- package/dist/commonjs/options.d.ts +98 -0
- package/dist/commonjs/options.d.ts.map +1 -0
- package/dist/commonjs/options.js +37 -0
- package/dist/commonjs/options.js.map +1 -0
- package/dist/commonjs/package.json +3 -0
- package/dist/commonjs/parse.d.ts +18 -0
- package/dist/commonjs/parse.d.ts.map +1 -0
- package/dist/commonjs/parse.js +77 -0
- package/dist/commonjs/parse.js.map +1 -0
- package/dist/commonjs/parsers/parse5-adapter.d.ts +20 -0
- package/dist/commonjs/parsers/parse5-adapter.d.ts.map +1 -0
- package/dist/commonjs/parsers/parse5-adapter.js +54 -0
- package/dist/commonjs/parsers/parse5-adapter.js.map +1 -0
- package/dist/commonjs/slim.d.ts +25 -0
- package/dist/commonjs/slim.d.ts.map +1 -0
- package/dist/commonjs/slim.js +30 -0
- package/dist/commonjs/slim.js.map +1 -0
- package/dist/commonjs/static.d.ts +112 -0
- package/dist/commonjs/static.d.ts.map +1 -0
- package/dist/commonjs/static.js +214 -0
- package/dist/commonjs/static.js.map +1 -0
- package/dist/commonjs/types.d.ts +21 -0
- package/dist/commonjs/types.d.ts.map +1 -0
- package/dist/commonjs/types.js +4 -0
- package/dist/commonjs/types.js.map +1 -0
- package/dist/commonjs/utils.d.ts +55 -0
- package/dist/commonjs/utils.d.ts.map +1 -0
- package/dist/commonjs/utils.js +91 -0
- package/dist/commonjs/utils.js.map +1 -0
- package/dist/esm/api/attributes.d.ts +385 -0
- package/dist/esm/api/attributes.d.ts.map +1 -0
- package/dist/esm/api/attributes.js +636 -0
- package/dist/esm/api/attributes.js.map +1 -0
- package/dist/esm/api/css.d.ts +42 -0
- package/dist/esm/api/css.d.ts.map +1 -0
- package/dist/esm/api/css.js +116 -0
- package/dist/esm/api/css.js.map +1 -0
- package/dist/esm/api/extract.d.ts +27 -0
- package/dist/esm/api/extract.d.ts.map +1 -0
- package/dist/esm/api/extract.js +42 -0
- package/dist/esm/api/extract.js.map +1 -0
- package/dist/esm/api/forms.d.ts +36 -0
- package/dist/esm/api/forms.d.ts.map +1 -0
- package/dist/esm/api/forms.js +81 -0
- package/dist/esm/api/forms.js.map +1 -0
- package/dist/esm/api/manipulation.d.ts +528 -0
- package/dist/esm/api/manipulation.d.ts.map +1 -0
- package/dist/esm/api/manipulation.js +831 -0
- package/dist/esm/api/manipulation.js.map +1 -0
- package/dist/esm/api/traversing.d.ts +657 -0
- package/dist/esm/api/traversing.d.ts.map +1 -0
- package/dist/esm/api/traversing.js +857 -0
- package/dist/esm/api/traversing.js.map +1 -0
- package/dist/esm/cheerio.d.ts +85 -0
- package/dist/esm/cheerio.d.ts.map +1 -0
- package/dist/esm/cheerio.js +58 -0
- package/dist/esm/cheerio.js.map +1 -0
- package/dist/esm/index.d.ts +104 -0
- package/dist/esm/index.d.ts.map +1 -0
- package/dist/esm/index.js +202 -0
- package/dist/esm/index.js.map +1 -0
- package/dist/esm/load-parse.d.ts +20 -0
- package/dist/esm/load-parse.d.ts.map +1 -0
- package/dist/esm/load-parse.js +28 -0
- package/dist/esm/load-parse.js.map +1 -0
- package/dist/esm/load.d.ts +91 -0
- package/dist/esm/load.d.ts.map +1 -0
- package/dist/esm/load.js +129 -0
- package/dist/esm/load.js.map +1 -0
- package/dist/esm/options.d.ts +98 -0
- package/dist/esm/options.d.ts.map +1 -0
- package/dist/esm/options.js +34 -0
- package/dist/esm/options.js.map +1 -0
- package/dist/esm/package.json +3 -0
- package/dist/esm/parse.d.ts +18 -0
- package/dist/esm/parse.d.ts.map +1 -0
- package/dist/esm/parse.js +73 -0
- package/dist/esm/parse.js.map +1 -0
- package/dist/esm/parsers/parse5-adapter.d.ts +20 -0
- package/dist/esm/parsers/parse5-adapter.d.ts.map +1 -0
- package/dist/esm/parsers/parse5-adapter.js +50 -0
- package/dist/esm/parsers/parse5-adapter.js.map +1 -0
- package/dist/esm/slim.d.ts +25 -0
- package/dist/esm/slim.d.ts.map +1 -0
- package/dist/esm/slim.js +22 -0
- package/dist/esm/slim.js.map +1 -0
- package/dist/esm/static.d.ts +112 -0
- package/dist/esm/static.d.ts.map +1 -0
- package/dist/esm/static.js +204 -0
- package/dist/esm/static.js.map +1 -0
- package/dist/esm/types.d.ts +21 -0
- package/dist/esm/types.d.ts.map +1 -0
- package/dist/esm/types.js +3 -0
- package/dist/esm/types.js.map +1 -0
- package/dist/esm/utils.d.ts +55 -0
- package/dist/esm/utils.d.ts.map +1 -0
- package/dist/esm/utils.js +84 -0
- package/dist/esm/utils.js.map +1 -0
- package/package.json +219 -0
- package/src/api/attributes.ts +1145 -0
- package/src/api/css.ts +224 -0
- package/src/api/extract.ts +92 -0
- package/src/api/forms.ts +103 -0
- package/src/api/manipulation.ts +1115 -0
- package/src/api/traversing.ts +1175 -0
- package/src/cheerio.ts +143 -0
- package/src/index-browser.mts +10 -0
- package/src/index.ts +294 -0
- package/src/load-parse.ts +39 -0
- package/src/load.ts +282 -0
- package/src/options.ts +136 -0
- package/src/parse.ts +105 -0
- package/src/parsers/parse5-adapter.ts +66 -0
- package/src/slim.ts +33 -0
- package/src/static.ts +312 -0
- package/src/types.ts +58 -0
- package/src/utils.ts +99 -0
package/src/cheerio.ts
ADDED
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
/* eslint-disable @typescript-eslint/no-unsafe-declaration-merging */
|
|
2
|
+
import type { InternalOptions } from './options.js';
|
|
3
|
+
import type { AnyNode, Document, ParentNode } from 'domhandler';
|
|
4
|
+
import type { BasicAcceptedElems } from './types.js';
|
|
5
|
+
|
|
6
|
+
import * as Attributes from './api/attributes.js';
|
|
7
|
+
import * as Traversing from './api/traversing.js';
|
|
8
|
+
import * as Manipulation from './api/manipulation.js';
|
|
9
|
+
import * as Css from './api/css.js';
|
|
10
|
+
import * as Forms from './api/forms.js';
|
|
11
|
+
import * as Extract from './api/extract.js';
|
|
12
|
+
|
|
13
|
+
type MethodsType = typeof Attributes &
|
|
14
|
+
typeof Traversing &
|
|
15
|
+
typeof Manipulation &
|
|
16
|
+
typeof Css &
|
|
17
|
+
typeof Forms &
|
|
18
|
+
typeof Extract;
|
|
19
|
+
|
|
20
|
+
/**
|
|
21
|
+
* The cheerio class is the central class of the library. It wraps a set of
|
|
22
|
+
* elements and provides an API for traversing, modifying, and interacting with
|
|
23
|
+
* the set.
|
|
24
|
+
*
|
|
25
|
+
* Loading a document will return the Cheerio class bound to the root element of
|
|
26
|
+
* the document. The class will be instantiated when querying the document (when
|
|
27
|
+
* calling `$('selector')`).
|
|
28
|
+
*
|
|
29
|
+
* @example This is the HTML markup we will be using in all of the API examples:
|
|
30
|
+
*
|
|
31
|
+
* ```html
|
|
32
|
+
* <ul id="fruits">
|
|
33
|
+
* <li class="apple">Apple</li>
|
|
34
|
+
* <li class="orange">Orange</li>
|
|
35
|
+
* <li class="pear">Pear</li>
|
|
36
|
+
* </ul>
|
|
37
|
+
* ```
|
|
38
|
+
*/
|
|
39
|
+
export abstract class Cheerio<T> implements ArrayLike<T> {
|
|
40
|
+
length = 0;
|
|
41
|
+
[index: number]: T;
|
|
42
|
+
|
|
43
|
+
options: InternalOptions;
|
|
44
|
+
/**
|
|
45
|
+
* The root of the document. Can be set by using the `root` argument of the
|
|
46
|
+
* constructor.
|
|
47
|
+
*
|
|
48
|
+
* @private
|
|
49
|
+
*/
|
|
50
|
+
_root: Cheerio<Document> | null;
|
|
51
|
+
|
|
52
|
+
/**
|
|
53
|
+
* Instance of cheerio. Methods are specified in the modules. Usage of this
|
|
54
|
+
* constructor is not recommended. Please use `$.load` instead.
|
|
55
|
+
*
|
|
56
|
+
* @private
|
|
57
|
+
* @param elements - The new selection.
|
|
58
|
+
* @param root - Sets the root node.
|
|
59
|
+
* @param options - Options for the instance.
|
|
60
|
+
*/
|
|
61
|
+
constructor(
|
|
62
|
+
elements: ArrayLike<T> | undefined,
|
|
63
|
+
root: Cheerio<Document> | null,
|
|
64
|
+
options: InternalOptions,
|
|
65
|
+
) {
|
|
66
|
+
this.options = options;
|
|
67
|
+
this._root = root;
|
|
68
|
+
|
|
69
|
+
if (elements) {
|
|
70
|
+
for (let idx = 0; idx < elements.length; idx++) {
|
|
71
|
+
this[idx] = elements[idx];
|
|
72
|
+
}
|
|
73
|
+
this.length = elements.length;
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
prevObject: Cheerio<any> | undefined;
|
|
78
|
+
/**
|
|
79
|
+
* Make a cheerio object.
|
|
80
|
+
*
|
|
81
|
+
* @private
|
|
82
|
+
* @param dom - The contents of the new object.
|
|
83
|
+
* @param context - The context of the new object.
|
|
84
|
+
* @returns The new cheerio object.
|
|
85
|
+
*/
|
|
86
|
+
abstract _make<T>(
|
|
87
|
+
dom: ArrayLike<T> | T | string,
|
|
88
|
+
context?: BasicAcceptedElems<AnyNode>,
|
|
89
|
+
): Cheerio<T>;
|
|
90
|
+
|
|
91
|
+
/**
|
|
92
|
+
* Parses some content.
|
|
93
|
+
*
|
|
94
|
+
* @private
|
|
95
|
+
* @param content - Content to parse.
|
|
96
|
+
* @param options - Options for parsing.
|
|
97
|
+
* @param isDocument - Allows parser to be switched to fragment mode.
|
|
98
|
+
* @returns A document containing the `content`.
|
|
99
|
+
*/
|
|
100
|
+
abstract _parse(
|
|
101
|
+
content: string | Document | AnyNode | AnyNode[] | Buffer,
|
|
102
|
+
options: InternalOptions,
|
|
103
|
+
isDocument: boolean,
|
|
104
|
+
context: ParentNode | null,
|
|
105
|
+
): Document;
|
|
106
|
+
|
|
107
|
+
/**
|
|
108
|
+
* Render an element or a set of elements.
|
|
109
|
+
*
|
|
110
|
+
* @private
|
|
111
|
+
* @param dom - DOM to render.
|
|
112
|
+
* @returns The rendered DOM.
|
|
113
|
+
*/
|
|
114
|
+
abstract _render(dom: AnyNode | ArrayLike<AnyNode>): string;
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
export interface Cheerio<T> extends MethodsType, Iterable<T> {
|
|
118
|
+
cheerio: '[cheerio object]';
|
|
119
|
+
|
|
120
|
+
splice: typeof Array.prototype.splice;
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
/** Set a signature of the object. */
|
|
124
|
+
Cheerio.prototype.cheerio = '[cheerio object]';
|
|
125
|
+
|
|
126
|
+
/*
|
|
127
|
+
* Make cheerio an array-like object
|
|
128
|
+
*/
|
|
129
|
+
Cheerio.prototype.splice = Array.prototype.splice;
|
|
130
|
+
|
|
131
|
+
// Support for (const element of $(...)) iteration:
|
|
132
|
+
Cheerio.prototype[Symbol.iterator] = Array.prototype[Symbol.iterator];
|
|
133
|
+
|
|
134
|
+
// Plug in the API
|
|
135
|
+
Object.assign(
|
|
136
|
+
Cheerio.prototype,
|
|
137
|
+
Attributes,
|
|
138
|
+
Traversing,
|
|
139
|
+
Manipulation,
|
|
140
|
+
Css,
|
|
141
|
+
Forms,
|
|
142
|
+
Extract,
|
|
143
|
+
);
|
package/src/index.ts
ADDED
|
@@ -0,0 +1,294 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file Batteries-included version of Cheerio. This module includes several
|
|
3
|
+
* convenience methods for loading documents from various sources.
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
export * from './load-parse.js';
|
|
7
|
+
export { contains, merge } from './static.js';
|
|
8
|
+
export type * from './types.js';
|
|
9
|
+
export type {
|
|
10
|
+
Cheerio,
|
|
11
|
+
CheerioAPI,
|
|
12
|
+
CheerioOptions,
|
|
13
|
+
HTMLParser2Options,
|
|
14
|
+
} from './slim.js';
|
|
15
|
+
|
|
16
|
+
import { adapter as htmlparser2Adapter } from 'parse5-htmlparser2-tree-adapter';
|
|
17
|
+
import * as htmlparser2 from 'htmlparser2';
|
|
18
|
+
import { ParserStream as Parse5Stream } from 'parse5-parser-stream';
|
|
19
|
+
import {
|
|
20
|
+
decodeBuffer,
|
|
21
|
+
DecodeStream,
|
|
22
|
+
type SnifferOptions,
|
|
23
|
+
} from 'encoding-sniffer';
|
|
24
|
+
import * as undici from 'undici';
|
|
25
|
+
import MIMEType from 'whatwg-mimetype';
|
|
26
|
+
import { Writable, finished } from 'node:stream';
|
|
27
|
+
import type { CheerioAPI } from './load.js';
|
|
28
|
+
import {
|
|
29
|
+
flattenOptions,
|
|
30
|
+
type InternalOptions,
|
|
31
|
+
type CheerioOptions,
|
|
32
|
+
} from './options.js';
|
|
33
|
+
import { load } from './load-parse.js';
|
|
34
|
+
|
|
35
|
+
/**
|
|
36
|
+
* Sniffs the encoding of a buffer, then creates a querying function bound to a
|
|
37
|
+
* document created from the buffer.
|
|
38
|
+
*
|
|
39
|
+
* @category Loading
|
|
40
|
+
* @example
|
|
41
|
+
*
|
|
42
|
+
* ```js
|
|
43
|
+
* import * as cheerio from 'cheerio';
|
|
44
|
+
*
|
|
45
|
+
* const buffer = fs.readFileSync('index.html');
|
|
46
|
+
* const $ = cheerio.loadBuffer(buffer);
|
|
47
|
+
* ```
|
|
48
|
+
*
|
|
49
|
+
* @param buffer - The buffer to sniff the encoding of.
|
|
50
|
+
* @param options - The options to pass to Cheerio.
|
|
51
|
+
* @returns The loaded document.
|
|
52
|
+
*/
|
|
53
|
+
export function loadBuffer(
|
|
54
|
+
buffer: Buffer,
|
|
55
|
+
options: DecodeStreamOptions = {},
|
|
56
|
+
): CheerioAPI {
|
|
57
|
+
const opts = flattenOptions(options);
|
|
58
|
+
const str = decodeBuffer(buffer, {
|
|
59
|
+
defaultEncoding: opts?.xmlMode ? 'utf8' : 'windows-1252',
|
|
60
|
+
...options.encoding,
|
|
61
|
+
});
|
|
62
|
+
|
|
63
|
+
return load(str, opts);
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
function _stringStream(
|
|
67
|
+
options: InternalOptions | undefined,
|
|
68
|
+
cb: (err: Error | null | undefined, $: CheerioAPI) => void,
|
|
69
|
+
): Writable {
|
|
70
|
+
if (options?._useHtmlParser2) {
|
|
71
|
+
const parser = htmlparser2.createDocumentStream(
|
|
72
|
+
(err, document) => cb(err, load(document, options)),
|
|
73
|
+
options,
|
|
74
|
+
);
|
|
75
|
+
|
|
76
|
+
return new Writable({
|
|
77
|
+
decodeStrings: false,
|
|
78
|
+
write(chunk, _encoding, callback) {
|
|
79
|
+
if (typeof chunk !== 'string') {
|
|
80
|
+
throw new TypeError('Expected a string');
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
parser.write(chunk);
|
|
84
|
+
callback();
|
|
85
|
+
},
|
|
86
|
+
final(callback) {
|
|
87
|
+
parser.end();
|
|
88
|
+
callback();
|
|
89
|
+
},
|
|
90
|
+
});
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
options ??= {};
|
|
94
|
+
options.treeAdapter ??= htmlparser2Adapter;
|
|
95
|
+
|
|
96
|
+
if (options.scriptingEnabled !== false) {
|
|
97
|
+
options.scriptingEnabled = true;
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
const stream = new Parse5Stream(options);
|
|
101
|
+
|
|
102
|
+
finished(stream, (err) => cb(err, load(stream.document, options)));
|
|
103
|
+
|
|
104
|
+
return stream;
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
/**
|
|
108
|
+
* Creates a stream that parses a sequence of strings into a document.
|
|
109
|
+
*
|
|
110
|
+
* The stream is a `Writable` stream that accepts strings. When the stream is
|
|
111
|
+
* finished, the callback is called with the loaded document.
|
|
112
|
+
*
|
|
113
|
+
* @category Loading
|
|
114
|
+
* @example
|
|
115
|
+
*
|
|
116
|
+
* ```js
|
|
117
|
+
* import * as cheerio from 'cheerio';
|
|
118
|
+
* import * as fs from 'fs';
|
|
119
|
+
*
|
|
120
|
+
* const writeStream = cheerio.stringStream({}, (err, $) => {
|
|
121
|
+
* if (err) {
|
|
122
|
+
* // Handle error
|
|
123
|
+
* }
|
|
124
|
+
*
|
|
125
|
+
* console.log($('h1').text());
|
|
126
|
+
* // Output: Hello, world!
|
|
127
|
+
* });
|
|
128
|
+
*
|
|
129
|
+
* fs.createReadStream('my-document.html', { encoding: 'utf8' }).pipe(
|
|
130
|
+
* writeStream,
|
|
131
|
+
* );
|
|
132
|
+
* ```
|
|
133
|
+
*
|
|
134
|
+
* @param options - The options to pass to Cheerio.
|
|
135
|
+
* @param cb - The callback to call when the stream is finished.
|
|
136
|
+
* @returns The writable stream.
|
|
137
|
+
*/
|
|
138
|
+
export function stringStream(
|
|
139
|
+
options: CheerioOptions,
|
|
140
|
+
cb: (err: Error | null | undefined, $: CheerioAPI) => void,
|
|
141
|
+
): Writable {
|
|
142
|
+
return _stringStream(flattenOptions(options), cb);
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
export interface DecodeStreamOptions extends CheerioOptions {
|
|
146
|
+
encoding?: SnifferOptions;
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
/**
|
|
150
|
+
* Parses a stream of buffers into a document.
|
|
151
|
+
*
|
|
152
|
+
* The stream is a `Writable` stream that accepts buffers. When the stream is
|
|
153
|
+
* finished, the callback is called with the loaded document.
|
|
154
|
+
*
|
|
155
|
+
* @category Loading
|
|
156
|
+
* @param options - The options to pass to Cheerio.
|
|
157
|
+
* @param cb - The callback to call when the stream is finished.
|
|
158
|
+
* @returns The writable stream.
|
|
159
|
+
*/
|
|
160
|
+
export function decodeStream(
|
|
161
|
+
options: DecodeStreamOptions,
|
|
162
|
+
cb: (err: Error | null | undefined, $: CheerioAPI) => void,
|
|
163
|
+
): Writable {
|
|
164
|
+
const { encoding = {}, ...cheerioOptions } = options;
|
|
165
|
+
const opts = flattenOptions(cheerioOptions);
|
|
166
|
+
|
|
167
|
+
// Set the default encoding to UTF-8 for XML mode
|
|
168
|
+
encoding.defaultEncoding ??= opts?.xmlMode ? 'utf8' : 'windows-1252';
|
|
169
|
+
|
|
170
|
+
const decodeStream = new DecodeStream(encoding);
|
|
171
|
+
const loadStream = _stringStream(opts, cb);
|
|
172
|
+
|
|
173
|
+
decodeStream.pipe(loadStream);
|
|
174
|
+
|
|
175
|
+
return decodeStream;
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
type UndiciStreamOptions = Omit<
|
|
179
|
+
undici.Dispatcher.RequestOptions<unknown>,
|
|
180
|
+
'path'
|
|
181
|
+
>;
|
|
182
|
+
|
|
183
|
+
export interface CheerioRequestOptions extends DecodeStreamOptions {
|
|
184
|
+
/** The options passed to `undici`'s `stream` method. */
|
|
185
|
+
requestOptions?: UndiciStreamOptions;
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
const defaultRequestOptions: UndiciStreamOptions = {
|
|
189
|
+
method: 'GET',
|
|
190
|
+
// Set an Accept header
|
|
191
|
+
headers: {
|
|
192
|
+
accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
|
193
|
+
},
|
|
194
|
+
};
|
|
195
|
+
|
|
196
|
+
/**
|
|
197
|
+
* `fromURL` loads a document from a URL.
|
|
198
|
+
*
|
|
199
|
+
* By default, redirects are allowed and non-2xx responses are rejected.
|
|
200
|
+
*
|
|
201
|
+
* @category Loading
|
|
202
|
+
* @example
|
|
203
|
+
*
|
|
204
|
+
* ```js
|
|
205
|
+
* import * as cheerio from 'cheerio';
|
|
206
|
+
*
|
|
207
|
+
* const $ = await cheerio.fromURL('https://example.com');
|
|
208
|
+
* ```
|
|
209
|
+
*
|
|
210
|
+
* @param url - The URL to load the document from.
|
|
211
|
+
* @param options - The options to pass to Cheerio.
|
|
212
|
+
* @returns The loaded document.
|
|
213
|
+
*/
|
|
214
|
+
export async function fromURL(
|
|
215
|
+
url: string | URL,
|
|
216
|
+
options: CheerioRequestOptions = {},
|
|
217
|
+
): Promise<CheerioAPI> {
|
|
218
|
+
const {
|
|
219
|
+
requestOptions = defaultRequestOptions,
|
|
220
|
+
encoding = {},
|
|
221
|
+
...cheerioOptions
|
|
222
|
+
} = options;
|
|
223
|
+
let undiciStream: Promise<undici.Dispatcher.StreamData<unknown>> | undefined;
|
|
224
|
+
|
|
225
|
+
// Add headers if none were supplied.
|
|
226
|
+
const urlObject = typeof url === 'string' ? new URL(url) : url;
|
|
227
|
+
const streamOptions = {
|
|
228
|
+
headers: defaultRequestOptions.headers,
|
|
229
|
+
path: urlObject.pathname + urlObject.search,
|
|
230
|
+
...requestOptions,
|
|
231
|
+
};
|
|
232
|
+
|
|
233
|
+
const promise = new Promise<CheerioAPI>((resolve, reject) => {
|
|
234
|
+
undiciStream = new undici.Client(urlObject.origin)
|
|
235
|
+
.compose(undici.interceptors.redirect({ maxRedirections: 5 }))
|
|
236
|
+
.stream(streamOptions, (res) => {
|
|
237
|
+
if (res.statusCode < 200 || res.statusCode >= 300) {
|
|
238
|
+
throw new undici.errors.ResponseError(
|
|
239
|
+
'Response Error',
|
|
240
|
+
res.statusCode,
|
|
241
|
+
{
|
|
242
|
+
headers: res.headers,
|
|
243
|
+
},
|
|
244
|
+
);
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
const contentTypeHeader = res.headers['content-type'] ?? 'text/html';
|
|
248
|
+
const mimeType = new MIMEType(
|
|
249
|
+
Array.isArray(contentTypeHeader)
|
|
250
|
+
? contentTypeHeader[0]
|
|
251
|
+
: contentTypeHeader,
|
|
252
|
+
);
|
|
253
|
+
|
|
254
|
+
if (!mimeType.isHTML() && !mimeType.isXML()) {
|
|
255
|
+
throw new RangeError(
|
|
256
|
+
`The content-type "${mimeType.essence}" is neither HTML nor XML.`,
|
|
257
|
+
);
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
// Forward the charset from the header to the decodeStream.
|
|
261
|
+
encoding.transportLayerEncodingLabel =
|
|
262
|
+
mimeType.parameters.get('charset');
|
|
263
|
+
|
|
264
|
+
/*
|
|
265
|
+
* If we allow redirects, we will have entries in the history.
|
|
266
|
+
* The last entry will be the final URL.
|
|
267
|
+
*/
|
|
268
|
+
const history = (
|
|
269
|
+
res.context as
|
|
270
|
+
| {
|
|
271
|
+
history?: URL[];
|
|
272
|
+
}
|
|
273
|
+
| undefined
|
|
274
|
+
)?.history;
|
|
275
|
+
// Set the `baseURI` to the final URL.
|
|
276
|
+
const baseURI = history ? history[history.length - 1] : urlObject;
|
|
277
|
+
|
|
278
|
+
const opts: DecodeStreamOptions = {
|
|
279
|
+
encoding,
|
|
280
|
+
// Set XML mode based on the MIME type.
|
|
281
|
+
xmlMode: mimeType.isXML(),
|
|
282
|
+
baseURI,
|
|
283
|
+
...cheerioOptions,
|
|
284
|
+
};
|
|
285
|
+
|
|
286
|
+
return decodeStream(opts, (err, $) => (err ? reject(err) : resolve($)));
|
|
287
|
+
});
|
|
288
|
+
});
|
|
289
|
+
|
|
290
|
+
// Let's make sure the request is completed before returning the promise.
|
|
291
|
+
await undiciStream;
|
|
292
|
+
|
|
293
|
+
return promise;
|
|
294
|
+
}
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
import { type CheerioAPI, getLoad } from './load.js';
|
|
2
|
+
import { getParse } from './parse.js';
|
|
3
|
+
import { renderWithParse5, parseWithParse5 } from './parsers/parse5-adapter.js';
|
|
4
|
+
import type { CheerioOptions } from './options.js';
|
|
5
|
+
import renderWithHtmlparser2 from 'dom-serializer';
|
|
6
|
+
import { parseDocument as parseWithHtmlparser2 } from 'htmlparser2';
|
|
7
|
+
import type { AnyNode } from 'domhandler';
|
|
8
|
+
|
|
9
|
+
const parse = getParse((content, options, isDocument, context) =>
|
|
10
|
+
options._useHtmlParser2
|
|
11
|
+
? parseWithHtmlparser2(content, options)
|
|
12
|
+
: parseWithParse5(content, options, isDocument, context),
|
|
13
|
+
);
|
|
14
|
+
|
|
15
|
+
// Duplicate docs due to https://github.com/TypeStrong/typedoc/issues/1616
|
|
16
|
+
/**
|
|
17
|
+
* Create a querying function, bound to a document created from the provided
|
|
18
|
+
* markup.
|
|
19
|
+
*
|
|
20
|
+
* Note that similar to web browser contexts, this operation may introduce
|
|
21
|
+
* `<html>`, `<head>`, and `<body>` elements; set `isDocument` to `false` to
|
|
22
|
+
* switch to fragment mode and disable this.
|
|
23
|
+
*
|
|
24
|
+
* @category Loading
|
|
25
|
+
* @param content - Markup to be loaded.
|
|
26
|
+
* @param options - Options for the created instance.
|
|
27
|
+
* @param isDocument - Allows parser to be switched to fragment mode.
|
|
28
|
+
* @returns The loaded document.
|
|
29
|
+
* @see {@link https://cheerio.js.org/docs/basics/loading#load} for additional usage information.
|
|
30
|
+
*/
|
|
31
|
+
export const load: (
|
|
32
|
+
content: string | AnyNode | AnyNode[] | Buffer,
|
|
33
|
+
options?: CheerioOptions | null,
|
|
34
|
+
isDocument?: boolean,
|
|
35
|
+
) => CheerioAPI = getLoad(parse, (dom, options) =>
|
|
36
|
+
options._useHtmlParser2
|
|
37
|
+
? renderWithHtmlparser2(dom, options)
|
|
38
|
+
: renderWithParse5(dom),
|
|
39
|
+
);
|