website-api 1.1.6 → 1.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/src/core/context.js +10 -0
- package/dist/src/types.d.ts +7 -0
- package/package.json +1 -1
package/dist/src/core/context.js
CHANGED
|
@@ -58,6 +58,15 @@ export function createContext(site, options = {}, providers = {}) {
|
|
|
58
58
|
return credentialsCache;
|
|
59
59
|
};
|
|
60
60
|
const http = createHttp({ fetchImpl: providers.fetchImpl, cookieString, userAgent, debug });
|
|
61
|
+
// Host-provided cheerio: extensions can't resolve their own node_modules, so
|
|
62
|
+
// they parse HTML through this instead of importing cheerio. Lazily imported
|
|
63
|
+
// and memoized — JSON-only sites never load it.
|
|
64
|
+
let cheerioMod;
|
|
65
|
+
const loadHtml = async (html) => {
|
|
66
|
+
if (!cheerioMod)
|
|
67
|
+
cheerioMod = await import("cheerio");
|
|
68
|
+
return cheerioMod.load(html);
|
|
69
|
+
};
|
|
61
70
|
// ── browser capability (lazy + memoized) ──
|
|
62
71
|
const connect = providers.connectBrowser ?? connectChrome;
|
|
63
72
|
let session;
|
|
@@ -115,6 +124,7 @@ export function createContext(site, options = {}, providers = {}) {
|
|
|
115
124
|
credentials,
|
|
116
125
|
userAgent,
|
|
117
126
|
http,
|
|
127
|
+
loadHtml,
|
|
118
128
|
browser,
|
|
119
129
|
async eval(fn) {
|
|
120
130
|
const page = await browser();
|
package/dist/src/types.d.ts
CHANGED
|
@@ -146,6 +146,13 @@ export interface SiteContext {
|
|
|
146
146
|
userAgent(): string;
|
|
147
147
|
/** HTTP capability with cookie + User-Agent auto-injection. */
|
|
148
148
|
readonly http: HttpCapability;
|
|
149
|
+
/**
|
|
150
|
+
* Parse an HTML string into a cheerio document using the host's bundled
|
|
151
|
+
* cheerio. Lets sites (especially installed extensions, which can't resolve
|
|
152
|
+
* their own `node_modules`) scrape HTML without importing/bundling cheerio
|
|
153
|
+
* themselves. Lazily loaded — HTTP/JSON-only sites never pay for it.
|
|
154
|
+
*/
|
|
155
|
+
loadHtml(html: string): Promise<import("cheerio").CheerioAPI>;
|
|
149
156
|
/** Connect to Chrome over CDP, applying fingerprint + auth. Memoized. */
|
|
150
157
|
browser(): Promise<Page>;
|
|
151
158
|
/** Sugar for `(await browser()).evaluate(fn)`. */
|
package/package.json
CHANGED