@crawlee/types 4.0.0-beta.64 → 4.0.0-beta.66
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/browser.d.ts +104 -0
- package/browser.d.ts.map +1 -1
- package/package.json +2 -2
package/browser.d.ts
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import type { ISession } from './session.js';
|
|
1
2
|
import type { Dictionary } from './utility-types.js';
|
|
2
3
|
export interface Cookie {
|
|
3
4
|
/**
|
|
@@ -60,4 +61,107 @@ export interface BrowserLikeResponse {
|
|
|
60
61
|
url(): string;
|
|
61
62
|
headers(): Dictionary<string | string[]>;
|
|
62
63
|
}
|
|
64
|
+
/**
|
|
65
|
+
* A snapshot of the relevant state of a page, as extracted by
|
|
66
|
+
* {@link IBrowserPool.extractPageState}.
|
|
67
|
+
*/
|
|
68
|
+
export interface PageState {
|
|
69
|
+
/**
|
|
70
|
+
* Cookies currently set in the page's browsing context.
|
|
71
|
+
*/
|
|
72
|
+
cookies: Cookie[];
|
|
73
|
+
}
|
|
74
|
+
/**
|
|
75
|
+
* Options accepted by {@link IBrowserPool.newPage}.
|
|
76
|
+
*/
|
|
77
|
+
export interface NewPageOptions {
|
|
78
|
+
/**
|
|
79
|
+
* Assign a custom ID to the page. If you don't provide one, a random string
|
|
80
|
+
* ID is generated.
|
|
81
|
+
*/
|
|
82
|
+
id?: string;
|
|
83
|
+
/**
|
|
84
|
+
* The crawling session that will use the returned page.
|
|
85
|
+
*
|
|
86
|
+
* The pool derives proxy configuration from the session's {@link
|
|
87
|
+
* ProxyInfo|proxy} (including TLS-error handling) — there are intentionally
|
|
88
|
+
* no standalone `proxyUrl` / `ignoreTlsErrors` options; configure them
|
|
89
|
+
* through the session instead.
|
|
90
|
+
*
|
|
91
|
+
* Session injection is **best-effort**: the pool may use the session's
|
|
92
|
+
* {@link ProxyInfo|proxy}, cookies, or fingerprint data to configure the
|
|
93
|
+
* page or the underlying browser, but none of this is guaranteed. Different
|
|
94
|
+
* pool implementations (or pool configurations such as `useIncognitoPages`)
|
|
95
|
+
* may support different subsets of session properties — or ignore them
|
|
96
|
+
* entirely.
|
|
97
|
+
*
|
|
98
|
+
* The crawler is still responsible for deterministic session setup (e.g.
|
|
99
|
+
* injecting cookies into the page before navigation) that must happen
|
|
100
|
+
* regardless of pool implementation.
|
|
101
|
+
*/
|
|
102
|
+
session?: ISession;
|
|
103
|
+
}
|
|
104
|
+
/**
|
|
105
|
+
* Minimal contract that any object passed to a browser crawler as its `browserPool`
|
|
106
|
+
* option must satisfy.
|
|
107
|
+
*
|
|
108
|
+
* Lifecycle (`destroy`) is the responsibility of whoever owns the pool — since a
|
|
109
|
+
* user-supplied pool is never owned by the crawler, the crawler never tears it
|
|
110
|
+
* down.
|
|
111
|
+
*
|
|
112
|
+
* Implement this interface to plug a custom page-provisioning strategy into any
|
|
113
|
+
* Crawlee browser crawler — for example a remote browser farm, a session-aware
|
|
114
|
+
* pool that pins pages to fingerprints differently, or a thin wrapper around the
|
|
115
|
+
* built-in `BrowserPool`.
|
|
116
|
+
*
|
|
117
|
+
* @category Browser management
|
|
118
|
+
*/
|
|
119
|
+
export interface IBrowserPool<Page = unknown> {
|
|
120
|
+
/**
|
|
121
|
+
* Opens a new page. The pool decides which browser to use, launching a new
|
|
122
|
+
* one if needed.
|
|
123
|
+
*/
|
|
124
|
+
newPage(options?: NewPageOptions): Promise<Page>;
|
|
125
|
+
/**
|
|
126
|
+
* Signals the pool that the caller is done with the page. The pool is
|
|
127
|
+
* responsible for closing the page and performing any necessary cleanup
|
|
128
|
+
* (e.g. retiring the underlying browser when a session has gone bad).
|
|
129
|
+
*
|
|
130
|
+
* @param page The page to release back to the pool.
|
|
131
|
+
* @param options.error If the page is being released because of an error,
|
|
132
|
+
* pass the error here. In particular, if the error is a
|
|
133
|
+
* {@link SessionError}, implementations should treat it as a signal
|
|
134
|
+
* to purge all state associated with the session (e.g. discard any
|
|
135
|
+
* browser that served the page).
|
|
136
|
+
*/
|
|
137
|
+
closePage(page: Page, options?: {
|
|
138
|
+
error?: Error;
|
|
139
|
+
}): Promise<void>;
|
|
140
|
+
/**
|
|
141
|
+
* Extracts the relevant state from a page so the caller can persist it —
|
|
142
|
+
* for example, back-propagating cookies into the crawling {@link
|
|
143
|
+
* ISession|session}.
|
|
144
|
+
*
|
|
145
|
+
* @param page The page to read state from.
|
|
146
|
+
*/
|
|
147
|
+
extractPageState(page: Page): Promise<PageState>;
|
|
148
|
+
/**
|
|
149
|
+
* Injects state (currently just cookies) into a page. This is the
|
|
150
|
+
* counterpart to {@link IBrowserPool.extractPageState} and lets the
|
|
151
|
+
* caller set up a page — for example, seeding it with the crawling
|
|
152
|
+
* {@link ISession|session}'s cookies before navigation.
|
|
153
|
+
*
|
|
154
|
+
* As with {@link IBrowserPool.newPage}, the caller decides *what* state
|
|
155
|
+
* to inject, while the pool decides *how*.
|
|
156
|
+
*
|
|
157
|
+
* Isolation between pages is **best-effort**: depending on the pool
|
|
158
|
+
* implementation and its configuration, multiple pages may share a browsing
|
|
159
|
+
* context, so injected state (such as cookies) can bleed across pages
|
|
160
|
+
* served by the same underlying browser.
|
|
161
|
+
*
|
|
162
|
+
* @param page The page to inject state into.
|
|
163
|
+
* @param state The state to inject.
|
|
164
|
+
*/
|
|
165
|
+
injectPageState(page: Page, state: PageState): Promise<void>;
|
|
166
|
+
}
|
|
63
167
|
//# sourceMappingURL=browser.d.ts.map
|
package/browser.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"browser.d.ts","sourceRoot":"","sources":["../src/browser.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,oBAAoB,CAAC;AAErD,MAAM,WAAW,MAAM;IACnB;;OAEG;IACH,IAAI,EAAE,MAAM,CAAC;IACb;;OAEG;IACH,KAAK,EAAE,MAAM,CAAC;IACd;;;OAGG;IACH,GAAG,CAAC,EAAE,MAAM,CAAC;IACb;;OAEG;IACH,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB;;OAEG;IACH,IAAI,CAAC,EAAE,MAAM,CAAC;IACd;;OAEG;IACH,MAAM,CAAC,EAAE,OAAO,CAAC;IACjB;;OAEG;IACH,QAAQ,CAAC,EAAE,OAAO,CAAC;IACnB;;OAEG;IACH,QAAQ,CAAC,EAAE,QAAQ,GAAG,KAAK,GAAG,MAAM,CAAC;IACrC;;OAEG;IACH,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB;;OAEG;IACH,QAAQ,CAAC,EAAE,KAAK,GAAG,QAAQ,GAAG,MAAM,CAAC;IACrC;;OAEG;IACH,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB;;OAEG;IACH,YAAY,CAAC,EAAE,OAAO,GAAG,WAAW,GAAG,QAAQ,CAAC;IAChD;;;;OAIG;IACH,UAAU,CAAC,EAAE,MAAM,CAAC;CACvB;AAED,MAAM,WAAW,mBAAmB;IAChC,GAAG,IAAI,MAAM,CAAC;IACd,OAAO,IAAI,UAAU,CAAC,MAAM,GAAG,MAAM,EAAE,CAAC,CAAC;CAC5C"}
|
|
1
|
+
{"version":3,"file":"browser.d.ts","sourceRoot":"","sources":["../src/browser.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,cAAc,CAAC;AAC7C,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,oBAAoB,CAAC;AAErD,MAAM,WAAW,MAAM;IACnB;;OAEG;IACH,IAAI,EAAE,MAAM,CAAC;IACb;;OAEG;IACH,KAAK,EAAE,MAAM,CAAC;IACd;;;OAGG;IACH,GAAG,CAAC,EAAE,MAAM,CAAC;IACb;;OAEG;IACH,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB;;OAEG;IACH,IAAI,CAAC,EAAE,MAAM,CAAC;IACd;;OAEG;IACH,MAAM,CAAC,EAAE,OAAO,CAAC;IACjB;;OAEG;IACH,QAAQ,CAAC,EAAE,OAAO,CAAC;IACnB;;OAEG;IACH,QAAQ,CAAC,EAAE,QAAQ,GAAG,KAAK,GAAG,MAAM,CAAC;IACrC;;OAEG;IACH,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB;;OAEG;IACH,QAAQ,CAAC,EAAE,KAAK,GAAG,QAAQ,GAAG,MAAM,CAAC;IACrC;;OAEG;IACH,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB;;OAEG;IACH,YAAY,CAAC,EAAE,OAAO,GAAG,WAAW,GAAG,QAAQ,CAAC;IAChD;;;;OAIG;IACH,UAAU,CAAC,EAAE,MAAM,CAAC;CACvB;AAED,MAAM,WAAW,mBAAmB;IAChC,GAAG,IAAI,MAAM,CAAC;IACd,OAAO,IAAI,UAAU,CAAC,MAAM,GAAG,MAAM,EAAE,CAAC,CAAC;CAC5C;AAED;;;GAGG;AACH,MAAM,WAAW,SAAS;IACtB;;OAEG;IACH,OAAO,EAAE,MAAM,EAAE,CAAC;CACrB;AAED;;GAEG;AACH,MAAM,WAAW,cAAc;IAC3B;;;OAGG;IACH,EAAE,CAAC,EAAE,MAAM,CAAC;IACZ;;;;;;;;;;;;;;;;;;OAkBG;IACH,OAAO,CAAC,EAAE,QAAQ,CAAC;CACtB;AAED;;;;;;;;;;;;;;GAcG;AACH,MAAM,WAAW,YAAY,CAAC,IAAI,GAAG,OAAO;IACxC;;;OAGG;IACH,OAAO,CAAC,OAAO,CAAC,EAAE,cAAc,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IAEjD;;;;;;;;;;;OAWG;IACH,SAAS,CAAC,IAAI,EAAE,IAAI,EAAE,OAAO,CAAC,EAAE;QAAE,KAAK,CAAC,EAAE,KAAK,CAAA;KAAE,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IAElE;;;;;;OAMG;IACH,gBAAgB,CAAC,IAAI,EAAE,IAAI,GAAG,OAAO,CAAC,SAAS,CAAC,CAAC;IAEjD;;;;;;;;;;;;;;;;OAgBG;IACH,eAAe,CAAC,IAAI,EAAE,IAAI,EAAE,KAAK,EAAE,SAAS,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;CAChE"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@crawlee/types",
|
|
3
|
-
"version": "4.0.0-beta.
|
|
3
|
+
"version": "4.0.0-beta.66",
|
|
4
4
|
"description": "Shared types for the crawlee projects",
|
|
5
5
|
"engines": {
|
|
6
6
|
"node": ">=22.0.0"
|
|
@@ -53,5 +53,5 @@
|
|
|
53
53
|
}
|
|
54
54
|
}
|
|
55
55
|
},
|
|
56
|
-
"gitHead": "
|
|
56
|
+
"gitHead": "3a7bed61f54a8bf2c8b2efef17ffa3a37cb18940"
|
|
57
57
|
}
|