@isdk/web-fetcher 0.2.1 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. package/README.action.cn.md +32 -24
  2. package/README.action.md +14 -4
  3. package/README.cn.md +10 -2
  4. package/README.hackernews.md +52 -0
  5. package/README.md +10 -2
  6. package/dist/index.d.mts +5 -3
  7. package/dist/index.d.ts +5 -3
  8. package/dist/index.js +1 -1
  9. package/dist/index.mjs +1 -1
  10. package/docs/README.md +10 -2
  11. package/docs/_media/README.action.md +14 -4
  12. package/docs/_media/README.cn.md +10 -2
  13. package/docs/classes/CheerioFetchEngine.md +91 -69
  14. package/docs/classes/ClickAction.md +23 -23
  15. package/docs/classes/ExtractAction.md +23 -23
  16. package/docs/classes/FetchAction.md +23 -23
  17. package/docs/classes/FetchEngine.md +87 -69
  18. package/docs/classes/FetchSession.md +8 -8
  19. package/docs/classes/FillAction.md +23 -23
  20. package/docs/classes/GetContentAction.md +23 -23
  21. package/docs/classes/GotoAction.md +23 -23
  22. package/docs/classes/PauseAction.md +23 -23
  23. package/docs/classes/PlaywrightFetchEngine.md +91 -69
  24. package/docs/classes/SubmitAction.md +23 -23
  25. package/docs/classes/WaitForAction.md +23 -23
  26. package/docs/classes/WebFetcher.md +5 -5
  27. package/docs/enumerations/FetchActionResultStatus.md +4 -4
  28. package/docs/functions/fetchWeb.md +2 -2
  29. package/docs/interfaces/BaseFetchActionProperties.md +9 -9
  30. package/docs/interfaces/BaseFetchCollectorActionProperties.md +13 -13
  31. package/docs/interfaces/BaseFetcherProperties.md +29 -21
  32. package/docs/interfaces/DispatchedEngineAction.md +4 -4
  33. package/docs/interfaces/ExtractActionProperties.md +9 -9
  34. package/docs/interfaces/FetchActionInContext.md +13 -13
  35. package/docs/interfaces/FetchActionProperties.md +10 -10
  36. package/docs/interfaces/FetchActionResult.md +6 -6
  37. package/docs/interfaces/FetchContext.md +43 -31
  38. package/docs/interfaces/FetchEngineContext.md +38 -26
  39. package/docs/interfaces/FetchMetadata.md +5 -5
  40. package/docs/interfaces/FetchResponse.md +13 -13
  41. package/docs/interfaces/FetchReturnTypeRegistry.md +7 -7
  42. package/docs/interfaces/FetchSite.md +36 -24
  43. package/docs/interfaces/FetcherOptions.md +35 -23
  44. package/docs/interfaces/GotoActionOptions.md +6 -6
  45. package/docs/interfaces/PendingEngineRequest.md +3 -3
  46. package/docs/interfaces/SubmitActionOptions.md +2 -2
  47. package/docs/interfaces/WaitForActionOptions.md +4 -4
  48. package/docs/type-aliases/BaseFetchActionOptions.md +1 -1
  49. package/docs/type-aliases/BaseFetchCollectorOptions.md +1 -1
  50. package/docs/type-aliases/BrowserEngine.md +1 -1
  51. package/docs/type-aliases/FetchActionCapabilities.md +1 -1
  52. package/docs/type-aliases/FetchActionCapabilityMode.md +1 -1
  53. package/docs/type-aliases/FetchActionOptions.md +1 -1
  54. package/docs/type-aliases/FetchEngineAction.md +1 -1
  55. package/docs/type-aliases/FetchEngineType.md +1 -1
  56. package/docs/type-aliases/FetchReturnType.md +1 -1
  57. package/docs/type-aliases/FetchReturnTypeFor.md +1 -1
  58. package/docs/type-aliases/OnFetchPauseCallback.md +1 -1
  59. package/docs/type-aliases/ResourceType.md +1 -1
  60. package/docs/variables/DefaultFetcherProperties.md +1 -1
  61. package/package.json +2 -1
@@ -80,21 +80,21 @@ export class FillAction extends FetchAction {
80
80
 
81
81
  #### `goto`
82
82
 
83
- 导航到新的 URL。
83
+ 将浏览器导航至指定 URL。
84
84
 
85
85
  * **`id`**: `goto`
86
86
  * **`params`**:
87
87
  * `url` (string): 要导航到的 URL。
88
- * ...其他导航选项,如 `waitUntil`, `timeout`,这些选项会传递给引擎。
88
+ * ...其他导航选项,如 **`waitUntil`**, **`timeout`**,这些选项会传递给引擎。
89
89
  * **`returns`**: `response`
90
90
 
91
91
  #### `click`
92
92
 
93
- 点击一个由选择器指定的元素。
93
+ 点击由 **`selector`** (CSS 选择器) 指定的元素。
94
94
 
95
95
  * **`id`**: `click`
96
96
  * **`params`**:
97
- * `selector` (string): 用于标识要点击元素的 CSS 选择器或 XPath。
97
+ * **`selector`** (string): 用于标识要点击元素的 CSS 选择器。
98
98
  * **`returns`**: `none`
99
99
 
100
100
  #### `fill`
@@ -103,9 +103,14 @@ export class FillAction extends FetchAction {
103
103
 
104
104
  * **`id`**: `fill`
105
105
  * **`params`**:
106
- * `selector` (string): 输入元素的选择器。
106
+ * **`selector`** (string): 输入元素的选择器。
107
107
  * `value` (string): 要填入元素中的文本。
108
- * **`returns`**: `none`
108
+ * **`returns`**: `response`
109
+
110
+ > **注意**:返回内容的具体行为在不同引擎之间存在差异。
111
+ >
112
+ > * **`cheerio`**:此引擎直接操作其内部的 HTML 表示,因此返回的内容会包含填充的值。
113
+ > * **`playwright`**:此引擎返回的是页面的渲染后 HTML (类似于 `document.documentElement.outerHTML`)。然而,当 `page.fill()` 更新输入框时,它修改的是该输入框元素的内部 `value` 属性。这个属性不总会被序列化为 HTML 源代码中的 `value` 特性。因此,调用 `page.content()` 所返回的 HTML 中将**不会**看到填充的值。
109
114
 
110
115
  #### `submit`
111
116
 
@@ -113,28 +118,31 @@ export class FillAction extends FetchAction {
113
118
 
114
119
  * **`id`**: `submit`
115
120
  * **`params`**:
116
- * `selector` (string, optional): 表单元素的选择器。
121
+ * **`selector`** (string, optional): 表单元素的选择器。
117
122
  * **`returns`**: `none`
118
123
 
119
124
  #### `waitFor`
120
125
 
121
- 暂停执行以等待特定条件满足。
126
+ 暂停执行,以等待一个或多个条件的满足。
127
+
128
+ 在 `browser` 模式下,如果提供了多个条件,它们将按顺序依次等待。例如,它会先等待选择器出现,然后等待网络空闲,最后再等待指定的毫秒数。
122
129
 
123
130
  * **`id`**: `waitFor`
124
- * **`params`**: 一个指定等待条件的对象 (例如 `ms`, `selector`, `networkIdle`)。
131
+ * **`params`**: 一个指定等待条件的对象,可包含以下一个或多个键:
132
+ * **`ms`** (number): 等待指定的毫秒数。两个引擎都支持。
133
+ * **`selector`** (string): 等待匹配的选择器出现在页面中。仅 `browser` 模式支持。
134
+ * **`networkIdle`** (boolean): 等待直到网络空闲(即,在一段时间内没有新的网络请求)。仅 `browser` 模式支持。
125
135
  * **`returns`**: `none`
126
136
 
127
137
  #### `pause`
128
138
 
129
- 暂停 Action 脚本的执行,以允许用户手动介入(例如,解决验证码)。
130
-
131
- 此 Action **必须**在 `fetchWeb` 的选项中提供一个 `onPause` 回调处理器。当此 Action 被触发时,它会调用 `onPause` 处理器并等待其执行完成。
139
+ 暂停 Action 脚本的执行,以允许用户手动介入(例如,解决验证码)。此 Action **必须**在 **`fetchWeb`** 的选项中提供一个 **`onPause`** 回调处理器。当此 Action 被触发时,它会调用 **`onPause`** 处理器并等待其执行完成。
132
140
 
133
141
  * **`id`**: `pause`
134
142
  * **`params`**:
135
- * `selector` (string, optional): 如果提供,仅当匹配此选择器的元素存在时,Action 才会暂停。
136
- * `attribute` (string, optional): 与 `selector` 配合使用。如果提供,仅当元素存在且拥有该指定属性时,Action 才会暂停。
137
- * `message` (string, optional): 一个将传递给 `onPause` 处理器的消息,可用于向用户显示提示信息。
143
+ * **`selector`** (string, optional): 如果提供,仅当匹配此选择器的元素存在时,Action 才会暂停。
144
+ * **`attribute`** (string, optional): 与 **`selector`** 配合使用。如果提供,仅当元素存在且拥有该指定属性时,Action 才会暂停。
145
+ * **`message`** (string, optional): 一个将传递给 **`onPause`** 处理器的消息,可用于向用户显示提示信息。
138
146
  * **`returns`**: `none`
139
147
 
140
148
  **示例:在 Google 搜索中处理 CAPTCHA**
@@ -193,10 +201,10 @@ await fetchWeb({
193
201
 
194
202
  #### `extract`
195
203
 
196
- 使用一个强大且声明式的 Schema 从当前页面中提取结构化数据。这是进行数据采集的核心 Action。
204
+ 使用一个强大且声明式的 **`ExtractSchema`** 从当前页面中提取结构化数据。这是进行数据采集的核心 Action。
197
205
 
198
206
  * **`id`**: `extract`
199
- * **`params`**: 一个 `ExtractSchema` 对象, 用于定义提取规则。
207
+ * **`params`**: 一个 **`ExtractSchema`** 对象, 用于定义提取规则。
200
208
  * **`returns`**: `any` (提取出的数据)
201
209
 
202
210
  ##### 提取 Schema 详解
@@ -205,7 +213,7 @@ await fetchWeb({
205
213
 
206
214
  ###### 1. 提取单个值
207
215
 
208
- 最基础的提取,可以指定 `selector` (CSS 选择器), `attribute` (要提取的属性名), 以及 `type` (string, number, boolean, html)。
216
+ 最基础的提取,可以指定 **`selector`** (CSS 选择器), **`attribute`** (要提取的属性名), 以及 **`type`** (string, number, boolean, html)。
209
217
 
210
218
  ```json
211
219
  {
@@ -221,7 +229,7 @@ await fetchWeb({
221
229
 
222
230
  ###### 2. 提取对象
223
231
 
224
- 通过 `type: 'object'``properties` 字段来定义一个结构化对象。
232
+ 通过 **`type: 'object'`****`properties`** 字段来定义一个结构化对象。
225
233
 
226
234
  ```json
227
235
  {
@@ -239,7 +247,7 @@ await fetchWeb({
239
247
 
240
248
  ###### 3. 提取数组 (便捷用法)
241
249
 
242
- 通过 `type: 'array'` 来提取一个列表。为了让最常见的操作更简单,我们提供了一些便捷用法。
250
+ 通过 **`type: 'array'`** 来提取一个列表。为了让最常见的操作更简单,我们提供了一些便捷用法。
243
251
 
244
252
  * **提取文本数组 (默认行为)**: 当您想提取一个文本列表时,只需提供选择器,省略 `items` 即可。这是最常见的用法。
245
253
 
@@ -263,7 +271,7 @@ await fetchWeb({
263
271
 
264
272
  > 上例将返回一个包含所有 `<li>` 标签文本的数组, 如 `["tech", "news"]`。
265
273
 
266
- * **提取属性数组 (快捷方式)**: 当您只想提取一个属性列表(例如所有链接的 `href`)时,也无需嵌套 `items`。直接在 `array` 定义中声明 `attribute` 即可。
274
+ * **提取属性数组 (快捷方式)**: 当您只想提取一个属性列表(例如所有链接的 **`href`**)时,也无需嵌套 `items`。直接在 `array` 定义中声明 **`attribute`** 即可。
267
275
 
268
276
  ```json
269
277
 
@@ -289,10 +297,10 @@ await fetchWeb({
289
297
 
290
298
  ###### 4. 精确筛选: `has` 和 `exclude`
291
299
 
292
- 您可以在任何包含 `selector` 的 Schema 中使用 `has``exclude` 字段来精确控制元素的选择。
300
+ 您可以在任何包含 **`selector`** 的 Schema 中使用 **`has`****`exclude`** 字段来精确控制元素的选择。
293
301
 
294
- * `has`: 一个 CSS 选择器,用于确保所选元素**必须包含**匹配此选择器的后代元素。
295
- * `exclude`: 一个 CSS 选择器,用于从结果中**排除**匹配此选择器的元素。
302
+ * **`has`**: 一个 CSS 选择器,用于确保所选元素**必须包含**匹配此选择器的后代元素。
303
+ * **`exclude`**: 一个 CSS 选择器,用于从结果中**排除**匹配此选择器的元素。
296
304
 
297
305
  **完整示例: 提取包含图片且未被标记为"草稿"的文章链接**
298
306
 
package/README.action.md CHANGED
@@ -94,7 +94,7 @@ Clicks on an element specified by a selector.
94
94
 
95
95
  * **`id`**: `click`
96
96
  * **`params`**:
97
- * `selector` (string): A CSS selector or XPath to identify the element to click.
97
+ * `selector` (string): A CSS selector to identify the element to click.
98
98
  * **`returns`**: `none`
99
99
 
100
100
  #### `fill`
@@ -105,7 +105,12 @@ Fills an input field with a specified value.
105
105
  * **`params`**:
106
106
  * `selector` (string): A selector for the input element.
107
107
  * `value` (string): The text to fill into the element.
108
- * **`returns`**: `none`
108
+ * **`returns`**: `response`
109
+
110
+ > **Note**: The behavior of the returned content differs between engines.
111
+ >
112
+ > * **`cheerio`**: This engine directly manipulates its internal HTML representation, so the returned content will include the filled value.
113
+ > * **`playwright`**: This engine returns the rendered HTML of the page (similar to `document.documentElement.outerHTML`). However, when `page.fill()` updates an input, it changes the input's internal `value` property. This property is not always serialized back to the `value` attribute in the HTML source. As a result, the filled value will **not** be visible in the HTML returned by `page.content()`.
109
114
 
110
115
  #### `submit`
111
116
 
@@ -118,10 +123,15 @@ Submits a form.
118
123
 
119
124
  #### `waitFor`
120
125
 
121
- Pauses execution to wait for a specific condition to be met.
126
+ Pauses execution to wait for one or more conditions to be met.
127
+
128
+ In `browser` mode, if multiple conditions are provided, they are awaited sequentially. For example, it will first wait for the selector to appear, then wait for the network to be idle, and finally wait for the specified duration.
122
129
 
123
130
  * **`id`**: `waitFor`
124
- * **`params`**: An object specifying the wait condition (e.g., `ms`, `selector`, `networkIdle`).
131
+ * **`params`**: An object specifying the wait condition, which can contain one or more of the following keys:
132
+ * **`ms`** (number): Waits for the specified number of milliseconds. Supported by both engines.
133
+ * **`selector`** (string): Waits for an element matching the selector to appear on the page. Supported only in `browser` mode.
134
+ * **`networkIdle`** (boolean): Waits until the network is idle (i.e., no new network requests for a period of time). Supported only in `browser` mode.
125
135
  * **`returns`**: `none`
126
136
 
127
137
  #### `pause`
package/README.cn.md CHANGED
@@ -1,9 +1,16 @@
1
1
  # 🕸️ @isdk/web-fetcher
2
2
 
3
+ [![npm version](https://img.shields.io/npm/v/%40isdk%2Fweb-fetcher)](https://www.npmjs.com/package/@isdk/web-fetcher)
4
+ [![npm downloads](https://img.shields.io/npm/dw/%40isdk%2Fweb-fetcher)](https://www.npmjs.com/package/@isdk/web-fetcher)
5
+ [![License](https://img.shields.io/github/license/isdk/web-fetcher.js)](https://github.com/isdk/web-fetcher.js/blob/main/LICENSE)
6
+ [![Node](https://img.shields.io/badge/node-%3E%3D18-339933?logo=node.js)](https://nodejs.org/)
7
+ [![TypeScript](https://img.shields.io/badge/TypeScript-Types%20included-3178C6?logo=typescript)](https://www.typescriptlang.org/)
8
+ [![GitHub Stars](https://img.shields.io/github/stars/isdk/web-fetcher.js?logo=github)](https://github.com/isdk/web-fetcher.js)
9
+ ![antibot](https://img.shields.io/badge/antibot-optional-orange)
10
+
3
11
  [English](./README.md) | 简体中文
4
12
 
5
- > 一个功能强大且灵活的 Web 抓取与浏览器自动化库。
6
- > 它采用双引擎架构(HTTP 和浏览器)和声明式动作系统,是 AI 代理和复杂数据抓取任务的理想选择。
13
+ > 一个面向AI的网页自动化库,它将复杂的网页交互简化为声明式JSON动作脚本。一次编写,你的脚本即可在快速的 **`http`** 模式(用于静态内容)或完整的 **`browser`** 模式(用于动态站点)下运行。可选的 **`antibot`** 标志有助于绕过检测机制。该库专为有针对性的、面向任务的数据提取而设计(例如,从页面Y获取数据X),而非用于构建全站爬虫。
7
14
 
8
15
  ---
9
16
 
@@ -137,6 +144,7 @@ searchGoogle('gemini');
137
144
  * `fill`: 用指定的值填充一个输入字段。
138
145
  * `submit`: 提交一个表单。
139
146
  * `waitFor`: 暂停执行以等待特定条件(例如,超时、选择器出现或网络空闲)。
147
+ * `pause`: 暂停执行以进行手动干预(例如,解决验证码)。
140
148
  * `getContent`: 获取当前页面状态的完整内容(HTML、文本等)。
141
149
  * `extract`: 使用富有表现力的声明式 Schema,可轻松提取页面中的任意结构化数据。
142
150
 
@@ -0,0 +1,52 @@
1
+ ### **Title:**
2
+
3
+ `Show HN: I built a declarative web automation library for AI agents`
4
+
5
+ ---
6
+
7
+ ### **First Comment (Final Masterpiece Version):**
8
+
9
+ GitHub: https://github.com/isdk/web-fetcher.js
10
+
11
+ Hey HN,
12
+
13
+ I’ve been building some AI agents lately and ran into a fundamental problem: LLMs are great at generating structured data like JSON, but they are terrible at writing the brittle, procedural JavaScript needed for web automation (`await page.click(...)`, `await page.waitFor(...)`, etc.).
14
+
15
+ This led me to build `@isdk/web-fetcher`, a library designed around a few core principles to solve this mismatch.
16
+
17
+ * **Declarative JSON Actions for AI:** Instead of code, you define tasks in a simple JSON "plan." This is a format an LLM can easily generate and reason about, making it a much more natural interface for an agent.
18
+
19
+ * **A Unified, Dual-Engine API:** It has a fast `http` engine (using Cheerio) and a full `browser` engine (using Playwright). The key was to design a single API (with actions like `extract`, `fill`, etc.) that works across both, allowing the library to execute your plan in the most efficient way.
20
+
21
+ * **Simple but Powerful Anti-Bot Evasion:** In `browser` mode, you just add `antibot: true`. Under the hood, this does more than just rotate user-agents; it meticulously equips the browser with a more convincing, human-like fingerprint—modifying TLS signatures, ordering headers correctly, and patching navigator properties to evade common commercial bot detectors. It's a complex problem solved with a single switch.
22
+
23
+ To avoid reinventing the core crawling infrastructure, the library is built on top of the excellent `crawlee` library. My work was to design and implement this declarative, AI-friendly layer on top.
24
+
25
+ Here’s an example of what a browser-based plan with the anti-bot flag looks like:
26
+
27
+ ```typescript
28
+ // Define a plan to access a site with strong bot detection
29
+ const plan = {
30
+ url: 'https://some-protected-site.com',
31
+ engine: 'browser',
32
+ antibot: true, // Just flip this switch for enhanced evasion
33
+ actions: [
34
+ { id: 'fill', params: { selector: '#search-input', value: 'Hacker News' } },
35
+ { id: 'click', params: { selector: '#search-button' } },
36
+ { id: 'waitFor', params: { selector: '#results' } },
37
+ {
38
+ id: 'extract',
39
+ params: {"type": "array", "selector": "#results a", "attribute": "href"},
40
+ storeAs: 'searchResults',
41
+ },
42
+ ]
43
+ };
44
+
45
+ const { result, outputs } = await fetchWeb(plan);
46
+ console.log('Search finalUrl:', result?.finalUrl);
47
+ console.log('Outputs searchResults:', outputs.searchResults);
48
+ ```
49
+
50
+ The project is new, and I believe this declarative approach is a more robust path forward for building capable AI agents that can interact with the web. I'd love to hear the community's thoughts and critiques on this design.
51
+
52
+ Thanks
package/README.md CHANGED
@@ -1,9 +1,16 @@
1
1
  # 🕸️ @isdk/web-fetcher
2
2
 
3
+ [![npm version](https://img.shields.io/npm/v/%40isdk%2Fweb-fetcher)](https://www.npmjs.com/package/@isdk/web-fetcher)
4
+ [![npm downloads](https://img.shields.io/npm/dw/%40isdk%2Fweb-fetcher)](https://www.npmjs.com/package/@isdk/web-fetcher)
5
+ [![License](https://img.shields.io/github/license/isdk/web-fetcher.js)](https://github.com/isdk/web-fetcher.js/blob/main/LICENSE)
6
+ [![Node](https://img.shields.io/badge/node-%3E%3D18-339933?logo=node.js)](https://nodejs.org/)
7
+ [![TypeScript](https://img.shields.io/badge/TypeScript-Types%20included-3178C6?logo=typescript)](https://www.typescriptlang.org/)
8
+ [![GitHub Stars](https://img.shields.io/github/stars/isdk/web-fetcher.js?logo=github)](https://github.com/isdk/web-fetcher.js)
9
+ ![antibot](https://img.shields.io/badge/antibot-optional-orange)
10
+
3
11
  English | [简体中文](./README.cn.md)
4
12
 
5
- > A powerful and flexible web fetching and browser automation library.
6
- > It features a dual-engine architecture (HTTP and Browser) and a declarative action system, making it perfect for AI agents and complex data scraping tasks.
13
+ > An AI-friendly web automation library that simplifies complex web interactions into a declarative JSON action script. Write your script once and run it in either a fast **`http`** mode for static content or a full **`browser`** mode for dynamic sites. An optional **`antibot`** flag helps bypass detection mechanisms. The library is designed for targeted, task-oriented data extraction (e.g., get X from page Y), not for building whole-site crawlers.
7
14
 
8
15
  ---
9
16
 
@@ -137,6 +144,7 @@ Here are the essential built-in actions:
137
144
  * `fill`: Fills an input field with a specified value.
138
145
  * `submit`: Submits a form.
139
146
  * `waitFor`: Pauses execution to wait for a specific condition (e.g., a timeout, a selector to appear, or network to be idle).
147
+ * `pause`: Pauses execution for manual intervention (e.g., solving a CAPTCHA).
140
148
  * `getContent`: Retrieves the full content (HTML, text, etc.) of the current page state.
141
149
  * `extract`: Extracts any structured data from the page with ease using an expressive, declarative schema.
142
150
 
package/dist/index.d.mts CHANGED
@@ -979,7 +979,8 @@ declare abstract class FetchEngine<TContext extends CrawlingContext = any, TCraw
979
979
  * Converts implementation-specific context (Playwright `page` or Cheerio `$`) to standardized response.
980
980
  * @internal
981
981
  */
982
- protected abstract buildResponse(context: TContext): Promise<FetchResponse>;
982
+ protected abstract _buildResponse(context: TContext): Promise<FetchResponse>;
983
+ protected buildResponse(context: TContext): Promise<FetchResponse>;
983
984
  /**
984
985
  * Abstract method for executing action within current page context.
985
986
  *
@@ -1247,7 +1248,7 @@ type CheerioNode = ReturnType<CheerioSelection['first']>;
1247
1248
  declare class CheerioFetchEngine extends FetchEngine<CheerioCrawlingContext, CheerioCrawler, CheerioCrawlerOptions> {
1248
1249
  static readonly id = "cheerio";
1249
1250
  static readonly mode = "http";
1250
- protected buildResponse(context: CheerioCrawlingContext): Promise<FetchResponse>;
1251
+ protected _buildResponse(context: CheerioCrawlingContext): Promise<FetchResponse>;
1251
1252
  protected _querySelectorAll(context: {
1252
1253
  $: CheerioAPI;
1253
1254
  el: CheerioNode;
@@ -1267,7 +1268,7 @@ type Locator = ReturnType<Page['locator']>;
1267
1268
  declare class PlaywrightFetchEngine extends FetchEngine<PlaywrightCrawlingContext, PlaywrightCrawler, PlaywrightCrawlerOptions> {
1268
1269
  static readonly id = "playwright";
1269
1270
  static readonly mode = "browser";
1270
- protected buildResponse(context: PlaywrightCrawlingContext): Promise<FetchResponse>;
1271
+ protected _buildResponse(context: PlaywrightCrawlingContext): Promise<FetchResponse>;
1271
1272
  protected _querySelectorAll(context: Locator, selector: string): Promise<any[]>;
1272
1273
  protected _extractValue(schema: ExtractValueSchema, context: Locator): Promise<any>;
1273
1274
  protected executeAction(context: PlaywrightCrawlingContext, action: FetchEngineAction): Promise<any>;
@@ -1420,6 +1421,7 @@ interface BaseFetcherProperties {
1420
1421
  body?: any;
1421
1422
  };
1422
1423
  timeoutMs?: number;
1424
+ requestHandlerTimeoutSecs?: number;
1423
1425
  maxConcurrency?: number;
1424
1426
  maxRequestsPerMinute?: number;
1425
1427
  delayBetweenRequestsMs?: number;
package/dist/index.d.ts CHANGED
@@ -979,7 +979,8 @@ declare abstract class FetchEngine<TContext extends CrawlingContext = any, TCraw
979
979
  * Converts implementation-specific context (Playwright `page` or Cheerio `$`) to standardized response.
980
980
  * @internal
981
981
  */
982
- protected abstract buildResponse(context: TContext): Promise<FetchResponse>;
982
+ protected abstract _buildResponse(context: TContext): Promise<FetchResponse>;
983
+ protected buildResponse(context: TContext): Promise<FetchResponse>;
983
984
  /**
984
985
  * Abstract method for executing action within current page context.
985
986
  *
@@ -1247,7 +1248,7 @@ type CheerioNode = ReturnType<CheerioSelection['first']>;
1247
1248
  declare class CheerioFetchEngine extends FetchEngine<CheerioCrawlingContext, CheerioCrawler, CheerioCrawlerOptions> {
1248
1249
  static readonly id = "cheerio";
1249
1250
  static readonly mode = "http";
1250
- protected buildResponse(context: CheerioCrawlingContext): Promise<FetchResponse>;
1251
+ protected _buildResponse(context: CheerioCrawlingContext): Promise<FetchResponse>;
1251
1252
  protected _querySelectorAll(context: {
1252
1253
  $: CheerioAPI;
1253
1254
  el: CheerioNode;
@@ -1267,7 +1268,7 @@ type Locator = ReturnType<Page['locator']>;
1267
1268
  declare class PlaywrightFetchEngine extends FetchEngine<PlaywrightCrawlingContext, PlaywrightCrawler, PlaywrightCrawlerOptions> {
1268
1269
  static readonly id = "playwright";
1269
1270
  static readonly mode = "browser";
1270
- protected buildResponse(context: PlaywrightCrawlingContext): Promise<FetchResponse>;
1271
+ protected _buildResponse(context: PlaywrightCrawlingContext): Promise<FetchResponse>;
1271
1272
  protected _querySelectorAll(context: Locator, selector: string): Promise<any[]>;
1272
1273
  protected _extractValue(schema: ExtractValueSchema, context: Locator): Promise<any>;
1273
1274
  protected executeAction(context: PlaywrightCrawlingContext, action: FetchEngineAction): Promise<any>;
@@ -1420,6 +1421,7 @@ interface BaseFetcherProperties {
1420
1421
  body?: any;
1421
1422
  };
1422
1423
  timeoutMs?: number;
1424
+ requestHandlerTimeoutSecs?: number;
1423
1425
  maxConcurrency?: number;
1424
1426
  maxRequestsPerMinute?: number;
1425
1427
  delayBetweenRequestsMs?: number;
package/dist/index.js CHANGED
@@ -1 +1 @@
1
- "use strict";var t,e=Object.create,i=Object.defineProperty,s=Object.getOwnPropertyDescriptor,r=Object.getOwnPropertyNames,n=Object.getPrototypeOf,o=Object.prototype.hasOwnProperty,a=(t,e,n,a)=>{if(e&&"object"==typeof e||"function"==typeof e)for(let c of r(e))o.call(t,c)||c===n||i(t,c,{get:()=>e[c],enumerable:!(a=s(e,c))||a.enumerable});return t},c=(t,s,r)=>(r=null!=t?e(n(t)):{},a(!s&&t&&t.__esModule?r:i(r,"default",{value:t,enumerable:!0}),t)),l={};((t,e)=>{for(var s in e)i(t,s,{get:e[s],enumerable:!0})})(l,{CheerioFetchEngine:()=>_,ClickAction:()=>G,DefaultFetcherProperties:()=>u,ExtractAction:()=>V,FetchAction:()=>f,FetchActionResultStatus:()=>h,FetchEngine:()=>A,FetchSession:()=>j,FillAction:()=>z,GetContentAction:()=>D,GotoAction:()=>W,PauseAction:()=>K,PlaywrightFetchEngine:()=>B,SubmitAction:()=>I,WaitForAction:()=>J,WebFetcher:()=>F,fetchWeb:()=>Q}),module.exports=(t=l,a(i({},"__esModule",{value:!0}),t));var u={enableSmart:!0,useSiteRegistry:!0,antibot:!1,headers:{},cookies:[],reuseCookies:!0,proxy:[],blockResources:[],ignoreSslErrors:!0,browser:{engine:"playwright",headless:!0,waitUntil:"domcontentloaded"},http:{method:"GET"},timeoutMs:6e4,maxConcurrency:1,maxRequestsPerMinute:1e3,delayBetweenRequestsMs:0,retries:0,sites:[]},h=(t=>(t[t.Failed=0]="Failed",t[t.Success=1]="Success",t[t.Skipped=2]="Skipped",t))(h||{}),w=class t{static register(t){const e=t.id;if(!e)throw new Error("FetchAction.register: actionClass.id is required");this.registry.set(e,t)}static get(t){return this.registry.get(t)}static create(t){const e="string"==typeof t?t:t.id||t.name;if(!e)throw new Error("Action must have id or name");const i=this.registry.get(e);return i?new i:void 0}static has(t){return this.registry.has(t)}static list(){return Array.from(this.registry.keys())}static getCapability(t){return this.capabilities[t]??"noop"}getCapability(t){return this.constructor.getCapability(t)}get id(){return this.constructor.id}get returnType(){return this.constructor.returnType}get capabilities(){return this.constructor.capabilities}async delegateToEngine(t,e,...i){const s=t.internal.engine;if(!s)throw new Error("No engine available");if("function"!=typeof s[e])throw new Error(`Engine does not have a method named '${String(e)}'`);return await s[e](...i)}installCollectors(e,i){const s=i?.collectors;if(!s?.length)return;const r=[],n=new Set;for(const i of s){const s=d(i.activateOn),o=d(i.collectOn),a=d(i.deactivateOn),c=!(i.background??!0),l=t.create(i);if(!l)continue;let u=!1,h=!1,w=0;const f=async t=>{if(!u&&!h){u=!0;try{await(l.onBeforeExec?.(e,i))}catch(t){e.eventBus.emit("collector:error",{action:this.id,collector:l.id,phase:"before",error:t})}}},m=async(t,s)=>{if(!h){u||await f(s);try{const r=Promise.resolve(l.onExecute?.(e,i,s)).then(s=>{var r,n;if(i.storeAs){((r=e.outputs)[n=i.storeAs]||(r[n]=[])).push(s)}return e.eventBus.emit("collector:result",{action:this.id,collector:i.id||i.name,event:t,result:s}),s}).catch(s=>{e.eventBus.emit("collector:error",{action:this.id,collector:i.id||i.name,event:t,phase:"exec",error:s})}).finally(()=>{w++});c&&(n.add(r),r.finally(()=>n.delete(r)))}catch(i){e.eventBus.emit("collector:error",{action:this.id,collector:l.id,event:t,phase:"exec",error:i})}}},g=async()=>{if(!h){0===w&&m("collector:after"),h=!0;try{await(l.onAfterExec?.(e,i))}catch(t){e.eventBus.emit("collector:error",{action:this.id,collector:i.id||i.name,phase:"after",error:t})}finally{e.eventBus.emit("collector:end",{action:this.id,collector:i.id||i.name}),v.forEach(t=>t())}}},b=p(e,s,f),v=y(e,o,m),x=p(e,a,g);if(r.push(...b,...v,...x),!s.length&&!o.length&&!a.length){const t=()=>{g()};e.eventBus.once(`action:${this.id}.end`,t),r.push(()=>e.eventBus.off("fetcher:action:end",t))}}return r.length||n.size>0?{cleanup:()=>r.forEach(t=>t()),awaitExecPendings:async()=>{n.size>0&&await Promise.allSettled(Array.from(n))}}:void 0}async beforeExec(t,e){t.internal.actionStack||(t.internal.actionStack=[]);const i=t.internal.actionStack,s=i.length,r=i.length>0?i[i.length-1].id:void 0,n={...e,id:this.id,depth:s,parent:r};i.push(n),t.currentAction=n;const o={action:this,context:t,options:e,depth:s,stack:[...i]};t.eventBus.emit(`action:${this.id}.start`,o),t.eventBus.emit("action:start",o),await(this.onBeforeExec?.(t,e));return{entry:o,collectors:this.installCollectors(t,e)}}async afterExec(t,e,i,s){const r=t.internal.actionStack,n=r.length-1,o=s?.collectors;try{await(o?.awaitExecPendings()),t.lastResult=i,"response"!==i?.returnType||i.error||(t.lastResponse=i.result),e?.storeAs&&(t.outputs[e.storeAs]=i?.result),i?.error&&(t.currentAction.error=i.error),await(this.onAfterExec?.(t,e));const s={action:this,context:t,options:e,result:i,depth:n,stack:[...r]};i?.error&&(s.error=i.error);try{t.eventBus.emit(`action:${this.id}.end`,s)}catch(t){}try{t.eventBus.emit("action:end",s)}catch(t){}}finally{try{o?.cleanup()}finally{r.pop();const e=r.length;t.currentAction=e>0?r[e-1]:void 0}}}async execute(t,e){const i=await this.beforeExec(t,e);let s;try{const i=e?.failOnError??!0;return t.throwHttpErrors=i,s=await this.onExecute(t,e),s&&s.returnType||(s={status:1,returnType:this.returnType??"any",result:s}),s}catch(i){if(s={status:0,error:i,meta:{id:this.id,engineType:t.engine,capability:this.getCapability(t.engine)}},e?.failOnError)throw i;return s}finally{await this.afterExec(t,e,s,i)}}};w.registry=new Map,w.returnType="any",w.capabilities={http:"noop",browser:"noop"};var f=w;function d(t){return t?Array.isArray(t)?t:[t]:[]}function p(t,e,i){const s=[];for(const r of e)if("string"==typeof r||r instanceof RegExp){const e=(...t)=>{i(t[0])};t.eventBus.once(r,e),s.push(()=>t.eventBus.off(r,e))}return s}function y(t,e,i){const s=[];for(const r of e)if("string"==typeof r||r instanceof RegExp){const e=t=>i(r,t);t.eventBus.on(r,e),s.push(()=>t.eventBus.off(r,e))}return s}var m=require("events-ex");var g,b,v=require("lodash-es"),x=c(require("crypto"),1),q=t=>((t=>{!g||g.length<t?(g=Buffer.allocUnsafe(128*t),x.default.randomFillSync(g),b=0):b+t>g.length&&(x.default.randomFillSync(g),b=0),b+=t})(t|=0),g.subarray(b-t,b)),E=((t,e=21)=>((t,e,i)=>{let s=(2<<31-Math.clz32(t.length-1|1))-1,r=Math.ceil(1.6*s*e/t.length);return(n=e)=>{let o="";for(;;){let e=i(r),a=r;for(;a--;)if(o+=t[e[a]&s]||"",o.length===n)return o}}})(t,e,q))("0123456789abcdefghijklmnopqrstuvwxyz",12);var C=require("lodash-es"),S=require("events-ex"),k=require("@isdk/common-error"),$=require("crawlee");function R(){let t=()=>{};const e=new Promise(e=>{t=e});return e.release=t,e}$.Configuration.getGlobalConfig().set("persistStorage",!1);var A=class{constructor(){this.hdrs={},this.jar=[],this.pendingRequests=new Map,this.requestCounter=0,this.actionEmitter=new S.EventEmitter,this.isPageActive=!1,this.navigationLock=function(){const t=R();return t.release(),t}(),this.blockedTypes=new Set}static register(t){const e=t.id;if(!e)throw new Error("Engine must define static id");if(this.registry.has(e))throw new Error(`Engine id duplicated: ${e}`);this.registry.set(e,t)}static get(t){return this.registry.get(t)}static getByMode(t){for(const[e,i]of this.registry.entries())if(i.mode===t)return i}static async create(t,e){const i=(0,C.defaultsDeep)(e,t,u),s=i.engine??t.engine,r=s?this.get(s)??this.getByMode(s):null;if(r){const e=new r;return await e.initialize(t,i),e}}async _extract(t,e){const i=t.type;if(!e)return"array"===i?[]:null;if("object"===i){const{selector:i,properties:s}=t;let r=e;if(i){const t=await this._querySelectorAll(e,i);r=t.length>0?t[0]:null}if(!r)return null;const n={};for(const t in s)n[t]=await this._extract(s[t],r);return n}if("array"===i){const{selector:i,items:s}=t,r=i?await this._querySelectorAll(e,i):[e],n=[];for(const t of r)n.push(await this._extract(s,t));return n}const{selector:s}=t;let r=e;if(s){const t=await this._querySelectorAll(e,s);r=t.length>0?t[0]:null}return r?this._extractValue(t,r):null}waitFor(t){return this.dispatchAction({type:"waitFor",options:t})}click(t){return this.dispatchAction({type:"click",selector:t})}fill(t,e){return this.dispatchAction({type:"fill",selector:t,value:e})}submit(t,e){return this.dispatchAction({type:"submit",selector:t,options:e})}pause(t){return this.dispatchAction({type:"pause",message:t})}extract(t){const e=this._normalizeSchema(t);return this.dispatchAction({type:"extract",schema:e})}_normalizeSchema(t){const e=JSON.parse(JSON.stringify(t));if(e.properties)for(const t in e.properties)e.properties[t]=this._normalizeSchema(e.properties[t]);if(e.items&&(e.items=this._normalizeSchema(e.items)),"array"===e.type&&(e.attribute&&!e.items&&(e.items={attribute:e.attribute},delete e.attribute),e.items||(e.items={type:"string"})),e.selector&&(e.has||e.exclude)){const{selector:t,has:i,exclude:s}=e,r=t.split(",").map(t=>{let e=t.trim();return i&&(e=`${e}:has(${i})`),s&&(e=`${e}:not(${s})`),e}).join(", ");e.selector=r,delete e.has,delete e.exclude}return e}get id(){return this.constructor.id}get mode(){return this.constructor.mode}get context(){return this.ctx}async initialize(t,e){if(this.ctx)return;(0,C.merge)(t,e),this.ctx=t,this.opts=t,this.hdrs=function(t){const e={};if(t&&"object"==typeof t)for(const[i,s]of Object.entries(t))e[i.toLowerCase()]=s;return e}(t.headers),this.jar=[...t.cookies??[]],t.internal||(t.internal={}),t.internal.engine=this,t.engine=this.mode,this.requestQueue=await $.RequestQueue.open();const i=await this._getSpecificCrawlerOptions(t),s={...(0,C.defaultsDeep)(i,{requestQueue:this.requestQueue,maxConcurrency:1,minConcurrency:1,useSessionPool:!0,persistCookiesPerSession:!0,sessionPoolOptions:{maxPoolSize:1,persistenceOptions:{enable:!1},sessionOptions:{maxUsageCount:1e3,maxErrorScore:3}}}),requestHandler:this._requestHandler.bind(this),errorHandler:this._failedRequestHandler.bind(this),failedRequestHandler:this._failedRequestHandler.bind(this)};this.crawler=this._createCrawler(s),this.crawler.run().then(()=>{this.isCrawlerReady=!0}).catch(t=>{this.isCrawlerReady=!1,console.error("Crawler background error:",t)})}async cleanup(){await(this._cleanup?.()),await this._commonCleanup();const t=this.ctx;t&&t.internal?.engine===this&&(t.internal.engine=void 0),this.ctx=void 0,this.opts=void 0}async _executePendingActions(t){await new Promise(e=>{const i=async({action:e,resolve:i,reject:s})=>{try{if("dispose"===e.type)return this.actionEmitter.emit("dispose"),void i();i(await this.executeAction(t,e))}catch(t){s(t)}};this.actionEmitter.on("dispatch",i),this.actionEmitter.once("dispose",()=>{this.actionEmitter.removeListener("dispatch",i),e()})})}async _sharedRequestHandler(t){try{const{request:e}=t;this.isPageActive=!0;const i=this.pendingRequests.get(e.userData.requestId);if(i){const s=await this.buildResponse(t),r=!s.statusCode||s.statusCode>=400;if(this.ctx?.throwHttpErrors&&r){const t=new k.CommonError(`Request for ${s.finalUrl} failed with status ${s.statusCode||"N/A"}`,"request",s.statusCode);i.reject(t)}else this.lastResponse=s,i.resolve(s);this.pendingRequests.delete(e.userData.requestId)}await this._executePendingActions(t)}finally{this.isPageActive=!1,this.navigationLock.release()}}async _sharedFailedRequestHandler(t,e){const{request:i}=t,s=this.pendingRequests.get(i.userData.requestId);if(s&&e&&this.ctx?.throwHttpErrors){this.pendingRequests.delete(i.userData.requestId);const t=e.response,r=t?.statusCode||500,n=t?.url?t.url:i.url,o=new k.CommonError(`Request${n?" for "+n:""} failed: ${e.message}`,"request",r);s.reject(o)}return this._sharedRequestHandler(t)}async dispatchAction(t){if(!this.isPageActive)throw new Error("No active page. Call goto() before performing actions.");return new Promise((e,i)=>{this.actionEmitter.emit("dispatch",{action:t,resolve:e,reject:i})})}async _requestHandler(t){await this._sharedRequestHandler(t)}async _failedRequestHandler(t,e){await this._sharedFailedRequestHandler(t,e)}async _commonCleanup(){if(this.isPageActive&&await this.dispatchAction({type:"dispose"}).catch(()=>{}),this.pendingRequests.size>0)for(const[,t]of this.pendingRequests)t.reject(new Error("Cleanup:Request cancelled"));if(this.actionEmitter.removeAllListeners(),this.crawler){try{await(this.crawler.teardown?.())}catch(t){console.error("ccrawler teardown error:",t)}this.crawler=void 0}this.isCrawlerReady=void 0,this.requestQueue&&(await this.requestQueue.drop(),this.requestQueue=void 0),this.pendingRequests.clear()}async blockResources(t,e){return e&&this.blockedTypes.clear(),t.forEach(t=>this.blockedTypes.add(t)),t.length}getContent(){return this.lastResponse?Promise.resolve(this.lastResponse):Promise.reject(new Error("No content fetched yet. Call goto() first."))}async headers(t,e){if(void 0===t)return{...this.hdrs};if("string"==typeof t&&void 0===e)return this.hdrs[t.toLowerCase()]||"";if(null!==t&&"object"==typeof t){const i={};for(const[e,s]of Object.entries(t))i[e.toLowerCase()]=String(s);return this.hdrs=!0===e?i:{...this.hdrs,...i},!0}return"string"==typeof t&&("string"==typeof e?this.hdrs[t.toLowerCase()]=e:null===e&&delete this.hdrs[t.toLowerCase()],!0)}async cookies(t){return Array.isArray(t)?(this.jar=[...t],!0):null===t?(this.jar=[],!0):[...this.jar]}async dispose(){await this.cleanup()}};async function P(t,e){const i=function(t,e){if(!t||!e?.length)return null;const i=new URL(t);let s=e.find(t=>t.domain===i.hostname);s||(s=e.find(t=>i.hostname.endsWith(t.domain)));if(!s)return null;if(s.pathScope?.length){if(!s.pathScope.some(t=>i.pathname.startsWith(t)))return null}return s}(e?.url||t.url,t.sites),s=t.engine||i?.engine||"auto";let r=await A.create(t,{engine:s});return r||(r=await A.create(t,{engine:"http"})),r}A.registry=new Map;var j=class{constructor(t={}){this.options=t,this.closed=!1,this.id=E(),this.context=this.createContext(t)}async execute(t){await this.ensureEngine(t);const e=f.create(t);if(!e)throw new Error(`Unknown action: ${t.id||t.name}`);let i,s;this.context.internal.actionIndex=(this.context.internal.actionIndex||0)+1,this.context.currentAction={...t,index:this.context.internal.actionIndex,startedAt:Date.now()};try{return i=await e.execute(this.context,t),i}catch(t){throw s=t,s}finally{this.context.currentAction=void 0}}async executeAll(t){try{for(let e=0;e<t.length;e++){const i=t[e];await this.execute(i)}const e=await this.execute({id:"getContent"});return{result:e?.result,outputs:this.getOutputs()}}catch(t){throw t}}getOutputs(){return this.context.outputs}async dispose(){if(this.closed)return;const t=this.context.eventBus;t.emit("session:closing",{sessionId:this.id});try{await(this.context.internal.engine?.dispose())}finally{this.closed=!0}t.emit("session:closed",{sessionId:this.id})}async ensureEngine(t){if(this.closed)throw new Error("Session is closed");if(!this.context.internal.engine){const e=t?.params?.url??this.context.url;if(!await P(this.context,{url:e}))throw new Error("No engine found")}}createContext(t=this.options){const e=new m.EventEmitter;return(0,v.defaultsDeep)({...t,id:this.id,eventBus:e,outputs:{},internal:{},execute:async t=>this.execute(t),action:async function(t,e,i){return this.execute({name:t,params:e,...i})}},u)}},F=class{constructor(t={}){this.defaults=t}async createSession(t){const e={...this.defaults,...t||{}};return new j(e)}async fetch(t,e){"string"!=typeof t&&(t=(e=t).url);const i=await this.createSession(e);try{const s=e?.actions||[];t&&0!==s.findIndex(e=>"goto"===e.id&&e.params?.url===t)&&s.unshift({id:"goto",params:{url:t}});return await i.executeAll(s)}finally{await i.dispose()}}},U=require("crawlee"),O=c(require("cheerio")),T=require("@isdk/common-error"),_=class extends A{async buildResponse(t){const{request:e,response:i,body:s,$:r}=t,n=r?.html();let o="string"==typeof s?s:Buffer.isBuffer(s)?s.toString("utf-8"):String(s??"");return n&&n!==o&&(o=n),{url:e.url,finalUrl:e.loadedUrl||e.url,statusCode:i?.statusCode??200,statusText:i?.statusMessage,headers:i?.headers,body:s,html:o,text:o}}async _querySelectorAll(t,e){const{$:i,el:s}=t;return s.find(e).toArray().map(t=>({$:i,el:i(t)}))}async _extractValue(t,e){const{el:i}=e,{attribute:s,type:r="string"}=t;if(0===i.length)return null;let n="";if(n=s?i.attr(s)??null:"html"===r?i.html():i.text().trim(),null===n)return null;switch(r){case"number":return parseFloat(n.replace(/[^0-9.-]+/g,""))||null;case"boolean":const t=n.toLowerCase();return"true"===t||"1"===t;default:return n}}async executeAction(t,e){const{$:i}=t;switch(e.type){case"dispose":return;case"extract":if(!i)throw new T.CommonError(`Cheerio context not available for action: ${e.type}`,"extract");return this._extract(e.schema,{$:i,el:i.root()});case"click":{if(!i)throw new T.CommonError(`Cheerio context not available for action: ${e.type}`,"click");const s=e.selector,r=i(s).first();let n;if(0===r.length)try{n=new URL(s,t.request.loadedUrl||t.request.url).href}catch{throw new T.CommonError(`click: selector not found or invalid URL: ${s}`,"click")}else{if(!r.is("a")||!r.attr("href")){if(r.is('input[type="submit"], button[type="submit"], button, input')){const e=r.closest("form");if(e.length)return this.executeAction(t,{type:"submit",selector:e});throw new T.CommonError("click: submit-like element without form","click")}throw new T.CommonError(`click: unsupported element for http simulate. Selector: ${s}`,"click")}{const e=r.attr("href");n=new URL(e,t.request.loadedUrl||t.request.url).href}}const o=await t.sendRequest({url:n});return void this._updateStateAfterNavigation(t,o)}case"fill":{if(!i)throw new T.CommonError(`Cheerio context not available for action: ${e.type}`),"fill";const s=i(e.selector).first();if(0===s.length)throw new T.CommonError(`fill: selector not found: ${e.selector}`);if(!s.is("input, textarea, select"))throw new T.CommonError(`fill: not a form field: ${e.selector}`,"fill");{s.val(e.value);const i=this.buildResponse(t);this.lastResponse=i}return}case"waitFor":return void(e.options?.ms&&await new Promise(t=>setTimeout(t,e.options.ms)));case"pause":const s=this.ctx?.onPause;return void(s?(console.info(e.message||"Execution paused for manual intervention."),await s({message:e.message}),console.info("Resuming execution...")):console.warn("[PauseAction] was called, but no `onPause` handler was provided in fetchWeb options. Skipped."));case"submit":{if(!i)throw new T.CommonError(`Cheerio context not available for action: ${e.type}`,"submit");const s="string"==typeof e.selector?i(e.selector).first():null!=e.selector?e.selector:i("form").first();if(0===s.length)throw new T.NotFoundError(e.selector,"submit");const r=s.attr("action")||t.request.loadedUrl||t.request.url,n=(s.attr("method")||"GET").toUpperCase(),o=new URL(r,t.request.loadedUrl||t.request.url).href,a={};let c;if(s.find("input, select, textarea").each((t,e)=>{const s=i(e),r=s.attr("name");if(!r)return;const n=s.val();null!=n&&(a[r]=String(n))}),"GET"===n){const e=new URL(o);Object.entries(a).forEach(([t,i])=>e.searchParams.set(t,i)),c=await t.sendRequest({url:e.href,method:"GET"})}else{let i;const r={};"application/json"===(e.options?.enctype||s.attr("enctype")||"application/x-www-form-urlencoded")?(i=JSON.stringify(a),r["Content-Type"]="application/json"):(i=new URLSearchParams(a).toString(),r["Content-Type"]="application/x-www-form-urlencoded"),c=await t.sendRequest({url:o,method:"POST",body:i,headers:r})}return void this._updateStateAfterNavigation(t,c)}case"getContent":return this.buildResponse(t);default:throw new T.CommonError(`Unknown action type: ${e.type}`,"CheerioFetchEngine.executeAction",T.ErrorCode.NotSupported)}}_updateStateAfterNavigation(t,e){const i=e.response||e,{body:s,headers:r,statusCode:n,statusMessage:o}=i,{url:a,loadedUrl:c}=e,l="string"==typeof s?s:Buffer.isBuffer(s)?s.toString("utf-8"):String(s??"");r&&r["content-type"]?.includes("html")&&(t.$=O.load(l)),this.lastResponse={url:a,finalUrl:c||a,statusCode:n,statusText:o,headers:r||{},body:s,html:l,text:l}}_createCrawler(t){return new U.CheerioCrawler(t)}_getSpecificCrawlerOptions(t){const e=this.opts?.proxy?"string"==typeof this.opts.proxy?[this.opts.proxy]:this.opts.proxy:void 0,i=e?.length?new U.ProxyConfiguration({proxyUrls:e}):void 0;return{additionalMimeTypes:["text/plain"],maxRequestRetries:1,requestHandlerTimeoutSecs:Math.max(5,Math.floor((this.opts?.timeoutMs||3e4)/1e3)),proxyConfiguration:i,preNavigationHooks:[(e,i)=>{i.throwHttpErrors=t.throwHttpErrors,this.opts?.timeoutMs&&(i.timeout={request:this.opts.timeoutMs})}]}}async goto(t,e){this.isPageActive&&this.dispatchAction({type:"dispose"}).catch(()=>{});const i="req-"+ ++this.requestCounter,s=new Promise((t,s)=>{const r=e?.timeoutMs||this.opts?.timeoutMs||3e4,n=setTimeout(()=>{this.pendingRequests.delete(i),this.navigationLock.release(),s(new T.CommonError(`goto timed out after ${r}ms.`,"gotoTimeout",T.ErrorCode.RequestTimeout))},r);this.pendingRequests.set(i,{resolve:e=>{clearTimeout(n),t(e)},reject:t=>{clearTimeout(n),s(t)}})});return this.requestQueue.addRequest({...e,url:t,headers:{...this.hdrs,...e?.headers},userData:{requestId:i},uniqueKey:`${t}-${i}`}).catch(t=>{const e=this.pendingRequests.get(i);e&&(this.pendingRequests.delete(i),this.navigationLock.release(),e.reject(t))}),await this.navigationLock,this.navigationLock=R(),s}};_.id="cheerio",_.mode="http",A.register(_);var M=require("crawlee"),N=require("playwright"),H=require("camoufox-js"),L=require("@isdk/common-error"),B=class extends A{async buildResponse(t){const{page:e,response:i,request:s}=t;if(!e||e.isClosed())return{url:s.url,finalUrl:s.loadedUrl||s.url,statusCode:i?.status(),statusText:i?.statusText(),headers:await(i?.allHeaders())||{},body:"",html:"",text:""};const r=await e.content(),n=await e.textContent("body");return{url:e.url(),finalUrl:e.url(),statusCode:i?.status(),statusText:i?.statusText(),headers:await(i?.allHeaders())||{},body:r,html:r,text:n||""}}async _querySelectorAll(t,e){return t.locator(e).all()}async _extractValue(t,e){const{attribute:i,type:s="string"}=t;if(0===await e.count())return null;let r="";if(r=i?await e.getAttribute(i):"html"===s?await e.innerHTML():await e.textContent(),null===r)return null;switch(r=r.trim(),s){case"number":return parseFloat(r.replace(/[^0-9.-]+/g,""))||null;case"boolean":const t=r.toLowerCase();return"true"===t||"1"===t;default:return r}}async executeAction(t,e){const{page:i}=t,s=this.opts?.timeoutMs||3e4;switch(e.type){case"navigate":{const s=await i.goto(e.url,{waitUntil:e.opts?.waitUntil||"domcontentloaded",timeout:this.opts?.timeoutMs||3e4});s&&(t={...t,response:s});const r=await this.buildResponse(t);return this.lastResponse=r,r}case"extract":return this._extract(e.schema,i.locator("body"));case"click":{await i.click(e.selector,{timeout:s}),await i.waitForLoadState("networkidle",{timeout:s});const r=await this.buildResponse(t);return void(this.lastResponse=r)}case"fill":await i.fill(e.selector,e.value,{timeout:s});const r=await this.buildResponse(t);return void(this.lastResponse=r);case"waitFor":return e.options?.selector&&await i.waitForSelector(e.options.selector,{timeout:s}),e.options?.networkIdle&&await i.waitForLoadState("networkidle",{timeout:s}),void(e.options?.ms&&await i.waitForTimeout(e.options.ms));case"submit":{const r=e.selector||"form",n=i.locator(r).first();if(0===await n.count())throw new L.NotFoundError(r,"submit");if("application/json"===(e.options?.enctype||"application/x-www-form-urlencoded")){const t=await n.elementHandle();if(!t)throw new L.CommonError(`submit: could not get form handle for ${r}`,"submit");const e=await t.evaluate(async t=>{const e=new FormData(t),i={};e.forEach((t,e)=>{i[e]=t.toString()});const s=await fetch(t.action,{method:t.method,headers:{"Content-Type":"application/json"},body:JSON.stringify(i)}),r=await s.text();return{status:s.status,statusText:s.statusText,headers:Object.fromEntries(s.headers.entries()),body:r,html:r,text:r,url:t.action,finalUrl:s.url}});return await t.dispose(),await i.setContent(e.html),void(this.lastResponse=e)}return await n.evaluate(t=>t.submit()),await i.waitForLoadState("networkidle",{timeout:s}),void(this.lastResponse=await this.buildResponse(t))}case"pause":{const t=this.ctx?.onPause;return void(t?(console.info(e.message||"Execution paused for manual intervention."),await t({message:e.message}),console.info("Resuming execution...")):console.warn("[PauseAction] was called, but no `onPause` handler was provided in fetchWeb options. Skipped."))}case"getContent":return this.buildResponse(t);default:throw new L.CommonError(`Unknown action type: ${e.type}`,"PlaywrightFetchEngine.executeAction",L.ErrorCode.NotSupported)}}_createCrawler(t){return new M.PlaywrightCrawler(t)}async _getSpecificCrawlerOptions(t){const e=t.browser?.headless??!0,i={maxRequestRetries:t.retries||3,headless:e,preNavigationHooks:[async({page:e,request:i},s)=>{s.throwHttpErrors=t.throwHttpErrors,this.jar.length>0&&await e.context().addCookies(this.jar.map(t=>({...t,url:i.url,domain:t.domain||new URL(i.url).hostname})));const r=this.blockedTypes;r.size>0&&await e.route("**/*",t=>{r.has(t.request().resourceType())?t.abort():t.continue()})}]};if(this.opts?.antibot){i.browserPoolOptions={useFingerprints:!1};const t=await(0,H.launchOptions)({headless:e});i.launchContext={launcher:N.firefox,launchOptions:t},i.postNavigationHooks=[async({page:t,handleCloudflareChallenge:e})=>{await e()}]}return i}async goto(t,e){if(this.isPageActive)return this.dispatchAction({type:"navigate",url:t,opts:e});if(!this.requestQueue)throw new L.CommonError("RequestQueue not initialized","goto");const i="req-"+ ++this.requestCounter,s=new Promise((t,e)=>{this.pendingRequests.set(i,{resolve:t,reject:e})});return await this.requestQueue.addRequest({url:t,headers:this.hdrs,userData:{requestId:i,waitUntil:e?.waitUntil||"domcontentloaded"},uniqueKey:`${t}-${i}`}),s}};B.id="playwright",B.mode="browser",A.register(B);var G=class extends f{async onExecute(t,e){const{selector:i,...s}=e?.params||{};if(!i)throw new Error("Selector is required for click action");await this.delegateToEngine(t,"click",i,s)}};G.id="click",G.returnType="none",G.capabilities={http:"simulate",browser:"native"},f.register(G);var z=class extends f{async onExecute(t,e){const{selector:i,value:s,...r}=e?.params||{};if(!i)throw new Error("Selector is required for fill action");if(void 0===s)throw new Error("Value is required for fill action");await this.delegateToEngine(t,"fill",i,s,r)}};z.id="fill",z.returnType="none",z.capabilities={http:"simulate",browser:"native"},f.register(z);var D=class extends f{async onExecute(t,e){return await this.delegateToEngine(t,"getContent",e?.params)}};D.id="getContent",D.returnType="response",D.capabilities={http:"native",browser:"native"},f.register(D);var W=class extends f{async onExecute(t,e,i){const s=e?.params,r=s?.url||t.url;if(!r)throw new Error("URL is required for goto action");const n=t.internal.engine;if(!n)throw new Error("No engine available");t.url=r;return await n.goto(r,s)}};W.id="goto",W.returnType="response",W.capabilities={http:"native",browser:"native"},f.register(W);var I=class extends f{async onExecute(t,e){const{selector:i,...s}=e?.params||{};await this.delegateToEngine(t,"submit",i,s)}};I.id="submit",I.returnType="none",I.capabilities={http:"simulate",browser:"native"},f.register(I);var J=class extends f{async onExecute(t,e){const i=t.internal.engine;if(!i)throw new Error("No engine available");await i.waitFor(e?.params)}};J.id="waitFor",J.returnType="none",J.capabilities={http:"native",browser:"native"},f.register(J);var V=class extends f{async onExecute(t,e){const i=e?.params;if(!i)throw new Error("Schema is required for extract action");return this.delegateToEngine(t,"extract",i)}};V.id="extract",V.returnType="any",V.capabilities={http:"native",browser:"native"},f.register(V);var K=class extends f{async onExecute(t,e){const{selector:i,message:s,attribute:r}=e?.params||{},n=t.internal.engine;if("browser"===n?.mode){if(i){if(!await(n?.extract({selector:i,attribute:r})))return}n&&"pause"in n?await n.pause(s):console.warn("[PauseAction] was called, but the current engine does not support `pause`. Skipped.")}else console.warn("[PauseAction] can only run in browser engine. Skipped.")}};async function Q(t,e){return(new F).fetch(t,e)}K.id="pause",K.capabilities={http:"native",browser:"native"},K.returnType="none",f.register(K);
1
+ "use strict";var t,e=Object.create,i=Object.defineProperty,s=Object.getOwnPropertyDescriptor,r=Object.getOwnPropertyNames,n=Object.getPrototypeOf,o=Object.prototype.hasOwnProperty,a=(t,e,n,a)=>{if(e&&"object"==typeof e||"function"==typeof e)for(let c of r(e))o.call(t,c)||c===n||i(t,c,{get:()=>e[c],enumerable:!(a=s(e,c))||a.enumerable});return t},c=(t,s,r)=>(r=null!=t?e(n(t)):{},a(!s&&t&&t.__esModule?r:i(r,"default",{value:t,enumerable:!0}),t)),l={};((t,e)=>{for(var s in e)i(t,s,{get:e[s],enumerable:!0})})(l,{CheerioFetchEngine:()=>_,ClickAction:()=>z,DefaultFetcherProperties:()=>u,ExtractAction:()=>V,FetchAction:()=>f,FetchActionResultStatus:()=>h,FetchEngine:()=>A,FetchSession:()=>j,FillAction:()=>B,GetContentAction:()=>D,GotoAction:()=>W,PauseAction:()=>K,PlaywrightFetchEngine:()=>G,SubmitAction:()=>I,WaitForAction:()=>J,WebFetcher:()=>F,fetchWeb:()=>Q}),module.exports=(t=l,a(i({},"__esModule",{value:!0}),t));var u={enableSmart:!0,useSiteRegistry:!0,antibot:!1,headers:{},cookies:[],reuseCookies:!0,proxy:[],blockResources:[],ignoreSslErrors:!0,browser:{engine:"playwright",headless:!0,waitUntil:"domcontentloaded"},http:{method:"GET"},timeoutMs:6e4,maxConcurrency:1,maxRequestsPerMinute:1e3,delayBetweenRequestsMs:0,retries:0,sites:[]},h=(t=>(t[t.Failed=0]="Failed",t[t.Success=1]="Success",t[t.Skipped=2]="Skipped",t))(h||{}),w=class t{static register(t){const e=t.id;if(!e)throw new Error("FetchAction.register: actionClass.id is required");this.registry.set(e,t)}static get(t){return this.registry.get(t)}static create(t){const e="string"==typeof t?t:t.id||t.name;if(!e)throw new Error("Action must have id or name");const i=this.registry.get(e);return i?new i:void 0}static has(t){return this.registry.has(t)}static list(){return Array.from(this.registry.keys())}static getCapability(t){return this.capabilities[t]??"noop"}getCapability(t){return this.constructor.getCapability(t)}get id(){return this.constructor.id}get returnType(){return this.constructor.returnType}get capabilities(){return this.constructor.capabilities}async delegateToEngine(t,e,...i){const s=t.internal.engine;if(!s)throw new Error("No engine available");if("function"!=typeof s[e])throw new Error(`Engine does not have a method named '${String(e)}'`);return await s[e](...i)}installCollectors(e,i){const s=i?.collectors;if(!s?.length)return;const r=[],n=new Set;for(const i of s){const s=d(i.activateOn),o=d(i.collectOn),a=d(i.deactivateOn),c=!(i.background??!0),l=t.create(i);if(!l)continue;let u=!1,h=!1,w=0;const f=async t=>{if(!u&&!h){u=!0;try{await(l.onBeforeExec?.(e,i))}catch(t){e.eventBus.emit("collector:error",{action:this.id,collector:l.id,phase:"before",error:t})}}},m=async(t,s)=>{if(!h){u||await f(s);try{const r=Promise.resolve(l.onExecute?.(e,i,s)).then(s=>{var r,n;if(i.storeAs){((r=e.outputs)[n=i.storeAs]||(r[n]=[])).push(s)}return e.eventBus.emit("collector:result",{action:this.id,collector:i.id||i.name,event:t,result:s}),s}).catch(s=>{e.eventBus.emit("collector:error",{action:this.id,collector:i.id||i.name,event:t,phase:"exec",error:s})}).finally(()=>{w++});c&&(n.add(r),r.finally(()=>n.delete(r)))}catch(i){e.eventBus.emit("collector:error",{action:this.id,collector:l.id,event:t,phase:"exec",error:i})}}},g=async()=>{if(!h){0===w&&m("collector:after"),h=!0;try{await(l.onAfterExec?.(e,i))}catch(t){e.eventBus.emit("collector:error",{action:this.id,collector:i.id||i.name,phase:"after",error:t})}finally{e.eventBus.emit("collector:end",{action:this.id,collector:i.id||i.name}),v.forEach(t=>t())}}},b=p(e,s,f),v=y(e,o,m),x=p(e,a,g);if(r.push(...b,...v,...x),!s.length&&!o.length&&!a.length){const t=()=>{g()};e.eventBus.once(`action:${this.id}.end`,t),r.push(()=>e.eventBus.off("fetcher:action:end",t))}}return r.length||n.size>0?{cleanup:()=>r.forEach(t=>t()),awaitExecPendings:async()=>{n.size>0&&await Promise.allSettled(Array.from(n))}}:void 0}async beforeExec(t,e){t.internal.actionStack||(t.internal.actionStack=[]);const i=t.internal.actionStack,s=i.length,r=i.length>0?i[i.length-1].id:void 0,n={...e,id:this.id,depth:s,parent:r};i.push(n),t.currentAction=n;const o={action:this,context:t,options:e,depth:s,stack:[...i]};t.eventBus.emit(`action:${this.id}.start`,o),t.eventBus.emit("action:start",o),await(this.onBeforeExec?.(t,e));return{entry:o,collectors:this.installCollectors(t,e)}}async afterExec(t,e,i,s){const r=t.internal.actionStack,n=r.length-1,o=s?.collectors;try{await(o?.awaitExecPendings()),t.lastResult=i,"response"!==i?.returnType||i.error||(t.lastResponse=i.result),e?.storeAs&&(t.outputs[e.storeAs]=i?.result),i?.error&&(t.currentAction.error=i.error),await(this.onAfterExec?.(t,e));const s={action:this,context:t,options:e,result:i,depth:n,stack:[...r]};i?.error&&(s.error=i.error);try{t.eventBus.emit(`action:${this.id}.end`,s)}catch(t){}try{t.eventBus.emit("action:end",s)}catch(t){}}finally{try{o?.cleanup()}finally{r.pop();const e=r.length;t.currentAction=e>0?r[e-1]:void 0}}}async execute(t,e){const i=await this.beforeExec(t,e);let s;try{const i=e?.failOnError??!0;return t.throwHttpErrors=i,s=await this.onExecute(t,e),s&&s.returnType||(s={status:1,returnType:this.returnType??"any",result:s}),s}catch(i){if(s={status:0,error:i,meta:{id:this.id,engineType:t.engine,capability:this.getCapability(t.engine)}},e?.failOnError)throw i;return s}finally{await this.afterExec(t,e,s,i)}}};w.registry=new Map,w.returnType="any",w.capabilities={http:"noop",browser:"noop"};var f=w;function d(t){return t?Array.isArray(t)?t:[t]:[]}function p(t,e,i){const s=[];for(const r of e)if("string"==typeof r||r instanceof RegExp){const e=(...t)=>{i(t[0])};t.eventBus.once(r,e),s.push(()=>t.eventBus.off(r,e))}return s}function y(t,e,i){const s=[];for(const r of e)if("string"==typeof r||r instanceof RegExp){const e=t=>i(r,t);t.eventBus.on(r,e),s.push(()=>t.eventBus.off(r,e))}return s}var m=require("events-ex");var g,b,v=require("lodash-es"),x=c(require("crypto"),1),q=t=>((t=>{!g||g.length<t?(g=Buffer.allocUnsafe(128*t),x.default.randomFillSync(g),b=0):b+t>g.length&&(x.default.randomFillSync(g),b=0),b+=t})(t|=0),g.subarray(b-t,b)),E=((t,e=21)=>((t,e,i)=>{let s=(2<<31-Math.clz32(t.length-1|1))-1,r=Math.ceil(1.6*s*e/t.length);return(n=e)=>{let o="";for(;;){let e=i(r),a=r;for(;a--;)if(o+=t[e[a]&s]||"",o.length===n)return o}}})(t,e,q))("0123456789abcdefghijklmnopqrstuvwxyz",12);var S=require("lodash-es"),k=require("events-ex"),C=require("@isdk/common-error"),$=require("crawlee");function R(){let t=()=>{};const e=new Promise(e=>{t=e});return e.release=t,e}$.Configuration.getGlobalConfig().set("persistStorage",!1);var A=class{constructor(){this.hdrs={},this.jar=[],this.pendingRequests=new Map,this.requestCounter=0,this.actionEmitter=new k.EventEmitter,this.isPageActive=!1,this.navigationLock=function(){const t=R();return t.release(),t}(),this.blockedTypes=new Set}static register(t){const e=t.id;if(!e)throw new Error("Engine must define static id");if(this.registry.has(e))throw new Error(`Engine id duplicated: ${e}`);this.registry.set(e,t)}static get(t){return this.registry.get(t)}static getByMode(t){for(const[e,i]of this.registry.entries())if(i.mode===t)return i}static async create(t,e){const i=(0,S.defaultsDeep)(e,t,u),s=i.engine??t.engine,r=s?this.get(s)??this.getByMode(s):null;if(r){const e=new r;return await e.initialize(t,i),e}}async _extract(t,e){const i=t.type;if(!e)return"array"===i?[]:null;if("object"===i){const{selector:i,properties:s}=t;let r=e;if(i){const t=await this._querySelectorAll(e,i);r=t.length>0?t[0]:null}if(!r)return null;const n={};for(const t in s)n[t]=await this._extract(s[t],r);return n}if("array"===i){const{selector:i,items:s}=t,r=i?await this._querySelectorAll(e,i):[e],n=[];for(const t of r)n.push(await this._extract(s,t));return n}const{selector:s}=t;let r=e;if(s){const t=await this._querySelectorAll(e,s);r=t.length>0?t[0]:null}return r?this._extractValue(t,r):null}async buildResponse(t){const e=await this._buildResponse(t),i=e.headers["content-type"]||"";return e.contentType=i.split(";")[0].trim(),e}waitFor(t){return this.dispatchAction({type:"waitFor",options:t})}click(t){return this.dispatchAction({type:"click",selector:t})}fill(t,e){return this.dispatchAction({type:"fill",selector:t,value:e})}submit(t,e){return this.dispatchAction({type:"submit",selector:t,options:e})}pause(t){return this.dispatchAction({type:"pause",message:t})}extract(t){const e=this._normalizeSchema(t);return this.dispatchAction({type:"extract",schema:e})}_normalizeSchema(t){const e=JSON.parse(JSON.stringify(t));if(e.properties)for(const t in e.properties)e.properties[t]=this._normalizeSchema(e.properties[t]);if(e.items&&(e.items=this._normalizeSchema(e.items)),"array"===e.type&&(e.attribute&&!e.items&&(e.items={attribute:e.attribute},delete e.attribute),e.items||(e.items={type:"string"})),e.selector&&(e.has||e.exclude)){const{selector:t,has:i,exclude:s}=e,r=t.split(",").map(t=>{let e=t.trim();return i&&(e=`${e}:has(${i})`),s&&(e=`${e}:not(${s})`),e}).join(", ");e.selector=r,delete e.has,delete e.exclude}return e}get id(){return this.constructor.id}get mode(){return this.constructor.mode}get context(){return this.ctx}async initialize(t,e){if(this.ctx)return;(0,S.merge)(t,e),this.ctx=t,this.opts=t,this.hdrs=function(t){const e={};if(t&&"object"==typeof t)for(const[i,s]of Object.entries(t))e[i.toLowerCase()]=s;return e}(t.headers),this.jar=[...t.cookies??[]],t.internal||(t.internal={}),t.internal.engine=this,t.engine=this.mode,this.actionEmitter.setMaxListeners(100),this.requestQueue=await $.RequestQueue.open();const i=await this._getSpecificCrawlerOptions(t),s={...(0,S.defaultsDeep)(i,{requestQueue:this.requestQueue,maxConcurrency:1,minConcurrency:1,useSessionPool:!0,persistCookiesPerSession:!0,sessionPoolOptions:{maxPoolSize:1,persistenceOptions:{enable:!1},sessionOptions:{maxUsageCount:1e3,maxErrorScore:3}}}),requestHandler:this._requestHandler.bind(this),errorHandler:this._failedRequestHandler.bind(this),failedRequestHandler:this._failedRequestHandler.bind(this)};this.crawler=this._createCrawler(s),this.crawler.run().then(()=>{this.isCrawlerReady=!0}).catch(t=>{this.isCrawlerReady=!1,console.error("Crawler background error:",t)})}async cleanup(){await(this._cleanup?.()),await this._commonCleanup();const t=this.ctx;t&&t.internal?.engine===this&&(t.internal.engine=void 0),this.ctx=void 0,this.opts=void 0}async _executePendingActions(t){await new Promise(e=>{const i=async({action:e,resolve:i,reject:s})=>{try{if("dispose"===e.type)return this.actionEmitter.emit("dispose"),void i();i(await this.executeAction(t,e))}catch(t){s(t)}};this.actionEmitter.on("dispatch",i),this.actionEmitter.once("dispose",()=>{this.actionEmitter.removeListener("dispatch",i),e()})})}async _sharedRequestHandler(t){try{const{request:e}=t;this.isPageActive=!0;const i=this.pendingRequests.get(e.userData.requestId);if(i){const s=await this._buildResponse(t),r=!s.statusCode||s.statusCode>=400;if(this.ctx?.throwHttpErrors&&r){const t=new C.CommonError(`Request for ${s.finalUrl} failed with status ${s.statusCode||"N/A"}`,"request",s.statusCode);i.reject(t)}else this.lastResponse=s,i.resolve(s);this.pendingRequests.delete(e.userData.requestId)}await this._executePendingActions(t)}finally{this.isPageActive=!1,this.navigationLock.release()}}async _sharedFailedRequestHandler(t,e){const{request:i}=t,s=this.pendingRequests.get(i.userData.requestId);if(s&&e&&this.ctx?.throwHttpErrors){this.pendingRequests.delete(i.userData.requestId);const t=e.response,r=t?.statusCode||500,n=t?.url?t.url:i.url,o=new C.CommonError(`Request${n?" for "+n:""} failed: ${e.message}`,"request",r);s.reject(o)}return this._sharedRequestHandler(t)}async dispatchAction(t){if(!this.isPageActive)throw new Error("No active page. Call goto() before performing actions.");return new Promise((e,i)=>{this.actionEmitter.emit("dispatch",{action:t,resolve:e,reject:i})})}async _requestHandler(t){await this._sharedRequestHandler(t)}async _failedRequestHandler(t,e){await this._sharedFailedRequestHandler(t,e)}async _commonCleanup(){if(this.isPageActive&&await this.dispatchAction({type:"dispose"}).catch(()=>{}),this.pendingRequests.size>0)for(const[,t]of this.pendingRequests)t.reject(new Error("Cleanup:Request cancelled"));if(this.actionEmitter.removeAllListeners(),this.crawler){try{await(this.crawler.teardown?.())}catch(t){console.error("ccrawler teardown error:",t)}this.crawler=void 0}this.isCrawlerReady=void 0,this.requestQueue&&(await this.requestQueue.drop(),this.requestQueue=void 0),this.pendingRequests.clear()}async blockResources(t,e){return e&&this.blockedTypes.clear(),t.forEach(t=>this.blockedTypes.add(t)),t.length}getContent(){return this.lastResponse?Promise.resolve(this.lastResponse):Promise.reject(new Error("No content fetched yet. Call goto() first."))}async headers(t,e){if(void 0===t)return{...this.hdrs};if("string"==typeof t&&void 0===e)return this.hdrs[t.toLowerCase()]||"";if(null!==t&&"object"==typeof t){const i={};for(const[e,s]of Object.entries(t))i[e.toLowerCase()]=String(s);return this.hdrs=!0===e?i:{...this.hdrs,...i},!0}return"string"==typeof t&&("string"==typeof e?this.hdrs[t.toLowerCase()]=e:null===e&&delete this.hdrs[t.toLowerCase()],!0)}async cookies(t){return Array.isArray(t)?(this.jar=[...t],!0):null===t?(this.jar=[],!0):[...this.jar]}async dispose(){await this.cleanup()}};async function P(t,e){const i=function(t,e){if(!t||!e?.length)return null;const i=new URL(t);let s=e.find(t=>t.domain===i.hostname);s||(s=e.find(t=>i.hostname.endsWith(t.domain)));if(!s)return null;if(s.pathScope?.length){if(!s.pathScope.some(t=>i.pathname.startsWith(t)))return null}return s}(e?.url||t.url,t.sites),s=t.engine||i?.engine||"auto";let r=await A.create(t,{engine:s});return r||(r=await A.create(t,{engine:"http"})),r}A.registry=new Map;var j=class{constructor(t={}){this.options=t,this.closed=!1,this.id=E(),this.context=this.createContext(t)}async execute(t){await this.ensureEngine(t);const e=f.create(t);if(!e)throw new Error(`Unknown action: ${t.id||t.name}`);let i,s;this.context.internal.actionIndex=(this.context.internal.actionIndex||0)+1,this.context.currentAction={...t,index:this.context.internal.actionIndex,startedAt:Date.now()};try{return i=await e.execute(this.context,t),i}catch(t){throw s=t,s}finally{this.context.currentAction=void 0}}async executeAll(t){try{for(let e=0;e<t.length;e++){const i=t[e];await this.execute(i)}const e=await this.execute({id:"getContent"});return{result:e?.result,outputs:this.getOutputs()}}catch(t){throw t}}getOutputs(){return this.context.outputs}async dispose(){if(this.closed)return;const t=this.context.eventBus;t.emit("session:closing",{sessionId:this.id});try{await(this.context.internal.engine?.dispose())}finally{this.closed=!0}t.emit("session:closed",{sessionId:this.id})}async ensureEngine(t){if(this.closed)throw new Error("Session is closed");if(!this.context.internal.engine){const e=t?.params?.url??this.context.url;if(!await P(this.context,{url:e}))throw new Error("No engine found")}}createContext(t=this.options){const e=new m.EventEmitter;return(0,v.defaultsDeep)({...t,id:this.id,eventBus:e,outputs:{},internal:{},execute:async t=>this.execute(t),action:async function(t,e,i){return this.execute({name:t,params:e,...i})}},u)}},F=class{constructor(t={}){this.defaults=t}async createSession(t){const e={...this.defaults,...t||{}};return new j(e)}async fetch(t,e){"string"!=typeof t&&(t=(e=t).url);const i=await this.createSession(e);try{const s=e?.actions||[];t&&0!==s.findIndex(e=>"goto"===e.id&&e.params?.url===t)&&s.unshift({id:"goto",params:{url:t}});return await i.executeAll(s)}finally{await i.dispose()}}},T=require("crawlee"),O=c(require("cheerio")),U=require("@isdk/common-error"),_=class extends A{async _buildResponse(t){const{request:e,response:i,body:s,$:r}=t,n=r?.html();let o="string"==typeof s?s:Buffer.isBuffer(s)?s.toString("utf-8"):String(s??"");n&&n!==o&&(o=n);let a=i?.headers;if(!a&&i?.rawHeaders){a={};const t=i.rawHeaders;for(let e=0;e<t.length;e+=2)a[t[e].toLowerCase()]=t[e+1]}return{url:e.url,finalUrl:e.loadedUrl||e.url,statusCode:i?.statusCode??200,statusText:i?.statusMessage,headers:a||{},body:s,html:o,text:o}}async _querySelectorAll(t,e){const{$:i,el:s}=t;return s.find(e).toArray().map(t=>({$:i,el:i(t)}))}async _extractValue(t,e){const{el:i}=e,{attribute:s,type:r="string"}=t;if(0===i.length)return null;let n="";if(n=s?i.attr(s)??null:"html"===r?i.html():i.text().trim(),null===n)return null;switch(r){case"number":return parseFloat(n.replace(/[^0-9.-]+/g,""))||null;case"boolean":const t=n.toLowerCase();return"true"===t||"1"===t;default:return n}}async executeAction(t,e){const{$:i}=t;switch(e.type){case"dispose":return;case"extract":if(!i)throw new U.CommonError(`Cheerio context not available for action: ${e.type}`,"extract");return this._extract(e.schema,{$:i,el:i.root()});case"click":{if(!i)throw new U.CommonError(`Cheerio context not available for action: ${e.type}`,"click");const s=e.selector,r=i(s).first();let n;if(0===r.length)try{n=new URL(s,t.request.loadedUrl||t.request.url).href}catch{throw new U.CommonError(`click: selector not found or invalid URL: ${s}`,"click")}else{if(!r.is("a")||!r.attr("href")){if(r.is('input[type="submit"], button[type="submit"], button, input')){const e=r.closest("form");if(e.length)return this.executeAction(t,{type:"submit",selector:e});throw new U.CommonError("click: submit-like element without form","click")}throw new U.CommonError(`click: unsupported element for http simulate. Selector: ${s}`,"click")}{const e=r.attr("href");n=new URL(e,t.request.loadedUrl||t.request.url).href}}const o=await t.sendRequest({url:n});return void this._updateStateAfterNavigation(t,o)}case"fill":{if(!i)throw new U.CommonError(`Cheerio context not available for action: ${e.type}`),"fill";const s=i(e.selector).first();if(0===s.length)throw new U.CommonError(`fill: selector not found: ${e.selector}`);if(!s.is("input, textarea, select"))throw new U.CommonError(`fill: not a form field: ${e.selector}`);return s.val(e.value),void(this.lastResponse=await this.buildResponse(t))}case"waitFor":return void(e.options?.ms&&await new Promise(t=>setTimeout(t,e.options.ms)));case"pause":const s=this.ctx?.onPause;return void(s?(console.info(e.message||"Execution paused for manual intervention."),await s({message:e.message}),console.info("Resuming execution...")):console.warn("[PauseAction] was called, but no `onPause` handler was provided in fetchWeb options. Skipped."));case"submit":{if(!i)throw new U.CommonError(`Cheerio context not available for action: ${e.type}`,"submit");const s="string"==typeof e.selector?i(e.selector).first():null!=e.selector?e.selector:i("form").first();if(0===s.length)throw new U.NotFoundError(e.selector,"submit");const r=s.attr("action")||t.request.loadedUrl||t.request.url,n=(s.attr("method")||"GET").toUpperCase(),o=new URL(r,t.request.loadedUrl||t.request.url).href,a={};let c;if(s.find("input, select, textarea").each((t,e)=>{const s=i(e),r=s.attr("name");if(!r)return;const n=s.val();null!=n&&(a[r]=String(n))}),"GET"===n){const e=new URL(o);Object.entries(a).forEach(([t,i])=>e.searchParams.set(t,i)),c=await t.sendRequest({url:e.href,method:"GET"})}else{let i;const r={};"application/json"===(e.options?.enctype||s.attr("enctype")||"application/x-www-form-urlencoded")?(i=JSON.stringify(a),r["Content-Type"]="application/json"):(i=new URLSearchParams(a).toString(),r["Content-Type"]="application/x-www-form-urlencoded"),c=await t.sendRequest({url:o,method:"POST",body:i,headers:r})}return void this._updateStateAfterNavigation(t,c)}case"getContent":return this.buildResponse(t);default:throw new U.CommonError(`Unknown action type: ${e.type}`,"CheerioFetchEngine.executeAction",U.ErrorCode.NotSupported)}}_updateStateAfterNavigation(t,e){const i=e;let s=i.headers;if(!s&&i.rawHeaders){s={};for(let t=0;t<i.rawHeaders.length;t+=2)s[i.rawHeaders[t].toLowerCase()]=i.rawHeaders[t+1]}s=s||{};const r=i.body,n=O.load(r??"");t.$=n,t.response=i,t.body=r;const o=n.html(),a=n.text(),c=(s["content-type"]||"").split(";")[0].trim();this.lastResponse={url:t.request.url,finalUrl:i.url,statusCode:i.statusCode,statusText:i.statusMessage,headers:s,contentType:c,body:r,html:o,text:a}}_createCrawler(t){return new T.CheerioCrawler(t)}_getSpecificCrawlerOptions(t){const e=this.opts?.proxy?"string"==typeof this.opts.proxy?[this.opts.proxy]:this.opts.proxy:void 0,i=e?.length?new T.ProxyConfiguration({proxyUrls:e}):void 0;return{additionalMimeTypes:["text/plain"],maxRequestRetries:1,requestHandlerTimeoutSecs:Math.max(5,Math.floor((this.opts?.timeoutMs||3e4)/1e3)),proxyConfiguration:i,preNavigationHooks:[(e,i)=>{i.throwHttpErrors=t.throwHttpErrors,this.opts?.timeoutMs&&(i.timeout={request:this.opts.timeoutMs})}]}}async goto(t,e){this.isPageActive&&this.dispatchAction({type:"dispose"}).catch(()=>{});const i="req-"+ ++this.requestCounter,s=new Promise((t,s)=>{const r=e?.timeoutMs||this.opts?.timeoutMs||3e4,n=setTimeout(()=>{this.pendingRequests.delete(i),this.navigationLock.release(),s(new U.CommonError(`goto timed out after ${r}ms.`,"gotoTimeout",U.ErrorCode.RequestTimeout))},r);this.pendingRequests.set(i,{resolve:e=>{clearTimeout(n),t(e)},reject:t=>{clearTimeout(n),s(t)}})});return this.requestQueue.addRequest({...e,url:t,headers:{...this.hdrs,...e?.headers},userData:{requestId:i},uniqueKey:`${t}-${i}`}).catch(t=>{const e=this.pendingRequests.get(i);e&&(this.pendingRequests.delete(i),this.navigationLock.release(),e.reject(t))}),await this.navigationLock,this.navigationLock=R(),s}};_.id="cheerio",_.mode="http",A.register(_);var N=require("crawlee"),M=require("playwright"),H=require("camoufox-js"),L=require("@isdk/common-error"),G=class extends A{async _buildResponse(t){const{page:e,response:i,request:s}=t;if(!e||e.isClosed())return{url:s.url,finalUrl:s.loadedUrl||s.url,statusCode:i?.status(),statusText:i?.statusText(),headers:await(i?.allHeaders())||{},body:"",html:"",text:""};const r=await e.content(),n=await e.textContent("body");return{url:e.url(),finalUrl:e.url(),statusCode:i?.status(),statusText:i?.statusText(),headers:await(i?.allHeaders())||{},body:r,html:r,text:n||""}}async _querySelectorAll(t,e){return t.locator(e).all()}async _extractValue(t,e){const{attribute:i,type:s="string"}=t;if(0===await e.count())return null;let r="";if(r=i?await e.getAttribute(i):"html"===s?await e.innerHTML():await e.textContent(),null===r)return null;switch(r=r.trim(),s){case"number":return parseFloat(r.replace(/[^0-9.-]+/g,""))||null;case"boolean":const t=r.toLowerCase();return"true"===t||"1"===t;default:return r}}async executeAction(t,e){const{page:i}=t,s=this.opts?.timeoutMs||3e4;switch(e.type){case"navigate":{const s=await i.goto(e.url,{waitUntil:e.opts?.waitUntil||"domcontentloaded",timeout:this.opts?.timeoutMs||3e4});s&&(t={...t,response:s});const r=await this.buildResponse(t);return this.lastResponse=r,r}case"extract":return this._extract(e.schema,i.locator("body"));case"click":{await i.click(e.selector,{timeout:s}),await i.waitForLoadState("networkidle",{timeout:s});const r=await this.buildResponse(t);return void(this.lastResponse=r)}case"fill":await i.fill(e.selector,e.value,{timeout:s});const r=await this.buildResponse(t);return void(this.lastResponse=r);case"waitFor":return e.options?.selector&&await i.waitForSelector(e.options.selector,{timeout:s}),e.options?.networkIdle&&await i.waitForLoadState("networkidle",{timeout:s}),void(e.options?.ms&&await i.waitForTimeout(e.options.ms));case"submit":{const r=e.selector||"form",n=i.locator(r).first();if(0===await n.count())throw new L.NotFoundError(r,"submit");if("application/json"===(e.options?.enctype||"application/x-www-form-urlencoded")){const t=await n.elementHandle();if(!t)throw new L.CommonError(`submit: could not get form handle for ${r}`,"submit");const e=await t.evaluate(async t=>{const e=new FormData(t),i={};e.forEach((t,e)=>{i[e]=t.toString()});const s=await fetch(t.action,{method:t.method,headers:{"Content-Type":"application/json"},body:JSON.stringify(i)}),r=await s.text();return{status:s.status,statusText:s.statusText,headers:Object.fromEntries(s.headers.entries()),body:r,html:r,text:r,url:t.action,finalUrl:s.url}});return await t.dispose(),await i.setContent(e.html),void(this.lastResponse=e)}return await n.evaluate(t=>t.submit()),await i.waitForLoadState("networkidle",{timeout:s}),void(this.lastResponse=await this.buildResponse(t))}case"pause":{const t=this.ctx?.onPause;return void(t?(console.info(e.message||"Execution paused for manual intervention."),await t({message:e.message}),console.info("Resuming execution...")):console.warn("[PauseAction] was called, but no `onPause` handler was provided in fetchWeb options. Skipped."))}case"getContent":return this.buildResponse(t);default:throw new L.CommonError(`Unknown action type: ${e.type}`,"PlaywrightFetchEngine.executeAction",L.ErrorCode.NotSupported)}}_createCrawler(t){return new N.PlaywrightCrawler(t)}async _getSpecificCrawlerOptions(t){const e=t.browser?.headless??!0,i={maxRequestRetries:t.retries||3,headless:e,requestHandlerTimeoutSecs:t.requestHandlerTimeoutSecs,preNavigationHooks:[async({page:e,request:i},s)=>{s.throwHttpErrors=t.throwHttpErrors,this.jar.length>0&&await e.context().addCookies(this.jar.map(t=>({...t,url:i.url,domain:t.domain||new URL(i.url).hostname})));const r=this.blockedTypes;r.size>0&&await e.route("**/*",t=>{r.has(t.request().resourceType())?t.abort():t.continue()})}]};if(this.opts?.antibot){i.browserPoolOptions={useFingerprints:!1};const t=await(0,H.launchOptions)({headless:e});i.launchContext={launcher:M.firefox,launchOptions:t},i.postNavigationHooks=[async({page:t,handleCloudflareChallenge:e})=>{await e()}]}return i}async goto(t,e){if(this.isPageActive)return this.dispatchAction({type:"navigate",url:t,opts:e});if(!this.requestQueue)throw new L.CommonError("RequestQueue not initialized","goto");const i="req-"+ ++this.requestCounter,s=new Promise((t,e)=>{this.pendingRequests.set(i,{resolve:t,reject:e})});return await this.requestQueue.addRequest({url:t,headers:this.hdrs,userData:{requestId:i,waitUntil:e?.waitUntil||"domcontentloaded"},uniqueKey:`${t}-${i}`}),s}};G.id="playwright",G.mode="browser",A.register(G);var z=class extends f{async onExecute(t,e){const{selector:i,...s}=e?.params||{};if(!i)throw new Error("Selector is required for click action");await this.delegateToEngine(t,"click",i,s)}};z.id="click",z.returnType="none",z.capabilities={http:"simulate",browser:"native"},f.register(z);var B=class extends f{async onExecute(t,e){const{selector:i,value:s,...r}=e?.params||{};if(!i)throw new Error("Selector is required for fill action");if(void 0===s)throw new Error("Value is required for fill action");await this.delegateToEngine(t,"fill",i,s,r)}};B.id="fill",B.returnType="none",B.capabilities={http:"simulate",browser:"native"},f.register(B);var D=class extends f{async onExecute(t,e){return await this.delegateToEngine(t,"getContent",e?.params)}};D.id="getContent",D.returnType="response",D.capabilities={http:"native",browser:"native"},f.register(D);var W=class extends f{async onExecute(t,e,i){const s=e?.params,r=s?.url||t.url;if(!r)throw new Error("URL is required for goto action");const n=t.internal.engine;if(!n)throw new Error("No engine available");t.url=r;return await n.goto(r,s)}};W.id="goto",W.returnType="response",W.capabilities={http:"native",browser:"native"},f.register(W);var I=class extends f{async onExecute(t,e){const{selector:i,...s}=e?.params||{};await this.delegateToEngine(t,"submit",i,s)}};I.id="submit",I.returnType="none",I.capabilities={http:"simulate",browser:"native"},f.register(I);var J=class extends f{async onExecute(t,e){const i=t.internal.engine;if(!i)throw new Error("No engine available");await i.waitFor(e?.params)}};J.id="waitFor",J.returnType="none",J.capabilities={http:"native",browser:"native"},f.register(J);var V=class extends f{async onExecute(t,e){const i=e?.params;if(!i)throw new Error("Schema is required for extract action");return this.delegateToEngine(t,"extract",i)}};V.id="extract",V.returnType="any",V.capabilities={http:"native",browser:"native"},f.register(V);var K=class extends f{async onExecute(t,e){const{selector:i,message:s,attribute:r}=e?.params||{},n=t.internal.engine;if("browser"===n?.mode){if(i){if(!await(n?.extract({selector:i,attribute:r})))return}n&&"pause"in n?await n.pause(s):console.warn("[PauseAction] was called, but the current engine does not support `pause`. Skipped.")}else console.warn("[PauseAction] can only run in browser engine. Skipped.")}};async function Q(t,e){return(new F).fetch(t,e)}K.id="pause",K.capabilities={http:"native",browser:"native"},K.returnType="none",f.register(K);