@isdk/web-fetcher 0.3.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. package/README.action.cn.md +53 -312
  2. package/README.action.extract.cn.md +263 -0
  3. package/README.action.extract.md +263 -0
  4. package/README.action.md +53 -311
  5. package/README.cn.md +10 -2
  6. package/README.engine.cn.md +22 -1
  7. package/README.engine.md +22 -1
  8. package/README.md +8 -1
  9. package/dist/index.d.mts +147 -1
  10. package/dist/index.d.ts +147 -1
  11. package/dist/index.js +1 -1
  12. package/dist/index.mjs +1 -1
  13. package/docs/README.md +8 -1
  14. package/docs/_media/README.action.md +53 -311
  15. package/docs/_media/README.cn.md +10 -2
  16. package/docs/_media/README.engine.md +22 -1
  17. package/docs/classes/CheerioFetchEngine.md +236 -88
  18. package/docs/classes/ClickAction.md +23 -23
  19. package/docs/classes/EvaluateAction.md +23 -23
  20. package/docs/classes/ExtractAction.md +23 -23
  21. package/docs/classes/FetchAction.md +27 -23
  22. package/docs/classes/FetchEngine.md +218 -86
  23. package/docs/classes/FetchSession.md +13 -13
  24. package/docs/classes/FillAction.md +23 -23
  25. package/docs/classes/GetContentAction.md +23 -23
  26. package/docs/classes/GotoAction.md +23 -23
  27. package/docs/classes/KeyboardPressAction.md +533 -0
  28. package/docs/classes/KeyboardTypeAction.md +533 -0
  29. package/docs/classes/MouseClickAction.md +533 -0
  30. package/docs/classes/MouseMoveAction.md +533 -0
  31. package/docs/classes/PauseAction.md +23 -23
  32. package/docs/classes/PlaywrightFetchEngine.md +337 -87
  33. package/docs/classes/SubmitAction.md +23 -23
  34. package/docs/classes/TrimAction.md +23 -23
  35. package/docs/classes/WaitForAction.md +23 -23
  36. package/docs/classes/WebFetcher.md +5 -5
  37. package/docs/enumerations/FetchActionResultStatus.md +4 -4
  38. package/docs/functions/fetchWeb.md +2 -2
  39. package/docs/globals.md +8 -0
  40. package/docs/interfaces/BaseFetchActionProperties.md +12 -12
  41. package/docs/interfaces/BaseFetchCollectorActionProperties.md +16 -16
  42. package/docs/interfaces/BaseFetcherProperties.md +31 -27
  43. package/docs/interfaces/Cookie.md +14 -14
  44. package/docs/interfaces/DispatchedEngineAction.md +4 -4
  45. package/docs/interfaces/EvaluateActionOptions.md +3 -3
  46. package/docs/interfaces/ExtractActionProperties.md +12 -12
  47. package/docs/interfaces/FetchActionInContext.md +15 -15
  48. package/docs/interfaces/FetchActionProperties.md +13 -13
  49. package/docs/interfaces/FetchActionResult.md +6 -6
  50. package/docs/interfaces/FetchContext.md +41 -37
  51. package/docs/interfaces/FetchEngineContext.md +36 -32
  52. package/docs/interfaces/FetchMetadata.md +5 -5
  53. package/docs/interfaces/FetchResponse.md +14 -14
  54. package/docs/interfaces/FetchReturnTypeRegistry.md +7 -7
  55. package/docs/interfaces/FetchSite.md +34 -30
  56. package/docs/interfaces/FetcherOptions.md +33 -29
  57. package/docs/interfaces/GotoActionOptions.md +14 -6
  58. package/docs/interfaces/KeyboardPressParams.md +25 -0
  59. package/docs/interfaces/KeyboardTypeParams.md +25 -0
  60. package/docs/interfaces/MouseClickParams.md +49 -0
  61. package/docs/interfaces/MouseMoveParams.md +41 -0
  62. package/docs/interfaces/PendingEngineRequest.md +3 -3
  63. package/docs/interfaces/StorageOptions.md +5 -5
  64. package/docs/interfaces/SubmitActionOptions.md +2 -2
  65. package/docs/interfaces/TrimActionOptions.md +3 -3
  66. package/docs/interfaces/WaitForActionOptions.md +5 -5
  67. package/docs/type-aliases/BaseFetchActionOptions.md +1 -1
  68. package/docs/type-aliases/BaseFetchCollectorOptions.md +1 -1
  69. package/docs/type-aliases/BrowserEngine.md +1 -1
  70. package/docs/type-aliases/FetchActionCapabilities.md +1 -1
  71. package/docs/type-aliases/FetchActionCapabilityMode.md +1 -1
  72. package/docs/type-aliases/FetchActionOptions.md +1 -1
  73. package/docs/type-aliases/FetchEngineAction.md +2 -2
  74. package/docs/type-aliases/FetchEngineType.md +1 -1
  75. package/docs/type-aliases/FetchReturnType.md +1 -1
  76. package/docs/type-aliases/FetchReturnTypeFor.md +1 -1
  77. package/docs/type-aliases/OnFetchPauseCallback.md +1 -1
  78. package/docs/type-aliases/ResourceType.md +1 -1
  79. package/docs/type-aliases/TrimPreset.md +1 -1
  80. package/docs/variables/DefaultFetcherProperties.md +1 -1
  81. package/docs/variables/FetcherOptionKeys.md +1 -1
  82. package/docs/variables/TRIM_PRESETS.md +1 -1
  83. package/package.json +10 -10
package/README.cn.md CHANGED
@@ -20,9 +20,10 @@
20
20
  * **📜 声明式动作脚本**: 以简单、可读的 JSON 格式定义多步骤工作流(如登录、填写表单、点击按钮等)。
21
21
  * **📊 强大而灵活的数据提取**: 通过直观、强大的声明式 Schema,轻松提取从简单文本到复杂嵌套的各类结构化数据。
22
22
  * **🧠 智能引擎选择**: 可自动检测动态站点,并在需要时将引擎从 `http` 动态升级到 `browser`。
23
+ * **🛡️ 反爬虫/反屏蔽**: 在 `browser` 模式下,一个可选的 `antibot` 标志有助于绕过常见的反机器人措施,如 Cloudflare 挑战。
24
+ * **🕹️ 高仿真交互模拟**: 支持基于 **贝塞尔曲线** 的鼠标轨迹移动、真实的打字延迟模拟,以及复杂的键盘交互,大幅提升反爬避障能力。
23
25
  * **🧩 可扩展性**: 轻松创建自定义的、高级别的“组合动作”,以封装可复用的业务逻辑(例如,一个 `login` 动作)。
24
26
  * **🧲 高级收集器 (Collectors)**: 在主动作执行期间,由事件触发,在后台异步收集数据。
25
- * **🛡️ 反爬虫/反屏蔽**: 在 `browser` 模式下,一个可选的 `antibot` 标志有助于绕过常见的反机器人措施,如 Cloudflare 挑战。
26
27
 
27
28
  ---
28
29
 
@@ -145,6 +146,9 @@ searchGoogle('gemini');
145
146
  * `output` (object): 控制 `FetchResponse` 中的输出字段。
146
147
  * `cookies` (boolean): 是否在响应中包含 Cookie(默认:`true`)。
147
148
  * `sessionState` (boolean): 是否在响应中包含会话状态(默认:`true`)。
149
+ * `browser` (object): 浏览器引擎配置。
150
+ * `headless` (boolean): 是否以无头模式运行(默认:`true`)。
151
+ * `launchOptions` (object): Playwright 启动选项(例如 `{ slowMo: 50, args: [...] }`)。
148
152
  * `sessionPoolOptions` (SessionPoolOptions): 底层 Crawlee SessionPool 的高级配置。
149
153
  * ...以及许多其他用于代理、重试等的选项。
150
154
 
@@ -156,6 +160,10 @@ searchGoogle('gemini');
156
160
  * `click`: 点击选择器指定的元素(引擎相关)。
157
161
  * `fill`: 用指定值填充输入框(引擎相关)。
158
162
  * `submit`: 提交表单(引擎相关)。
163
+ * `mouseMove`: 将鼠标指针移动到指定的坐标或元素(支持贝塞尔曲线)。
164
+ * `mouseClick`: 在当前位置或指定坐标触发鼠标点击。
165
+ * `keyboardType`: 模拟真人在当前获得焦点的元素中输入文本。
166
+ * `keyboardPress`: 模拟按下单个按键或组合键。
159
167
  * `trim`: 从 DOM 中移除元素以清理页面(如脚本、广告、隐藏内容)。
160
168
  * `waitFor`: 暂停执行以等待特定条件(支持统一处理的固定超时)。
161
169
  * `pause`: 暂停执行以进行人工干预(如处理验证码,由核心层统一处理)。
@@ -174,7 +182,7 @@ searchGoogle('gemini');
174
182
  * `cookies`: Cookie 数组。
175
183
  * `sessionState`: Crawlee 会话状态。
176
184
  * `text`, `html`: 页面内容。
177
- * `outputs` (Record<string, any>): 通过 `storeAs` 提取并存储的数据。
185
+ * `outputs` (Record<string, any>): 通过 `storeAs` 提取并存储的数据。注意:当多个动作将对象存储到同一个键时,它们将被合并而不再是覆盖。
178
186
 
179
187
  ---
180
188
 
@@ -158,7 +158,7 @@ await session.executeAll([
158
158
  * ✅ **快速轻量**:非常适合追求速度和低资源消耗的场景。
159
159
  * ✅ **符合 HTTP 标准的重定向**:正确处理 301-303 和 307/308 重定向,按照 HTTP 规范保留方法/正文或转换为 GET。
160
160
  * ❌ **无 JavaScript 执行**:无法与客户端渲染的内容交互。
161
- * ⚙️ **模拟交互**:像 `click` 和 `submit` 这样的动作是通过发起新的 HTTP 请求来模拟的。
161
+ * ⚙️ **模拟交互**:像 `click` 和 `submit` 这样的动作是通过发起新的 HTTP 请求来模拟的。**仅浏览器支持的动作**(如 `mouseMove`, `keyboardType`)将抛出 `not_supported` 错误。
162
162
  * **用例**: 抓取静态网站、服务器渲染页面或 API。
163
163
 
164
164
  ### `PlaywrightFetchEngine` (browser 模式)
@@ -183,6 +183,27 @@ await session.executeAll([
183
183
  * **用例**: 抓取受 Cloudflare 或其他高级机器人检测系统保护的网站。
184
184
  * **注意**: 此功能需要额外的依赖项(`camoufox-js`, `firefox`),并可能带来性能开销。
185
185
 
186
+ #### 配置 (Configuration)
187
+
188
+ 您可以通过选项中的 `browser` 属性来配置浏览器引擎:
189
+
190
+ * `headless` (boolean): 是否以无头模式运行浏览器(默认:`true`)。
191
+ * `launchOptions` (object): 原生 Playwright [LaunchOptions](https://playwright.dev/docs/api/class-browsertype#browser-type-launch),直接传递给浏览器启动器(例如 `slowMo`, `args`, `devtools`)。
192
+
193
+ ```typescript
194
+ const result = await fetchWeb({
195
+ url: 'https://example.com',
196
+ engine: 'browser',
197
+ browser: {
198
+ headless: false,
199
+ launchOptions: {
200
+ slowMo: 100, // 将操作减慢 100ms
201
+ args: ['--start-maximized'] // 传递自定义参数
202
+ }
203
+ }
204
+ });
205
+ ```
206
+
186
207
  ---
187
208
 
188
209
  ## 📊 5. 使用 `extract()` 进行数据提取
package/README.engine.md CHANGED
@@ -158,7 +158,7 @@ There are two primary engine implementations:
158
158
  * ✅ **Fast and Lightweight**: Ideal for speed and low resource consumption.
159
159
  * ✅ **HTTP-Compliant Redirects**: Correctly handles 301-303 and 307/308 redirects, preserving methods/bodies or converting to GET as per HTTP specifications.
160
160
  * ❌ **No JavaScript Execution**: Cannot interact with client-side rendered content.
161
- * ⚙️ **Simulated Interaction**: Actions like `click` and `submit` are simulated by making new HTTP requests.
161
+ * ⚙️ **Simulated Interaction**: Actions like `click` and `submit` are simulated by making new HTTP requests. **Browser-only actions** (e.g., `mouseMove`, `keyboardType`) will throw a `not_supported` error.
162
162
  * **Use Case**: Scraping static websites, server-rendered pages, or APIs.
163
163
 
164
164
  ### `PlaywrightFetchEngine` (browser mode)
@@ -183,6 +183,27 @@ To combat sophisticated anti-bot measures, the `PlaywrightFetchEngine` offers an
183
183
  * **Use Case**: Scraping websites protected by services like Cloudflare or other advanced bot-detection systems.
184
184
  * **Note**: This feature requires additional dependencies (`camoufox-js`, `firefox`) and may have a performance overhead.
185
185
 
186
+ #### Configuration
187
+
188
+ You can configure the browser engine via the `browser` property in options:
189
+
190
+ * `headless` (boolean): Whether to run browser in headless mode (default: `true`).
191
+ * `launchOptions` (object): Native Playwright [LaunchOptions](https://playwright.dev/docs/api/class-browsertype#browser-type-launch) passed directly to the browser launcher (e.g., `slowMo`, `args`, `devtools`).
192
+
193
+ ```typescript
194
+ const result = await fetchWeb({
195
+ url: 'https://example.com',
196
+ engine: 'browser',
197
+ browser: {
198
+ headless: false,
199
+ launchOptions: {
200
+ slowMo: 100, // Slow down operations by 100ms
201
+ args: ['--start-maximized'] // Pass custom arguments
202
+ }
203
+ }
204
+ });
205
+ ```
206
+
186
207
  ---
187
208
 
188
209
  ## 📊 5. Data Extraction with `extract()`
package/README.md CHANGED
@@ -145,6 +145,9 @@ This is the main entry point for the library.
145
145
  * `output` (object): Controls the output fields in `FetchResponse`.
146
146
  * `cookies` (boolean): Whether to include cookies in the response (default: `true`).
147
147
  * `sessionState` (boolean): Whether to include session state in the response (default: `true`).
148
+ * `browser` (object): Browser engine configuration.
149
+ * `headless` (boolean): Run in headless mode (default: `true`).
150
+ * `launchOptions` (object): Playwright launch options (e.g., `{ slowMo: 50, args: [...] }`).
148
151
  * `sessionPoolOptions` (SessionPoolOptions): Advanced configuration for the underlying Crawlee SessionPool.
149
152
  * ...and many other options for proxy, retries, etc.
150
153
 
@@ -156,6 +159,10 @@ The library provides a set of powerful built-in actions, many of which are engin
156
159
  * `click`: Clicks on an element (Engine-specific).
157
160
  * `fill`: Fills an input field (Engine-specific).
158
161
  * `submit`: Submits a form (Engine-specific).
162
+ * `mouseMove`: Moves the mouse cursor to a specific coordinate or element (Bézier curve supported).
163
+ * `mouseClick`: Triggers a mouse click at the current position or specified coordinates.
164
+ * `keyboardType`: Simulates human-like typing into the currently focused element.
165
+ * `keyboardPress`: Simulates pressing a single key or a key combination.
159
166
  * `trim`: Removes elements from the DOM to clean up the page.
160
167
  * `waitFor`: Pauses execution to wait for a specific condition (Supports fixed timeouts centrally).
161
168
  * `pause`: Pauses execution for manual intervention (Handled centrally).
@@ -174,7 +181,7 @@ The `fetchWeb` function returns an object containing:
174
181
  * `cookies`: Array of cookies.
175
182
  * `sessionState`: Crawlee session state.
176
183
  * `text`, `html`: Page content.
177
- * `outputs` (Record<string, any>): Data extracted and stored via `storeAs`.
184
+ * `outputs` (Record<string, any>): Data extracted and stored via `storeAs`. Note: When multiple actions store objects into the same key, they are merged instead of overwritten.
178
185
 
179
186
  ---
180
187
 
package/dist/index.d.mts CHANGED
@@ -1094,6 +1094,7 @@ interface GotoActionOptions {
1094
1094
  headers?: Record<string, string>;
1095
1095
  waitUntil?: 'load' | 'domcontentloaded' | 'networkidle' | 'commit';
1096
1096
  timeoutMs?: number;
1097
+ simulate?: boolean;
1097
1098
  }
1098
1099
  /**
1099
1100
  * Options for the {@link FetchEngine.waitFor} action, specifying conditions to wait for before continuing.
@@ -1202,6 +1203,35 @@ type FetchEngineAction = {
1202
1203
  type: 'fill';
1203
1204
  selector: string;
1204
1205
  value: string;
1206
+ } | {
1207
+ type: 'mouseMove';
1208
+ params: {
1209
+ x?: number;
1210
+ y?: number;
1211
+ selector?: string;
1212
+ steps?: number;
1213
+ };
1214
+ } | {
1215
+ type: 'mouseClick';
1216
+ params: {
1217
+ x?: number;
1218
+ y?: number;
1219
+ button?: 'left' | 'right' | 'middle';
1220
+ clickCount?: number;
1221
+ delay?: number;
1222
+ };
1223
+ } | {
1224
+ type: 'keyboardType';
1225
+ params: {
1226
+ text: string;
1227
+ delay?: number;
1228
+ };
1229
+ } | {
1230
+ type: 'keyboardPress';
1231
+ params: {
1232
+ key: string;
1233
+ delay?: number;
1234
+ };
1205
1235
  } | {
1206
1236
  type: 'waitFor';
1207
1237
  options?: WaitForActionOptions;
@@ -1555,6 +1585,43 @@ declare abstract class FetchEngine<TContext extends CrawlingContext = any, TCraw
1555
1585
  * @throws {Error} When no active page context exists
1556
1586
  */
1557
1587
  click(selector: string): Promise<void>;
1588
+ /**
1589
+ * Moves mouse to specified position or element.
1590
+ *
1591
+ * @param params - Move parameters (x, y, selector, steps)
1592
+ */
1593
+ mouseMove(params: {
1594
+ x?: number;
1595
+ y?: number;
1596
+ selector?: string;
1597
+ steps?: number;
1598
+ }): Promise<void>;
1599
+ /**
1600
+ * Clicks at current position or specified position.
1601
+ *
1602
+ * @param params - Click parameters (x, y, button, clickCount, delay)
1603
+ */
1604
+ mouseClick(params: {
1605
+ x?: number;
1606
+ y?: number;
1607
+ button?: 'left' | 'right' | 'middle';
1608
+ clickCount?: number;
1609
+ delay?: number;
1610
+ }): Promise<void>;
1611
+ /**
1612
+ * Types text into current focused element.
1613
+ *
1614
+ * @param text - Text to type
1615
+ * @param delay - Delay between key presses
1616
+ */
1617
+ keyboardType(text: string, delay?: number): Promise<void>;
1618
+ /**
1619
+ * Presses specified key.
1620
+ *
1621
+ * @param key - Key to press
1622
+ * @param delay - Delay after key press
1623
+ */
1624
+ keyboardPress(key: string, delay?: number): Promise<void>;
1558
1625
  /**
1559
1626
  * Fills input element with specified value.
1560
1627
  *
@@ -2007,6 +2074,25 @@ declare class PlaywrightFetchEngine extends FetchEngine<PlaywrightCrawlingContex
2007
2074
  _extractValue(schema: ExtractValueSchema, scope: Locator): Promise<any>;
2008
2075
  protected _getInitialElementScope(context: PlaywrightCrawlingContext): FetchElementScope;
2009
2076
  protected _waitForNavigation(context: PlaywrightCrawlingContext, oldUrl: string, actionType: string): Promise<void>;
2077
+ protected currentMousePos: {
2078
+ x: number;
2079
+ y: number;
2080
+ };
2081
+ protected _getRandomDelay(base: number, variance?: number): number;
2082
+ protected _getTrajectory(start: {
2083
+ x: number;
2084
+ y: number;
2085
+ }, end: {
2086
+ x: number;
2087
+ y: number;
2088
+ }, steps?: number): {
2089
+ x: number;
2090
+ y: number;
2091
+ }[];
2092
+ protected _moveToSelector(context: PlaywrightCrawlingContext, selector: string, steps?: number): Promise<{
2093
+ x: number;
2094
+ y: number;
2095
+ }>;
2010
2096
  protected executeAction(context: PlaywrightCrawlingContext, action: FetchEngineAction): Promise<any>;
2011
2097
  protected _createCrawler(options: PlaywrightCrawlerOptions, config?: Configuration): PlaywrightCrawler;
2012
2098
  protected _getSpecificCrawlerOptions(ctx: FetchEngineContext): Promise<Partial<PlaywrightCrawlerOptions>>;
@@ -2195,6 +2281,7 @@ interface BaseFetcherProperties {
2195
2281
  engine?: BrowserEngine;
2196
2282
  headless?: boolean;
2197
2283
  waitUntil?: 'load' | 'domcontentloaded' | 'networkidle' | 'commit';
2284
+ launchOptions?: Record<string, any>;
2198
2285
  };
2199
2286
  http?: {
2200
2287
  method?: 'GET' | 'POST' | 'PUT' | 'PATCH' | 'DELETE';
@@ -2526,6 +2613,65 @@ declare class EvaluateAction extends FetchAction {
2526
2613
  onExecute(context: FetchContext, options?: BaseFetchActionProperties): Promise<any>;
2527
2614
  }
2528
2615
 
2616
+ interface MouseMoveParams {
2617
+ x?: number;
2618
+ y?: number;
2619
+ selector?: string;
2620
+ steps?: number;
2621
+ }
2622
+ declare class MouseMoveAction extends FetchAction {
2623
+ static id: string;
2624
+ static returnType: "none";
2625
+ static capabilities: {
2626
+ http: "noop";
2627
+ browser: "native";
2628
+ };
2629
+ onExecute(context: FetchContext, options?: BaseFetchActionProperties): Promise<void>;
2630
+ }
2631
+ interface MouseClickParams {
2632
+ x?: number;
2633
+ y?: number;
2634
+ button?: 'left' | 'right' | 'middle';
2635
+ clickCount?: number;
2636
+ delay?: number;
2637
+ }
2638
+ declare class MouseClickAction extends FetchAction {
2639
+ static id: string;
2640
+ static returnType: "none";
2641
+ static capabilities: {
2642
+ http: "noop";
2643
+ browser: "native";
2644
+ };
2645
+ onExecute(context: FetchContext, options?: BaseFetchActionProperties): Promise<void>;
2646
+ }
2647
+
2648
+ interface KeyboardTypeParams {
2649
+ text: string;
2650
+ delay?: number;
2651
+ }
2652
+ declare class KeyboardTypeAction extends FetchAction {
2653
+ static id: string;
2654
+ static returnType: "none";
2655
+ static capabilities: {
2656
+ http: "noop";
2657
+ browser: "native";
2658
+ };
2659
+ onExecute(context: FetchContext, options?: BaseFetchActionProperties): Promise<void>;
2660
+ }
2661
+ interface KeyboardPressParams {
2662
+ key: string;
2663
+ delay?: number;
2664
+ }
2665
+ declare class KeyboardPressAction extends FetchAction {
2666
+ static id: string;
2667
+ static returnType: "none";
2668
+ static capabilities: {
2669
+ http: "noop";
2670
+ browser: "native";
2671
+ };
2672
+ onExecute(context: FetchContext, options?: BaseFetchActionProperties): Promise<void>;
2673
+ }
2674
+
2529
2675
  declare function fetchWeb(options: FetcherOptions): Promise<{
2530
2676
  result: FetchResponse | undefined;
2531
2677
  outputs: Record<string, any>;
@@ -2535,4 +2681,4 @@ declare function fetchWeb(url: string, options?: FetcherOptions): Promise<{
2535
2681
  outputs: Record<string, any>;
2536
2682
  }>;
2537
2683
 
2538
- export { type BaseFetchActionOptions, type BaseFetchActionProperties, type BaseFetchCollectorActionProperties, type BaseFetchCollectorOptions, type BaseFetcherProperties, type BrowserEngine, CheerioFetchEngine, ClickAction, DefaultFetcherProperties, type DispatchedEngineAction, EvaluateAction, type EvaluateActionOptions, ExtractAction, type ExtractActionProperties, FetchAction, type FetchActionCapabilities, type FetchActionCapabilityMode, type FetchActionInContext, type FetchActionOptions, type FetchActionProperties, type FetchActionResult, FetchActionResultStatus, type FetchContext, FetchEngine, type FetchEngineAction, type FetchEngineContext, type FetchEngineType, type FetchMetadata, type FetchResponse, type FetchReturnType, type FetchReturnTypeFor, type FetchReturnTypeRegistry, FetchSession, type FetchSite, FetcherOptionKeys, type FetcherOptions, FillAction, GetContentAction, GotoAction, type GotoActionOptions, type OnFetchPauseCallback, PauseAction, type PendingEngineRequest, PlaywrightFetchEngine, type ResourceType, type StorageOptions, SubmitAction, type SubmitActionOptions, TRIM_PRESETS, TrimAction, type TrimActionOptions, type TrimPreset, WaitForAction, type WaitForActionOptions, WebFetcher, fetchWeb };
2684
+ export { type BaseFetchActionOptions, type BaseFetchActionProperties, type BaseFetchCollectorActionProperties, type BaseFetchCollectorOptions, type BaseFetcherProperties, type BrowserEngine, CheerioFetchEngine, ClickAction, DefaultFetcherProperties, type DispatchedEngineAction, EvaluateAction, type EvaluateActionOptions, ExtractAction, type ExtractActionProperties, FetchAction, type FetchActionCapabilities, type FetchActionCapabilityMode, type FetchActionInContext, type FetchActionOptions, type FetchActionProperties, type FetchActionResult, FetchActionResultStatus, type FetchContext, FetchEngine, type FetchEngineAction, type FetchEngineContext, type FetchEngineType, type FetchMetadata, type FetchResponse, type FetchReturnType, type FetchReturnTypeFor, type FetchReturnTypeRegistry, FetchSession, type FetchSite, FetcherOptionKeys, type FetcherOptions, FillAction, GetContentAction, GotoAction, type GotoActionOptions, KeyboardPressAction, type KeyboardPressParams, KeyboardTypeAction, type KeyboardTypeParams, MouseClickAction, type MouseClickParams, MouseMoveAction, type MouseMoveParams, type OnFetchPauseCallback, PauseAction, type PendingEngineRequest, PlaywrightFetchEngine, type ResourceType, type StorageOptions, SubmitAction, type SubmitActionOptions, TRIM_PRESETS, TrimAction, type TrimActionOptions, type TrimPreset, WaitForAction, type WaitForActionOptions, WebFetcher, fetchWeb };
package/dist/index.d.ts CHANGED
@@ -1094,6 +1094,7 @@ interface GotoActionOptions {
1094
1094
  headers?: Record<string, string>;
1095
1095
  waitUntil?: 'load' | 'domcontentloaded' | 'networkidle' | 'commit';
1096
1096
  timeoutMs?: number;
1097
+ simulate?: boolean;
1097
1098
  }
1098
1099
  /**
1099
1100
  * Options for the {@link FetchEngine.waitFor} action, specifying conditions to wait for before continuing.
@@ -1202,6 +1203,35 @@ type FetchEngineAction = {
1202
1203
  type: 'fill';
1203
1204
  selector: string;
1204
1205
  value: string;
1206
+ } | {
1207
+ type: 'mouseMove';
1208
+ params: {
1209
+ x?: number;
1210
+ y?: number;
1211
+ selector?: string;
1212
+ steps?: number;
1213
+ };
1214
+ } | {
1215
+ type: 'mouseClick';
1216
+ params: {
1217
+ x?: number;
1218
+ y?: number;
1219
+ button?: 'left' | 'right' | 'middle';
1220
+ clickCount?: number;
1221
+ delay?: number;
1222
+ };
1223
+ } | {
1224
+ type: 'keyboardType';
1225
+ params: {
1226
+ text: string;
1227
+ delay?: number;
1228
+ };
1229
+ } | {
1230
+ type: 'keyboardPress';
1231
+ params: {
1232
+ key: string;
1233
+ delay?: number;
1234
+ };
1205
1235
  } | {
1206
1236
  type: 'waitFor';
1207
1237
  options?: WaitForActionOptions;
@@ -1555,6 +1585,43 @@ declare abstract class FetchEngine<TContext extends CrawlingContext = any, TCraw
1555
1585
  * @throws {Error} When no active page context exists
1556
1586
  */
1557
1587
  click(selector: string): Promise<void>;
1588
+ /**
1589
+ * Moves mouse to specified position or element.
1590
+ *
1591
+ * @param params - Move parameters (x, y, selector, steps)
1592
+ */
1593
+ mouseMove(params: {
1594
+ x?: number;
1595
+ y?: number;
1596
+ selector?: string;
1597
+ steps?: number;
1598
+ }): Promise<void>;
1599
+ /**
1600
+ * Clicks at current position or specified position.
1601
+ *
1602
+ * @param params - Click parameters (x, y, button, clickCount, delay)
1603
+ */
1604
+ mouseClick(params: {
1605
+ x?: number;
1606
+ y?: number;
1607
+ button?: 'left' | 'right' | 'middle';
1608
+ clickCount?: number;
1609
+ delay?: number;
1610
+ }): Promise<void>;
1611
+ /**
1612
+ * Types text into current focused element.
1613
+ *
1614
+ * @param text - Text to type
1615
+ * @param delay - Delay between key presses
1616
+ */
1617
+ keyboardType(text: string, delay?: number): Promise<void>;
1618
+ /**
1619
+ * Presses specified key.
1620
+ *
1621
+ * @param key - Key to press
1622
+ * @param delay - Delay after key press
1623
+ */
1624
+ keyboardPress(key: string, delay?: number): Promise<void>;
1558
1625
  /**
1559
1626
  * Fills input element with specified value.
1560
1627
  *
@@ -2007,6 +2074,25 @@ declare class PlaywrightFetchEngine extends FetchEngine<PlaywrightCrawlingContex
2007
2074
  _extractValue(schema: ExtractValueSchema, scope: Locator): Promise<any>;
2008
2075
  protected _getInitialElementScope(context: PlaywrightCrawlingContext): FetchElementScope;
2009
2076
  protected _waitForNavigation(context: PlaywrightCrawlingContext, oldUrl: string, actionType: string): Promise<void>;
2077
+ protected currentMousePos: {
2078
+ x: number;
2079
+ y: number;
2080
+ };
2081
+ protected _getRandomDelay(base: number, variance?: number): number;
2082
+ protected _getTrajectory(start: {
2083
+ x: number;
2084
+ y: number;
2085
+ }, end: {
2086
+ x: number;
2087
+ y: number;
2088
+ }, steps?: number): {
2089
+ x: number;
2090
+ y: number;
2091
+ }[];
2092
+ protected _moveToSelector(context: PlaywrightCrawlingContext, selector: string, steps?: number): Promise<{
2093
+ x: number;
2094
+ y: number;
2095
+ }>;
2010
2096
  protected executeAction(context: PlaywrightCrawlingContext, action: FetchEngineAction): Promise<any>;
2011
2097
  protected _createCrawler(options: PlaywrightCrawlerOptions, config?: Configuration): PlaywrightCrawler;
2012
2098
  protected _getSpecificCrawlerOptions(ctx: FetchEngineContext): Promise<Partial<PlaywrightCrawlerOptions>>;
@@ -2195,6 +2281,7 @@ interface BaseFetcherProperties {
2195
2281
  engine?: BrowserEngine;
2196
2282
  headless?: boolean;
2197
2283
  waitUntil?: 'load' | 'domcontentloaded' | 'networkidle' | 'commit';
2284
+ launchOptions?: Record<string, any>;
2198
2285
  };
2199
2286
  http?: {
2200
2287
  method?: 'GET' | 'POST' | 'PUT' | 'PATCH' | 'DELETE';
@@ -2526,6 +2613,65 @@ declare class EvaluateAction extends FetchAction {
2526
2613
  onExecute(context: FetchContext, options?: BaseFetchActionProperties): Promise<any>;
2527
2614
  }
2528
2615
 
2616
+ interface MouseMoveParams {
2617
+ x?: number;
2618
+ y?: number;
2619
+ selector?: string;
2620
+ steps?: number;
2621
+ }
2622
+ declare class MouseMoveAction extends FetchAction {
2623
+ static id: string;
2624
+ static returnType: "none";
2625
+ static capabilities: {
2626
+ http: "noop";
2627
+ browser: "native";
2628
+ };
2629
+ onExecute(context: FetchContext, options?: BaseFetchActionProperties): Promise<void>;
2630
+ }
2631
+ interface MouseClickParams {
2632
+ x?: number;
2633
+ y?: number;
2634
+ button?: 'left' | 'right' | 'middle';
2635
+ clickCount?: number;
2636
+ delay?: number;
2637
+ }
2638
+ declare class MouseClickAction extends FetchAction {
2639
+ static id: string;
2640
+ static returnType: "none";
2641
+ static capabilities: {
2642
+ http: "noop";
2643
+ browser: "native";
2644
+ };
2645
+ onExecute(context: FetchContext, options?: BaseFetchActionProperties): Promise<void>;
2646
+ }
2647
+
2648
+ interface KeyboardTypeParams {
2649
+ text: string;
2650
+ delay?: number;
2651
+ }
2652
+ declare class KeyboardTypeAction extends FetchAction {
2653
+ static id: string;
2654
+ static returnType: "none";
2655
+ static capabilities: {
2656
+ http: "noop";
2657
+ browser: "native";
2658
+ };
2659
+ onExecute(context: FetchContext, options?: BaseFetchActionProperties): Promise<void>;
2660
+ }
2661
+ interface KeyboardPressParams {
2662
+ key: string;
2663
+ delay?: number;
2664
+ }
2665
+ declare class KeyboardPressAction extends FetchAction {
2666
+ static id: string;
2667
+ static returnType: "none";
2668
+ static capabilities: {
2669
+ http: "noop";
2670
+ browser: "native";
2671
+ };
2672
+ onExecute(context: FetchContext, options?: BaseFetchActionProperties): Promise<void>;
2673
+ }
2674
+
2529
2675
  declare function fetchWeb(options: FetcherOptions): Promise<{
2530
2676
  result: FetchResponse | undefined;
2531
2677
  outputs: Record<string, any>;
@@ -2535,4 +2681,4 @@ declare function fetchWeb(url: string, options?: FetcherOptions): Promise<{
2535
2681
  outputs: Record<string, any>;
2536
2682
  }>;
2537
2683
 
2538
- export { type BaseFetchActionOptions, type BaseFetchActionProperties, type BaseFetchCollectorActionProperties, type BaseFetchCollectorOptions, type BaseFetcherProperties, type BrowserEngine, CheerioFetchEngine, ClickAction, DefaultFetcherProperties, type DispatchedEngineAction, EvaluateAction, type EvaluateActionOptions, ExtractAction, type ExtractActionProperties, FetchAction, type FetchActionCapabilities, type FetchActionCapabilityMode, type FetchActionInContext, type FetchActionOptions, type FetchActionProperties, type FetchActionResult, FetchActionResultStatus, type FetchContext, FetchEngine, type FetchEngineAction, type FetchEngineContext, type FetchEngineType, type FetchMetadata, type FetchResponse, type FetchReturnType, type FetchReturnTypeFor, type FetchReturnTypeRegistry, FetchSession, type FetchSite, FetcherOptionKeys, type FetcherOptions, FillAction, GetContentAction, GotoAction, type GotoActionOptions, type OnFetchPauseCallback, PauseAction, type PendingEngineRequest, PlaywrightFetchEngine, type ResourceType, type StorageOptions, SubmitAction, type SubmitActionOptions, TRIM_PRESETS, TrimAction, type TrimActionOptions, type TrimPreset, WaitForAction, type WaitForActionOptions, WebFetcher, fetchWeb };
2684
+ export { type BaseFetchActionOptions, type BaseFetchActionProperties, type BaseFetchCollectorActionProperties, type BaseFetchCollectorOptions, type BaseFetcherProperties, type BrowserEngine, CheerioFetchEngine, ClickAction, DefaultFetcherProperties, type DispatchedEngineAction, EvaluateAction, type EvaluateActionOptions, ExtractAction, type ExtractActionProperties, FetchAction, type FetchActionCapabilities, type FetchActionCapabilityMode, type FetchActionInContext, type FetchActionOptions, type FetchActionProperties, type FetchActionResult, FetchActionResultStatus, type FetchContext, FetchEngine, type FetchEngineAction, type FetchEngineContext, type FetchEngineType, type FetchMetadata, type FetchResponse, type FetchReturnType, type FetchReturnTypeFor, type FetchReturnTypeRegistry, FetchSession, type FetchSite, FetcherOptionKeys, type FetcherOptions, FillAction, GetContentAction, GotoAction, type GotoActionOptions, KeyboardPressAction, type KeyboardPressParams, KeyboardTypeAction, type KeyboardTypeParams, MouseClickAction, type MouseClickParams, MouseMoveAction, type MouseMoveParams, type OnFetchPauseCallback, PauseAction, type PendingEngineRequest, PlaywrightFetchEngine, type ResourceType, type StorageOptions, SubmitAction, type SubmitActionOptions, TRIM_PRESETS, TrimAction, type TrimActionOptions, type TrimPreset, WaitForAction, type WaitForActionOptions, WebFetcher, fetchWeb };