azcrawlerpy 0.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- azcrawlerpy-0.0.1/PKG-INFO +1046 -0
- azcrawlerpy-0.0.1/README.md +1028 -0
- azcrawlerpy-0.0.1/azcrawlerpy/__init__.py +71 -0
- azcrawlerpy-0.0.1/azcrawlerpy/actions.py +338 -0
- azcrawlerpy-0.0.1/azcrawlerpy/browser_utils.py +106 -0
- azcrawlerpy-0.0.1/azcrawlerpy/crawler.py +301 -0
- azcrawlerpy-0.0.1/azcrawlerpy/diagnostics.py +407 -0
- azcrawlerpy-0.0.1/azcrawlerpy/discovery.py +642 -0
- azcrawlerpy-0.0.1/azcrawlerpy/discovery_models.py +122 -0
- azcrawlerpy-0.0.1/azcrawlerpy/exceptions.py +136 -0
- azcrawlerpy-0.0.1/azcrawlerpy/field_handlers.py +631 -0
- azcrawlerpy-0.0.1/azcrawlerpy/models.py +207 -0
- azcrawlerpy-0.0.1/azcrawlerpy/utils.py +100 -0
- azcrawlerpy-0.0.1/azcrawlerpy.egg-info/PKG-INFO +1046 -0
- azcrawlerpy-0.0.1/azcrawlerpy.egg-info/SOURCES.txt +18 -0
- azcrawlerpy-0.0.1/azcrawlerpy.egg-info/dependency_links.txt +1 -0
- azcrawlerpy-0.0.1/azcrawlerpy.egg-info/requires.txt +10 -0
- azcrawlerpy-0.0.1/azcrawlerpy.egg-info/top_level.txt +1 -0
- azcrawlerpy-0.0.1/pyproject.toml +52 -0
- azcrawlerpy-0.0.1/setup.cfg +4 -0
|
@@ -0,0 +1,1046 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: azcrawlerpy
|
|
3
|
+
Version: 0.0.1
|
|
4
|
+
Summary: Agentic Crawler Discovery Framework.
|
|
5
|
+
Classifier: Programming Language :: Python :: 3
|
|
6
|
+
Classifier: Operating System :: OS Independent
|
|
7
|
+
Requires-Python: <3.12,>=3.11.10
|
|
8
|
+
Description-Content-Type: text/markdown
|
|
9
|
+
Requires-Dist: a>=1.0
|
|
10
|
+
Requires-Dist: playwright>=1.41.0
|
|
11
|
+
Requires-Dist: pydantic>=2.0.0
|
|
12
|
+
Provides-Extra: dev
|
|
13
|
+
Requires-Dist: pytest==8.4.2; extra == "dev"
|
|
14
|
+
Requires-Dist: pytest-asyncio==1.0.0; extra == "dev"
|
|
15
|
+
Requires-Dist: pytest-mock==3.14.1; extra == "dev"
|
|
16
|
+
Requires-Dist: pytest-cov>=4.1.0; extra == "dev"
|
|
17
|
+
Requires-Dist: playwright>=1.41.0; extra == "dev"
|
|
18
|
+
|
|
19
|
+
# azcrawlerpy
|
|
20
|
+
|
|
21
|
+
A Playwright-based framework for navigating and filling multi-step web forms programmatically. The framework uses JSON instruction files to define form navigation workflows, making it ideal for automated form submission, web scraping, and AI agent-driven web interactions.
|
|
22
|
+
|
|
23
|
+
## Table of Contents
|
|
24
|
+
|
|
25
|
+
- [Installation](#installation)
|
|
26
|
+
- [Quick Start](#quick-start)
|
|
27
|
+
- [Core Concepts](#core-concepts)
|
|
28
|
+
- [Instructions Schema](#instructions-schema)
|
|
29
|
+
- [Top-Level Structure](#top-level-structure)
|
|
30
|
+
- [Browser Configuration](#browser-configuration)
|
|
31
|
+
- [Cookie Consent Handling](#cookie-consent-handling)
|
|
32
|
+
- [Step Definitions](#step-definitions)
|
|
33
|
+
- [Field Types](#field-types)
|
|
34
|
+
- [Action Types](#action-types)
|
|
35
|
+
- [Final Page Configuration](#final-page-configuration)
|
|
36
|
+
- [Data Points (input_data)](#data-points-input_data)
|
|
37
|
+
- [Element Discovery](#element-discovery)
|
|
38
|
+
- [AI Agent Guidance](#ai-agent-guidance)
|
|
39
|
+
- [Error Handling and Diagnostics](#error-handling-and-diagnostics)
|
|
40
|
+
- [Examples](#examples)
|
|
41
|
+
|
|
42
|
+
## Installation
|
|
43
|
+
|
|
44
|
+
```bash
|
|
45
|
+
uv add azcrawlerpy
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
Or install from source:
|
|
49
|
+
|
|
50
|
+
```bash
|
|
51
|
+
uv pip install -e .
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
## Quick Start
|
|
55
|
+
|
|
56
|
+
```python
|
|
57
|
+
import asyncio
|
|
58
|
+
from pathlib import Path
|
|
59
|
+
from azcrawlerpy import FormCrawler, DebugMode
|
|
60
|
+
|
|
61
|
+
async def main():
|
|
62
|
+
crawler = FormCrawler(headless=True)
|
|
63
|
+
|
|
64
|
+
instructions = {
|
|
65
|
+
"url": "https://example.com/form",
|
|
66
|
+
"browser_config": {
|
|
67
|
+
"viewport_width": 1920,
|
|
68
|
+
"viewport_height": 1080
|
|
69
|
+
},
|
|
70
|
+
"steps": [
|
|
71
|
+
{
|
|
72
|
+
"name": "step_1",
|
|
73
|
+
"wait_for": "input[name='email']",
|
|
74
|
+
"timeout_ms": 30000,
|
|
75
|
+
"fields": [
|
|
76
|
+
{
|
|
77
|
+
"type": "text",
|
|
78
|
+
"selector": "input[name='email']",
|
|
79
|
+
"data_key": "email"
|
|
80
|
+
}
|
|
81
|
+
],
|
|
82
|
+
"next_action": {
|
|
83
|
+
"type": "click",
|
|
84
|
+
"selector": "button[type='submit']"
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
],
|
|
88
|
+
"final_page": {
|
|
89
|
+
"wait_for": ".success-message",
|
|
90
|
+
"timeout_ms": 60000
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
input_data = {
|
|
95
|
+
"email": "user@example.com"
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
result = await crawler.crawl(
|
|
99
|
+
url=instructions["url"],
|
|
100
|
+
input_data=input_data,
|
|
101
|
+
instructions=instructions,
|
|
102
|
+
output_dir=Path("./output"),
|
|
103
|
+
debug_mode=DebugMode.ALL,
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
print(f"Final URL: {result.final_url}")
|
|
107
|
+
print(f"Steps completed: {result.steps_completed}")
|
|
108
|
+
print(f"Screenshot saved: {result.screenshot_path}")
|
|
109
|
+
|
|
110
|
+
asyncio.run(main())
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
## Core Concepts
|
|
114
|
+
|
|
115
|
+
The framework operates on two primary inputs:
|
|
116
|
+
|
|
117
|
+
1. **Instructions (instructions.json)**: Defines the form structure, selectors, navigation flow, and field types
|
|
118
|
+
2. **Data Points (input_data)**: Contains the actual values to fill into form fields
|
|
119
|
+
|
|
120
|
+
The crawler processes each step sequentially:
|
|
121
|
+
1. Wait for the step's `wait_for` selector to become visible
|
|
122
|
+
2. Fill all fields defined in the step using values from `input_data`
|
|
123
|
+
3. Execute the `next_action` to navigate to the next step
|
|
124
|
+
4. Repeat until all steps are complete
|
|
125
|
+
5. Wait for and capture the final page
|
|
126
|
+
|
|
127
|
+
## Instructions Schema
|
|
128
|
+
|
|
129
|
+
### Top-Level Structure
|
|
130
|
+
|
|
131
|
+
```json
|
|
132
|
+
{
|
|
133
|
+
"url": "https://example.com/form",
|
|
134
|
+
"browser_config": { ... },
|
|
135
|
+
"cookie_consent": { ... },
|
|
136
|
+
"steps": [ ... ],
|
|
137
|
+
"final_page": { ... }
|
|
138
|
+
}
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
| Field | Type | Required | Description |
|
|
142
|
+
|-------|------|----------|-------------|
|
|
143
|
+
| `url` | string | Yes | Starting URL for the form |
|
|
144
|
+
| `browser_config` | object | No | Browser viewport and user agent settings |
|
|
145
|
+
| `cookie_consent` | object | No | Cookie banner handling configuration |
|
|
146
|
+
| `steps` | array | Yes | Ordered list of form steps |
|
|
147
|
+
| `final_page` | object | Yes | Configuration for the result page |
|
|
148
|
+
|
|
149
|
+
### Browser Configuration
|
|
150
|
+
|
|
151
|
+
```json
|
|
152
|
+
{
|
|
153
|
+
"browser_config": {
|
|
154
|
+
"viewport_width": 1920,
|
|
155
|
+
"viewport_height": 1080,
|
|
156
|
+
"user_agent": "Mozilla/5.0 ..."
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
| Field | Type | Required | Description |
|
|
162
|
+
|-------|------|----------|-------------|
|
|
163
|
+
| `viewport_width` | integer | Yes | Browser viewport width in pixels |
|
|
164
|
+
| `viewport_height` | integer | Yes | Browser viewport height in pixels |
|
|
165
|
+
| `user_agent` | string | No | Custom user agent string |
|
|
166
|
+
|
|
167
|
+
### Cookie Consent Handling
|
|
168
|
+
|
|
169
|
+
The framework supports two modes for handling cookie consent banners:
|
|
170
|
+
|
|
171
|
+
**Standard Mode** (regular DOM elements):
|
|
172
|
+
```json
|
|
173
|
+
{
|
|
174
|
+
"cookie_consent": {
|
|
175
|
+
"banner_selector": "dialog:has-text('cookies')",
|
|
176
|
+
"accept_selector": "button:has-text('Accept')"
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
```
|
|
180
|
+
|
|
181
|
+
**Shadow DOM Mode** (for Usercentrics, OneTrust, etc.):
|
|
182
|
+
```json
|
|
183
|
+
{
|
|
184
|
+
"cookie_consent": {
|
|
185
|
+
"banner_selector": "#usercentrics-cmp-ui",
|
|
186
|
+
"shadow_host_selector": "#usercentrics-cmp-ui",
|
|
187
|
+
"accept_button_texts": ["Accept All", "Alle akzeptieren"]
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
```
|
|
191
|
+
|
|
192
|
+
| Field | Type | Required | Description |
|
|
193
|
+
|-------|------|----------|-------------|
|
|
194
|
+
| `banner_selector` | string | Yes | CSS selector for the banner container |
|
|
195
|
+
| `accept_selector` | string | No | CSS selector for accept button (standard mode) |
|
|
196
|
+
| `shadow_host_selector` | string | No | CSS selector for shadow DOM host |
|
|
197
|
+
| `accept_button_texts` | array | No | Text patterns to match accept buttons in shadow DOM |
|
|
198
|
+
| `banner_settle_delay_ms` | integer | No | Wait time before checking for banner |
|
|
199
|
+
| `banner_visible_timeout_ms` | integer | No | Timeout for banner visibility |
|
|
200
|
+
| `accept_button_timeout_ms` | integer | No | Timeout for accept button visibility |
|
|
201
|
+
| `post_consent_delay_ms` | integer | No | Wait time after handling consent |
|
|
202
|
+
|
|
203
|
+
### Step Definitions
|
|
204
|
+
|
|
205
|
+
Each step represents a form page or section:
|
|
206
|
+
|
|
207
|
+
```json
|
|
208
|
+
{
|
|
209
|
+
"name": "personal_info",
|
|
210
|
+
"wait_for": "input[name='firstName']",
|
|
211
|
+
"timeout_ms": 30000,
|
|
212
|
+
"fields": [ ... ],
|
|
213
|
+
"next_action": { ... }
|
|
214
|
+
}
|
|
215
|
+
```
|
|
216
|
+
|
|
217
|
+
| Field | Type | Required | Description |
|
|
218
|
+
|-------|------|----------|-------------|
|
|
219
|
+
| `name` | string | Yes | Unique identifier for the step |
|
|
220
|
+
| `wait_for` | string | Yes | CSS selector to wait for before processing |
|
|
221
|
+
| `timeout_ms` | integer | Yes | Timeout in milliseconds for wait condition |
|
|
222
|
+
| `fields` | array | Yes | List of field definitions (can be empty) |
|
|
223
|
+
| `next_action` | object | Yes | Action to navigate to next step |
|
|
224
|
+
|
|
225
|
+
### Field Types
|
|
226
|
+
|
|
227
|
+
#### TEXT
|
|
228
|
+
|
|
229
|
+
For text inputs, email fields, phone numbers, and similar:
|
|
230
|
+
|
|
231
|
+
```json
|
|
232
|
+
{
|
|
233
|
+
"type": "text",
|
|
234
|
+
"selector": "input[name='email']",
|
|
235
|
+
"data_key": "email"
|
|
236
|
+
}
|
|
237
|
+
```
|
|
238
|
+
|
|
239
|
+
#### TEXTAREA
|
|
240
|
+
|
|
241
|
+
For multi-line text areas:
|
|
242
|
+
|
|
243
|
+
```json
|
|
244
|
+
{
|
|
245
|
+
"type": "textarea",
|
|
246
|
+
"selector": "textarea[name='message']",
|
|
247
|
+
"data_key": "message"
|
|
248
|
+
}
|
|
249
|
+
```
|
|
250
|
+
|
|
251
|
+
#### DROPDOWN / SELECT
|
|
252
|
+
|
|
253
|
+
For native `<select>` elements:
|
|
254
|
+
|
|
255
|
+
```json
|
|
256
|
+
{
|
|
257
|
+
"type": "dropdown",
|
|
258
|
+
"selector": "select[name='country']",
|
|
259
|
+
"data_key": "country",
|
|
260
|
+
"select_by": "text"
|
|
261
|
+
}
|
|
262
|
+
```
|
|
263
|
+
|
|
264
|
+
| Parameter | Values | Description |
|
|
265
|
+
|-----------|--------|-------------|
|
|
266
|
+
| `select_by` | `text`, `value`, `index` | How to match the option |
|
|
267
|
+
|
|
268
|
+
#### RADIO
|
|
269
|
+
|
|
270
|
+
For radio button groups:
|
|
271
|
+
|
|
272
|
+
```json
|
|
273
|
+
{
|
|
274
|
+
"type": "radio",
|
|
275
|
+
"selector": "input[type='radio'][value='${value}']",
|
|
276
|
+
"data_key": "payment_method"
|
|
277
|
+
}
|
|
278
|
+
```
|
|
279
|
+
|
|
280
|
+
**Pattern A - Value-driven selector**: Use `${value}` placeholder in selector, data provides the value:
|
|
281
|
+
```json
|
|
282
|
+
{
|
|
283
|
+
"type": "radio",
|
|
284
|
+
"selector": "input[type='radio'][value='${value}']",
|
|
285
|
+
"data_key": "gender"
|
|
286
|
+
}
|
|
287
|
+
// data: { "gender": "male" }
|
|
288
|
+
```
|
|
289
|
+
|
|
290
|
+
**Pattern B - Boolean flags**: Use explicit selectors with boolean data values:
|
|
291
|
+
```json
|
|
292
|
+
{
|
|
293
|
+
"type": "radio",
|
|
294
|
+
"selector": "[role='radio']:has-text('Yes')",
|
|
295
|
+
"data_key": "accept_terms",
|
|
296
|
+
"force_click": true
|
|
297
|
+
}
|
|
298
|
+
// data: { "accept_terms": true } // clicks if true, skips if null/false
|
|
299
|
+
```
|
|
300
|
+
|
|
301
|
+
#### CHECKBOX
|
|
302
|
+
|
|
303
|
+
For checkbox inputs:
|
|
304
|
+
|
|
305
|
+
```json
|
|
306
|
+
{
|
|
307
|
+
"type": "checkbox",
|
|
308
|
+
"selector": "input[type='checkbox'][name='newsletter']",
|
|
309
|
+
"data_key": "subscribe_newsletter"
|
|
310
|
+
}
|
|
311
|
+
```
|
|
312
|
+
|
|
313
|
+
Data value `true` checks the box, `false` or `null` leaves it unchanged.
|
|
314
|
+
|
|
315
|
+
#### DATE
|
|
316
|
+
|
|
317
|
+
For date inputs with format conversion:
|
|
318
|
+
|
|
319
|
+
```json
|
|
320
|
+
{
|
|
321
|
+
"type": "date",
|
|
322
|
+
"selector": "input[name='birthdate']",
|
|
323
|
+
"data_key": "birthdate",
|
|
324
|
+
"format": "DD.MM.YYYY"
|
|
325
|
+
}
|
|
326
|
+
```
|
|
327
|
+
|
|
328
|
+
| Format | Example | Description |
|
|
329
|
+
|--------|---------|-------------|
|
|
330
|
+
| `DD.MM.YYYY` | 15.06.1985 | Day.Month.Year |
|
|
331
|
+
| `MM.YYYY` | 06.1985 | Month.Year |
|
|
332
|
+
| `YYYY-MM-DD` | 1985-06-15 | ISO format |
|
|
333
|
+
| `%d.%m.%Y` | 15.06.1985 | Python strftime format |
|
|
334
|
+
|
|
335
|
+
Data should be provided in ISO format (`YYYY-MM-DD`) and will be converted to the specified format.
|
|
336
|
+
|
|
337
|
+
#### SLIDER
|
|
338
|
+
|
|
339
|
+
For range inputs:
|
|
340
|
+
|
|
341
|
+
```json
|
|
342
|
+
{
|
|
343
|
+
"type": "slider",
|
|
344
|
+
"selector": "input[type='range'][name='coverage']",
|
|
345
|
+
"data_key": "coverage_amount"
|
|
346
|
+
}
|
|
347
|
+
```
|
|
348
|
+
|
|
349
|
+
#### FILE
|
|
350
|
+
|
|
351
|
+
For file upload fields:
|
|
352
|
+
|
|
353
|
+
```json
|
|
354
|
+
{
|
|
355
|
+
"type": "file",
|
|
356
|
+
"selector": "input[type='file']",
|
|
357
|
+
"data_key": "document_path"
|
|
358
|
+
}
|
|
359
|
+
```
|
|
360
|
+
|
|
361
|
+
Data value should be the absolute file path.
|
|
362
|
+
|
|
363
|
+
#### COMBOBOX
|
|
364
|
+
|
|
365
|
+
For autocomplete/typeahead inputs:
|
|
366
|
+
|
|
367
|
+
```json
|
|
368
|
+
{
|
|
369
|
+
"type": "combobox",
|
|
370
|
+
"selector": "input[aria-label='City']",
|
|
371
|
+
"data_key": "city",
|
|
372
|
+
"option_selector": ".autocomplete-option",
|
|
373
|
+
"type_delay_ms": 50,
|
|
374
|
+
"wait_after_type_ms": 500,
|
|
375
|
+
"press_enter": true
|
|
376
|
+
}
|
|
377
|
+
```
|
|
378
|
+
|
|
379
|
+
| Parameter | Description |
|
|
380
|
+
|-----------|-------------|
|
|
381
|
+
| `option_selector` | CSS selector for dropdown options |
|
|
382
|
+
| `type_delay_ms` | Delay between keystrokes (simulates human typing) |
|
|
383
|
+
| `wait_after_type_ms` | Wait time for options to appear |
|
|
384
|
+
| `press_enter` | Press Enter after selecting option |
|
|
385
|
+
| `clear_before_type` | Clear field before typing |
|
|
386
|
+
|
|
387
|
+
#### CLICK_SELECT
|
|
388
|
+
|
|
389
|
+
For custom dropdowns requiring click-then-select:
|
|
390
|
+
|
|
391
|
+
```json
|
|
392
|
+
{
|
|
393
|
+
"type": "click_select",
|
|
394
|
+
"selector": ".custom-dropdown-trigger",
|
|
395
|
+
"data_key": "option_value",
|
|
396
|
+
"option_selector": ".dropdown-item:has-text('${value}')",
|
|
397
|
+
"post_click_delay_ms": 300
|
|
398
|
+
}
|
|
399
|
+
```
|
|
400
|
+
|
|
401
|
+
#### CLICK_ONLY
|
|
402
|
+
|
|
403
|
+
For elements that only need clicking (no data input):
|
|
404
|
+
|
|
405
|
+
```json
|
|
406
|
+
{
|
|
407
|
+
"type": "click_only",
|
|
408
|
+
"selector": "button.expand-section"
|
|
409
|
+
}
|
|
410
|
+
```
|
|
411
|
+
|
|
412
|
+
With conditional clicking based on data:
|
|
413
|
+
|
|
414
|
+
```json
|
|
415
|
+
{
|
|
416
|
+
"type": "click_only",
|
|
417
|
+
"selector": "button:has-text('${value}')",
|
|
418
|
+
"data_key": "selected_option"
|
|
419
|
+
}
|
|
420
|
+
```
|
|
421
|
+
|
|
422
|
+
#### IFRAME_FIELD
|
|
423
|
+
|
|
424
|
+
For fields inside iframes (alternative to `iframe_selector`):
|
|
425
|
+
|
|
426
|
+
```json
|
|
427
|
+
{
|
|
428
|
+
"type": "iframe_field",
|
|
429
|
+
"selector": "input[name='card_number']",
|
|
430
|
+
"iframe_selector": "iframe#payment-frame",
|
|
431
|
+
"data_key": "card_number"
|
|
432
|
+
}
|
|
433
|
+
```
|
|
434
|
+
|
|
435
|
+
### Common Field Parameters
|
|
436
|
+
|
|
437
|
+
| Parameter | Type | Description |
|
|
438
|
+
|-----------|------|-------------|
|
|
439
|
+
| `data_key` | string | Key in input_data to get value from |
|
|
440
|
+
| `selector` | string | CSS/Playwright selector for the element |
|
|
441
|
+
| `iframe_selector` | string | Selector for parent iframe if field is embedded |
|
|
442
|
+
| `field_visible_timeout_ms` | integer | Timeout for field to become visible |
|
|
443
|
+
| `post_click_delay_ms` | integer | Wait after clicking the field |
|
|
444
|
+
| `skip_verification` | boolean | Skip value verification after filling |
|
|
445
|
+
| `force_click` | boolean | Use force click (bypasses overlays) |
|
|
446
|
+
|
|
447
|
+
### Action Types
|
|
448
|
+
|
|
449
|
+
#### CLICK
|
|
450
|
+
|
|
451
|
+
Click a button or link:
|
|
452
|
+
|
|
453
|
+
```json
|
|
454
|
+
{
|
|
455
|
+
"type": "click",
|
|
456
|
+
"selector": "button[type='submit']"
|
|
457
|
+
}
|
|
458
|
+
```
|
|
459
|
+
|
|
460
|
+
With iframe support:
|
|
461
|
+
|
|
462
|
+
```json
|
|
463
|
+
{
|
|
464
|
+
"type": "click",
|
|
465
|
+
"selector": "button:has-text('Next')",
|
|
466
|
+
"iframe_selector": "iframe#form-frame"
|
|
467
|
+
}
|
|
468
|
+
```
|
|
469
|
+
|
|
470
|
+
#### WAIT
|
|
471
|
+
|
|
472
|
+
Wait for an element to appear:
|
|
473
|
+
|
|
474
|
+
```json
|
|
475
|
+
{
|
|
476
|
+
"type": "wait",
|
|
477
|
+
"selector": ".loading-complete"
|
|
478
|
+
}
|
|
479
|
+
```
|
|
480
|
+
|
|
481
|
+
#### WAIT_HIDDEN
|
|
482
|
+
|
|
483
|
+
Wait for an element to disappear:
|
|
484
|
+
|
|
485
|
+
```json
|
|
486
|
+
{
|
|
487
|
+
"type": "wait_hidden",
|
|
488
|
+
"selector": ".loading-spinner"
|
|
489
|
+
}
|
|
490
|
+
```
|
|
491
|
+
|
|
492
|
+
#### SCROLL
|
|
493
|
+
|
|
494
|
+
Scroll to an element:
|
|
495
|
+
|
|
496
|
+
```json
|
|
497
|
+
{
|
|
498
|
+
"type": "scroll",
|
|
499
|
+
"selector": "#section-bottom"
|
|
500
|
+
}
|
|
501
|
+
```
|
|
502
|
+
|
|
503
|
+
#### DELAY
|
|
504
|
+
|
|
505
|
+
Wait for a fixed time:
|
|
506
|
+
|
|
507
|
+
```json
|
|
508
|
+
{
|
|
509
|
+
"type": "delay",
|
|
510
|
+
"delay_ms": 2000
|
|
511
|
+
}
|
|
512
|
+
```
|
|
513
|
+
|
|
514
|
+
#### CONDITIONAL
|
|
515
|
+
|
|
516
|
+
Execute actions based on conditions:
|
|
517
|
+
|
|
518
|
+
```json
|
|
519
|
+
{
|
|
520
|
+
"type": "conditional",
|
|
521
|
+
"condition": {
|
|
522
|
+
"type": "element_visible",
|
|
523
|
+
"selector": ".error-message"
|
|
524
|
+
},
|
|
525
|
+
"actions": [
|
|
526
|
+
{
|
|
527
|
+
"type": "click",
|
|
528
|
+
"selector": "button.dismiss-error"
|
|
529
|
+
}
|
|
530
|
+
]
|
|
531
|
+
}
|
|
532
|
+
```
|
|
533
|
+
|
|
534
|
+
Condition types:
|
|
535
|
+
- `element_visible`: Check if element is visible
|
|
536
|
+
- `element_exists`: Check if element exists in DOM
|
|
537
|
+
- `data_equals`: Check if data value matches
|
|
538
|
+
|
|
539
|
+
### Common Action Parameters
|
|
540
|
+
|
|
541
|
+
| Parameter | Type | Description |
|
|
542
|
+
|-----------|------|-------------|
|
|
543
|
+
| `selector` | string | Target element selector |
|
|
544
|
+
| `iframe_selector` | string | Selector for parent iframe |
|
|
545
|
+
| `pre_action_delay_ms` | integer | Wait before executing action |
|
|
546
|
+
| `post_action_delay_ms` | integer | Wait after executing action |
|
|
547
|
+
|
|
548
|
+
### Final Page Configuration
|
|
549
|
+
|
|
550
|
+
```json
|
|
551
|
+
{
|
|
552
|
+
"final_page": {
|
|
553
|
+
"wait_for": ".result-container, .confirmation",
|
|
554
|
+
"timeout_ms": 60000,
|
|
555
|
+
"screenshot_selector": ".result-panel"
|
|
556
|
+
}
|
|
557
|
+
}
|
|
558
|
+
```
|
|
559
|
+
|
|
560
|
+
| Field | Type | Required | Description |
|
|
561
|
+
|-------|------|----------|-------------|
|
|
562
|
+
| `wait_for` | string | Yes | Selector to confirm final page loaded |
|
|
563
|
+
| `timeout_ms` | integer | Yes | Timeout for final page |
|
|
564
|
+
| `screenshot_selector` | string | No | Element to screenshot (null for full page) |
|
|
565
|
+
|
|
566
|
+
## Data Points (input_data)
|
|
567
|
+
|
|
568
|
+
The `input_data` dictionary provides values for form fields. Keys must match `data_key` values in the instructions.
|
|
569
|
+
|
|
570
|
+
### Structure
|
|
571
|
+
|
|
572
|
+
```json
|
|
573
|
+
{
|
|
574
|
+
"email": "user@example.com",
|
|
575
|
+
"first_name": "John",
|
|
576
|
+
"last_name": "Doe",
|
|
577
|
+
"birthdate": "1985-06-15",
|
|
578
|
+
"country": "Germany",
|
|
579
|
+
"accept_terms": true,
|
|
580
|
+
"newsletter": false,
|
|
581
|
+
"premium_option": null
|
|
582
|
+
}
|
|
583
|
+
```
|
|
584
|
+
|
|
585
|
+
### Value Types
|
|
586
|
+
|
|
587
|
+
| Type | Description | Example |
|
|
588
|
+
|------|-------------|---------|
|
|
589
|
+
| String | Text values, dropdown selections | `"John"` |
|
|
590
|
+
| Boolean | Checkbox/radio toggle | `true`, `false` |
|
|
591
|
+
| Null | Skip this field | `null` |
|
|
592
|
+
| Integer/Float | Numeric inputs, sliders | `12000`, `99.99` |
|
|
593
|
+
|
|
594
|
+
### Radio Button Patterns
|
|
595
|
+
|
|
596
|
+
**Pattern A - Mutually exclusive options with value selector**:
|
|
597
|
+
```json
|
|
598
|
+
{
|
|
599
|
+
"gender": "male"
|
|
600
|
+
}
|
|
601
|
+
```
|
|
602
|
+
Selector uses `${value}` placeholder: `input[value='${value}']`
|
|
603
|
+
|
|
604
|
+
**Pattern B - Boolean flags for each option**:
|
|
605
|
+
```json
|
|
606
|
+
{
|
|
607
|
+
"option_a": true,
|
|
608
|
+
"option_b": null,
|
|
609
|
+
"option_c": null
|
|
610
|
+
}
|
|
611
|
+
```
|
|
612
|
+
Only the option with `true` gets clicked.
|
|
613
|
+
|
|
614
|
+
### Date Handling
|
|
615
|
+
|
|
616
|
+
Dates in input_data should use ISO format (`YYYY-MM-DD`):
|
|
617
|
+
```json
|
|
618
|
+
{
|
|
619
|
+
"birthdate": "1985-06-15",
|
|
620
|
+
"start_date": "2024-01-01"
|
|
621
|
+
}
|
|
622
|
+
```
|
|
623
|
+
|
|
624
|
+
The framework converts to the format specified in the field definition.
|
|
625
|
+
|
|
626
|
+
## Element Discovery
|
|
627
|
+
|
|
628
|
+
The `ElementDiscovery` class scans web pages to identify interactive elements, helping build instructions.json files.
|
|
629
|
+
|
|
630
|
+
```python
|
|
631
|
+
from pathlib import Path
|
|
632
|
+
from azcrawlerpy import ElementDiscovery
|
|
633
|
+
|
|
634
|
+
async def discover_elements():
|
|
635
|
+
discovery = ElementDiscovery(headless=False)
|
|
636
|
+
|
|
637
|
+
report = await discovery.discover(
|
|
638
|
+
url="https://example.com/form",
|
|
639
|
+
output_dir=Path("./discovery_output"),
|
|
640
|
+
cookie_consent={
|
|
641
|
+
"banner_selector": "#cookie-banner",
|
|
642
|
+
"accept_selector": "button.accept"
|
|
643
|
+
},
|
|
644
|
+
explore_iframes=True,
|
|
645
|
+
screenshot=True,
|
|
646
|
+
)
|
|
647
|
+
|
|
648
|
+
print(f"Found {report.total_elements} elements")
|
|
649
|
+
|
|
650
|
+
for text_input in report.text_inputs:
|
|
651
|
+
print(f"Text input: {text_input.selector}")
|
|
652
|
+
print(f" Suggested type: {text_input.suggested_field_type}")
|
|
653
|
+
|
|
654
|
+
for dropdown in report.selects:
|
|
655
|
+
print(f"Dropdown: {dropdown.selector}")
|
|
656
|
+
print(f" Options: {dropdown.options}")
|
|
657
|
+
|
|
658
|
+
for radio_group in report.radio_groups:
|
|
659
|
+
print(f"Radio group: {radio_group.name}")
|
|
660
|
+
for option in radio_group.options:
|
|
661
|
+
print(f" - {option.label}: {option.selector}")
|
|
662
|
+
```
|
|
663
|
+
|
|
664
|
+
### Discovery Report Contents
|
|
665
|
+
|
|
666
|
+
- `text_inputs`: Text, email, phone, password fields
|
|
667
|
+
- `textareas`: Multi-line text areas
|
|
668
|
+
- `selects`: Native dropdown elements with options
|
|
669
|
+
- `radio_groups`: Grouped radio buttons
|
|
670
|
+
- `checkboxes`: Checkbox inputs
|
|
671
|
+
- `buttons`: Clickable buttons
|
|
672
|
+
- `links`: Anchor elements
|
|
673
|
+
- `date_inputs`: Date picker fields
|
|
674
|
+
- `file_inputs`: File upload fields
|
|
675
|
+
- `sliders`: Range inputs
|
|
676
|
+
- `custom_components`: Non-standard interactive elements
|
|
677
|
+
- `iframes`: Discovered iframes with their elements
|
|
678
|
+
|
|
679
|
+
## AI Agent Guidance
|
|
680
|
+
|
|
681
|
+
This section provides instructions for AI agents tasked with creating `instructions.json` and `input_data` files.
|
|
682
|
+
|
|
683
|
+
### Workflow for Creating Instructions
|
|
684
|
+
|
|
685
|
+
1. **Discovery Phase**: Use `ElementDiscovery` to scan each page/step of the form
|
|
686
|
+
2. **Mapping Phase**: Map discovered elements to field definitions
|
|
687
|
+
3. **Flow Definition**: Define step transitions and actions
|
|
688
|
+
4. **Data Schema**: Create the input_data structure
|
|
689
|
+
|
|
690
|
+
### Step-by-Step Process
|
|
691
|
+
|
|
692
|
+
#### 1. Analyze the Form Structure
|
|
693
|
+
|
|
694
|
+
- Identify how many pages/steps the form has
|
|
695
|
+
- Note the URL pattern changes (if any)
|
|
696
|
+
- Identify what element appears when each step loads
|
|
697
|
+
|
|
698
|
+
#### 2. For Each Step, Define:
|
|
699
|
+
|
|
700
|
+
```json
|
|
701
|
+
{
|
|
702
|
+
"name": "<descriptive_step_name>",
|
|
703
|
+
"wait_for": "<selector_that_confirms_step_loaded>",
|
|
704
|
+
"timeout_ms": 30000,
|
|
705
|
+
"fields": [...],
|
|
706
|
+
"next_action": {...}
|
|
707
|
+
}
|
|
708
|
+
```
|
|
709
|
+
|
|
710
|
+
**Naming conventions**:
|
|
711
|
+
- Use snake_case for step names: `personal_info`, `payment_details`
|
|
712
|
+
- Use descriptive data_keys: `first_name`, `email_address`, `accepts_terms`
|
|
713
|
+
|
|
714
|
+
#### 3. Selector Priority
|
|
715
|
+
|
|
716
|
+
When choosing selectors, prefer in order:
|
|
717
|
+
1. `[data-testid='...']` or `[data-cy='...']` - Most stable
|
|
718
|
+
2. `[aria-label='...']` or `[aria-labelledby='...']` - Accessible and stable
|
|
719
|
+
3. `input[name='...']` - Form field names
|
|
720
|
+
4. `:has-text('...')` - Text content (use for buttons/labels)
|
|
721
|
+
5. CSS class selectors - Least stable, avoid if possible
|
|
722
|
+
|
|
723
|
+
#### 4. Handle Dynamic Content
|
|
724
|
+
|
|
725
|
+
For AJAX-loaded content:
|
|
726
|
+
- Use `wait` action before interacting
|
|
727
|
+
- Add `field_visible_timeout_ms` to field definitions
|
|
728
|
+
- Use `post_click_delay_ms` for fields that trigger updates
|
|
729
|
+
|
|
730
|
+
#### 5. Radio Button Strategy
|
|
731
|
+
|
|
732
|
+
**Option A - When radio values are meaningful**:
|
|
733
|
+
```json
|
|
734
|
+
{
|
|
735
|
+
"type": "radio",
|
|
736
|
+
"selector": "input[type='radio'][value='${value}']",
|
|
737
|
+
"data_key": "payment_type"
|
|
738
|
+
}
|
|
739
|
+
// data: { "payment_type": "credit_card" }
|
|
740
|
+
```
|
|
741
|
+
|
|
742
|
+
**Option B - When you need individual control**:
|
|
743
|
+
```json
|
|
744
|
+
{
|
|
745
|
+
"type": "radio",
|
|
746
|
+
"selector": "[role='radio']:has-text('Credit Card')",
|
|
747
|
+
"data_key": "payment_credit_card",
|
|
748
|
+
"force_click": true
|
|
749
|
+
},
|
|
750
|
+
{
|
|
751
|
+
"type": "radio",
|
|
752
|
+
"selector": "[role='radio']:has-text('PayPal')",
|
|
753
|
+
"data_key": "payment_paypal",
|
|
754
|
+
"force_click": true
|
|
755
|
+
}
|
|
756
|
+
// data: { "payment_credit_card": true, "payment_paypal": null }
|
|
757
|
+
```
|
|
758
|
+
|
|
759
|
+
#### 6. Iframe Handling
|
|
760
|
+
|
|
761
|
+
When elements are inside iframes:
|
|
762
|
+
```json
|
|
763
|
+
{
|
|
764
|
+
"type": "text",
|
|
765
|
+
"selector": "input[name='card_number']",
|
|
766
|
+
"iframe_selector": "iframe#payment-iframe",
|
|
767
|
+
"data_key": "card_number"
|
|
768
|
+
}
|
|
769
|
+
```
|
|
770
|
+
|
|
771
|
+
### Creating input_data
|
|
772
|
+
|
|
773
|
+
#### 1. Analyze Required Fields
|
|
774
|
+
|
|
775
|
+
From the instructions, extract all unique `data_key` values:
|
|
776
|
+
```python
|
|
777
|
+
data_keys = set()
|
|
778
|
+
for step in instructions["steps"]:
|
|
779
|
+
for field in step["fields"]:
|
|
780
|
+
if field.get("data_key"):
|
|
781
|
+
data_keys.add(field["data_key"])
|
|
782
|
+
```
|
|
783
|
+
|
|
784
|
+
#### 2. Determine Value Types
|
|
785
|
+
|
|
786
|
+
| Field Type | Data Type | Example |
|
|
787
|
+
|------------|-----------|---------|
|
|
788
|
+
| text, textarea | string | `"John Doe"` |
|
|
789
|
+
| dropdown | string | `"Germany"` |
|
|
790
|
+
| radio (value-driven) | string | `"option_a"` |
|
|
791
|
+
| radio (boolean) | boolean/null | `true` or `null` |
|
|
792
|
+
| checkbox | boolean | `true` / `false` |
|
|
793
|
+
| date | string (ISO) | `"1985-06-15"` |
|
|
794
|
+
| slider | number | `50000` |
|
|
795
|
+
| file | string (path) | `"/path/to/file.pdf"` |
|
|
796
|
+
|
|
797
|
+
#### 3. Handle Mutually Exclusive Options
|
|
798
|
+
|
|
799
|
+
For radio groups with boolean flags, only ONE should be `true`:
|
|
800
|
+
```json
|
|
801
|
+
{
|
|
802
|
+
"employment_fulltime": true,
|
|
803
|
+
"employment_parttime": null,
|
|
804
|
+
"employment_selfemployed": null,
|
|
805
|
+
"employment_unemployed": null
|
|
806
|
+
}
|
|
807
|
+
```
|
|
808
|
+
|
|
809
|
+
#### 4. Date Format
|
|
810
|
+
|
|
811
|
+
Always provide dates in ISO format in input_data:
|
|
812
|
+
```json
|
|
813
|
+
{
|
|
814
|
+
"birthdate": "1985-06-15",
|
|
815
|
+
"policy_start": "2024-01-01"
|
|
816
|
+
}
|
|
817
|
+
```
|
|
818
|
+
|
|
819
|
+
The instructions specify the output format for the specific form.
|
|
820
|
+
|
|
821
|
+
### Common Patterns
|
|
822
|
+
|
|
823
|
+
#### Multi-Step Wizard
|
|
824
|
+
```json
|
|
825
|
+
{
|
|
826
|
+
"steps": [
|
|
827
|
+
{
|
|
828
|
+
"name": "step_1_personal",
|
|
829
|
+
"wait_for": "input[name='firstName']",
|
|
830
|
+
"fields": [...],
|
|
831
|
+
"next_action": { "type": "click", "selector": "button:has-text('Next')" }
|
|
832
|
+
},
|
|
833
|
+
{
|
|
834
|
+
"name": "step_2_address",
|
|
835
|
+
"wait_for": "input[name='street']",
|
|
836
|
+
"fields": [...],
|
|
837
|
+
"next_action": { "type": "click", "selector": "button:has-text('Next')" }
|
|
838
|
+
}
|
|
839
|
+
]
|
|
840
|
+
}
|
|
841
|
+
```
|
|
842
|
+
|
|
843
|
+
#### Form with Loading States
|
|
844
|
+
```json
|
|
845
|
+
{
|
|
846
|
+
"next_action": {
|
|
847
|
+
"type": "click",
|
|
848
|
+
"selector": "button[type='submit']",
|
|
849
|
+
"post_action_delay_ms": 1000
|
|
850
|
+
}
|
|
851
|
+
}
|
|
852
|
+
```
|
|
853
|
+
|
|
854
|
+
#### Conditional Fields
|
|
855
|
+
```json
|
|
856
|
+
{
|
|
857
|
+
"type": "conditional",
|
|
858
|
+
"condition": {
|
|
859
|
+
"type": "data_equals",
|
|
860
|
+
"data_key": "has_additional_driver",
|
|
861
|
+
"value": true
|
|
862
|
+
},
|
|
863
|
+
"actions": [
|
|
864
|
+
{
|
|
865
|
+
"type": "click",
|
|
866
|
+
"selector": "button:has-text('Add Driver')"
|
|
867
|
+
}
|
|
868
|
+
]
|
|
869
|
+
}
|
|
870
|
+
```
|
|
871
|
+
|
|
872
|
+
## Error Handling and Diagnostics
|
|
873
|
+
|
|
874
|
+
The framework provides detailed error information when failures occur.
|
|
875
|
+
|
|
876
|
+
### Exception Types
|
|
877
|
+
|
|
878
|
+
| Exception | When Raised |
|
|
879
|
+
|-----------|-------------|
|
|
880
|
+
| `FieldNotFoundError` | Selector doesn't match any element |
|
|
881
|
+
| `FieldInteractionError` | Element found but interaction failed |
|
|
882
|
+
| `CrawlerTimeoutError` | Wait condition not met within timeout |
|
|
883
|
+
| `NavigationError` | Navigation action failed |
|
|
884
|
+
| `MissingDataError` | Required data_key not in input_data |
|
|
885
|
+
| `InvalidInstructionError` | Malformed instructions JSON |
|
|
886
|
+
| `UnsupportedFieldTypeError` | Unknown field type specified |
|
|
887
|
+
| `UnsupportedActionTypeError` | Unknown action type specified |
|
|
888
|
+
| `IframeNotFoundError` | Specified iframe not found |
|
|
889
|
+
|
|
890
|
+
### Debug Mode
|
|
891
|
+
|
|
892
|
+
Enable debug mode to capture screenshots at various stages:
|
|
893
|
+
|
|
894
|
+
```python
|
|
895
|
+
from azcrawlerpy import DebugMode
|
|
896
|
+
|
|
897
|
+
result = await crawler.crawl(
|
|
898
|
+
...,
|
|
899
|
+
debug_mode=DebugMode.ALL, # Capture all screenshots
|
|
900
|
+
)
|
|
901
|
+
```
|
|
902
|
+
|
|
903
|
+
| Mode | Description |
|
|
904
|
+
|------|-------------|
|
|
905
|
+
| `NONE` | No debug screenshots |
|
|
906
|
+
| `START` | Screenshot at form start |
|
|
907
|
+
| `END` | Screenshot at form end |
|
|
908
|
+
| `ALL` | Screenshots after every field and action |
|
|
909
|
+
|
|
910
|
+
### AI Diagnostics
|
|
911
|
+
|
|
912
|
+
When errors occur with debug mode enabled, the framework captures:
|
|
913
|
+
|
|
914
|
+
- Current page URL and title
|
|
915
|
+
- Available `data-cy` and `data-testid` selectors
|
|
916
|
+
- Visible buttons and input fields
|
|
917
|
+
- Similar selectors (fuzzy matching suggestions)
|
|
918
|
+
- Console errors and warnings
|
|
919
|
+
- Failed network requests
|
|
920
|
+
- HTML snippet of the form area
|
|
921
|
+
|
|
922
|
+
This information is included in the exception message and saved to `error_diagnostics.json`.
|
|
923
|
+
|
|
924
|
+
## Examples
|
|
925
|
+
|
|
926
|
+
### Insurance Quote Form
|
|
927
|
+
|
|
928
|
+
**instructions.json**:
|
|
929
|
+
```json
|
|
930
|
+
{
|
|
931
|
+
"url": "https://insurance.example.com/quote",
|
|
932
|
+
"browser_config": {
|
|
933
|
+
"viewport_width": 1920,
|
|
934
|
+
"viewport_height": 1080
|
|
935
|
+
},
|
|
936
|
+
"cookie_consent": {
|
|
937
|
+
"banner_selector": "#cookie-banner",
|
|
938
|
+
"accept_selector": "button:has-text('Accept')"
|
|
939
|
+
},
|
|
940
|
+
"steps": [
|
|
941
|
+
{
|
|
942
|
+
"name": "vehicle_info",
|
|
943
|
+
"wait_for": "input[name='hsn']",
|
|
944
|
+
"timeout_ms": 30000,
|
|
945
|
+
"fields": [
|
|
946
|
+
{
|
|
947
|
+
"type": "text",
|
|
948
|
+
"selector": "input[name='hsn']",
|
|
949
|
+
"data_key": "vehicle_hsn"
|
|
950
|
+
},
|
|
951
|
+
{
|
|
952
|
+
"type": "text",
|
|
953
|
+
"selector": "input[name='tsn']",
|
|
954
|
+
"data_key": "vehicle_tsn"
|
|
955
|
+
},
|
|
956
|
+
{
|
|
957
|
+
"type": "date",
|
|
958
|
+
"selector": "input[name='registration_date']",
|
|
959
|
+
"data_key": "first_registration",
|
|
960
|
+
"format": "MM.YYYY"
|
|
961
|
+
}
|
|
962
|
+
],
|
|
963
|
+
"next_action": {
|
|
964
|
+
"type": "click",
|
|
965
|
+
"selector": "button:has-text('Continue')"
|
|
966
|
+
}
|
|
967
|
+
},
|
|
968
|
+
{
|
|
969
|
+
"name": "personal_info",
|
|
970
|
+
"wait_for": "input[name='birthdate']",
|
|
971
|
+
"timeout_ms": 30000,
|
|
972
|
+
"fields": [
|
|
973
|
+
{
|
|
974
|
+
"type": "date",
|
|
975
|
+
"selector": "input[name='birthdate']",
|
|
976
|
+
"data_key": "birthdate",
|
|
977
|
+
"format": "DD.MM.YYYY"
|
|
978
|
+
},
|
|
979
|
+
{
|
|
980
|
+
"type": "text",
|
|
981
|
+
"selector": "input[name='zipcode']",
|
|
982
|
+
"data_key": "postal_code"
|
|
983
|
+
}
|
|
984
|
+
],
|
|
985
|
+
"next_action": {
|
|
986
|
+
"type": "click",
|
|
987
|
+
"selector": "button:has-text('Get Quote')"
|
|
988
|
+
}
|
|
989
|
+
}
|
|
990
|
+
],
|
|
991
|
+
"final_page": {
|
|
992
|
+
"wait_for": ".quote-result",
|
|
993
|
+
"timeout_ms": 60000,
|
|
994
|
+
"screenshot_selector": ".quote-panel"
|
|
995
|
+
}
|
|
996
|
+
}
|
|
997
|
+
```
|
|
998
|
+
|
|
999
|
+
**data_row.json**:
|
|
1000
|
+
```json
|
|
1001
|
+
{
|
|
1002
|
+
"vehicle_hsn": "0603",
|
|
1003
|
+
"vehicle_tsn": "AKZ",
|
|
1004
|
+
"first_registration": "2020-03-15",
|
|
1005
|
+
"birthdate": "1985-06-20",
|
|
1006
|
+
"postal_code": "80331"
|
|
1007
|
+
}
|
|
1008
|
+
```
|
|
1009
|
+
|
|
1010
|
+
### Form with Iframes
|
|
1011
|
+
|
|
1012
|
+
```json
|
|
1013
|
+
{
|
|
1014
|
+
"steps": [
|
|
1015
|
+
{
|
|
1016
|
+
"name": "embedded_form",
|
|
1017
|
+
"wait_for": "iframe#form-frame",
|
|
1018
|
+
"timeout_ms": 30000,
|
|
1019
|
+
"fields": [
|
|
1020
|
+
{
|
|
1021
|
+
"type": "text",
|
|
1022
|
+
"selector": "input[name='email']",
|
|
1023
|
+
"iframe_selector": "iframe#form-frame",
|
|
1024
|
+
"data_key": "email"
|
|
1025
|
+
},
|
|
1026
|
+
{
|
|
1027
|
+
"type": "dropdown",
|
|
1028
|
+
"selector": "select[name='plan']",
|
|
1029
|
+
"iframe_selector": "iframe#form-frame",
|
|
1030
|
+
"data_key": "selected_plan",
|
|
1031
|
+
"select_by": "text"
|
|
1032
|
+
}
|
|
1033
|
+
],
|
|
1034
|
+
"next_action": {
|
|
1035
|
+
"type": "click",
|
|
1036
|
+
"selector": "button:has-text('Submit')",
|
|
1037
|
+
"iframe_selector": "iframe#form-frame"
|
|
1038
|
+
}
|
|
1039
|
+
}
|
|
1040
|
+
]
|
|
1041
|
+
}
|
|
1042
|
+
```
|
|
1043
|
+
|
|
1044
|
+
## License
|
|
1045
|
+
|
|
1046
|
+
MIT
|