scrape-do-python 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scrape_do/__init__.py +0 -0
- scrape_do/abc.py +0 -0
- scrape_do/async_client.py +0 -0
- scrape_do/client.py +804 -0
- scrape_do/constants.py +84 -0
- scrape_do/exceptions.py +238 -0
- scrape_do/models/__init__.py +79 -0
- scrape_do/models/browser_actions.py +332 -0
- scrape_do/models/enums.py +76 -0
- scrape_do/models/parameters.py +840 -0
- scrape_do/models/request.py +232 -0
- scrape_do/models/response.py +890 -0
- scrape_do/namespaces/__init__.py +0 -0
- scrape_do/namespaces/amazon.py +0 -0
- scrape_do/namespaces/google.py +0 -0
- scrape_do/namespaces/jobs.py +0 -0
- scrape_do_python-0.1.0.dist-info/METADATA +134 -0
- scrape_do_python-0.1.0.dist-info/RECORD +21 -0
- scrape_do_python-0.1.0.dist-info/WHEEL +5 -0
- scrape_do_python-0.1.0.dist-info/licenses/LICENSE +21 -0
- scrape_do_python-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,332 @@
|
|
|
1
|
+
"""Pydantic models for headless browser automation.
|
|
2
|
+
|
|
3
|
+
Defines the strongly-typed contracts for the `playWithBrowser`
|
|
4
|
+
feature of the Scrape.do API. It provides models for every supported
|
|
5
|
+
browser interaction, enabling users to chain automation workflows with
|
|
6
|
+
full type safety and IDE support.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
from typing import (
|
|
11
|
+
Literal,
|
|
12
|
+
Optional,
|
|
13
|
+
Self,
|
|
14
|
+
TypeAlias,
|
|
15
|
+
Annotated,
|
|
16
|
+
Union
|
|
17
|
+
)
|
|
18
|
+
from pydantic import (
|
|
19
|
+
BaseModel,
|
|
20
|
+
ConfigDict,
|
|
21
|
+
Field,
|
|
22
|
+
model_validator
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
# ---------------------
|
|
27
|
+
# Browser Action Models
|
|
28
|
+
# ---------------------
|
|
29
|
+
|
|
30
|
+
class ClickAction(BaseModel):
|
|
31
|
+
"""Executes a click event on a specified CSS selector.
|
|
32
|
+
|
|
33
|
+
Attributes:
|
|
34
|
+
action (Literal["Click"]): The literal action identifier.
|
|
35
|
+
selector (str): The CSS selector of the target element.
|
|
36
|
+
"""
|
|
37
|
+
model_config = ConfigDict(
|
|
38
|
+
populate_by_name=True
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
action: Literal["Click"] = Field(
|
|
42
|
+
"Click",
|
|
43
|
+
alias="Action"
|
|
44
|
+
)
|
|
45
|
+
selector: str = Field(
|
|
46
|
+
...,
|
|
47
|
+
alias="Selector",
|
|
48
|
+
min_length=1
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class WaitAction(BaseModel):
|
|
53
|
+
"""Pauses browser execution for a specific duration.
|
|
54
|
+
|
|
55
|
+
Attributes:
|
|
56
|
+
action (Literal["Wait"]): The literal action identifier.
|
|
57
|
+
timeout (int): Number of milliseconds to wait.
|
|
58
|
+
"""
|
|
59
|
+
model_config = ConfigDict(
|
|
60
|
+
populate_by_name=True
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
action: Literal["Wait"] = Field(
|
|
64
|
+
"Wait",
|
|
65
|
+
alias="Action"
|
|
66
|
+
)
|
|
67
|
+
timeout: int = Field(
|
|
68
|
+
...,
|
|
69
|
+
alias="Timeout",
|
|
70
|
+
description="Number of miliseconds to wait",
|
|
71
|
+
ge=0
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
class WaitSelectorAction(BaseModel):
|
|
76
|
+
"""Pauses browser execution until a specific element appears in the DOM.
|
|
77
|
+
|
|
78
|
+
Attributes:
|
|
79
|
+
action (Literal["WaitSelector"]): The literal action identifier.
|
|
80
|
+
wait_selector (str): The CSS selector to wait for.
|
|
81
|
+
timeout (Optional[int]): Maximum time to wait in milliseconds.
|
|
82
|
+
Defaults to None.
|
|
83
|
+
"""
|
|
84
|
+
model_config = ConfigDict(
|
|
85
|
+
populate_by_name=True
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
action: Literal["WaitSelector"] = Field(
|
|
89
|
+
"WaitSelector",
|
|
90
|
+
alias="Action"
|
|
91
|
+
)
|
|
92
|
+
wait_selector: str = Field(
|
|
93
|
+
...,
|
|
94
|
+
alias="WaitSelector",
|
|
95
|
+
min_length=1
|
|
96
|
+
)
|
|
97
|
+
timeout: Optional[int] = Field(
|
|
98
|
+
None,
|
|
99
|
+
alias="Timeout",
|
|
100
|
+
description="Number of miliseconds to wait",
|
|
101
|
+
ge=0
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
class ScrollXAction(BaseModel):
|
|
106
|
+
"""Scrolls the viewport horizontally.
|
|
107
|
+
|
|
108
|
+
Attributes:
|
|
109
|
+
action (Literal["ScrollX"]): The literal action identifier.
|
|
110
|
+
value (int): Number of pixels to scroll along the X-axis.
|
|
111
|
+
"""
|
|
112
|
+
model_config = ConfigDict(
|
|
113
|
+
populate_by_name=True
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
action: Literal["ScrollX"] = Field(
|
|
117
|
+
"ScrollX",
|
|
118
|
+
alias="Action"
|
|
119
|
+
)
|
|
120
|
+
value: int = Field(
|
|
121
|
+
...,
|
|
122
|
+
alias="Value",
|
|
123
|
+
description="Number of pixels to scroll"
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
class ScrollYAction(BaseModel):
|
|
128
|
+
"""Scrolls the viewport vertically.
|
|
129
|
+
|
|
130
|
+
Attributes:
|
|
131
|
+
action (Literal["ScrollY"]): The literal action identifier.
|
|
132
|
+
value (int): Number of pixels to scroll along the Y-axis.
|
|
133
|
+
"""
|
|
134
|
+
model_config = ConfigDict(
|
|
135
|
+
populate_by_name=True
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
action: Literal["ScrollY"] = Field(
|
|
139
|
+
"ScrollY",
|
|
140
|
+
alias="Action"
|
|
141
|
+
)
|
|
142
|
+
value: int = Field(
|
|
143
|
+
...,
|
|
144
|
+
alias="Value",
|
|
145
|
+
description="Number of pixels to scroll"
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
class ScrollToAction(BaseModel):
|
|
150
|
+
"""Scrolls the viewport until a specific element is visible.
|
|
151
|
+
|
|
152
|
+
Attributes:
|
|
153
|
+
action (Literal["ScrollTo"]): The literal action identifier.
|
|
154
|
+
selector (str): The CSS selector of the element to scroll to.
|
|
155
|
+
"""
|
|
156
|
+
model_config = ConfigDict(
|
|
157
|
+
populate_by_name=True
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
action: Literal["ScrollTo"] = Field(
|
|
161
|
+
"ScrollTo",
|
|
162
|
+
alias="Action"
|
|
163
|
+
)
|
|
164
|
+
selector: str = Field(
|
|
165
|
+
...,
|
|
166
|
+
alias="Selector",
|
|
167
|
+
min_length=1
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
class FillAction(BaseModel):
|
|
172
|
+
"""Types a specified value into an input field.
|
|
173
|
+
|
|
174
|
+
Attributes:
|
|
175
|
+
action (Literal["Fill"]): The literal action identifier.
|
|
176
|
+
selector (str): The CSS selector of the input element.
|
|
177
|
+
value (str): The text string to type into the element.
|
|
178
|
+
"""
|
|
179
|
+
model_config = ConfigDict(
|
|
180
|
+
populate_by_name=True
|
|
181
|
+
)
|
|
182
|
+
|
|
183
|
+
action: Literal["Fill"] = Field(
|
|
184
|
+
"Fill",
|
|
185
|
+
alias="Action"
|
|
186
|
+
)
|
|
187
|
+
selector: str = Field(
|
|
188
|
+
...,
|
|
189
|
+
alias="Selector",
|
|
190
|
+
min_length=1
|
|
191
|
+
)
|
|
192
|
+
value: str = Field(
|
|
193
|
+
...,
|
|
194
|
+
alias="Value"
|
|
195
|
+
)
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
class ExecuteAction(BaseModel):
|
|
199
|
+
"""Executes arbitrary JavaScript within the browser context.
|
|
200
|
+
|
|
201
|
+
Attributes:
|
|
202
|
+
action (Literal["Execute"]): The literal action identifier.
|
|
203
|
+
execute (str): The raw JavaScript code to evaluate.
|
|
204
|
+
"""
|
|
205
|
+
model_config = ConfigDict(
|
|
206
|
+
populate_by_name=True
|
|
207
|
+
)
|
|
208
|
+
|
|
209
|
+
action: Literal["Execute"] = Field(
|
|
210
|
+
"Execute",
|
|
211
|
+
alias="Action"
|
|
212
|
+
)
|
|
213
|
+
execute: str = Field(
|
|
214
|
+
...,
|
|
215
|
+
alias="Execute",
|
|
216
|
+
description="Custom JavaScript to run",
|
|
217
|
+
min_length=1
|
|
218
|
+
)
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
class ScreenShotAction(BaseModel):
|
|
222
|
+
"""Captures a screenshot during the execution of browser actions.
|
|
223
|
+
|
|
224
|
+
Attributes:
|
|
225
|
+
action (Literal["ScreenShot"]): The literal action identifier.
|
|
226
|
+
full_screenshot (Optional[bool]): If True, captures the entire
|
|
227
|
+
scrollable page.
|
|
228
|
+
particular_screenshot (Optional[str]): CSS selector of a specific
|
|
229
|
+
element to capture.
|
|
230
|
+
"""
|
|
231
|
+
model_config = ConfigDict(
|
|
232
|
+
populate_by_name=True
|
|
233
|
+
)
|
|
234
|
+
|
|
235
|
+
action: Literal["ScreenShot"] = Field(
|
|
236
|
+
"ScreenShot",
|
|
237
|
+
alias="Action"
|
|
238
|
+
)
|
|
239
|
+
full_screenshot: Optional[bool] = Field(
|
|
240
|
+
None,
|
|
241
|
+
alias="fullScreenShot",
|
|
242
|
+
)
|
|
243
|
+
particular_screenshot: Optional[str] = Field(
|
|
244
|
+
None,
|
|
245
|
+
alias="particularScreenShot",
|
|
246
|
+
description="Selector of the element to take a screenshot of",
|
|
247
|
+
min_length=1
|
|
248
|
+
)
|
|
249
|
+
|
|
250
|
+
@model_validator(mode="after")
|
|
251
|
+
def validate_screenshot_logic(self) -> Self:
|
|
252
|
+
"""Ensures mutually exclusive screenshot targeting parameters are not
|
|
253
|
+
combined.
|
|
254
|
+
|
|
255
|
+
tip: Capturing Full Screenshot And Particular Screenshot
|
|
256
|
+
A single screenshot action can either capture the entire scrollable
|
|
257
|
+
page OR a specific DOM element, but not both simultaneously.
|
|
258
|
+
To capture both, provide two separate `ScreenShotAction` objects in
|
|
259
|
+
the `play_with_browser` list.
|
|
260
|
+
|
|
261
|
+
Returns:
|
|
262
|
+
The validated instance from which the method was called from
|
|
263
|
+
|
|
264
|
+
Raises:
|
|
265
|
+
ValueError: If both `full_screenshot` and `particular_screenshot`
|
|
266
|
+
are active.
|
|
267
|
+
"""
|
|
268
|
+
if self.full_screenshot and self.particular_screenshot:
|
|
269
|
+
raise ValueError(
|
|
270
|
+
"Cannot use 'full_screenshot' and 'particular_screenshot' "
|
|
271
|
+
"simultaneously within a single ScreenShotAction."
|
|
272
|
+
)
|
|
273
|
+
return self
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
class WaitForRequestCompletionAction(BaseModel):
|
|
277
|
+
"""Pauses execution until network requests matching a specific pattern
|
|
278
|
+
complete.
|
|
279
|
+
|
|
280
|
+
Attributes:
|
|
281
|
+
action (Literal["WaitForRequestCompletion"]): The literal action
|
|
282
|
+
identifier.
|
|
283
|
+
url_pattern (str): The regex or string pattern of the URL to wait for.
|
|
284
|
+
timeout (int): Maximum time to wait in milliseconds before failing.
|
|
285
|
+
"""
|
|
286
|
+
model_config = ConfigDict(
|
|
287
|
+
populate_by_name=True
|
|
288
|
+
)
|
|
289
|
+
|
|
290
|
+
action: Literal["WaitForRequestCompletion"] = Field(
|
|
291
|
+
"WaitForRequestCompletion",
|
|
292
|
+
alias="Action"
|
|
293
|
+
)
|
|
294
|
+
|
|
295
|
+
url_pattern: str = Field(
|
|
296
|
+
...,
|
|
297
|
+
alias="UrlPattern",
|
|
298
|
+
description="Wait for requests matching this url pattern to complete",
|
|
299
|
+
min_length=1
|
|
300
|
+
)
|
|
301
|
+
timeout: int = Field(
|
|
302
|
+
...,
|
|
303
|
+
alias="Timeout",
|
|
304
|
+
description="Number of miliseconds to wait",
|
|
305
|
+
ge=0
|
|
306
|
+
)
|
|
307
|
+
|
|
308
|
+
# -------------------------
|
|
309
|
+
# Browser Action Type Alias
|
|
310
|
+
# -------------------------
|
|
311
|
+
|
|
312
|
+
|
|
313
|
+
BrowserAction: TypeAlias = Annotated[
|
|
314
|
+
Union[
|
|
315
|
+
ClickAction,
|
|
316
|
+
WaitAction,
|
|
317
|
+
WaitSelectorAction,
|
|
318
|
+
ScrollXAction,
|
|
319
|
+
ScrollYAction,
|
|
320
|
+
ScrollToAction,
|
|
321
|
+
FillAction,
|
|
322
|
+
ExecuteAction,
|
|
323
|
+
ScreenShotAction,
|
|
324
|
+
WaitForRequestCompletionAction
|
|
325
|
+
],
|
|
326
|
+
Field(discriminator="action")
|
|
327
|
+
]
|
|
328
|
+
"""
|
|
329
|
+
Defines the valid types that can be passed to the
|
|
330
|
+
`play_with_browser` parameter in the `RequestParameters`
|
|
331
|
+
model
|
|
332
|
+
"""
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
"""Type aliases, literals, and enumerations
|
|
2
|
+
|
|
3
|
+
Defines the static, permissible values for Scrape.do's various
|
|
4
|
+
configuration parameters. It ensures that IDEs and static analyzers can provide
|
|
5
|
+
strict autocomplete and validation for expected parameter values
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
from typing import (
|
|
10
|
+
TypeAlias,
|
|
11
|
+
Literal
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
RegionCodeType: TypeAlias = Literal[
|
|
15
|
+
'europe',
|
|
16
|
+
'asia'
|
|
17
|
+
'africa'
|
|
18
|
+
'oceania',
|
|
19
|
+
'northamerica',
|
|
20
|
+
'southamerica'
|
|
21
|
+
]
|
|
22
|
+
"""
|
|
23
|
+
Defines the valid strings that can be passed to the
|
|
24
|
+
`regional_geo_code` parameter in the `RequestParameters`
|
|
25
|
+
model
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
WaitUntilType: TypeAlias = Literal[
|
|
29
|
+
'domcontentloaded',
|
|
30
|
+
'networkidle0',
|
|
31
|
+
'networkidle2',
|
|
32
|
+
'load'
|
|
33
|
+
]
|
|
34
|
+
"""
|
|
35
|
+
Defines the valid strings that can be passed to the
|
|
36
|
+
`wait_until` parameter in the `RequestParameters`
|
|
37
|
+
model
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
DeviceType: TypeAlias = Literal[
|
|
41
|
+
'desktop',
|
|
42
|
+
'mobile',
|
|
43
|
+
'tablet'
|
|
44
|
+
]
|
|
45
|
+
"""
|
|
46
|
+
Defines the valid strings that can be passed to the
|
|
47
|
+
`device` parameter in the `RequestParameters`
|
|
48
|
+
model
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
OutputType: TypeAlias = Literal['raw', 'markdown']
|
|
52
|
+
"""
|
|
53
|
+
Defines the valid strings that can be passed to the
|
|
54
|
+
`output` parameter in the `RequestParameters`
|
|
55
|
+
model
|
|
56
|
+
"""
|
|
57
|
+
|
|
58
|
+
HttpMethod: TypeAlias = Literal[
|
|
59
|
+
"GET",
|
|
60
|
+
"POST",
|
|
61
|
+
"PUT",
|
|
62
|
+
"PATCH",
|
|
63
|
+
"DELETE",
|
|
64
|
+
"HEAD",
|
|
65
|
+
"OPTIONS"
|
|
66
|
+
]
|
|
67
|
+
"""
|
|
68
|
+
Defines the valid HTTP methods that can be passed to the
|
|
69
|
+
`method` parameter in the `PreparedScrapeDoRequest` model
|
|
70
|
+
"""
|
|
71
|
+
|
|
72
|
+
PayloadType: TypeAlias = Literal["json", "form", "raw"]
|
|
73
|
+
"""
|
|
74
|
+
Defines the valid types of payload that can be passed to the
|
|
75
|
+
`payload_type` parameter in the `PreparedScrapeDoRequest` model
|
|
76
|
+
"""
|