scrape-do-python 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,332 @@
1
+ """Pydantic models for headless browser automation.
2
+
3
+ Defines the strongly-typed contracts for the `playWithBrowser`
4
+ feature of the Scrape.do API. It provides models for every supported
5
+ browser interaction, enabling users to chain automation workflows with
6
+ full type safety and IDE support.
7
+ """
8
+
9
+ from __future__ import annotations
10
+ from typing import (
11
+ Literal,
12
+ Optional,
13
+ Self,
14
+ TypeAlias,
15
+ Annotated,
16
+ Union
17
+ )
18
+ from pydantic import (
19
+ BaseModel,
20
+ ConfigDict,
21
+ Field,
22
+ model_validator
23
+ )
24
+
25
+
26
+ # ---------------------
27
+ # Browser Action Models
28
+ # ---------------------
29
+
30
+ class ClickAction(BaseModel):
31
+ """Executes a click event on a specified CSS selector.
32
+
33
+ Attributes:
34
+ action (Literal["Click"]): The literal action identifier.
35
+ selector (str): The CSS selector of the target element.
36
+ """
37
+ model_config = ConfigDict(
38
+ populate_by_name=True
39
+ )
40
+
41
+ action: Literal["Click"] = Field(
42
+ "Click",
43
+ alias="Action"
44
+ )
45
+ selector: str = Field(
46
+ ...,
47
+ alias="Selector",
48
+ min_length=1
49
+ )
50
+
51
+
52
+ class WaitAction(BaseModel):
53
+ """Pauses browser execution for a specific duration.
54
+
55
+ Attributes:
56
+ action (Literal["Wait"]): The literal action identifier.
57
+ timeout (int): Number of milliseconds to wait.
58
+ """
59
+ model_config = ConfigDict(
60
+ populate_by_name=True
61
+ )
62
+
63
+ action: Literal["Wait"] = Field(
64
+ "Wait",
65
+ alias="Action"
66
+ )
67
+ timeout: int = Field(
68
+ ...,
69
+ alias="Timeout",
70
+ description="Number of miliseconds to wait",
71
+ ge=0
72
+ )
73
+
74
+
75
+ class WaitSelectorAction(BaseModel):
76
+ """Pauses browser execution until a specific element appears in the DOM.
77
+
78
+ Attributes:
79
+ action (Literal["WaitSelector"]): The literal action identifier.
80
+ wait_selector (str): The CSS selector to wait for.
81
+ timeout (Optional[int]): Maximum time to wait in milliseconds.
82
+ Defaults to None.
83
+ """
84
+ model_config = ConfigDict(
85
+ populate_by_name=True
86
+ )
87
+
88
+ action: Literal["WaitSelector"] = Field(
89
+ "WaitSelector",
90
+ alias="Action"
91
+ )
92
+ wait_selector: str = Field(
93
+ ...,
94
+ alias="WaitSelector",
95
+ min_length=1
96
+ )
97
+ timeout: Optional[int] = Field(
98
+ None,
99
+ alias="Timeout",
100
+ description="Number of miliseconds to wait",
101
+ ge=0
102
+ )
103
+
104
+
105
+ class ScrollXAction(BaseModel):
106
+ """Scrolls the viewport horizontally.
107
+
108
+ Attributes:
109
+ action (Literal["ScrollX"]): The literal action identifier.
110
+ value (int): Number of pixels to scroll along the X-axis.
111
+ """
112
+ model_config = ConfigDict(
113
+ populate_by_name=True
114
+ )
115
+
116
+ action: Literal["ScrollX"] = Field(
117
+ "ScrollX",
118
+ alias="Action"
119
+ )
120
+ value: int = Field(
121
+ ...,
122
+ alias="Value",
123
+ description="Number of pixels to scroll"
124
+ )
125
+
126
+
127
+ class ScrollYAction(BaseModel):
128
+ """Scrolls the viewport vertically.
129
+
130
+ Attributes:
131
+ action (Literal["ScrollY"]): The literal action identifier.
132
+ value (int): Number of pixels to scroll along the Y-axis.
133
+ """
134
+ model_config = ConfigDict(
135
+ populate_by_name=True
136
+ )
137
+
138
+ action: Literal["ScrollY"] = Field(
139
+ "ScrollY",
140
+ alias="Action"
141
+ )
142
+ value: int = Field(
143
+ ...,
144
+ alias="Value",
145
+ description="Number of pixels to scroll"
146
+ )
147
+
148
+
149
+ class ScrollToAction(BaseModel):
150
+ """Scrolls the viewport until a specific element is visible.
151
+
152
+ Attributes:
153
+ action (Literal["ScrollTo"]): The literal action identifier.
154
+ selector (str): The CSS selector of the element to scroll to.
155
+ """
156
+ model_config = ConfigDict(
157
+ populate_by_name=True
158
+ )
159
+
160
+ action: Literal["ScrollTo"] = Field(
161
+ "ScrollTo",
162
+ alias="Action"
163
+ )
164
+ selector: str = Field(
165
+ ...,
166
+ alias="Selector",
167
+ min_length=1
168
+ )
169
+
170
+
171
+ class FillAction(BaseModel):
172
+ """Types a specified value into an input field.
173
+
174
+ Attributes:
175
+ action (Literal["Fill"]): The literal action identifier.
176
+ selector (str): The CSS selector of the input element.
177
+ value (str): The text string to type into the element.
178
+ """
179
+ model_config = ConfigDict(
180
+ populate_by_name=True
181
+ )
182
+
183
+ action: Literal["Fill"] = Field(
184
+ "Fill",
185
+ alias="Action"
186
+ )
187
+ selector: str = Field(
188
+ ...,
189
+ alias="Selector",
190
+ min_length=1
191
+ )
192
+ value: str = Field(
193
+ ...,
194
+ alias="Value"
195
+ )
196
+
197
+
198
+ class ExecuteAction(BaseModel):
199
+ """Executes arbitrary JavaScript within the browser context.
200
+
201
+ Attributes:
202
+ action (Literal["Execute"]): The literal action identifier.
203
+ execute (str): The raw JavaScript code to evaluate.
204
+ """
205
+ model_config = ConfigDict(
206
+ populate_by_name=True
207
+ )
208
+
209
+ action: Literal["Execute"] = Field(
210
+ "Execute",
211
+ alias="Action"
212
+ )
213
+ execute: str = Field(
214
+ ...,
215
+ alias="Execute",
216
+ description="Custom JavaScript to run",
217
+ min_length=1
218
+ )
219
+
220
+
221
+ class ScreenShotAction(BaseModel):
222
+ """Captures a screenshot during the execution of browser actions.
223
+
224
+ Attributes:
225
+ action (Literal["ScreenShot"]): The literal action identifier.
226
+ full_screenshot (Optional[bool]): If True, captures the entire
227
+ scrollable page.
228
+ particular_screenshot (Optional[str]): CSS selector of a specific
229
+ element to capture.
230
+ """
231
+ model_config = ConfigDict(
232
+ populate_by_name=True
233
+ )
234
+
235
+ action: Literal["ScreenShot"] = Field(
236
+ "ScreenShot",
237
+ alias="Action"
238
+ )
239
+ full_screenshot: Optional[bool] = Field(
240
+ None,
241
+ alias="fullScreenShot",
242
+ )
243
+ particular_screenshot: Optional[str] = Field(
244
+ None,
245
+ alias="particularScreenShot",
246
+ description="Selector of the element to take a screenshot of",
247
+ min_length=1
248
+ )
249
+
250
+ @model_validator(mode="after")
251
+ def validate_screenshot_logic(self) -> Self:
252
+ """Ensures mutually exclusive screenshot targeting parameters are not
253
+ combined.
254
+
255
+ tip: Capturing Full Screenshot And Particular Screenshot
256
+ A single screenshot action can either capture the entire scrollable
257
+ page OR a specific DOM element, but not both simultaneously.
258
+ To capture both, provide two separate `ScreenShotAction` objects in
259
+ the `play_with_browser` list.
260
+
261
+ Returns:
262
+ The validated instance from which the method was called from
263
+
264
+ Raises:
265
+ ValueError: If both `full_screenshot` and `particular_screenshot`
266
+ are active.
267
+ """
268
+ if self.full_screenshot and self.particular_screenshot:
269
+ raise ValueError(
270
+ "Cannot use 'full_screenshot' and 'particular_screenshot' "
271
+ "simultaneously within a single ScreenShotAction."
272
+ )
273
+ return self
274
+
275
+
276
+ class WaitForRequestCompletionAction(BaseModel):
277
+ """Pauses execution until network requests matching a specific pattern
278
+ complete.
279
+
280
+ Attributes:
281
+ action (Literal["WaitForRequestCompletion"]): The literal action
282
+ identifier.
283
+ url_pattern (str): The regex or string pattern of the URL to wait for.
284
+ timeout (int): Maximum time to wait in milliseconds before failing.
285
+ """
286
+ model_config = ConfigDict(
287
+ populate_by_name=True
288
+ )
289
+
290
+ action: Literal["WaitForRequestCompletion"] = Field(
291
+ "WaitForRequestCompletion",
292
+ alias="Action"
293
+ )
294
+
295
+ url_pattern: str = Field(
296
+ ...,
297
+ alias="UrlPattern",
298
+ description="Wait for requests matching this url pattern to complete",
299
+ min_length=1
300
+ )
301
+ timeout: int = Field(
302
+ ...,
303
+ alias="Timeout",
304
+ description="Number of miliseconds to wait",
305
+ ge=0
306
+ )
307
+
308
+ # -------------------------
309
+ # Browser Action Type Alias
310
+ # -------------------------
311
+
312
+
313
+ BrowserAction: TypeAlias = Annotated[
314
+ Union[
315
+ ClickAction,
316
+ WaitAction,
317
+ WaitSelectorAction,
318
+ ScrollXAction,
319
+ ScrollYAction,
320
+ ScrollToAction,
321
+ FillAction,
322
+ ExecuteAction,
323
+ ScreenShotAction,
324
+ WaitForRequestCompletionAction
325
+ ],
326
+ Field(discriminator="action")
327
+ ]
328
+ """
329
+ Defines the valid types that can be passed to the
330
+ `play_with_browser` parameter in the `RequestParameters`
331
+ model
332
+ """
@@ -0,0 +1,76 @@
1
+ """Type aliases, literals, and enumerations
2
+
3
+ Defines the static, permissible values for Scrape.do's various
4
+ configuration parameters. It ensures that IDEs and static analyzers can provide
5
+ strict autocomplete and validation for expected parameter values
6
+ """
7
+
8
+ from __future__ import annotations
9
+ from typing import (
10
+ TypeAlias,
11
+ Literal
12
+ )
13
+
14
+ RegionCodeType: TypeAlias = Literal[
15
+ 'europe',
16
+ 'asia'
17
+ 'africa'
18
+ 'oceania',
19
+ 'northamerica',
20
+ 'southamerica'
21
+ ]
22
+ """
23
+ Defines the valid strings that can be passed to the
24
+ `regional_geo_code` parameter in the `RequestParameters`
25
+ model
26
+ """
27
+
28
+ WaitUntilType: TypeAlias = Literal[
29
+ 'domcontentloaded',
30
+ 'networkidle0',
31
+ 'networkidle2',
32
+ 'load'
33
+ ]
34
+ """
35
+ Defines the valid strings that can be passed to the
36
+ `wait_until` parameter in the `RequestParameters`
37
+ model
38
+ """
39
+
40
+ DeviceType: TypeAlias = Literal[
41
+ 'desktop',
42
+ 'mobile',
43
+ 'tablet'
44
+ ]
45
+ """
46
+ Defines the valid strings that can be passed to the
47
+ `device` parameter in the `RequestParameters`
48
+ model
49
+ """
50
+
51
+ OutputType: TypeAlias = Literal['raw', 'markdown']
52
+ """
53
+ Defines the valid strings that can be passed to the
54
+ `output` parameter in the `RequestParameters`
55
+ model
56
+ """
57
+
58
+ HttpMethod: TypeAlias = Literal[
59
+ "GET",
60
+ "POST",
61
+ "PUT",
62
+ "PATCH",
63
+ "DELETE",
64
+ "HEAD",
65
+ "OPTIONS"
66
+ ]
67
+ """
68
+ Defines the valid HTTP methods that can be passed to the
69
+ `method` parameter in the `PreparedScrapeDoRequest` model
70
+ """
71
+
72
+ PayloadType: TypeAlias = Literal["json", "form", "raw"]
73
+ """
74
+ Defines the valid types of payload that can be passed to the
75
+ `payload_type` parameter in the `PreparedScrapeDoRequest` model
76
+ """