scrape-do-python 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,840 @@
1
+ """Core validation engine and configuration contracts.
2
+
3
+ Validates request data before the network layer to ensure that invalid
4
+ configurations are caught locally without wasting network requests by using
5
+ Pydantic V2 models to enforce Scrape.do's parameter dependencies and
6
+ interactions
7
+ """
8
+
9
+ from __future__ import annotations
10
+ import json
11
+ import urllib.parse
12
+ from typing import (
13
+ Optional,
14
+ List,
15
+ Self,
16
+ Type,
17
+ Any,
18
+ Dict,
19
+ TypedDict
20
+ )
21
+ from pydantic import (
22
+ BaseModel,
23
+ Field,
24
+ HttpUrl,
25
+ model_validator,
26
+ field_validator,
27
+ ValidationInfo,
28
+ ConfigDict
29
+ )
30
+ from .browser_actions import BrowserAction
31
+ from .enums import (
32
+ OutputType,
33
+ DeviceType,
34
+ WaitUntilType,
35
+ RegionCodeType
36
+ )
37
+ from ..constants import (
38
+ _SUPER_SUPPORTED_COUNTRIES,
39
+ _DATACENTER_SUPPORTED_COUNTRIES,
40
+ _ZIPCODE_FORMATS
41
+ )
42
+
43
+
44
+ # ----------------------------------
45
+ # RequestParameters Kwargs TypedDict
46
+ # ----------------------------------
47
+
48
+ class RequestParametersDict(TypedDict, total=False):
49
+ """
50
+ Provides strict IDE autocomplete and static type checking for `**kwargs`
51
+ dictionaries meant for the
52
+ [RequestParameters][scrape_do.models.RequestParameters] model.
53
+ """
54
+ super: Optional[bool]
55
+ """
56
+ Activates Residential/Mobile IP proxies.
57
+ """
58
+ render: Optional[bool]
59
+ """
60
+ Executes the request using a headless browser.
61
+ """
62
+ device: Optional[DeviceType]
63
+ """
64
+ Specify the device type (desktop, mobile, tablet)
65
+ """
66
+ session_id: Optional[int]
67
+ """
68
+ Use the same IP address continuously with a session
69
+ """
70
+ geo_code: Optional[str]
71
+ """
72
+ ISO 3166-1 alpha-2 country code for IP targeting.
73
+ """
74
+ regional_geo_code: Optional[RegionCodeType]
75
+ """
76
+ Targets a broader geographical region. Requires super=True.
77
+ """
78
+ postal_code: Optional[str]
79
+ """
80
+ Targets a specific zip code. Requires super=True and a supported geo_code.
81
+ """
82
+ wait_until: Optional[WaitUntilType]
83
+ """
84
+ Control when the browser considers the page loaded
85
+ """
86
+ custom_wait: Optional[int]
87
+ """
88
+ Set the browser wait time on the target web page after content loaded
89
+ """
90
+ wait_selector: Optional[str]
91
+ """
92
+ CSS selector to wait for in the target web page.
93
+ """
94
+ width: Optional[int]
95
+ """
96
+ Custom viewport width.
97
+ """
98
+ height: Optional[int]
99
+ """
100
+ Custom viewport height.
101
+ """
102
+ return_json: Optional[bool]
103
+ """
104
+ Returns response body as base64-encoded JSON instead of raw HTML.
105
+ """
106
+ block_resources: Optional[bool]
107
+ """
108
+ Block CSS, images, and fonts on your target web page
109
+ """
110
+ screenshot: Optional[bool]
111
+ """
112
+ Captures the visible viewport.
113
+ """
114
+ full_screenshot: Optional[bool]
115
+ """
116
+ Captures the entire scrollable page.
117
+ """
118
+ particular_screenshot: Optional[str]
119
+ """
120
+ Captures a specific DOM element by selector.
121
+ """
122
+ play_with_browser: Optional[List[BrowserAction]]
123
+ """
124
+ A sequence of automated interactions to perform.
125
+ """
126
+ show_frames: Optional[bool]
127
+ """
128
+ Returns all iframe content from the target webpage. Requires render=true
129
+ and returnJSON=true
130
+ """
131
+ show_websocket_requests: Optional[bool]
132
+ """
133
+ Captures WebSocket network traffic. Requires render=true and
134
+ returnJSON=true.
135
+ """
136
+ custom_headers: Optional[bool]
137
+ """
138
+ Replaces Scrape.do's default headers with your provided headers.
139
+ """
140
+ extra_headers: Optional[bool]
141
+ """
142
+ Appends your provided headers to Scrape.do's default headers.
143
+ """
144
+ forward_headers: Optional[bool]
145
+ """
146
+ Forwards all headers exactly as sent by your client.
147
+ """
148
+ set_cookies: Optional[str]
149
+ """
150
+ Injects specific cookies into the request.
151
+ """
152
+ disable_redirection: Optional[bool]
153
+ """
154
+ Prevents the proxy from following 3xx HTTP redirects.
155
+ """
156
+ timeout: Optional[int]
157
+ """
158
+ Total API connection timeout in milliseconds.
159
+ """
160
+ retry_timeout: Optional[int]
161
+ """
162
+ Internal proxy retry duration in milliseconds. Cannot be used with
163
+ render=True.
164
+ """
165
+ disable_retry: Optional[bool]
166
+ """
167
+ Fails immediately on target error without rotating IPs.
168
+ """
169
+ output: Optional[OutputType]
170
+ """
171
+ Output format parser.
172
+ """
173
+ transparent_response: Optional[bool]
174
+ """
175
+ Return pure response from target web page without Scrape.do processing
176
+ """
177
+ pure_cookies: Optional[bool]
178
+ """
179
+ Returns the original Set-Cookie headers from the target website
180
+ """
181
+
182
+
183
+ # --------------------
184
+ # Request Parameters
185
+ # --------------------
186
+
187
+ class RequestParameters(BaseModel):
188
+ """The strict data contract for the request parameters accepted by
189
+ Scrape.do's API.
190
+
191
+ This model enforces all parameter dependencies, mutually exclusive rules,
192
+ and geographical targeting constraints locally before a network request
193
+ is generated.
194
+
195
+ Attributes:
196
+ url (HttpUrl): The absolute destination URL you wish to scrape.
197
+ super (Optional[bool]): Activates Residential/Mobile IP proxies.
198
+ render (Optional[bool]): Executes the request using a headless browser.
199
+ device (Optional[DeviceType]): Specify the device type (desktop,
200
+ mobile, tablet)
201
+ session_id (Optional[int]): Use the same IP address continuously with
202
+ a session
203
+ geo_code (Optional[str]): ISO 3166-1 alpha-2 country code for IP
204
+ targeting.
205
+ regional_geo_code (Optional[RegionCodeType]): Targets a broader
206
+ geographical region. Requires super=True.
207
+ postal_code (Optional[str]): Targets a specific zip code. Requires
208
+ super=True and a supported geo_code.
209
+ wait_until (Optional[WaitUntilType]): Control when the browser
210
+ considers the page loaded
211
+ custom_wait (Optional[int]): Set the browser wait time on the target
212
+ web page after content loaded
213
+ wait_selector (Optional[str]): CSS selector to wait for in the target
214
+ web page.
215
+ width (Optional[int]): Custom viewport width.
216
+ height (Optional[int]): Custom viewport height.
217
+ return_json (Optional[bool]): Returns response body as base64-encoded
218
+ JSON instead of raw HTML.
219
+ block_resources (Optional[bool]): Block CSS, images, and fonts on your
220
+ target web page
221
+ screenshot (Optional[bool]): Captures the visible viewport.
222
+ full_screenshot (Optional[bool]): Captures the entire scrollable page.
223
+ particular_screenshot (Optional[str]): Captures a specific DOM element
224
+ by selector.
225
+ play_with_browser (Optional[List[BrowserAction]]): A sequence of
226
+ automated interactions to perform.
227
+ show_frames (Optional[bool]): Returns all iframe content from the
228
+ target webpage. Requires render=true and returnJSON=true
229
+ show_websocket_requests (Optional[bool]): Captures WebSocket network
230
+ traffic. Requires render=true and returnJSON=true.
231
+ custom_headers (Optional[bool]): Replaces Scrape.do's default headers
232
+ with your provided headers.
233
+ extra_headers (Optional[bool]): Appends your provided headers to
234
+ Scrape.do's default headers.
235
+ forward_headers (Optional[bool]): Forwards all headers exactly as sent
236
+ by your client.
237
+ set_cookies (Optional[str]): Injects specific cookies into the request.
238
+ disable_redirection (Optional[bool]): Prevents the proxy from
239
+ following 3xx HTTP redirects.
240
+ timeout (Optional[int]): Total API connection timeout in milliseconds.
241
+ retry_timeout (Optional[int]): Internal proxy retry duration in
242
+ milliseconds. Cannot be used with render=True.
243
+ disable_retry (Optional[bool]): Fails immediately on target error
244
+ without rotating IPs.
245
+ output (Optional[OutputType]): Output format parser.
246
+ transparent_response (Optional[bool]): Return pure response from
247
+ target web page without Scrape.do processing
248
+ pure_cookies (Optional[bool]): Returns the original Set-Cookie headers
249
+ from the target website
250
+ """
251
+ model_config = ConfigDict(
252
+ populate_by_name=True
253
+ )
254
+
255
+ # --- Required Parameters ---
256
+
257
+ url: HttpUrl = Field(
258
+ ...,
259
+ alias="url"
260
+ )
261
+
262
+ # --- Core Routing Parameters ---
263
+
264
+ super: Optional[bool] = Field(
265
+ default=None,
266
+ alias="super"
267
+ )
268
+
269
+ render: Optional[bool] = Field(
270
+ None,
271
+ alias="render"
272
+ )
273
+
274
+ device: Optional[DeviceType] = Field(
275
+ None,
276
+ alias="device"
277
+ )
278
+
279
+ session_id: Optional[int] = Field(
280
+ None,
281
+ alias="sessionId",
282
+ ge=0,
283
+ le=1000000
284
+ )
285
+
286
+ # --- Location Parameters ---
287
+
288
+ geo_code: Optional[str] = Field(
289
+ None,
290
+ alias="geoCode",
291
+ min_length=2,
292
+ max_length=2,
293
+ validate_default=True
294
+ )
295
+
296
+ regional_geo_code: Optional[RegionCodeType] = Field(
297
+ None,
298
+ alias="regionalGeoCode"
299
+ )
300
+
301
+ postal_code: Optional[str] = Field(
302
+ None,
303
+ alias="postalcode",
304
+ validate_default=True
305
+ )
306
+
307
+ # --- Browser Parameters ---
308
+
309
+ wait_until: Optional[WaitUntilType] = Field(
310
+ None,
311
+ alias="waitUntil"
312
+ )
313
+
314
+ custom_wait: Optional[int] = Field(
315
+ None,
316
+ alias="customWait",
317
+ ge=0,
318
+ le=35000
319
+ )
320
+
321
+ wait_selector: Optional[str] = Field(
322
+ None,
323
+ alias="waitSelector"
324
+ )
325
+
326
+ width: Optional[int] = Field(
327
+ None,
328
+ alias="width"
329
+ )
330
+
331
+ height: Optional[int] = Field(
332
+ None,
333
+ alias="height"
334
+ )
335
+
336
+ return_json: Optional[bool] = Field(
337
+ None,
338
+ alias="returnJSON"
339
+ )
340
+
341
+ block_resources: Optional[bool] = Field(
342
+ None,
343
+ alias="blockResources"
344
+ )
345
+
346
+ screenshot: Optional[bool] = Field(
347
+ None,
348
+ alias="screenShot"
349
+ )
350
+
351
+ full_screenshot: Optional[bool] = Field(
352
+ None,
353
+ alias="fullScreenShot"
354
+ )
355
+
356
+ particular_screenshot: Optional[str] = Field(
357
+ None,
358
+ alias="particularScreenShot"
359
+ )
360
+
361
+ play_with_browser: Optional[List[BrowserAction]] = Field(
362
+ None,
363
+ alias="playWithBrowser"
364
+ )
365
+
366
+ # --- Browser Response Configuration Parameters ---
367
+
368
+ show_frames: Optional[bool] = Field(
369
+ None,
370
+ alias="showFrames"
371
+ )
372
+
373
+ show_websocket_requests: Optional[bool] = Field(
374
+ None,
375
+ alias="showWebsocketRequests"
376
+ )
377
+
378
+ # --- Header + Cookie Control Parameters ---
379
+
380
+ custom_headers: Optional[bool] = Field(
381
+ None,
382
+ alias="customHeaders"
383
+ )
384
+
385
+ extra_headers: Optional[bool] = Field(
386
+ None,
387
+ alias="extraHeaders"
388
+ )
389
+
390
+ forward_headers: Optional[bool] = Field(
391
+ None,
392
+ alias="forwardHeaders"
393
+ )
394
+
395
+ set_cookies: Optional[str] = Field(
396
+ None,
397
+ alias="setCookies"
398
+ )
399
+
400
+ # --- Network Parameters ---
401
+
402
+ disable_redirection: Optional[bool] = Field(
403
+ None,
404
+ alias="disableRedirection"
405
+ )
406
+
407
+ timeout: Optional[int] = Field(
408
+ None,
409
+ alias="timeout",
410
+ le=120000,
411
+ ge=5000
412
+ )
413
+
414
+ retry_timeout: Optional[int] = Field(
415
+ None,
416
+ alias="retryTimeout",
417
+ le=55000,
418
+ ge=5000
419
+ )
420
+
421
+ disable_retry: Optional[bool] = Field(
422
+ None,
423
+ alias="disableRetry"
424
+ )
425
+
426
+ # --- General Response Configuration Parameters ---
427
+
428
+ output: Optional[OutputType] = Field(
429
+ None,
430
+ alias="output"
431
+ )
432
+
433
+ transparent_response: Optional[bool] = Field(
434
+ None,
435
+ alias="transparentResponse"
436
+ )
437
+
438
+ pure_cookies: Optional[bool] = Field(
439
+ None,
440
+ alias="pureCookies"
441
+ )
442
+
443
+ @model_validator(mode="after")
444
+ def validate_compatibility(self) -> Self:
445
+ """Cross-validates parameter dependencies to prevent invalid API
446
+ requests locally.
447
+
448
+ info: Headless Browser Dependencies (`render=True`)
449
+ - `wait_until`
450
+ - `wait_selector`
451
+ - `custom_wait`
452
+ - `width`
453
+ - `height`
454
+ - `return_json`
455
+ - `block_resources`
456
+ - `screenshot`
457
+ - `full_screenshot`
458
+ - `particular_screenshot`
459
+ - `play_with_browser`
460
+ - `show_frames`
461
+ - `show_websocket_requests`
462
+
463
+ info: ReturnJSON Dependencies (`render=True` + `return_json=True`)
464
+ - `screenshot`
465
+ - `full_screenshot`
466
+ - `particular_screenshot`
467
+ - `show_frames`
468
+ - `show_websocket_requests`
469
+
470
+ info: Super Proxy Dependencies (`super=True`)
471
+ - `regional_geo_code`
472
+
473
+ info: Screenshot Parameters
474
+ - Only one of the screenshot parameters can be set at a time.
475
+
476
+ - In addition to `render=True` and `return_json=True`, all
477
+ screenshot parameters require `blockResources` to be set to
478
+ False.
479
+
480
+ info: Header Parameters
481
+ - Only one of the header parameters can be set at a time.
482
+
483
+ - None of the header parameters can be set to True when using the
484
+ `setCookies` parameter
485
+
486
+ info: Mutually Exclusive Parameters
487
+ - The `playWithBrowser` and `particular_screenshot` parameters
488
+ cannot be used simultaneously
489
+
490
+ - The `retryTimeout` and `render` parameters cannot be used
491
+ simultaneously
492
+
493
+ - The `regional_geo_code` and `geo_code` parameters cannot be used
494
+ simultaneously
495
+
496
+ Returns:
497
+ The validated instance from which the method was called
498
+
499
+ Raises:
500
+ ValueError: If mutually exclusive parameters are combined or if
501
+ dependent parameters are provided without their required
502
+ prerequisites.
503
+ """
504
+
505
+ # --- Headless Browser Dependencies ---
506
+
507
+ # Render Dependencies
508
+
509
+ render_dependent_fields = {
510
+ "wait_until": self.wait_until,
511
+ "custom_wait": self.custom_wait,
512
+ "wait_selector": self.wait_selector,
513
+ "width": self.width,
514
+ "height": self.height,
515
+ "return_json": self.return_json,
516
+ "block_resources": self.block_resources,
517
+ "screenshot": self.screenshot,
518
+ "full_screenshot": self.full_screenshot,
519
+ "particular_screenshot": self.particular_screenshot,
520
+ "play_with_browser": self.play_with_browser,
521
+ "show_frames": self.show_frames,
522
+ "show_websocket_requests": self.show_websocket_requests
523
+ }
524
+
525
+ used_render_fields = [
526
+ field_name for field_name, value in render_dependent_fields.items()
527
+ if value is not None
528
+ ]
529
+
530
+ if used_render_fields and not self.render:
531
+ raise ValueError(
532
+ f"The following parameters require 'render=true' to be set: "
533
+ f"{', '.join(used_render_fields)}."
534
+ )
535
+
536
+ # ReturnJSON Additional Dependencies
537
+ json_dependent_fields = {
538
+ "screenshot": self.screenshot,
539
+ "full_screenshot": self.full_screenshot,
540
+ "particular_screenshot": self.particular_screenshot,
541
+ "show_frames": self.show_frames,
542
+ "show_websocket_requests": self.show_websocket_requests
543
+ }
544
+
545
+ used_json_fields = [
546
+ field_name for field_name, value in json_dependent_fields.items()
547
+ if value
548
+ ]
549
+
550
+ if used_json_fields and not self.return_json:
551
+ raise ValueError((
552
+ f"The following parameters require both 'render=true' AND"
553
+ f" 'returnJSON=true' to be set: "
554
+ f" {', '.join(used_json_fields)}."
555
+ ))
556
+
557
+ # Screenshot Additional Dependencies
558
+ screenshot_fields = {
559
+ "screenshot": self.screenshot,
560
+ "full_screenshot": self.full_screenshot,
561
+ "particular_screenshot": self.particular_screenshot
562
+ }
563
+
564
+ used_screenshot_fields = [
565
+ field_name for field_name, value in screenshot_fields.items()
566
+ if value
567
+ ]
568
+
569
+ if used_screenshot_fields and self.block_resources:
570
+ raise ValueError((
571
+ f"Screenshot parameters automatically operate with "
572
+ f"'blockResources=false' to ensure contents are loaded "
573
+ f"correctly. Screenshot Parameters used:"
574
+ f" {', '.join(used_screenshot_fields)}"
575
+ ))
576
+
577
+ # --- Enforce Mutually Eclusive Parameters ---
578
+
579
+ if self.render and self.retry_timeout is not None:
580
+ raise ValueError(
581
+ "The 'retry_timeout' parameter cannot be used concurrently"
582
+ " with 'render=true'"
583
+ )
584
+
585
+ if len(used_screenshot_fields) > 1:
586
+ raise ValueError(
587
+ f"Only one screenshot parameter can be used at a time."
588
+ f" Screenshot Parameters used:"
589
+ f" {', '.join(used_screenshot_fields)}"
590
+ )
591
+
592
+ if (
593
+ self.particular_screenshot is not None
594
+ and self.play_with_browser is not None
595
+ ):
596
+ raise ValueError(
597
+ "The 'particular_screenshot' parameter cannot be used"
598
+ " concurrently with the 'playWithBrowser' parameter"
599
+ )
600
+
601
+ header_fields = {
602
+ "custom_headers": self.custom_headers,
603
+ "extra_headers": self.extra_headers,
604
+ "forward_headers": self.forward_headers
605
+ }
606
+
607
+ used_header_fields = [
608
+ field_name for field_name, value in header_fields.items()
609
+ if value
610
+ ]
611
+
612
+ if len(used_header_fields) > 1:
613
+ raise ValueError(
614
+ f"Only one header parameter can be used at a time."
615
+ f" Header Parameters used: {', '.join(used_header_fields)}"
616
+ )
617
+
618
+ if used_header_fields and self.set_cookies:
619
+ raise ValueError(
620
+ f"Header parameters cannot be used concurrently with"
621
+ f" the set_cookies parameter. Header Parameters used:"
622
+ f" {', '.join(used_header_fields)}"
623
+ )
624
+
625
+ if self.geo_code is not None and self.regional_geo_code is not None:
626
+ raise ValueError(
627
+ "'geoCode' and 'regionalGeoCode' parameters cannot be used"
628
+ " simultaneously"
629
+ )
630
+
631
+ if not self.super and self.regional_geo_code is not None:
632
+ raise ValueError(
633
+ "'super=true' must be set to use the 'regionalGeoCode'"
634
+ " parameter"
635
+ )
636
+
637
+ return self
638
+
639
+ @field_validator("geo_code")
640
+ @classmethod
641
+ def validate_geo_code(
642
+ cls: Type[Self],
643
+ v: Optional[str],
644
+ info: ValidationInfo
645
+ ) -> Optional[str]:
646
+ """Validates the country code against the allowed proxy pools.
647
+
648
+ Args:
649
+ v (Optional[str]): The `geo_code` provided during initialization
650
+ info (ValidationInfo): The data already validated for the model so
651
+ far
652
+
653
+ Returns:
654
+ The validated `geo_code` parameter
655
+
656
+ Raises:
657
+ ValueError: If the country code is not supported by the selected
658
+ proxy tier.
659
+ """
660
+
661
+ is_super = info.data.get("super", False)
662
+ if v is not None:
663
+ v = v.lower()
664
+ if is_super:
665
+ if v not in _SUPER_SUPPORTED_COUNTRIES:
666
+ raise ValueError(
667
+ f"'{v}' is not a supported country code"
668
+ )
669
+ else:
670
+ if v not in _DATACENTER_SUPPORTED_COUNTRIES:
671
+ if v in _SUPER_SUPPORTED_COUNTRIES:
672
+ raise ValueError(
673
+ f"'{v}' is not a supported country code when"
674
+ f" 'super=false'"
675
+ )
676
+ else:
677
+ raise ValueError(
678
+ f"'{v}' is not a supported country code"
679
+ )
680
+ return v
681
+
682
+ return v
683
+
684
+ @field_validator("postal_code")
685
+ @classmethod
686
+ def validate_postal_code(
687
+ cls: Type[Self],
688
+ v: Optional[str],
689
+ info: ValidationInfo
690
+ ) -> Optional[str]:
691
+ """Validates postal codes based on specific regional formats.
692
+
693
+ Args:
694
+ v (Optional[str]): The `postal_code` provided during initialization
695
+ info (ValidationInfo): The data already validated for the model so
696
+ far
697
+
698
+ Returns:
699
+ The validated `postal_code` parameter
700
+
701
+ Raises:
702
+ ValueError: If dependencies are missing or the format does not
703
+ match the regional regex.
704
+ """
705
+ if v is not None:
706
+ v = v.strip()
707
+ is_super = info.data.get("super", False)
708
+ geo_code = info.data.get("geo_code")
709
+
710
+ if not is_super or not geo_code:
711
+ raise ValueError(
712
+ "The 'postalcode' parameter can only be used when both "
713
+ "'super=true' and a valid 'geoCode' are provided."
714
+ )
715
+
716
+ if geo_code not in _ZIPCODE_FORMATS:
717
+ raise ValueError(
718
+ f"Zip code targeting is not supported for country"
719
+ f" '{geo_code}'. "
720
+ f" Supported countries are:"
721
+ f" {', '.join(_ZIPCODE_FORMATS.keys())}."
722
+ )
723
+
724
+ regex = _ZIPCODE_FORMATS[geo_code]
725
+ if not regex.match(v):
726
+ raise ValueError(
727
+ f"Invalid zip code format for {geo_code}. "
728
+ f"Provided '{v}' does not match the required pattern."
729
+ )
730
+
731
+ return v
732
+ return v
733
+
734
+ def to_api_params(self) -> Dict[str, Any]:
735
+ """Serializes the model into a dictionary formatted for httpx
736
+ query parameters.
737
+
738
+ This method automatically drops unassigned fields, maps snake_case
739
+ variables to their camelCase API equivalents, and stringifies nested
740
+ JSON objects as required by Scrape.do.
741
+
742
+ Returns:
743
+ A sanitized dictionary ready to be passed to httpx.
744
+ """
745
+
746
+ params = self.model_dump(
747
+ by_alias=True,
748
+ exclude_none=True,
749
+ mode="json"
750
+ )
751
+
752
+ for key, value in params.items():
753
+
754
+ # Serialize playWithBrowserActions
755
+ if key == "playWithBrowser" and self.play_with_browser:
756
+ actions = []
757
+ for action in self.play_with_browser:
758
+ a_dict = action.model_dump(
759
+ by_alias=True,
760
+ exclude_none=True,
761
+ mode="json"
762
+ )
763
+
764
+ # Scrape.do's backend expects string booleans
765
+ for k, v in a_dict.items():
766
+ if isinstance(v, bool):
767
+ a_dict[k] = "true" if v else "false"
768
+
769
+ actions.append(a_dict)
770
+
771
+ params[key] = json.dumps(actions)
772
+
773
+ if isinstance(value, bool):
774
+ params[key] = "true" if value else "false"
775
+
776
+ return params
777
+
778
+ @classmethod
779
+ def from_url(cls: type[Self], api_url: str) -> RequestParameters:
780
+ """Instantiates a `RequestParameters` instance by parsing a raw
781
+ Scrape.do API URL string.
782
+
783
+ tip: Accepted URLs
784
+ This method accepts both raw and encoded URLs by using
785
+ the `urllib.parse.parse_qs` and `urllib.parse.unquote_plus`
786
+ functions to normalize encoded URLs.
787
+
788
+ warning: Browser Actions (`playWithBrowser`)
789
+ When providing a URL containing the `playWithBrowser` parameter,
790
+ make sure to use the `json.dumps` function to stringify the list
791
+ of dictionaries containing the entries. Both the raw and ecoded
792
+ URLs can be passed to this method afterwards.
793
+
794
+ warning: API Token
795
+ This method ignores the `&token=` parameter containing the
796
+ Scrape.do API key, since its insertion is meant to be handled by
797
+ the `ScrapeDoClient` using either an initialization parameter, or
798
+ the `SCRAPE_DO_API_KEY` environment variable.
799
+
800
+ Args:
801
+ api_url (str): The full Scrape.do endpoint
802
+ (`https://api.scrape.do/?url=...&render=true...`)
803
+
804
+ Raises:
805
+ ValueError: If the value found in the `&playWithBrowser=` parameter
806
+ is not a parsable JSON string.
807
+
808
+ Returns:
809
+ The `RequestParameters` instance mapping the URL parameters
810
+ (`&render=true&...`) to validated attributes
811
+ """
812
+
813
+ parsed = urllib.parse.urlparse(api_url)
814
+ query_params = urllib.parse.parse_qs(parsed.query)
815
+ # Type parsed params as Dict[str, Any] and let Pydantic raise a
816
+ # ValidationError if it can't coerce a specific value
817
+ flat_params: Dict[str, Any] = {
818
+ k: v[0] for k, v in query_params.items()
819
+ }
820
+
821
+ # Reconstruct the nested JSON actions if they exist
822
+ if "playWithBrowser" in flat_params:
823
+ try:
824
+ # Manually convert '+' to ' ' specifically for this JSON string
825
+ decoded = urllib.parse.unquote_plus(
826
+ flat_params["playWithBrowser"]
827
+ )
828
+
829
+ flat_params["playWithBrowser"] = json.loads(decoded)
830
+
831
+ except json.JSONDecodeError as e:
832
+ raise ValueError(
833
+ f"Failed to decode `playWithBrowser` parameter from URL | "
834
+ f"Parameter Value : {flat_params['playWithBrowser']}"
835
+ ) from e
836
+
837
+ # Strip Token
838
+ flat_params.pop("token", None)
839
+
840
+ return cls(**flat_params)