scrapling 0.3__tar.gz → 0.3.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. {scrapling-0.3/scrapling.egg-info → scrapling-0.3.1}/PKG-INFO +5 -3
  2. {scrapling-0.3 → scrapling-0.3.1}/README.md +4 -2
  3. {scrapling-0.3 → scrapling-0.3.1}/scrapling/__init__.py +1 -1
  4. {scrapling-0.3 → scrapling-0.3.1}/scrapling/core/shell.py +3 -3
  5. {scrapling-0.3 → scrapling-0.3.1}/scrapling/engines/_browsers/_camoufox.py +14 -0
  6. {scrapling-0.3 → scrapling-0.3.1}/scrapling/engines/_browsers/_controllers.py +14 -0
  7. {scrapling-0.3 → scrapling-0.3.1}/scrapling/engines/_browsers/_validators.py +20 -0
  8. {scrapling-0.3 → scrapling-0.3.1}/scrapling/fetchers.py +12 -0
  9. {scrapling-0.3 → scrapling-0.3.1/scrapling.egg-info}/PKG-INFO +5 -3
  10. {scrapling-0.3 → scrapling-0.3.1}/setup.cfg +1 -1
  11. {scrapling-0.3 → scrapling-0.3.1}/LICENSE +0 -0
  12. {scrapling-0.3 → scrapling-0.3.1}/MANIFEST.in +0 -0
  13. {scrapling-0.3 → scrapling-0.3.1}/pyproject.toml +0 -0
  14. {scrapling-0.3 → scrapling-0.3.1}/scrapling/cli.py +0 -0
  15. {scrapling-0.3 → scrapling-0.3.1}/scrapling/core/__init__.py +0 -0
  16. {scrapling-0.3 → scrapling-0.3.1}/scrapling/core/_html_utils.py +0 -0
  17. {scrapling-0.3 → scrapling-0.3.1}/scrapling/core/_types.py +0 -0
  18. {scrapling-0.3 → scrapling-0.3.1}/scrapling/core/ai.py +0 -0
  19. {scrapling-0.3 → scrapling-0.3.1}/scrapling/core/custom_types.py +0 -0
  20. {scrapling-0.3 → scrapling-0.3.1}/scrapling/core/mixins.py +0 -0
  21. {scrapling-0.3 → scrapling-0.3.1}/scrapling/core/storage.py +0 -0
  22. {scrapling-0.3 → scrapling-0.3.1}/scrapling/core/translator.py +0 -0
  23. {scrapling-0.3 → scrapling-0.3.1}/scrapling/core/utils.py +0 -0
  24. {scrapling-0.3 → scrapling-0.3.1}/scrapling/engines/__init__.py +0 -0
  25. {scrapling-0.3 → scrapling-0.3.1}/scrapling/engines/_browsers/__init__.py +0 -0
  26. {scrapling-0.3 → scrapling-0.3.1}/scrapling/engines/_browsers/_config_tools.py +0 -0
  27. {scrapling-0.3 → scrapling-0.3.1}/scrapling/engines/_browsers/_page.py +0 -0
  28. {scrapling-0.3 → scrapling-0.3.1}/scrapling/engines/constants.py +0 -0
  29. {scrapling-0.3 → scrapling-0.3.1}/scrapling/engines/static.py +0 -0
  30. {scrapling-0.3 → scrapling-0.3.1}/scrapling/engines/toolbelt/__init__.py +0 -0
  31. {scrapling-0.3 → scrapling-0.3.1}/scrapling/engines/toolbelt/bypasses/navigator_plugins.js +0 -0
  32. {scrapling-0.3 → scrapling-0.3.1}/scrapling/engines/toolbelt/bypasses/notification_permission.js +0 -0
  33. {scrapling-0.3 → scrapling-0.3.1}/scrapling/engines/toolbelt/bypasses/playwright_fingerprint.js +0 -0
  34. {scrapling-0.3 → scrapling-0.3.1}/scrapling/engines/toolbelt/bypasses/screen_props.js +0 -0
  35. {scrapling-0.3 → scrapling-0.3.1}/scrapling/engines/toolbelt/bypasses/webdriver_fully.js +0 -0
  36. {scrapling-0.3 → scrapling-0.3.1}/scrapling/engines/toolbelt/bypasses/window_chrome.js +0 -0
  37. {scrapling-0.3 → scrapling-0.3.1}/scrapling/engines/toolbelt/convertor.py +0 -0
  38. {scrapling-0.3 → scrapling-0.3.1}/scrapling/engines/toolbelt/custom.py +0 -0
  39. {scrapling-0.3 → scrapling-0.3.1}/scrapling/engines/toolbelt/fingerprints.py +0 -0
  40. {scrapling-0.3 → scrapling-0.3.1}/scrapling/engines/toolbelt/navigation.py +0 -0
  41. {scrapling-0.3 → scrapling-0.3.1}/scrapling/parser.py +0 -0
  42. {scrapling-0.3 → scrapling-0.3.1}/scrapling/py.typed +0 -0
  43. {scrapling-0.3 → scrapling-0.3.1}/scrapling.egg-info/SOURCES.txt +0 -0
  44. {scrapling-0.3 → scrapling-0.3.1}/scrapling.egg-info/dependency_links.txt +0 -0
  45. {scrapling-0.3 → scrapling-0.3.1}/scrapling.egg-info/entry_points.txt +0 -0
  46. {scrapling-0.3 → scrapling-0.3.1}/scrapling.egg-info/not-zip-safe +0 -0
  47. {scrapling-0.3 → scrapling-0.3.1}/scrapling.egg-info/requires.txt +0 -0
  48. {scrapling-0.3 → scrapling-0.3.1}/scrapling.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: scrapling
3
- Version: 0.3
3
+ Version: 0.3.1
4
4
  Summary: Scrapling is an undetectable, powerful, flexible, high-performance Python library that makes Web Scraping easy and effortless as it should be!
5
5
  Home-page: https://github.com/D4Vinci/Scrapling
6
6
  Author: Karim Shoair
@@ -155,8 +155,8 @@ Built for the modern Web, Scrapling has its own rapid parsing engine and its fet
155
155
  <!-- sponsors -->
156
156
 
157
157
  <a href="https://evomi.com?utm_source=github&utm_medium=banner&utm_campaign=d4vinci-scrapling" target="_blank" title="Evomi is your Swiss Quality Proxy Provider, starting at $0.49/GB"><img src="https://raw.githubusercontent.com/D4Vinci/Scrapling/main/images/evomi.png"></a>
158
- <a href="https://www.swiftproxy.net/" target="_blank" title="Unlock Reliable Proxy Services with Swiftproxy!"><img src="https://raw.githubusercontent.com/D4Vinci/Scrapling/main/images/swiftproxy.png"></a>
159
158
  <a href="https://petrosky.io/d4vinci" target="_blank" title="PetroSky delivers cutting-edge VPS hosting."><img src="https://raw.githubusercontent.com/D4Vinci/Scrapling/main/images/petrosky.png"></a>
159
+ <a href="https://www.swiftproxy.net/" target="_blank" title="Unlock Reliable Proxy Services with Swiftproxy!"><img src="https://raw.githubusercontent.com/D4Vinci/Scrapling/main/images/swiftproxy.png"></a>
160
160
  <a href="https://serpapi.com/?utm_source=scrapling" target="_blank" title="Scrape Google and other search engines with SerpApi"><img src="https://raw.githubusercontent.com/D4Vinci/Scrapling/main/images/SerpApi.png"></a>
161
161
 
162
162
  <!-- /sponsors -->
@@ -273,7 +273,7 @@ from scrapling.parser import Selector
273
273
 
274
274
  page = Selector("<html>...</html>")
275
275
  ```
276
- And it works exactly the same!
276
+ And it works exactly the same way!
277
277
 
278
278
  ### Async Session Management Examples
279
279
  ```python
@@ -302,6 +302,8 @@ async with AsyncStealthySession(max_pages=2) as session:
302
302
 
303
303
  Scrapling v0.3 includes a powerful command-line interface:
304
304
 
305
+ [![asciicast](https://asciinema.org/a/736339.svg)](https://asciinema.org/a/736339)
306
+
305
307
  ```bash
306
308
  # Launch interactive Web Scraping shell
307
309
  scrapling shell
@@ -68,8 +68,8 @@ Built for the modern Web, Scrapling has its own rapid parsing engine and its fet
68
68
  <!-- sponsors -->
69
69
 
70
70
  <a href="https://evomi.com?utm_source=github&utm_medium=banner&utm_campaign=d4vinci-scrapling" target="_blank" title="Evomi is your Swiss Quality Proxy Provider, starting at $0.49/GB"><img src="https://raw.githubusercontent.com/D4Vinci/Scrapling/main/images/evomi.png"></a>
71
- <a href="https://www.swiftproxy.net/" target="_blank" title="Unlock Reliable Proxy Services with Swiftproxy!"><img src="https://raw.githubusercontent.com/D4Vinci/Scrapling/main/images/swiftproxy.png"></a>
72
71
  <a href="https://petrosky.io/d4vinci" target="_blank" title="PetroSky delivers cutting-edge VPS hosting."><img src="https://raw.githubusercontent.com/D4Vinci/Scrapling/main/images/petrosky.png"></a>
72
+ <a href="https://www.swiftproxy.net/" target="_blank" title="Unlock Reliable Proxy Services with Swiftproxy!"><img src="https://raw.githubusercontent.com/D4Vinci/Scrapling/main/images/swiftproxy.png"></a>
73
73
  <a href="https://serpapi.com/?utm_source=scrapling" target="_blank" title="Scrape Google and other search engines with SerpApi"><img src="https://raw.githubusercontent.com/D4Vinci/Scrapling/main/images/SerpApi.png"></a>
74
74
 
75
75
  <!-- /sponsors -->
@@ -186,7 +186,7 @@ from scrapling.parser import Selector
186
186
 
187
187
  page = Selector("<html>...</html>")
188
188
  ```
189
- And it works exactly the same!
189
+ And it works exactly the same way!
190
190
 
191
191
  ### Async Session Management Examples
192
192
  ```python
@@ -215,6 +215,8 @@ async with AsyncStealthySession(max_pages=2) as session:
215
215
 
216
216
  Scrapling v0.3 includes a powerful command-line interface:
217
217
 
218
+ [![asciicast](https://asciinema.org/a/736339.svg)](https://asciinema.org/a/736339)
219
+
218
220
  ```bash
219
221
  # Launch interactive Web Scraping shell
220
222
  scrapling shell
@@ -1,5 +1,5 @@
1
1
  __author__ = "Karim Shoair (karim.shoair@pm.me)"
2
- __version__ = "0.3"
2
+ __version__ = "0.3.1"
3
3
  __copyright__ = "Copyright (c) 2024 Karim Shoair"
4
4
 
5
5
 
@@ -20,7 +20,6 @@ from logging import (
20
20
  getLevelName,
21
21
  )
22
22
 
23
- from IPython.terminal.embed import InteractiveShellEmbed
24
23
  from orjson import loads as json_loads, JSONDecodeError
25
24
 
26
25
  from scrapling import __version__
@@ -394,8 +393,7 @@ class CurlParser:
394
393
 
395
394
  else: # pragma: no cover
396
395
  log.error("Input must be a valid curl command string or a Request object.")
397
-
398
- return None
396
+ return None
399
397
 
400
398
 
401
399
  def show_page_in_browser(page: Selector): # pragma: no cover
@@ -544,6 +542,8 @@ Type 'exit' or press Ctrl+D to exit.
544
542
 
545
543
  def start(self): # pragma: no cover
546
544
  """Start the interactive shell"""
545
+ from IPython.terminal.embed import InteractiveShellEmbed
546
+
547
547
  # Get our namespace with application objects
548
548
  namespace = self.get_namespace()
549
549
  ipython_shell = InteractiveShellEmbed(
@@ -60,6 +60,7 @@ class StealthySession:
60
60
  "timeout",
61
61
  "page_action",
62
62
  "wait_selector",
63
+ "init_script",
63
64
  "addons",
64
65
  "wait_selector_state",
65
66
  "cookies",
@@ -95,6 +96,7 @@ class StealthySession:
95
96
  timeout: int | float = 30000,
96
97
  page_action: Optional[Callable] = None,
97
98
  wait_selector: Optional[str] = None,
99
+ init_script: Optional[str] = None,
98
100
  addons: Optional[List[str]] = None,
99
101
  wait_selector_state: SelectorWaitStates = "attached",
100
102
  cookies: Optional[List[Dict]] = None,
@@ -128,6 +130,7 @@ class StealthySession:
128
130
  :param timeout: The timeout in milliseconds that is used in all operations and waits through the page. The default is 30,000
129
131
  :param page_action: Added for automation. A function that takes the `page` object, does the automation you need, then returns `page` again.
130
132
  :param wait_selector: Wait for a specific CSS selector to be in a specific state.
133
+ :param init_script: An absolute path to a JavaScript file to be executed on page creation for all pages in this session.
131
134
  :param geoip: Recommended to use with proxies; Automatically use IP's longitude, latitude, timezone, country, locale, and spoof the WebRTC IP address.
132
135
  It will also calculate and spoof the browser's language based on the distribution of language speakers in the target region.
133
136
  :param wait_selector_state: The state to wait for the selector given with `wait_selector`. The default state is `attached`.
@@ -153,6 +156,7 @@ class StealthySession:
153
156
  "timeout": timeout,
154
157
  "page_action": page_action,
155
158
  "wait_selector": wait_selector,
159
+ "init_script": init_script,
156
160
  "addons": addons,
157
161
  "wait_selector_state": wait_selector_state,
158
162
  "cookies": cookies,
@@ -180,6 +184,7 @@ class StealthySession:
180
184
  self.timeout = config.timeout
181
185
  self.page_action = config.page_action
182
186
  self.wait_selector = config.wait_selector
187
+ self.init_script = config.init_script
183
188
  self.addons = config.addons
184
189
  self.wait_selector_state = config.wait_selector_state
185
190
  self.cookies = config.cookies
@@ -234,6 +239,9 @@ class StealthySession:
234
239
  **self.launch_options
235
240
  )
236
241
  )
242
+ if self.init_script: # pragma: no cover
243
+ self.context.add_init_script(path=self.init_script)
244
+
237
245
  if self.cookies: # pragma: no cover
238
246
  self.context.add_cookies(self.cookies)
239
247
 
@@ -474,6 +482,7 @@ class AsyncStealthySession(StealthySession):
474
482
  timeout: int | float = 30000,
475
483
  page_action: Optional[Callable] = None,
476
484
  wait_selector: Optional[str] = None,
485
+ init_script: Optional[str] = None,
477
486
  addons: Optional[List[str]] = None,
478
487
  wait_selector_state: SelectorWaitStates = "attached",
479
488
  cookies: Optional[List[Dict]] = None,
@@ -507,6 +516,7 @@ class AsyncStealthySession(StealthySession):
507
516
  :param timeout: The timeout in milliseconds that is used in all operations and waits through the page. The default is 30,000
508
517
  :param page_action: Added for automation. A function that takes the `page` object, does the automation you need, then returns `page` again.
509
518
  :param wait_selector: Wait for a specific CSS selector to be in a specific state.
519
+ :param init_script: An absolute path to a JavaScript file to be executed on page creation for all pages in this session.
510
520
  :param geoip: Recommended to use with proxies; Automatically use IP's longitude, latitude, timezone, country, locale, and spoof the WebRTC IP address.
511
521
  It will also calculate and spoof the browser's language based on the distribution of language speakers in the target region.
512
522
  :param wait_selector_state: The state to wait for the selector given with `wait_selector`. The default state is `attached`.
@@ -531,6 +541,7 @@ class AsyncStealthySession(StealthySession):
531
541
  timeout,
532
542
  page_action,
533
543
  wait_selector,
544
+ init_script,
534
545
  addons,
535
546
  wait_selector_state,
536
547
  cookies,
@@ -557,6 +568,9 @@ class AsyncStealthySession(StealthySession):
557
568
  **self.launch_options
558
569
  )
559
570
  )
571
+ if self.init_script: # pragma: no cover
572
+ await self.context.add_init_script(path=self.init_script)
573
+
560
574
  if self.cookies:
561
575
  await self.context.add_cookies(self.cookies)
562
576
 
@@ -60,6 +60,7 @@ class DynamicSession:
60
60
  "disable_resources",
61
61
  "network_idle",
62
62
  "wait_selector",
63
+ "init_script",
63
64
  "wait_selector_state",
64
65
  "wait",
65
66
  "playwright",
@@ -94,6 +95,7 @@ class DynamicSession:
94
95
  timeout: int | float = 30000,
95
96
  disable_resources: bool = False,
96
97
  wait_selector: Optional[str] = None,
98
+ init_script: Optional[str] = None,
97
99
  cookies: Optional[List[Dict]] = None,
98
100
  network_idle: bool = False,
99
101
  wait_selector_state: SelectorWaitStates = "attached",
@@ -112,6 +114,7 @@ class DynamicSession:
112
114
  :param wait: The time (milliseconds) the fetcher will wait after everything finishes before closing the page and returning the ` Response ` object.
113
115
  :param page_action: Added for automation. A function that takes the `page` object, does the automation you need, then returns `page` again.
114
116
  :param wait_selector: Wait for a specific CSS selector to be in a specific state.
117
+ :param init_script: An absolute path to a JavaScript file to be executed on page creation for all pages in this session.
115
118
  :param locale: Set the locale for the browser if wanted. The default value is `en-US`.
116
119
  :param wait_selector_state: The state to wait for the selector given with `wait_selector`. The default state is `attached`.
117
120
  :param stealth: Enables stealth mode, check the documentation to see what stealth mode does currently.
@@ -143,6 +146,7 @@ class DynamicSession:
143
146
  "selector_config": selector_config,
144
147
  "disable_resources": disable_resources,
145
148
  "wait_selector": wait_selector,
149
+ "init_script": init_script,
146
150
  "cookies": cookies,
147
151
  "network_idle": network_idle,
148
152
  "wait_selector_state": wait_selector_state,
@@ -168,6 +172,7 @@ class DynamicSession:
168
172
  self.cdp_url = config.cdp_url
169
173
  self.network_idle = config.network_idle
170
174
  self.wait_selector = config.wait_selector
175
+ self.init_script = config.init_script
171
176
  self.wait_selector_state = config.wait_selector_state
172
177
 
173
178
  self.playwright: Optional[Playwright] = None
@@ -243,6 +248,9 @@ class DynamicSession:
243
248
  user_data_dir="", **self.launch_options
244
249
  )
245
250
 
251
+ if self.init_script: # pragma: no cover
252
+ self.context.add_init_script(path=self.init_script)
253
+
246
254
  if self.cookies: # pragma: no cover
247
255
  self.context.add_cookies(self.cookies)
248
256
 
@@ -409,6 +417,7 @@ class AsyncDynamicSession(DynamicSession):
409
417
  timeout: int | float = 30000,
410
418
  disable_resources: bool = False,
411
419
  wait_selector: Optional[str] = None,
420
+ init_script: Optional[str] = None,
412
421
  cookies: Optional[List[Dict]] = None,
413
422
  network_idle: bool = False,
414
423
  wait_selector_state: SelectorWaitStates = "attached",
@@ -427,6 +436,7 @@ class AsyncDynamicSession(DynamicSession):
427
436
  :param wait: The time (milliseconds) the fetcher will wait after everything finishes before closing the page and returning the ` Response ` object.
428
437
  :param page_action: Added for automation. A function that takes the `page` object, does the automation you need, then returns `page` again.
429
438
  :param wait_selector: Wait for a specific CSS selector to be in a specific state.
439
+ :param init_script: An absolute path to a JavaScript file to be executed on page creation for all pages in this session.
430
440
  :param locale: Set the locale for the browser if wanted. The default value is `en-US`.
431
441
  :param wait_selector_state: The state to wait for the selector given with `wait_selector`. The default state is `attached`.
432
442
  :param stealth: Enables stealth mode, check the documentation to see what stealth mode does currently.
@@ -459,6 +469,7 @@ class AsyncDynamicSession(DynamicSession):
459
469
  timeout,
460
470
  disable_resources,
461
471
  wait_selector,
472
+ init_script,
462
473
  cookies,
463
474
  network_idle,
464
475
  wait_selector_state,
@@ -494,6 +505,9 @@ class AsyncDynamicSession(DynamicSession):
494
505
  )
495
506
  )
496
507
 
508
+ if self.init_script: # pragma: no cover
509
+ await self.context.add_init_script(path=self.init_script)
510
+
497
511
  if self.cookies:
498
512
  await self.context.add_cookies(self.cookies)
499
513
 
@@ -32,6 +32,7 @@ class PlaywrightConfig(Struct, kw_only=True, frozen=False):
32
32
  extra_headers: Optional[Dict[str, str]] = None
33
33
  useragent: Optional[str] = None
34
34
  timeout: int | float = 30000
35
+ init_script: Optional[str] = None
35
36
  disable_resources: bool = False
36
37
  wait_selector: Optional[str] = None
37
38
  cookies: Optional[List[Dict]] = None
@@ -58,6 +59,15 @@ class PlaywrightConfig(Struct, kw_only=True, frozen=False):
58
59
  if not self.selector_config:
59
60
  self.selector_config = {}
60
61
 
62
+ if self.init_script is not None:
63
+ script_path = Path(self.init_script)
64
+ if not script_path.exists():
65
+ raise ValueError("Init script path not found")
66
+ elif not script_path.is_file():
67
+ raise ValueError("Init script is not a file")
68
+ elif not script_path.is_absolute():
69
+ raise ValueError("Init script is not a absolute path")
70
+
61
71
  @staticmethod
62
72
  def __validate_cdp(cdp_url):
63
73
  try:
@@ -90,6 +100,7 @@ class CamoufoxConfig(Struct, kw_only=True, frozen=False):
90
100
  solve_cloudflare: bool = False
91
101
  wait: int | float = 0
92
102
  timeout: int | float = 30000
103
+ init_script: Optional[str] = None
93
104
  page_action: Optional[Callable] = None
94
105
  wait_selector: Optional[str] = None
95
106
  addons: Optional[List[str]] = None
@@ -131,6 +142,15 @@ class CamoufoxConfig(Struct, kw_only=True, frozen=False):
131
142
  f"Addon's path is not a folder, you need to pass a folder of the extracted addon: {addon}"
132
143
  )
133
144
 
145
+ if self.init_script is not None:
146
+ script_path = Path(self.init_script)
147
+ if not script_path.exists():
148
+ raise ValueError("Init script path not found")
149
+ elif not script_path.is_file():
150
+ raise ValueError("Init script is not a file")
151
+ elif not script_path.is_absolute():
152
+ raise ValueError("Init script is not a absolute path")
153
+
134
154
  if not self.cookies:
135
155
  self.cookies = []
136
156
  if self.solve_cloudflare and self.timeout < 60_000:
@@ -62,6 +62,7 @@ class StealthyFetcher(BaseFetcher):
62
62
  timeout: int | float = 30000,
63
63
  page_action: Optional[Callable] = None,
64
64
  wait_selector: Optional[str] = None,
65
+ init_script: Optional[str] = None,
65
66
  addons: Optional[List[str]] = None,
66
67
  wait_selector_state: SelectorWaitStates = "attached",
67
68
  cookies: Optional[List[Dict]] = None,
@@ -97,6 +98,7 @@ class StealthyFetcher(BaseFetcher):
97
98
  :param timeout: The timeout in milliseconds that is used in all operations and waits through the page. The default is 30,000
98
99
  :param page_action: Added for automation. A function that takes the `page` object, does the automation you need, then returns `page` again.
99
100
  :param wait_selector: Wait for a specific CSS selector to be in a specific state.
101
+ :param init_script: An absolute path to a JavaScript file to be executed on page creation with this request.
100
102
  :param geoip: Recommended to use with proxies; Automatically use IP's longitude, latitude, timezone, country, locale, and spoof the WebRTC IP address.
101
103
  It will also calculate and spoof the browser's language based on the distribution of language speakers in the target region.
102
104
  :param wait_selector_state: The state to wait for the selector given with `wait_selector`. The default state is `attached`.
@@ -127,6 +129,7 @@ class StealthyFetcher(BaseFetcher):
127
129
  disable_ads=disable_ads,
128
130
  allow_webgl=allow_webgl,
129
131
  page_action=page_action,
132
+ init_script=init_script,
130
133
  network_idle=network_idle,
131
134
  block_images=block_images,
132
135
  block_webrtc=block_webrtc,
@@ -158,6 +161,7 @@ class StealthyFetcher(BaseFetcher):
158
161
  timeout: int | float = 30000,
159
162
  page_action: Optional[Callable] = None,
160
163
  wait_selector: Optional[str] = None,
164
+ init_script: Optional[str] = None,
161
165
  addons: Optional[List[str]] = None,
162
166
  wait_selector_state: SelectorWaitStates = "attached",
163
167
  cookies: Optional[List[Dict]] = None,
@@ -193,6 +197,7 @@ class StealthyFetcher(BaseFetcher):
193
197
  :param timeout: The timeout in milliseconds that is used in all operations and waits through the page. The default is 30,000
194
198
  :param page_action: Added for automation. A function that takes the `page` object, does the automation you need, then returns `page` again.
195
199
  :param wait_selector: Wait for a specific CSS selector to be in a specific state.
200
+ :param init_script: An absolute path to a JavaScript file to be executed on page creation with this request.
196
201
  :param geoip: Recommended to use with proxies; Automatically use IP's longitude, latitude, timezone, country, locale, and spoof the WebRTC IP address.
197
202
  It will also calculate and spoof the browser's language based on the distribution of language speakers in the target region.
198
203
  :param wait_selector_state: The state to wait for the selector given with `wait_selector`. The default state is `attached`.
@@ -223,6 +228,7 @@ class StealthyFetcher(BaseFetcher):
223
228
  disable_ads=disable_ads,
224
229
  allow_webgl=allow_webgl,
225
230
  page_action=page_action,
231
+ init_script=init_script,
226
232
  network_idle=network_idle,
227
233
  block_images=block_images,
228
234
  block_webrtc=block_webrtc,
@@ -276,6 +282,7 @@ class DynamicFetcher(BaseFetcher):
276
282
  timeout: int | float = 30000,
277
283
  disable_resources: bool = False,
278
284
  wait_selector: Optional[str] = None,
285
+ init_script: Optional[str] = None,
279
286
  cookies: Optional[Iterable[Dict]] = None,
280
287
  network_idle: bool = False,
281
288
  wait_selector_state: SelectorWaitStates = "attached",
@@ -295,6 +302,7 @@ class DynamicFetcher(BaseFetcher):
295
302
  :param wait: The time (milliseconds) the fetcher will wait after everything finishes before closing the page and returning the ` Response ` object.
296
303
  :param page_action: Added for automation. A function that takes the `page` object, does the automation you need, then returns `page` again.
297
304
  :param wait_selector: Wait for a specific CSS selector to be in a specific state.
305
+ :param init_script: An absolute path to a JavaScript file to be executed on page creation with this request.
298
306
  :param locale: Set the locale for the browser if wanted. The default value is `en-US`.
299
307
  :param wait_selector_state: The state to wait for the selector given with `wait_selector`. The default state is `attached`.
300
308
  :param stealth: Enables stealth mode, check the documentation to see what stealth mode does currently.
@@ -328,6 +336,7 @@ class DynamicFetcher(BaseFetcher):
328
336
  real_chrome=real_chrome,
329
337
  page_action=page_action,
330
338
  hide_canvas=hide_canvas,
339
+ init_script=init_script,
331
340
  network_idle=network_idle,
332
341
  google_search=google_search,
333
342
  extra_headers=extra_headers,
@@ -359,6 +368,7 @@ class DynamicFetcher(BaseFetcher):
359
368
  timeout: int | float = 30000,
360
369
  disable_resources: bool = False,
361
370
  wait_selector: Optional[str] = None,
371
+ init_script: Optional[str] = None,
362
372
  cookies: Optional[Iterable[Dict]] = None,
363
373
  network_idle: bool = False,
364
374
  wait_selector_state: SelectorWaitStates = "attached",
@@ -378,6 +388,7 @@ class DynamicFetcher(BaseFetcher):
378
388
  :param wait: The time (milliseconds) the fetcher will wait after everything finishes before closing the page and returning the ` Response ` object.
379
389
  :param page_action: Added for automation. A function that takes the `page` object, does the automation you need, then returns `page` again.
380
390
  :param wait_selector: Wait for a specific CSS selector to be in a specific state.
391
+ :param init_script: An absolute path to a JavaScript file to be executed on page creation with this request.
381
392
  :param locale: Set the locale for the browser if wanted. The default value is `en-US`.
382
393
  :param wait_selector_state: The state to wait for the selector given with `wait_selector`. The default state is `attached`.
383
394
  :param stealth: Enables stealth mode, check the documentation to see what stealth mode does currently.
@@ -412,6 +423,7 @@ class DynamicFetcher(BaseFetcher):
412
423
  real_chrome=real_chrome,
413
424
  page_action=page_action,
414
425
  hide_canvas=hide_canvas,
426
+ init_script=init_script,
415
427
  network_idle=network_idle,
416
428
  google_search=google_search,
417
429
  extra_headers=extra_headers,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: scrapling
3
- Version: 0.3
3
+ Version: 0.3.1
4
4
  Summary: Scrapling is an undetectable, powerful, flexible, high-performance Python library that makes Web Scraping easy and effortless as it should be!
5
5
  Home-page: https://github.com/D4Vinci/Scrapling
6
6
  Author: Karim Shoair
@@ -155,8 +155,8 @@ Built for the modern Web, Scrapling has its own rapid parsing engine and its fet
155
155
  <!-- sponsors -->
156
156
 
157
157
  <a href="https://evomi.com?utm_source=github&utm_medium=banner&utm_campaign=d4vinci-scrapling" target="_blank" title="Evomi is your Swiss Quality Proxy Provider, starting at $0.49/GB"><img src="https://raw.githubusercontent.com/D4Vinci/Scrapling/main/images/evomi.png"></a>
158
- <a href="https://www.swiftproxy.net/" target="_blank" title="Unlock Reliable Proxy Services with Swiftproxy!"><img src="https://raw.githubusercontent.com/D4Vinci/Scrapling/main/images/swiftproxy.png"></a>
159
158
  <a href="https://petrosky.io/d4vinci" target="_blank" title="PetroSky delivers cutting-edge VPS hosting."><img src="https://raw.githubusercontent.com/D4Vinci/Scrapling/main/images/petrosky.png"></a>
159
+ <a href="https://www.swiftproxy.net/" target="_blank" title="Unlock Reliable Proxy Services with Swiftproxy!"><img src="https://raw.githubusercontent.com/D4Vinci/Scrapling/main/images/swiftproxy.png"></a>
160
160
  <a href="https://serpapi.com/?utm_source=scrapling" target="_blank" title="Scrape Google and other search engines with SerpApi"><img src="https://raw.githubusercontent.com/D4Vinci/Scrapling/main/images/SerpApi.png"></a>
161
161
 
162
162
  <!-- /sponsors -->
@@ -273,7 +273,7 @@ from scrapling.parser import Selector
273
273
 
274
274
  page = Selector("<html>...</html>")
275
275
  ```
276
- And it works exactly the same!
276
+ And it works exactly the same way!
277
277
 
278
278
  ### Async Session Management Examples
279
279
  ```python
@@ -302,6 +302,8 @@ async with AsyncStealthySession(max_pages=2) as session:
302
302
 
303
303
  Scrapling v0.3 includes a powerful command-line interface:
304
304
 
305
+ [![asciicast](https://asciinema.org/a/736339.svg)](https://asciinema.org/a/736339)
306
+
305
307
  ```bash
306
308
  # Launch interactive Web Scraping shell
307
309
  scrapling shell
@@ -1,6 +1,6 @@
1
1
  [metadata]
2
2
  name = scrapling
3
- version = 0.3
3
+ version = 0.3.1
4
4
  author = Karim Shoair
5
5
  author_email = karim.shoair@pm.me
6
6
  description = Scrapling is an undetectable, powerful, flexible, high-performance Python library that makes Web Scraping easy and effortless as it should be!
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes