scrapling 0.2.9__py3-none-any.whl → 0.2.91__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
scrapling/__init__.py CHANGED
@@ -5,7 +5,7 @@ from scrapling.fetchers import (AsyncFetcher, CustomFetcher, Fetcher,
5
5
  from scrapling.parser import Adaptor, Adaptors
6
6
 
7
7
  __author__ = "Karim Shoair (karim.shoair@pm.me)"
8
- __version__ = "0.2.9"
8
+ __version__ = "0.2.91"
9
9
  __copyright__ = "Copyright (c) 2024 Karim Shoair"
10
10
 
11
11
 
scrapling/core/_types.py CHANGED
@@ -5,6 +5,8 @@ Type definitions for type checking purposes.
5
5
  from typing import (TYPE_CHECKING, Any, Callable, Dict, Generator, Iterable,
6
6
  List, Literal, Optional, Pattern, Tuple, Type, Union)
7
7
 
8
+ SelectorWaitStates = Literal["attached", "detached", "hidden", "visible"]
9
+
8
10
  try:
9
11
  from typing import Protocol
10
12
  except ImportError:
scrapling/engines/camo.py CHANGED
@@ -3,7 +3,7 @@ from camoufox.async_api import AsyncCamoufox
3
3
  from camoufox.sync_api import Camoufox
4
4
 
5
5
  from scrapling.core._types import (Callable, Dict, List, Literal, Optional,
6
- Union)
6
+ SelectorWaitStates, Union)
7
7
  from scrapling.core.utils import log
8
8
  from scrapling.engines.toolbelt import (Response, StatusText,
9
9
  async_intercept_route,
@@ -18,7 +18,7 @@ class CamoufoxEngine:
18
18
  self, headless: Optional[Union[bool, Literal['virtual']]] = True, block_images: Optional[bool] = False, disable_resources: Optional[bool] = False,
19
19
  block_webrtc: Optional[bool] = False, allow_webgl: Optional[bool] = True, network_idle: Optional[bool] = False, humanize: Optional[Union[bool, float]] = True,
20
20
  timeout: Optional[float] = 30000, page_action: Callable = None, wait_selector: Optional[str] = None, addons: Optional[List[str]] = None,
21
- wait_selector_state: str = 'attached', google_search: Optional[bool] = True, extra_headers: Optional[Dict[str, str]] = None,
21
+ wait_selector_state: Optional[SelectorWaitStates] = 'attached', google_search: Optional[bool] = True, extra_headers: Optional[Dict[str, str]] = None,
22
22
  proxy: Optional[Union[str, Dict[str, str]]] = None, os_randomize: Optional[bool] = None, disable_ads: Optional[bool] = True,
23
23
  geoip: Optional[bool] = False,
24
24
  adaptor_arguments: Dict = None,
@@ -84,6 +84,14 @@ class CamoufoxEngine:
84
84
  :return: A `Response` object that is the same as `Adaptor` object except it has these added attributes: `status`, `reason`, `cookies`, `headers`, and `request_headers`
85
85
  """
86
86
  addons = [] if self.disable_ads else [DefaultAddons.UBO]
87
+ # Store the final response
88
+ final_response = None
89
+
90
+ def handle_response(finished_response):
91
+ nonlocal final_response
92
+ if finished_response.request.resource_type == "document":
93
+ final_response = finished_response
94
+
87
95
  with Camoufox(
88
96
  geoip=self.geoip,
89
97
  proxy=self.proxy,
@@ -100,13 +108,15 @@ class CamoufoxEngine:
100
108
  page = browser.new_page()
101
109
  page.set_default_navigation_timeout(self.timeout)
102
110
  page.set_default_timeout(self.timeout)
111
+ # Listen for all responses
112
+ page.on("response", handle_response)
103
113
  if self.disable_resources:
104
114
  page.route("**/*", intercept_route)
105
115
 
106
116
  if self.extra_headers:
107
117
  page.set_extra_http_headers(self.extra_headers)
108
118
 
109
- res = page.goto(url, referer=generate_convincing_referer(url) if self.google_search else None)
119
+ first_response = page.goto(url, referer=generate_convincing_referer(url) if self.google_search else None)
110
120
  page.wait_for_load_state(state="domcontentloaded")
111
121
  if self.network_idle:
112
122
  page.wait_for_load_state('networkidle')
@@ -123,21 +133,24 @@ class CamoufoxEngine:
123
133
  if self.network_idle:
124
134
  page.wait_for_load_state('networkidle')
125
135
 
136
+ response_bytes = final_response.body() if final_response else page.content().encode('utf-8')
137
+ # In case we didn't catch a document type somehow
138
+ final_response = final_response if final_response else first_response
126
139
  # This will be parsed inside `Response`
127
- encoding = res.headers.get('content-type', '') or 'utf-8' # default encoding
140
+ encoding = final_response.headers.get('content-type', '') or 'utf-8' # default encoding
128
141
  # PlayWright API sometimes give empty status text for some reason!
129
- status_text = res.status_text or StatusText.get(res.status)
142
+ status_text = final_response.status_text or StatusText.get(final_response.status)
130
143
 
131
144
  response = Response(
132
- url=res.url,
145
+ url=final_response.url,
133
146
  text=page.content(),
134
- body=page.content().encode('utf-8'),
135
- status=res.status,
147
+ body=response_bytes,
148
+ status=final_response.status,
136
149
  reason=status_text,
137
150
  encoding=encoding,
138
151
  cookies={cookie['name']: cookie['value'] for cookie in page.context.cookies()},
139
- headers=res.all_headers(),
140
- request_headers=res.request.all_headers(),
152
+ headers=final_response.all_headers(),
153
+ request_headers=final_response.request.all_headers(),
141
154
  **self.adaptor_arguments
142
155
  )
143
156
  page.close()
@@ -151,6 +164,14 @@ class CamoufoxEngine:
151
164
  :return: A `Response` object that is the same as `Adaptor` object except it has these added attributes: `status`, `reason`, `cookies`, `headers`, and `request_headers`
152
165
  """
153
166
  addons = [] if self.disable_ads else [DefaultAddons.UBO]
167
+ # Store the final response
168
+ final_response = None
169
+
170
+ async def handle_response(finished_response):
171
+ nonlocal final_response
172
+ if finished_response.request.resource_type == "document":
173
+ final_response = finished_response
174
+
154
175
  async with AsyncCamoufox(
155
176
  geoip=self.geoip,
156
177
  proxy=self.proxy,
@@ -167,13 +188,15 @@ class CamoufoxEngine:
167
188
  page = await browser.new_page()
168
189
  page.set_default_navigation_timeout(self.timeout)
169
190
  page.set_default_timeout(self.timeout)
191
+ # Listen for all responses
192
+ page.on("response", handle_response)
170
193
  if self.disable_resources:
171
194
  await page.route("**/*", async_intercept_route)
172
195
 
173
196
  if self.extra_headers:
174
197
  await page.set_extra_http_headers(self.extra_headers)
175
198
 
176
- res = await page.goto(url, referer=generate_convincing_referer(url) if self.google_search else None)
199
+ first_response = await page.goto(url, referer=generate_convincing_referer(url) if self.google_search else None)
177
200
  await page.wait_for_load_state(state="domcontentloaded")
178
201
  if self.network_idle:
179
202
  await page.wait_for_load_state('networkidle')
@@ -190,21 +213,24 @@ class CamoufoxEngine:
190
213
  if self.network_idle:
191
214
  await page.wait_for_load_state('networkidle')
192
215
 
216
+ response_bytes = await final_response.body() if final_response else (await page.content()).encode('utf-8')
217
+ # In case we didn't catch a document type somehow
218
+ final_response = final_response if final_response else first_response
193
219
  # This will be parsed inside `Response`
194
- encoding = res.headers.get('content-type', '') or 'utf-8' # default encoding
220
+ encoding = final_response.headers.get('content-type', '') or 'utf-8' # default encoding
195
221
  # PlayWright API sometimes give empty status text for some reason!
196
- status_text = res.status_text or StatusText.get(res.status)
222
+ status_text = final_response.status_text or StatusText.get(final_response.status)
197
223
 
198
224
  response = Response(
199
- url=res.url,
225
+ url=final_response.url,
200
226
  text=await page.content(),
201
- body=(await page.content()).encode('utf-8'),
202
- status=res.status,
227
+ body=response_bytes,
228
+ status=final_response.status,
203
229
  reason=status_text,
204
230
  encoding=encoding,
205
231
  cookies={cookie['name']: cookie['value'] for cookie in await page.context.cookies()},
206
- headers=await res.all_headers(),
207
- request_headers=await res.request.all_headers(),
232
+ headers=await final_response.all_headers(),
233
+ request_headers=await final_response.request.all_headers(),
208
234
  **self.adaptor_arguments
209
235
  )
210
236
  await page.close()
scrapling/engines/pw.py CHANGED
@@ -1,6 +1,7 @@
1
1
  import json
2
2
 
3
- from scrapling.core._types import Callable, Dict, Optional, Union
3
+ from scrapling.core._types import (Callable, Dict, Optional,
4
+ SelectorWaitStates, Union)
4
5
  from scrapling.core.utils import log, lru_cache
5
6
  from scrapling.engines.constants import (DEFAULT_STEALTH_FLAGS,
6
7
  NSTBROWSER_DEFAULT_QUERY)
@@ -23,7 +24,7 @@ class PlaywrightEngine:
23
24
  page_action: Callable = None,
24
25
  wait_selector: Optional[str] = None,
25
26
  locale: Optional[str] = 'en-US',
26
- wait_selector_state: Optional[str] = 'attached',
27
+ wait_selector_state: SelectorWaitStates = 'attached',
27
28
  stealth: Optional[bool] = False,
28
29
  real_chrome: Optional[bool] = False,
29
30
  hide_canvas: Optional[bool] = False,
@@ -193,12 +194,21 @@ class PlaywrightEngine:
193
194
  :param url: Target url.
194
195
  :return: A `Response` object that is the same as `Adaptor` object except it has these added attributes: `status`, `reason`, `cookies`, `headers`, and `request_headers`
195
196
  """
197
+ from playwright.sync_api import Response as PlaywrightResponse
196
198
  if not self.stealth or self.real_chrome:
197
199
  # Because rebrowser_playwright doesn't play well with real browsers
198
200
  from playwright.sync_api import sync_playwright
199
201
  else:
200
202
  from rebrowser_playwright.sync_api import sync_playwright
201
203
 
204
+ # Store the final response
205
+ final_response = None
206
+
207
+ def handle_response(finished_response: PlaywrightResponse):
208
+ nonlocal final_response
209
+ if finished_response.request.resource_type == "document":
210
+ final_response = finished_response
211
+
202
212
  with sync_playwright() as p:
203
213
  # Creating the browser
204
214
  if self.cdp_url:
@@ -212,6 +222,8 @@ class PlaywrightEngine:
212
222
  page = context.new_page()
213
223
  page.set_default_navigation_timeout(self.timeout)
214
224
  page.set_default_timeout(self.timeout)
225
+ # Listen for all responses
226
+ page.on("response", handle_response)
215
227
 
216
228
  if self.extra_headers:
217
229
  page.set_extra_http_headers(self.extra_headers)
@@ -223,7 +235,7 @@ class PlaywrightEngine:
223
235
  for script in self.__stealth_scripts():
224
236
  page.add_init_script(path=script)
225
237
 
226
- res = page.goto(url, referer=generate_convincing_referer(url) if self.google_search else None)
238
+ first_response = page.goto(url, referer=generate_convincing_referer(url) if self.google_search else None)
227
239
  page.wait_for_load_state(state="domcontentloaded")
228
240
  if self.network_idle:
229
241
  page.wait_for_load_state('networkidle')
@@ -240,21 +252,24 @@ class PlaywrightEngine:
240
252
  if self.network_idle:
241
253
  page.wait_for_load_state('networkidle')
242
254
 
255
+ response_bytes = final_response.body() if final_response else page.content().encode('utf-8')
256
+ # In case we didn't catch a document type somehow
257
+ final_response = final_response if final_response else first_response
243
258
  # This will be parsed inside `Response`
244
- encoding = res.headers.get('content-type', '') or 'utf-8' # default encoding
259
+ encoding = final_response.headers.get('content-type', '') or 'utf-8' # default encoding
245
260
  # PlayWright API sometimes give empty status text for some reason!
246
- status_text = res.status_text or StatusText.get(res.status)
261
+ status_text = final_response.status_text or StatusText.get(final_response.status)
247
262
 
248
263
  response = Response(
249
- url=res.url,
264
+ url=final_response.url,
250
265
  text=page.content(),
251
- body=page.content().encode('utf-8'),
252
- status=res.status,
266
+ body=response_bytes,
267
+ status=final_response.status,
253
268
  reason=status_text,
254
269
  encoding=encoding,
255
270
  cookies={cookie['name']: cookie['value'] for cookie in page.context.cookies()},
256
- headers=res.all_headers(),
257
- request_headers=res.request.all_headers(),
271
+ headers=final_response.all_headers(),
272
+ request_headers=final_response.request.all_headers(),
258
273
  **self.adaptor_arguments
259
274
  )
260
275
  page.close()
@@ -266,12 +281,21 @@ class PlaywrightEngine:
266
281
  :param url: Target url.
267
282
  :return: A `Response` object that is the same as `Adaptor` object except it has these added attributes: `status`, `reason`, `cookies`, `headers`, and `request_headers`
268
283
  """
284
+ from playwright.async_api import Response as PlaywrightResponse
269
285
  if not self.stealth or self.real_chrome:
270
286
  # Because rebrowser_playwright doesn't play well with real browsers
271
287
  from playwright.async_api import async_playwright
272
288
  else:
273
289
  from rebrowser_playwright.async_api import async_playwright
274
290
 
291
+ # Store the final response
292
+ final_response = None
293
+
294
+ async def handle_response(finished_response: PlaywrightResponse):
295
+ nonlocal final_response
296
+ if finished_response.request.resource_type == "document":
297
+ final_response = finished_response
298
+
275
299
  async with async_playwright() as p:
276
300
  # Creating the browser
277
301
  if self.cdp_url:
@@ -285,6 +309,8 @@ class PlaywrightEngine:
285
309
  page = await context.new_page()
286
310
  page.set_default_navigation_timeout(self.timeout)
287
311
  page.set_default_timeout(self.timeout)
312
+ # Listen for all responses
313
+ page.on("response", handle_response)
288
314
 
289
315
  if self.extra_headers:
290
316
  await page.set_extra_http_headers(self.extra_headers)
@@ -296,7 +322,7 @@ class PlaywrightEngine:
296
322
  for script in self.__stealth_scripts():
297
323
  await page.add_init_script(path=script)
298
324
 
299
- res = await page.goto(url, referer=generate_convincing_referer(url) if self.google_search else None)
325
+ first_response = await page.goto(url, referer=generate_convincing_referer(url) if self.google_search else None)
300
326
  await page.wait_for_load_state(state="domcontentloaded")
301
327
  if self.network_idle:
302
328
  await page.wait_for_load_state('networkidle')
@@ -313,21 +339,24 @@ class PlaywrightEngine:
313
339
  if self.network_idle:
314
340
  await page.wait_for_load_state('networkidle')
315
341
 
342
+ response_bytes = await final_response.body() if final_response else (await page.content()).encode('utf-8')
343
+ # In case we didn't catch a document type somehow
344
+ final_response = final_response if final_response else first_response
316
345
  # This will be parsed inside `Response`
317
- encoding = res.headers.get('content-type', '') or 'utf-8' # default encoding
346
+ encoding = final_response.headers.get('content-type', '') or 'utf-8' # default encoding
318
347
  # PlayWright API sometimes give empty status text for some reason!
319
- status_text = res.status_text or StatusText.get(res.status)
348
+ status_text = final_response.status_text or StatusText.get(final_response.status)
320
349
 
321
350
  response = Response(
322
- url=res.url,
351
+ url=final_response.url,
323
352
  text=await page.content(),
324
- body=(await page.content()).encode('utf-8'),
325
- status=res.status,
353
+ body=response_bytes,
354
+ status=final_response.status,
326
355
  reason=status_text,
327
356
  encoding=encoding,
328
357
  cookies={cookie['name']: cookie['value'] for cookie in await page.context.cookies()},
329
- headers=await res.all_headers(),
330
- request_headers=await res.request.all_headers(),
358
+ headers=await final_response.all_headers(),
359
+ request_headers=await final_response.request.all_headers(),
331
360
  **self.adaptor_arguments
332
361
  )
333
362
  await page.close()
@@ -84,8 +84,6 @@ class ResponseEncoding:
84
84
  class Response(Adaptor):
85
85
  """This class is returned by all engines as a way to unify response type between different libraries."""
86
86
 
87
- _is_response_result_logged = False # Class-level flag, initialized to False
88
-
89
87
  def __init__(self, url: str, text: str, body: bytes, status: int, reason: str, cookies: Dict, headers: Dict, request_headers: Dict,
90
88
  encoding: str = 'utf-8', method: str = 'GET', **adaptor_arguments: Dict):
91
89
  automatch_domain = adaptor_arguments.pop('automatch_domain', None)
@@ -99,9 +97,7 @@ class Response(Adaptor):
99
97
  # For back-ward compatibility
100
98
  self.adaptor = self
101
99
  # For easier debugging while working from a Python shell
102
- if not Response._is_response_result_logged:
103
- log.info(f'Fetched ({status}) <{method} {url}> (referer: {request_headers.get("referer")})')
104
- Response._is_response_result_logged = True
100
+ log.info(f'Fetched ({status}) <{method} {url}> (referer: {request_headers.get("referer")})')
105
101
 
106
102
  # def __repr__(self):
107
103
  # return f'<{self.__class__.__name__} [{self.status} {self.reason}]>'
scrapling/fetchers.py CHANGED
@@ -1,5 +1,5 @@
1
1
  from scrapling.core._types import (Callable, Dict, List, Literal, Optional,
2
- Union)
2
+ SelectorWaitStates, Union)
3
3
  from scrapling.engines import (CamoufoxEngine, PlaywrightEngine, StaticEngine,
4
4
  check_if_engine_usable)
5
5
  from scrapling.engines.toolbelt import BaseFetcher, Response
@@ -176,8 +176,8 @@ class StealthyFetcher(BaseFetcher):
176
176
  self, url: str, headless: Optional[Union[bool, Literal['virtual']]] = True, block_images: Optional[bool] = False, disable_resources: Optional[bool] = False,
177
177
  block_webrtc: Optional[bool] = False, allow_webgl: Optional[bool] = True, network_idle: Optional[bool] = False, addons: Optional[List[str]] = None,
178
178
  timeout: Optional[float] = 30000, page_action: Callable = None, wait_selector: Optional[str] = None, humanize: Optional[Union[bool, float]] = True,
179
- wait_selector_state: str = 'attached', google_search: Optional[bool] = True, extra_headers: Optional[Dict[str, str]] = None, proxy: Optional[Union[str, Dict[str, str]]] = None,
180
- os_randomize: Optional[bool] = None, disable_ads: Optional[bool] = True, geoip: Optional[bool] = False,
179
+ wait_selector_state: SelectorWaitStates = 'attached', google_search: Optional[bool] = True, extra_headers: Optional[Dict[str, str]] = None,
180
+ proxy: Optional[Union[str, Dict[str, str]]] = None, os_randomize: Optional[bool] = None, disable_ads: Optional[bool] = True, geoip: Optional[bool] = False,
181
181
  ) -> Response:
182
182
  """
183
183
  Opens up a browser and do your request based on your chosen options below.
@@ -234,8 +234,8 @@ class StealthyFetcher(BaseFetcher):
234
234
  self, url: str, headless: Optional[Union[bool, Literal['virtual']]] = True, block_images: Optional[bool] = False, disable_resources: Optional[bool] = False,
235
235
  block_webrtc: Optional[bool] = False, allow_webgl: Optional[bool] = True, network_idle: Optional[bool] = False, addons: Optional[List[str]] = None,
236
236
  timeout: Optional[float] = 30000, page_action: Callable = None, wait_selector: Optional[str] = None, humanize: Optional[Union[bool, float]] = True,
237
- wait_selector_state: str = 'attached', google_search: Optional[bool] = True, extra_headers: Optional[Dict[str, str]] = None, proxy: Optional[Union[str, Dict[str, str]]] = None,
238
- os_randomize: Optional[bool] = None, disable_ads: Optional[bool] = True, geoip: Optional[bool] = False,
237
+ wait_selector_state: SelectorWaitStates = 'attached', google_search: Optional[bool] = True, extra_headers: Optional[Dict[str, str]] = None,
238
+ proxy: Optional[Union[str, Dict[str, str]]] = None, os_randomize: Optional[bool] = None, disable_ads: Optional[bool] = True, geoip: Optional[bool] = False,
239
239
  ) -> Response:
240
240
  """
241
241
  Opens up a browser and do your request based on your chosen options below.
@@ -308,7 +308,7 @@ class PlayWrightFetcher(BaseFetcher):
308
308
  def fetch(
309
309
  self, url: str, headless: Union[bool, str] = True, disable_resources: bool = None,
310
310
  useragent: Optional[str] = None, network_idle: Optional[bool] = False, timeout: Optional[float] = 30000,
311
- page_action: Optional[Callable] = None, wait_selector: Optional[str] = None, wait_selector_state: Optional[str] = 'attached',
311
+ page_action: Optional[Callable] = None, wait_selector: Optional[str] = None, wait_selector_state: SelectorWaitStates = 'attached',
312
312
  hide_canvas: Optional[bool] = False, disable_webgl: Optional[bool] = False, extra_headers: Optional[Dict[str, str]] = None, google_search: Optional[bool] = True,
313
313
  proxy: Optional[Union[str, Dict[str, str]]] = None, locale: Optional[str] = 'en-US',
314
314
  stealth: Optional[bool] = False, real_chrome: Optional[bool] = False,
@@ -368,7 +368,7 @@ class PlayWrightFetcher(BaseFetcher):
368
368
  async def async_fetch(
369
369
  self, url: str, headless: Union[bool, str] = True, disable_resources: bool = None,
370
370
  useragent: Optional[str] = None, network_idle: Optional[bool] = False, timeout: Optional[float] = 30000,
371
- page_action: Optional[Callable] = None, wait_selector: Optional[str] = None, wait_selector_state: Optional[str] = 'attached',
371
+ page_action: Optional[Callable] = None, wait_selector: Optional[str] = None, wait_selector_state: SelectorWaitStates = 'attached',
372
372
  hide_canvas: Optional[bool] = False, disable_webgl: Optional[bool] = False, extra_headers: Optional[Dict[str, str]] = None, google_search: Optional[bool] = True,
373
373
  proxy: Optional[Union[str, Dict[str, str]]] = None, locale: Optional[str] = 'en-US',
374
374
  stealth: Optional[bool] = False, real_chrome: Optional[bool] = False,
scrapling/parser.py CHANGED
@@ -155,7 +155,7 @@ class Adaptor(SelectorsGeneration):
155
155
  else:
156
156
  if issubclass(type(element), html.HtmlMixin):
157
157
 
158
- return self.__class__(
158
+ return Adaptor(
159
159
  root=element,
160
160
  text='', body=b'', # Since root argument is provided, both `text` and `body` will be ignored so this is just a filler
161
161
  url=self.url, encoding=self.encoding, auto_match=self.__auto_match_enabled,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: scrapling
3
- Version: 0.2.9
3
+ Version: 0.2.91
4
4
  Summary: Scrapling is a powerful, flexible, and high-performance web scraping library for Python. It
5
5
  Home-page: https://github.com/D4Vinci/Scrapling
6
6
  Author: Karim Shoair
@@ -21,7 +21,6 @@ Classifier: Topic :: Text Processing :: Markup :: HTML
21
21
  Classifier: Topic :: Software Development :: Libraries :: Python Modules
22
22
  Classifier: Programming Language :: Python :: 3
23
23
  Classifier: Programming Language :: Python :: 3 :: Only
24
- Classifier: Programming Language :: Python :: 3.8
25
24
  Classifier: Programming Language :: Python :: 3.9
26
25
  Classifier: Programming Language :: Python :: 3.10
27
26
  Classifier: Programming Language :: Python :: 3.11
@@ -38,7 +37,7 @@ Requires-Dist: cssselect>=1.2
38
37
  Requires-Dist: w3lib
39
38
  Requires-Dist: orjson>=3
40
39
  Requires-Dist: tldextract
41
- Requires-Dist: httpx[brotli,zstd]
40
+ Requires-Dist: httpx[brotli,socks,zstd]
42
41
  Requires-Dist: playwright>=1.49.1
43
42
  Requires-Dist: rebrowser-playwright>=1.49.1
44
43
  Requires-Dist: camoufox[geoip]>=0.4.9
@@ -1,22 +1,22 @@
1
- scrapling/__init__.py,sha256=4adit4xM1Io6mBz-VnnSHcPCQxIYhvDmDVMhbXu8VF4,499
1
+ scrapling/__init__.py,sha256=pfbhEm1kcriA9pFR3JUUFEE3v4_ykB35SYbeHKzFxHw,500
2
2
  scrapling/defaults.py,sha256=tJAOMB-PMd3aLZz3j_yr6haBxxaklAvWdS_hP-GFFdU,331
3
- scrapling/fetchers.py,sha256=I_N32DMjCzNCMmrkGYoX480x1Eh5Lka6cMJ-EcSfszk,35342
4
- scrapling/parser.py,sha256=NKwOsGR6TB7XC9lMkA418_DRWE6pyUqK0XtmTAA51ic,55215
3
+ scrapling/fetchers.py,sha256=K3MKBqKDOXItJNwxFY2fe1C21Vz6QSd91fFtN98Mpg4,35402
4
+ scrapling/parser.py,sha256=Fl9cdbR58GuoPbWN5hZI6ToPSl0_rQFXMskTdzpoxWs,55208
5
5
  scrapling/py.typed,sha256=frcCV1k9oG9oKj3dpUqdJg1PxRT2RSN_XKdLCPjaYaY,2
6
6
  scrapling/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
- scrapling/core/_types.py,sha256=__HJ2JTk5vx5eg_7HAJmDjaHrMDIaoxNG8fadLLyKV8,566
7
+ scrapling/core/_types.py,sha256=OcsP1WeQEOlEVo9OzTrLQfgZZfXuJ0civVs31SynwGA,641
8
8
  scrapling/core/custom_types.py,sha256=ZRzpoT6qQ4vU_ejhLXa7WYuYLGl5HwAjLPe01xdhuvM,10808
9
9
  scrapling/core/mixins.py,sha256=sozbpaGL1_O_x3U-ABM5aYWpnxpCLfdbcA9SG3P7weY,3532
10
10
  scrapling/core/storage_adaptors.py,sha256=l_ZYcdn1y69AcoPuRrPoaxqKysN62pMExrwJWYdu5MA,6220
11
11
  scrapling/core/translator.py,sha256=ojDmNi5pFZE6Ke-AiSsTilXiPRdR8yhX3o-uVGMkap8,5236
12
12
  scrapling/core/utils.py,sha256=03LzCDzmeK1TXPjIKVzHSUgSfhpe36XE8AwxlgxzJoU,3705
13
13
  scrapling/engines/__init__.py,sha256=zA7tzqcDXP0hllwmjVewNHWipIA4JSU9mRG4J-cud0c,267
14
- scrapling/engines/camo.py,sha256=L5jRNUgJSAY5hE8KCD-tz4SFrx7ZjowJoWpHrl7havI,12359
14
+ scrapling/engines/camo.py,sha256=g12IVIPy4Uyp_jngtu8Qcvy7PSMHjURAHUGXdM58Kks,13778
15
15
  scrapling/engines/constants.py,sha256=Gb_nXFoBB4ujJkd05SKkenMe1UDiRYQA3dkmA3DunLg,3723
16
- scrapling/engines/pw.py,sha256=0vCDaodve_WcOdbGqBdyRwMECPZmQ0eGLQikh4WHKFc,17011
16
+ scrapling/engines/pw.py,sha256=Eq4_oQA5eX666chiNpXsBqhWONzleniyXjKdmCpXj_Y,18630
17
17
  scrapling/engines/static.py,sha256=7SVEfeigCPfwC1ukx0zIFFe96Bo5fox6qOq2IWrP6P8,10319
18
18
  scrapling/engines/toolbelt/__init__.py,sha256=VQDdYm1zY9Apno6d8UrULk29vUjllZrQqD8mXL1E2Fc,402
19
- scrapling/engines/toolbelt/custom.py,sha256=FbWTUC0Z8NTmTLFDiiCchs4W0_Q40lz2ONnhInRNuvA,12947
19
+ scrapling/engines/toolbelt/custom.py,sha256=d3qyeCg_qHm1RRE7yv5hyU9b17Y7YDPGBOVhEH1CAT0,12754
20
20
  scrapling/engines/toolbelt/fingerprints.py,sha256=ajEHdXHr7W4hw9KcNS7XlyxNBZu37p1bRj18TiICLzU,2929
21
21
  scrapling/engines/toolbelt/navigation.py,sha256=xEfZRJefuxOCGxQOSI2llS0du0Y2XmoIPdVGUSHOd7k,4567
22
22
  scrapling/engines/toolbelt/bypasses/navigator_plugins.js,sha256=tbnnk3nCXB6QEQnOhDlu3n-s7lnUTAkrUsjP6FDQIQg,2104
@@ -40,8 +40,8 @@ tests/fetchers/sync/test_playwright.py,sha256=5eZdPwk3JGeaO7GuExv_QsByLyWDE9joxn
40
40
  tests/parser/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
41
41
  tests/parser/test_automatch.py,sha256=SxsNdExE8zz8AcPRQFBUjZ3Q_1-tPOd9dzVvMSZpOYQ,4908
42
42
  tests/parser/test_general.py,sha256=dyfOsc8lleoY4AxcfDUBUaD1i95xecfYuTUhKBsYjwo,12100
43
- scrapling-0.2.9.dist-info/LICENSE,sha256=XHgu8DRuT7_g3Hb9Q18YGg8eShp6axPBacbnQxT_WWQ,1499
44
- scrapling-0.2.9.dist-info/METADATA,sha256=Wg6lcRo_5LcyotrB1ZXagT5-gToAyRmtNKsq6TJoNk4,68382
45
- scrapling-0.2.9.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
46
- scrapling-0.2.9.dist-info/top_level.txt,sha256=ub7FkOEXeYmmYTUxd4pCrwXfBfAMIpZ1sCGmXCc14tI,16
47
- scrapling-0.2.9.dist-info/RECORD,,
43
+ scrapling-0.2.91.dist-info/LICENSE,sha256=XHgu8DRuT7_g3Hb9Q18YGg8eShp6axPBacbnQxT_WWQ,1499
44
+ scrapling-0.2.91.dist-info/METADATA,sha256=ajc8n5Hjl--ZdGXwHxmfMEWyCMgbw1waZNovoPFxrUc,68339
45
+ scrapling-0.2.91.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
46
+ scrapling-0.2.91.dist-info/top_level.txt,sha256=ub7FkOEXeYmmYTUxd4pCrwXfBfAMIpZ1sCGmXCc14tI,16
47
+ scrapling-0.2.91.dist-info/RECORD,,