crawlee 1.0.3b6__py3-none-any.whl → 1.0.5b6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- crawlee/_service_locator.py +4 -4
- crawlee/_utils/recoverable_state.py +32 -8
- crawlee/_utils/recurring_task.py +15 -0
- crawlee/_utils/robots.py +17 -5
- crawlee/_utils/sitemap.py +1 -1
- crawlee/_utils/urls.py +9 -2
- crawlee/browsers/_browser_pool.py +4 -1
- crawlee/browsers/_playwright_browser_controller.py +1 -1
- crawlee/browsers/_playwright_browser_plugin.py +17 -3
- crawlee/browsers/_types.py +1 -1
- crawlee/crawlers/_abstract_http/_abstract_http_crawler.py +3 -1
- crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawler.py +33 -12
- crawlee/crawlers/_basic/_basic_crawler.py +23 -12
- crawlee/crawlers/_playwright/_playwright_crawler.py +11 -4
- crawlee/fingerprint_suite/_header_generator.py +2 -2
- crawlee/request_loaders/_sitemap_request_loader.py +5 -0
- crawlee/statistics/_error_snapshotter.py +1 -1
- crawlee/statistics/_statistics.py +15 -6
- crawlee/storage_clients/_file_system/_request_queue_client.py +24 -6
- crawlee/storage_clients/_sql/_db_models.py +1 -2
- crawlee/storages/_key_value_store.py +5 -2
- {crawlee-1.0.3b6.dist-info → crawlee-1.0.5b6.dist-info}/METADATA +1 -1
- {crawlee-1.0.3b6.dist-info → crawlee-1.0.5b6.dist-info}/RECORD +26 -26
- {crawlee-1.0.3b6.dist-info → crawlee-1.0.5b6.dist-info}/WHEEL +0 -0
- {crawlee-1.0.3b6.dist-info → crawlee-1.0.5b6.dist-info}/entry_points.txt +0 -0
- {crawlee-1.0.3b6.dist-info → crawlee-1.0.5b6.dist-info}/licenses/LICENSE +0 -0
crawlee/_service_locator.py
CHANGED
|
@@ -38,7 +38,7 @@ class ServiceLocator:
|
|
|
38
38
|
def get_configuration(self) -> Configuration:
|
|
39
39
|
"""Get the configuration."""
|
|
40
40
|
if self._configuration is None:
|
|
41
|
-
logger.
|
|
41
|
+
logger.debug('No configuration set, implicitly creating and using default Configuration.')
|
|
42
42
|
self._configuration = Configuration()
|
|
43
43
|
|
|
44
44
|
return self._configuration
|
|
@@ -63,9 +63,9 @@ class ServiceLocator:
|
|
|
63
63
|
def get_event_manager(self) -> EventManager:
|
|
64
64
|
"""Get the event manager."""
|
|
65
65
|
if self._event_manager is None:
|
|
66
|
-
logger.
|
|
66
|
+
logger.debug('No event manager set, implicitly creating and using default LocalEventManager.')
|
|
67
67
|
if self._configuration is None:
|
|
68
|
-
logger.
|
|
68
|
+
logger.debug(
|
|
69
69
|
'Implicit creation of event manager will implicitly set configuration as side effect. '
|
|
70
70
|
'It is advised to explicitly first set the configuration instead.'
|
|
71
71
|
)
|
|
@@ -93,7 +93,7 @@ class ServiceLocator:
|
|
|
93
93
|
def get_storage_client(self) -> StorageClient:
|
|
94
94
|
"""Get the storage client."""
|
|
95
95
|
if self._storage_client is None:
|
|
96
|
-
logger.
|
|
96
|
+
logger.debug('No storage client set, implicitly creating and using default FileSystemStorageClient.')
|
|
97
97
|
if self._configuration is None:
|
|
98
98
|
logger.warning(
|
|
99
99
|
'Implicit creation of storage client will implicitly set configuration as side effect. '
|
|
@@ -4,12 +4,14 @@ from typing import TYPE_CHECKING, Generic, Literal, TypeVar
|
|
|
4
4
|
|
|
5
5
|
from pydantic import BaseModel
|
|
6
6
|
|
|
7
|
+
from crawlee._utils.raise_if_too_many_kwargs import raise_if_too_many_kwargs
|
|
7
8
|
from crawlee.events._types import Event, EventPersistStateData
|
|
8
9
|
|
|
9
10
|
if TYPE_CHECKING:
|
|
10
11
|
import logging
|
|
12
|
+
from collections.abc import Callable, Coroutine
|
|
11
13
|
|
|
12
|
-
from crawlee.storages
|
|
14
|
+
from crawlee.storages import KeyValueStore
|
|
13
15
|
|
|
14
16
|
TStateModel = TypeVar('TStateModel', bound=BaseModel)
|
|
15
17
|
|
|
@@ -37,6 +39,7 @@ class RecoverableState(Generic[TStateModel]):
|
|
|
37
39
|
persistence_enabled: Literal[True, False, 'explicit_only'] = False,
|
|
38
40
|
persist_state_kvs_name: str | None = None,
|
|
39
41
|
persist_state_kvs_id: str | None = None,
|
|
42
|
+
persist_state_kvs_factory: Callable[[], Coroutine[None, None, KeyValueStore]] | None = None,
|
|
40
43
|
logger: logging.Logger,
|
|
41
44
|
) -> None:
|
|
42
45
|
"""Initialize a new recoverable state object.
|
|
@@ -51,16 +54,40 @@ class RecoverableState(Generic[TStateModel]):
|
|
|
51
54
|
If neither a name nor and id are supplied, the default store will be used.
|
|
52
55
|
persist_state_kvs_id: The identifier of the KeyValueStore to use for persistence.
|
|
53
56
|
If neither a name nor and id are supplied, the default store will be used.
|
|
57
|
+
persist_state_kvs_factory: Factory that can be awaited to create KeyValueStore to use for persistence. If
|
|
58
|
+
not provided, a system-wide KeyValueStore will be used, based on service locator configuration.
|
|
54
59
|
logger: A logger instance for logging operations related to state persistence
|
|
55
60
|
"""
|
|
61
|
+
raise_if_too_many_kwargs(
|
|
62
|
+
persist_state_kvs_name=persist_state_kvs_name,
|
|
63
|
+
persist_state_kvs_id=persist_state_kvs_id,
|
|
64
|
+
persist_state_kvs_factory=persist_state_kvs_factory,
|
|
65
|
+
)
|
|
66
|
+
if not persist_state_kvs_factory:
|
|
67
|
+
logger.debug(
|
|
68
|
+
'No explicit key_value_store set for recoverable state. Recovery will use a system-wide KeyValueStore '
|
|
69
|
+
'based on service_locator configuration, potentially calling service_locator.set_storage_client in the '
|
|
70
|
+
'process. It is recommended to initialize RecoverableState with explicit key_value_store to avoid '
|
|
71
|
+
'global side effects.'
|
|
72
|
+
)
|
|
73
|
+
|
|
56
74
|
self._default_state = default_state
|
|
57
75
|
self._state_type: type[TStateModel] = self._default_state.__class__
|
|
58
76
|
self._state: TStateModel | None = None
|
|
59
77
|
self._persistence_enabled = persistence_enabled
|
|
60
78
|
self._persist_state_key = persist_state_key
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
79
|
+
if persist_state_kvs_factory is None:
|
|
80
|
+
|
|
81
|
+
async def kvs_factory() -> KeyValueStore:
|
|
82
|
+
from crawlee.storages import KeyValueStore # noqa: PLC0415 avoid circular import
|
|
83
|
+
|
|
84
|
+
return await KeyValueStore.open(name=persist_state_kvs_name, id=persist_state_kvs_id)
|
|
85
|
+
|
|
86
|
+
self._persist_state_kvs_factory = kvs_factory
|
|
87
|
+
else:
|
|
88
|
+
self._persist_state_kvs_factory = persist_state_kvs_factory
|
|
89
|
+
|
|
90
|
+
self._key_value_store: KeyValueStore | None = None
|
|
64
91
|
self._log = logger
|
|
65
92
|
|
|
66
93
|
async def initialize(self) -> TStateModel:
|
|
@@ -77,11 +104,8 @@ class RecoverableState(Generic[TStateModel]):
|
|
|
77
104
|
return self.current_value
|
|
78
105
|
|
|
79
106
|
# Import here to avoid circular imports.
|
|
80
|
-
from crawlee.storages._key_value_store import KeyValueStore # noqa: PLC0415
|
|
81
107
|
|
|
82
|
-
self._key_value_store = await
|
|
83
|
-
name=self._persist_state_kvs_name, id=self._persist_state_kvs_id
|
|
84
|
-
)
|
|
108
|
+
self._key_value_store = await self._persist_state_kvs_factory()
|
|
85
109
|
|
|
86
110
|
await self._load_saved_state()
|
|
87
111
|
|
crawlee/_utils/recurring_task.py
CHANGED
|
@@ -7,6 +7,9 @@ from typing import TYPE_CHECKING
|
|
|
7
7
|
if TYPE_CHECKING:
|
|
8
8
|
from collections.abc import Callable
|
|
9
9
|
from datetime import timedelta
|
|
10
|
+
from types import TracebackType
|
|
11
|
+
|
|
12
|
+
from typing_extensions import Self
|
|
10
13
|
|
|
11
14
|
logger = getLogger(__name__)
|
|
12
15
|
|
|
@@ -26,6 +29,18 @@ class RecurringTask:
|
|
|
26
29
|
self.delay = delay
|
|
27
30
|
self.task: asyncio.Task | None = None
|
|
28
31
|
|
|
32
|
+
async def __aenter__(self) -> Self:
|
|
33
|
+
self.start()
|
|
34
|
+
return self
|
|
35
|
+
|
|
36
|
+
async def __aexit__(
|
|
37
|
+
self,
|
|
38
|
+
exc_type: type[BaseException] | None,
|
|
39
|
+
exc_value: BaseException | None,
|
|
40
|
+
exc_traceback: TracebackType | None,
|
|
41
|
+
) -> None:
|
|
42
|
+
await self.stop()
|
|
43
|
+
|
|
29
44
|
async def _wrapper(self) -> None:
|
|
30
45
|
"""Continuously execute the provided function with the specified delay.
|
|
31
46
|
|
crawlee/_utils/robots.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
from logging import getLogger
|
|
3
4
|
from typing import TYPE_CHECKING
|
|
4
5
|
|
|
5
6
|
from protego import Protego
|
|
@@ -15,6 +16,9 @@ if TYPE_CHECKING:
|
|
|
15
16
|
from crawlee.proxy_configuration import ProxyInfo
|
|
16
17
|
|
|
17
18
|
|
|
19
|
+
logger = getLogger(__name__)
|
|
20
|
+
|
|
21
|
+
|
|
18
22
|
class RobotsTxtFile:
|
|
19
23
|
def __init__(
|
|
20
24
|
self, url: str, robots: Protego, http_client: HttpClient | None = None, proxy_info: ProxyInfo | None = None
|
|
@@ -56,12 +60,20 @@ class RobotsTxtFile:
|
|
|
56
60
|
http_client: The `HttpClient` instance used to perform the network request for fetching the robots.txt file.
|
|
57
61
|
proxy_info: Optional `ProxyInfo` to be used when fetching the robots.txt file. If None, no proxy is used.
|
|
58
62
|
"""
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
+
try:
|
|
64
|
+
response = await http_client.send_request(url, proxy_info=proxy_info)
|
|
65
|
+
|
|
66
|
+
body = (
|
|
67
|
+
b'User-agent: *\nAllow: /'
|
|
68
|
+
if is_status_code_client_error(response.status_code)
|
|
69
|
+
else await response.read()
|
|
70
|
+
)
|
|
71
|
+
robots = Protego.parse(body.decode('utf-8'))
|
|
72
|
+
|
|
73
|
+
except Exception as e:
|
|
74
|
+
logger.warning(f'Failed to fetch from robots.txt from "{url}" with error: "{e}"')
|
|
63
75
|
|
|
64
|
-
|
|
76
|
+
robots = Protego.parse('User-agent: *\nAllow: /')
|
|
65
77
|
|
|
66
78
|
return cls(url, robots, http_client=http_client, proxy_info=proxy_info)
|
|
67
79
|
|
crawlee/_utils/sitemap.py
CHANGED
|
@@ -335,7 +335,7 @@ async def _fetch_and_process_sitemap(
|
|
|
335
335
|
# Check if the first chunk is a valid gzip header
|
|
336
336
|
if first_chunk and raw_chunk.startswith(b'\x1f\x8b'):
|
|
337
337
|
decompressor = zlib.decompressobj(zlib.MAX_WBITS | 16)
|
|
338
|
-
|
|
338
|
+
first_chunk = False
|
|
339
339
|
|
|
340
340
|
chunk = decompressor.decompress(raw_chunk) if decompressor else raw_chunk
|
|
341
341
|
text_chunk = decoder.decode(chunk)
|
crawlee/_utils/urls.py
CHANGED
|
@@ -7,6 +7,7 @@ from yarl import URL
|
|
|
7
7
|
|
|
8
8
|
if TYPE_CHECKING:
|
|
9
9
|
from collections.abc import Iterator
|
|
10
|
+
from logging import Logger
|
|
10
11
|
|
|
11
12
|
|
|
12
13
|
def is_url_absolute(url: str) -> bool:
|
|
@@ -22,13 +23,19 @@ def convert_to_absolute_url(base_url: str, relative_url: str) -> str:
|
|
|
22
23
|
return str(URL(base_url).join(URL(relative_url)))
|
|
23
24
|
|
|
24
25
|
|
|
25
|
-
def to_absolute_url_iterator(base_url: str, urls: Iterator[str]) -> Iterator[str]:
|
|
26
|
+
def to_absolute_url_iterator(base_url: str, urls: Iterator[str], logger: Logger | None = None) -> Iterator[str]:
|
|
26
27
|
"""Convert an iterator of relative URLs to absolute URLs using a base URL."""
|
|
27
28
|
for url in urls:
|
|
28
29
|
if is_url_absolute(url):
|
|
29
30
|
yield url
|
|
30
31
|
else:
|
|
31
|
-
|
|
32
|
+
converted_url = convert_to_absolute_url(base_url, url)
|
|
33
|
+
# Skip the URL if conversion fails, probably due to an incorrect format, such as 'mailto:'.
|
|
34
|
+
if not is_url_absolute(converted_url):
|
|
35
|
+
if logger:
|
|
36
|
+
logger.debug(f'Could not convert URL "{url}" to absolute using base URL "{base_url}". Skipping it.')
|
|
37
|
+
continue
|
|
38
|
+
yield converted_url
|
|
32
39
|
|
|
33
40
|
|
|
34
41
|
_http_url_adapter = TypeAdapter(AnyHttpUrl)
|
|
@@ -118,7 +118,10 @@ class BrowserPool:
|
|
|
118
118
|
"""Initialize a new instance with a single `PlaywrightBrowserPlugin` configured with the provided options.
|
|
119
119
|
|
|
120
120
|
Args:
|
|
121
|
-
browser_type: The type of browser to launch
|
|
121
|
+
browser_type: The type of browser to launch:
|
|
122
|
+
- 'chromium', 'firefox', 'webkit': Use Playwright-managed browsers
|
|
123
|
+
- 'chrome': Use your locally installed Google Chrome browser. Requires Google Chrome to be installed on
|
|
124
|
+
the system.
|
|
122
125
|
user_data_dir: Path to a user data directory, which stores browser session data like cookies
|
|
123
126
|
and local storage.
|
|
124
127
|
browser_launch_options: Keyword arguments to pass to the browser launch method. These options are provided
|
|
@@ -216,7 +216,7 @@ class PlaywrightBrowserController(BrowserController):
|
|
|
216
216
|
browser_new_context_options = dict(browser_new_context_options) if browser_new_context_options else {}
|
|
217
217
|
if proxy_info:
|
|
218
218
|
if browser_new_context_options.get('proxy'):
|
|
219
|
-
logger.warning("browser_new_context_options['proxy']
|
|
219
|
+
logger.warning("browser_new_context_options['proxy'] overridden by explicit `proxy_info` argument.")
|
|
220
220
|
|
|
221
221
|
browser_new_context_options['proxy'] = ProxySettings(
|
|
222
222
|
server=f'{proxy_info.scheme}://{proxy_info.hostname}:{proxy_info.port}',
|
|
@@ -34,8 +34,8 @@ class PlaywrightBrowserPlugin(BrowserPlugin):
|
|
|
34
34
|
|
|
35
35
|
It is a plugin designed to manage browser instances using the Playwright automation library. It acts as a factory
|
|
36
36
|
for creating new browser instances and provides a unified interface for interacting with different browser types
|
|
37
|
-
(chromium, firefox, and
|
|
38
|
-
executable paths, sandboxing, ...). It also manages browser contexts and the number of pages open within each
|
|
37
|
+
(chromium, firefox, webkit and chrome). This class integrates configuration options for browser launches (headless
|
|
38
|
+
mode, executable paths, sandboxing, ...). It also manages browser contexts and the number of pages open within each
|
|
39
39
|
browser instance, ensuring that resource limits are respected.
|
|
40
40
|
"""
|
|
41
41
|
|
|
@@ -55,7 +55,10 @@ class PlaywrightBrowserPlugin(BrowserPlugin):
|
|
|
55
55
|
"""Initialize a new instance.
|
|
56
56
|
|
|
57
57
|
Args:
|
|
58
|
-
browser_type: The type of browser to launch
|
|
58
|
+
browser_type: The type of browser to launch:
|
|
59
|
+
- 'chromium', 'firefox', 'webkit': Use Playwright-managed browsers
|
|
60
|
+
- 'chrome': Use your locally installed Google Chrome browser. Requires Google Chrome to be installed on
|
|
61
|
+
the system.
|
|
59
62
|
user_data_dir: Path to a User Data Directory, which stores browser session data like cookies and local
|
|
60
63
|
storage.
|
|
61
64
|
browser_launch_options: Keyword arguments to pass to the browser launch method. These options are provided
|
|
@@ -80,6 +83,17 @@ class PlaywrightBrowserPlugin(BrowserPlugin):
|
|
|
80
83
|
'chromium_sandbox': not config.disable_browser_sandbox,
|
|
81
84
|
}
|
|
82
85
|
|
|
86
|
+
if browser_type == 'chrome' and default_launch_browser_options['executable_path']:
|
|
87
|
+
raise ValueError(
|
|
88
|
+
'Cannot use browser_type `chrome` with `Configuration.default_browser_path` or `executable_path` set.'
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
# Map 'chrome' to 'chromium' with the 'chrome' channel.
|
|
92
|
+
if browser_type == 'chrome':
|
|
93
|
+
browser_type = 'chromium'
|
|
94
|
+
# Chromium parameter 'channel' set to 'chrome' enables using installed Google Chrome.
|
|
95
|
+
default_launch_browser_options['channel'] = 'chrome'
|
|
96
|
+
|
|
83
97
|
self._browser_type: BrowserType = browser_type
|
|
84
98
|
self._browser_launch_options: dict[str, Any] = default_launch_browser_options | (browser_launch_options or {})
|
|
85
99
|
self._browser_new_context_options = browser_new_context_options or {}
|
crawlee/browsers/_types.py
CHANGED
|
@@ -167,7 +167,9 @@ class AbstractHttpCrawler(
|
|
|
167
167
|
kwargs.setdefault('strategy', 'same-hostname')
|
|
168
168
|
|
|
169
169
|
links_iterator: Iterator[str] = iter(self._parser.find_links(parsed_content, selector=selector))
|
|
170
|
-
links_iterator = to_absolute_url_iterator(
|
|
170
|
+
links_iterator = to_absolute_url_iterator(
|
|
171
|
+
context.request.loaded_url or context.request.url, links_iterator, logger=context.log
|
|
172
|
+
)
|
|
171
173
|
|
|
172
174
|
if robots_txt_file:
|
|
173
175
|
skipped, links_iterator = partition(lambda url: robots_txt_file.is_allowed(url), links_iterator)
|
|
@@ -149,10 +149,6 @@ class AdaptivePlaywrightCrawler(
|
|
|
149
149
|
non-default configuration.
|
|
150
150
|
kwargs: Additional keyword arguments to pass to the underlying `BasicCrawler`.
|
|
151
151
|
"""
|
|
152
|
-
# Some sub crawler kwargs are internally modified. Prepare copies.
|
|
153
|
-
basic_crawler_kwargs_for_static_crawler = deepcopy(kwargs)
|
|
154
|
-
basic_crawler_kwargs_for_pw_crawler = deepcopy(kwargs)
|
|
155
|
-
|
|
156
152
|
# Adaptive crawling related.
|
|
157
153
|
self.rendering_type_predictor = rendering_type_predictor or DefaultRenderingTypePredictor()
|
|
158
154
|
self.result_checker = result_checker or (lambda _: True)
|
|
@@ -170,11 +166,11 @@ class AdaptivePlaywrightCrawler(
|
|
|
170
166
|
# Each sub crawler will use custom logger .
|
|
171
167
|
static_logger = getLogger('Subcrawler_static')
|
|
172
168
|
static_logger.setLevel(logging.ERROR)
|
|
173
|
-
basic_crawler_kwargs_for_static_crawler
|
|
169
|
+
basic_crawler_kwargs_for_static_crawler: _BasicCrawlerOptions = {'_logger': static_logger, **kwargs}
|
|
174
170
|
|
|
175
171
|
pw_logger = getLogger('Subcrawler_playwright')
|
|
176
172
|
pw_logger.setLevel(logging.ERROR)
|
|
177
|
-
basic_crawler_kwargs_for_pw_crawler
|
|
173
|
+
basic_crawler_kwargs_for_pw_crawler: _BasicCrawlerOptions = {'_logger': pw_logger, **kwargs}
|
|
178
174
|
|
|
179
175
|
# Initialize sub crawlers to create their pipelines.
|
|
180
176
|
static_crawler_class = AbstractHttpCrawler.create_parsed_http_crawler_class(static_parser=static_parser)
|
|
@@ -319,7 +315,7 @@ class AdaptivePlaywrightCrawler(
|
|
|
319
315
|
),
|
|
320
316
|
logger=self._logger,
|
|
321
317
|
)
|
|
322
|
-
return SubCrawlerRun(result=result)
|
|
318
|
+
return SubCrawlerRun(result=result, run_context=context_linked_to_result)
|
|
323
319
|
except Exception as e:
|
|
324
320
|
return SubCrawlerRun(exception=e)
|
|
325
321
|
|
|
@@ -375,7 +371,8 @@ class AdaptivePlaywrightCrawler(
|
|
|
375
371
|
self.track_http_only_request_handler_runs()
|
|
376
372
|
|
|
377
373
|
static_run = await self._crawl_one(rendering_type='static', context=context)
|
|
378
|
-
if static_run.result and self.result_checker(static_run.result):
|
|
374
|
+
if static_run.result and static_run.run_context and self.result_checker(static_run.result):
|
|
375
|
+
self._update_context_from_copy(context, static_run.run_context)
|
|
379
376
|
self._context_result_map[context] = static_run.result
|
|
380
377
|
return
|
|
381
378
|
if static_run.exception:
|
|
@@ -406,13 +403,10 @@ class AdaptivePlaywrightCrawler(
|
|
|
406
403
|
if pw_run.exception is not None:
|
|
407
404
|
raise pw_run.exception
|
|
408
405
|
|
|
409
|
-
if pw_run.result:
|
|
410
|
-
self._context_result_map[context] = pw_run.result
|
|
411
|
-
|
|
406
|
+
if pw_run.result and pw_run.run_context:
|
|
412
407
|
if should_detect_rendering_type:
|
|
413
408
|
detection_result: RenderingType
|
|
414
409
|
static_run = await self._crawl_one('static', context=context, state=old_state_copy)
|
|
415
|
-
|
|
416
410
|
if static_run.result and self.result_comparator(static_run.result, pw_run.result):
|
|
417
411
|
detection_result = 'static'
|
|
418
412
|
else:
|
|
@@ -421,6 +415,9 @@ class AdaptivePlaywrightCrawler(
|
|
|
421
415
|
context.log.debug(f'Detected rendering type {detection_result} for {context.request.url}')
|
|
422
416
|
self.rendering_type_predictor.store_result(context.request, detection_result)
|
|
423
417
|
|
|
418
|
+
self._update_context_from_copy(context, pw_run.run_context)
|
|
419
|
+
self._context_result_map[context] = pw_run.result
|
|
420
|
+
|
|
424
421
|
def pre_navigation_hook(
|
|
425
422
|
self,
|
|
426
423
|
hook: Callable[[AdaptivePlaywrightPreNavCrawlingContext], Awaitable[None]] | None = None,
|
|
@@ -455,8 +452,32 @@ class AdaptivePlaywrightCrawler(
|
|
|
455
452
|
def track_rendering_type_mispredictions(self) -> None:
|
|
456
453
|
self.statistics.state.rendering_type_mispredictions += 1
|
|
457
454
|
|
|
455
|
+
def _update_context_from_copy(self, context: BasicCrawlingContext, context_copy: BasicCrawlingContext) -> None:
|
|
456
|
+
"""Update mutable fields of `context` from `context_copy`.
|
|
457
|
+
|
|
458
|
+
Uses object.__setattr__ to bypass frozen dataclass restrictions,
|
|
459
|
+
allowing state synchronization after isolated crawler execution.
|
|
460
|
+
"""
|
|
461
|
+
updating_attributes = {
|
|
462
|
+
'request': ('headers', 'user_data'),
|
|
463
|
+
'session': ('_user_data', '_usage_count', '_error_score', '_cookies'),
|
|
464
|
+
}
|
|
465
|
+
|
|
466
|
+
for attr, sub_attrs in updating_attributes.items():
|
|
467
|
+
original_sub_obj = getattr(context, attr)
|
|
468
|
+
copy_sub_obj = getattr(context_copy, attr)
|
|
469
|
+
|
|
470
|
+
# Check that both sub objects are not None
|
|
471
|
+
if original_sub_obj is None or copy_sub_obj is None:
|
|
472
|
+
continue
|
|
473
|
+
|
|
474
|
+
for sub_attr in sub_attrs:
|
|
475
|
+
new_value = getattr(copy_sub_obj, sub_attr)
|
|
476
|
+
object.__setattr__(original_sub_obj, sub_attr, new_value)
|
|
477
|
+
|
|
458
478
|
|
|
459
479
|
@dataclass(frozen=True)
|
|
460
480
|
class SubCrawlerRun:
|
|
461
481
|
result: RequestHandlerRunResult | None = None
|
|
462
482
|
exception: Exception | None = None
|
|
483
|
+
run_context: BasicCrawlingContext | None = None
|
|
@@ -56,7 +56,7 @@ from crawlee.errors import (
|
|
|
56
56
|
SessionError,
|
|
57
57
|
UserDefinedErrorHandlerError,
|
|
58
58
|
)
|
|
59
|
-
from crawlee.events._types import Event, EventCrawlerStatusData
|
|
59
|
+
from crawlee.events._types import Event, EventCrawlerStatusData, EventPersistStateData
|
|
60
60
|
from crawlee.http_clients import ImpitHttpClient
|
|
61
61
|
from crawlee.router import Router
|
|
62
62
|
from crawlee.sessions import SessionPool
|
|
@@ -437,14 +437,23 @@ class BasicCrawler(Generic[TCrawlingContext, TStatisticsState]):
|
|
|
437
437
|
self._statistics_log_format = statistics_log_format
|
|
438
438
|
|
|
439
439
|
# Statistics
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
440
|
+
if statistics:
|
|
441
|
+
self._statistics = statistics
|
|
442
|
+
else:
|
|
443
|
+
|
|
444
|
+
async def persist_state_factory() -> KeyValueStore:
|
|
445
|
+
return await self.get_key_value_store()
|
|
446
|
+
|
|
447
|
+
self._statistics = cast(
|
|
448
|
+
'Statistics[TStatisticsState]',
|
|
449
|
+
Statistics.with_default_state(
|
|
450
|
+
persistence_enabled=True,
|
|
451
|
+
periodic_message_logger=self._logger,
|
|
452
|
+
statistics_log_format=self._statistics_log_format,
|
|
453
|
+
log_message='Current request statistics:',
|
|
454
|
+
persist_state_kvs_factory=persist_state_factory,
|
|
455
|
+
),
|
|
456
|
+
)
|
|
448
457
|
|
|
449
458
|
# Additional context managers to enter and exit
|
|
450
459
|
self._additional_context_managers = _additional_context_managers or []
|
|
@@ -689,7 +698,6 @@ class BasicCrawler(Generic[TCrawlingContext, TStatisticsState]):
|
|
|
689
698
|
except CancelledError:
|
|
690
699
|
pass
|
|
691
700
|
finally:
|
|
692
|
-
await self._crawler_state_rec_task.stop()
|
|
693
701
|
if threading.current_thread() is threading.main_thread():
|
|
694
702
|
with suppress(NotImplementedError):
|
|
695
703
|
asyncio.get_running_loop().remove_signal_handler(signal.SIGINT)
|
|
@@ -721,8 +729,6 @@ class BasicCrawler(Generic[TCrawlingContext, TStatisticsState]):
|
|
|
721
729
|
async def _run_crawler(self) -> None:
|
|
722
730
|
event_manager = self._service_locator.get_event_manager()
|
|
723
731
|
|
|
724
|
-
self._crawler_state_rec_task.start()
|
|
725
|
-
|
|
726
732
|
# Collect the context managers to be entered. Context managers that are already active are excluded,
|
|
727
733
|
# as they were likely entered by the caller, who will also be responsible for exiting them.
|
|
728
734
|
contexts_to_enter = [
|
|
@@ -733,6 +739,7 @@ class BasicCrawler(Generic[TCrawlingContext, TStatisticsState]):
|
|
|
733
739
|
self._statistics,
|
|
734
740
|
self._session_pool if self._use_session_pool else None,
|
|
735
741
|
self._http_client,
|
|
742
|
+
self._crawler_state_rec_task,
|
|
736
743
|
*self._additional_context_managers,
|
|
737
744
|
)
|
|
738
745
|
if cm and getattr(cm, 'active', False) is False
|
|
@@ -744,6 +751,9 @@ class BasicCrawler(Generic[TCrawlingContext, TStatisticsState]):
|
|
|
744
751
|
|
|
745
752
|
await self._autoscaled_pool.run()
|
|
746
753
|
|
|
754
|
+
# Emit PERSIST_STATE event when crawler is finishing to allow listeners to persist their state if needed
|
|
755
|
+
event_manager.emit(event=Event.PERSIST_STATE, event_data=EventPersistStateData(is_migrating=False))
|
|
756
|
+
|
|
747
757
|
async def add_requests(
|
|
748
758
|
self,
|
|
749
759
|
requests: Sequence[str | Request],
|
|
@@ -972,6 +982,7 @@ class BasicCrawler(Generic[TCrawlingContext, TStatisticsState]):
|
|
|
972
982
|
label=label,
|
|
973
983
|
user_data=user_data,
|
|
974
984
|
transform_request_function=transform_request_function,
|
|
985
|
+
**kwargs,
|
|
975
986
|
),
|
|
976
987
|
rq_id=rq_id,
|
|
977
988
|
rq_name=rq_name,
|
|
@@ -114,7 +114,10 @@ class PlaywrightCrawler(BasicCrawler[PlaywrightCrawlingContext, StatisticsState]
|
|
|
114
114
|
browser_pool: A `BrowserPool` instance to be used for launching the browsers and getting pages.
|
|
115
115
|
user_data_dir: Path to a user data directory, which stores browser session data like cookies
|
|
116
116
|
and local storage.
|
|
117
|
-
browser_type: The type of browser to launch
|
|
117
|
+
browser_type: The type of browser to launch:
|
|
118
|
+
- 'chromium', 'firefox', 'webkit': Use Playwright-managed browsers
|
|
119
|
+
- 'chrome': Use your locally installed Google Chrome browser. Requires Google Chrome to be installed on
|
|
120
|
+
the system.
|
|
118
121
|
This option should not be used if `browser_pool` is provided.
|
|
119
122
|
browser_launch_options: Keyword arguments to pass to the browser launch method. These options are provided
|
|
120
123
|
directly to Playwright's `browser_type.launch` method. For more details, refer to the
|
|
@@ -153,7 +156,7 @@ class PlaywrightCrawler(BasicCrawler[PlaywrightCrawlingContext, StatisticsState]
|
|
|
153
156
|
):
|
|
154
157
|
raise ValueError(
|
|
155
158
|
'You cannot provide `headless`, `browser_type`, `browser_launch_options`, '
|
|
156
|
-
'`browser_new_context_options`, `use_incognito_pages`, `user_data_dir`
|
|
159
|
+
'`browser_new_context_options`, `use_incognito_pages`, `user_data_dir` or '
|
|
157
160
|
'`fingerprint_generator` arguments when `browser_pool` is provided.'
|
|
158
161
|
)
|
|
159
162
|
|
|
@@ -366,7 +369,9 @@ class PlaywrightCrawler(BasicCrawler[PlaywrightCrawlingContext, StatisticsState]
|
|
|
366
369
|
links_iterator: Iterator[str] = iter(
|
|
367
370
|
[url for element in elements if (url := await element.get_attribute('href')) is not None]
|
|
368
371
|
)
|
|
369
|
-
links_iterator = to_absolute_url_iterator(
|
|
372
|
+
links_iterator = to_absolute_url_iterator(
|
|
373
|
+
context.request.loaded_url or context.request.url, links_iterator, logger=context.log
|
|
374
|
+
)
|
|
370
375
|
|
|
371
376
|
if robots_txt_file:
|
|
372
377
|
skipped, links_iterator = partition(lambda url: robots_txt_file.is_allowed(url), links_iterator)
|
|
@@ -494,7 +499,9 @@ class _PlaywrightCrawlerAdditionalOptions(TypedDict):
|
|
|
494
499
|
"""A `BrowserPool` instance to be used for launching the browsers and getting pages."""
|
|
495
500
|
|
|
496
501
|
browser_type: NotRequired[BrowserType]
|
|
497
|
-
"""The type of browser to launch
|
|
502
|
+
"""The type of browser to launch:
|
|
503
|
+
- 'chromium', 'firefox', 'webkit': Use Playwright-managed browsers
|
|
504
|
+
- 'chrome': Use your locally installed Google Chrome browser. Requires Google Chrome to be installed on the system.
|
|
498
505
|
This option should not be used if `browser_pool` is provided."""
|
|
499
506
|
|
|
500
507
|
browser_launch_options: NotRequired[Mapping[str, Any]]
|
|
@@ -11,9 +11,9 @@ if TYPE_CHECKING:
|
|
|
11
11
|
|
|
12
12
|
|
|
13
13
|
def fingerprint_browser_type_from_playwright_browser_type(
|
|
14
|
-
playwright_browser_type: Literal['chromium', 'firefox', 'webkit'],
|
|
14
|
+
playwright_browser_type: Literal['chromium', 'firefox', 'webkit', 'chrome'],
|
|
15
15
|
) -> SupportedBrowserType:
|
|
16
|
-
if playwright_browser_type
|
|
16
|
+
if playwright_browser_type in {'chromium', 'chrome'}:
|
|
17
17
|
return 'chrome'
|
|
18
18
|
if playwright_browser_type == 'firefox':
|
|
19
19
|
return 'firefox'
|
|
@@ -90,6 +90,11 @@ class SitemapRequestLoaderState(BaseModel):
|
|
|
90
90
|
class SitemapRequestLoader(RequestLoader):
|
|
91
91
|
"""A request loader that reads URLs from sitemap(s).
|
|
92
92
|
|
|
93
|
+
The loader is designed to handle sitemaps that follow the format described in the Sitemaps protocol
|
|
94
|
+
(https://www.sitemaps.org/protocol.html). It supports both XML and plain text sitemap formats.
|
|
95
|
+
Note that HTML pages containing links are not supported - those should be handled by regular crawlers
|
|
96
|
+
and the `enqueue_links` functionality.
|
|
97
|
+
|
|
93
98
|
The loader fetches and parses sitemaps in the background, allowing crawling to start
|
|
94
99
|
before all URLs are loaded. It supports filtering URLs using glob and regex patterns.
|
|
95
100
|
|
|
@@ -32,7 +32,7 @@ class ErrorSnapshotter:
|
|
|
32
32
|
"""Capture error snapshot and save it to key value store.
|
|
33
33
|
|
|
34
34
|
It saves the error snapshot directly to a key value store. It can't use `context.get_key_value_store` because
|
|
35
|
-
it returns `KeyValueStoreChangeRecords` which is
|
|
35
|
+
it returns `KeyValueStoreChangeRecords` which is committed to the key value store only if the `RequestHandler`
|
|
36
36
|
returned without an exception. ErrorSnapshotter is on the contrary active only when `RequestHandler` fails with
|
|
37
37
|
an exception.
|
|
38
38
|
|
|
@@ -17,8 +17,11 @@ from crawlee.statistics import FinalStatistics, StatisticsState
|
|
|
17
17
|
from crawlee.statistics._error_tracker import ErrorTracker
|
|
18
18
|
|
|
19
19
|
if TYPE_CHECKING:
|
|
20
|
+
from collections.abc import Callable, Coroutine
|
|
20
21
|
from types import TracebackType
|
|
21
22
|
|
|
23
|
+
from crawlee.storages import KeyValueStore
|
|
24
|
+
|
|
22
25
|
TStatisticsState = TypeVar('TStatisticsState', bound=StatisticsState, default=StatisticsState)
|
|
23
26
|
TNewStatisticsState = TypeVar('TNewStatisticsState', bound=StatisticsState, default=StatisticsState)
|
|
24
27
|
logger = getLogger(__name__)
|
|
@@ -70,6 +73,7 @@ class Statistics(Generic[TStatisticsState]):
|
|
|
70
73
|
persistence_enabled: bool | Literal['explicit_only'] = False,
|
|
71
74
|
persist_state_kvs_name: str | None = None,
|
|
72
75
|
persist_state_key: str | None = None,
|
|
76
|
+
persist_state_kvs_factory: Callable[[], Coroutine[None, None, KeyValueStore]] | None = None,
|
|
73
77
|
log_message: str = 'Statistics',
|
|
74
78
|
periodic_message_logger: Logger | None = None,
|
|
75
79
|
log_interval: timedelta = timedelta(minutes=1),
|
|
@@ -92,9 +96,10 @@ class Statistics(Generic[TStatisticsState]):
|
|
|
92
96
|
|
|
93
97
|
self._state = RecoverableState(
|
|
94
98
|
default_state=state_model(stats_id=self._id),
|
|
95
|
-
persist_state_key=persist_state_key or f'
|
|
99
|
+
persist_state_key=persist_state_key or f'__CRAWLER_STATISTICS_{self._id}',
|
|
96
100
|
persistence_enabled=persistence_enabled,
|
|
97
101
|
persist_state_kvs_name=persist_state_kvs_name,
|
|
102
|
+
persist_state_kvs_factory=persist_state_kvs_factory,
|
|
98
103
|
logger=logger,
|
|
99
104
|
)
|
|
100
105
|
|
|
@@ -110,8 +115,8 @@ class Statistics(Generic[TStatisticsState]):
|
|
|
110
115
|
"""Create near copy of the `Statistics` with replaced `state_model`."""
|
|
111
116
|
new_statistics: Statistics[TNewStatisticsState] = Statistics(
|
|
112
117
|
persistence_enabled=self._state._persistence_enabled, # noqa: SLF001
|
|
113
|
-
persist_state_kvs_name=self._state._persist_state_kvs_name, # noqa: SLF001
|
|
114
118
|
persist_state_key=self._state._persist_state_key, # noqa: SLF001
|
|
119
|
+
persist_state_kvs_factory=self._state._persist_state_kvs_factory, # noqa: SLF001
|
|
115
120
|
log_message=self._log_message,
|
|
116
121
|
periodic_message_logger=self._periodic_message_logger,
|
|
117
122
|
state_model=state_model,
|
|
@@ -125,6 +130,7 @@ class Statistics(Generic[TStatisticsState]):
|
|
|
125
130
|
persistence_enabled: bool = False,
|
|
126
131
|
persist_state_kvs_name: str | None = None,
|
|
127
132
|
persist_state_key: str | None = None,
|
|
133
|
+
persist_state_kvs_factory: Callable[[], Coroutine[None, None, KeyValueStore]] | None = None,
|
|
128
134
|
log_message: str = 'Statistics',
|
|
129
135
|
periodic_message_logger: Logger | None = None,
|
|
130
136
|
log_interval: timedelta = timedelta(minutes=1),
|
|
@@ -136,6 +142,7 @@ class Statistics(Generic[TStatisticsState]):
|
|
|
136
142
|
persistence_enabled=persistence_enabled,
|
|
137
143
|
persist_state_kvs_name=persist_state_kvs_name,
|
|
138
144
|
persist_state_key=persist_state_key,
|
|
145
|
+
persist_state_kvs_factory=persist_state_kvs_factory,
|
|
139
146
|
log_message=log_message,
|
|
140
147
|
periodic_message_logger=periodic_message_logger,
|
|
141
148
|
log_interval=log_interval,
|
|
@@ -182,7 +189,10 @@ class Statistics(Generic[TStatisticsState]):
|
|
|
182
189
|
if not self._active:
|
|
183
190
|
raise RuntimeError(f'The {self.__class__.__name__} is not active.')
|
|
184
191
|
|
|
185
|
-
|
|
192
|
+
if not self.state.crawler_last_started_at:
|
|
193
|
+
raise RuntimeError('Statistics.state.crawler_last_started_at not set.')
|
|
194
|
+
self.state.crawler_finished_at = datetime.now(timezone.utc)
|
|
195
|
+
self.state.crawler_runtime += self.state.crawler_finished_at - self.state.crawler_last_started_at
|
|
186
196
|
|
|
187
197
|
await self._state.teardown()
|
|
188
198
|
|
|
@@ -250,8 +260,7 @@ class Statistics(Generic[TStatisticsState]):
|
|
|
250
260
|
if self._instance_start is None:
|
|
251
261
|
raise RuntimeError('The Statistics object is not initialized')
|
|
252
262
|
|
|
253
|
-
|
|
254
|
-
total_minutes = crawler_runtime.total_seconds() / 60
|
|
263
|
+
total_minutes = self.state.crawler_runtime.total_seconds() / 60
|
|
255
264
|
state = self._state.current_value
|
|
256
265
|
serialized_state = state.model_dump(by_alias=False)
|
|
257
266
|
|
|
@@ -262,7 +271,7 @@ class Statistics(Generic[TStatisticsState]):
|
|
|
262
271
|
requests_failed_per_minute=math.floor(state.requests_failed / total_minutes) if total_minutes else 0,
|
|
263
272
|
request_total_duration=state.request_total_finished_duration + state.request_total_failed_duration,
|
|
264
273
|
requests_total=state.requests_failed + state.requests_finished,
|
|
265
|
-
crawler_runtime=crawler_runtime,
|
|
274
|
+
crawler_runtime=state.crawler_runtime,
|
|
266
275
|
requests_finished=state.requests_finished,
|
|
267
276
|
requests_failed=state.requests_failed,
|
|
268
277
|
retry_histogram=serialized_state['request_retry_histogram'],
|
|
@@ -31,6 +31,7 @@ if TYPE_CHECKING:
|
|
|
31
31
|
from collections.abc import Sequence
|
|
32
32
|
|
|
33
33
|
from crawlee.configuration import Configuration
|
|
34
|
+
from crawlee.storages import KeyValueStore
|
|
34
35
|
|
|
35
36
|
logger = getLogger(__name__)
|
|
36
37
|
|
|
@@ -92,6 +93,7 @@ class FileSystemRequestQueueClient(RequestQueueClient):
|
|
|
92
93
|
metadata: RequestQueueMetadata,
|
|
93
94
|
path_to_rq: Path,
|
|
94
95
|
lock: asyncio.Lock,
|
|
96
|
+
recoverable_state: RecoverableState[RequestQueueState],
|
|
95
97
|
) -> None:
|
|
96
98
|
"""Initialize a new instance.
|
|
97
99
|
|
|
@@ -114,12 +116,7 @@ class FileSystemRequestQueueClient(RequestQueueClient):
|
|
|
114
116
|
self._is_empty_cache: bool | None = None
|
|
115
117
|
"""Cache for is_empty result: None means unknown, True/False is cached state."""
|
|
116
118
|
|
|
117
|
-
self._state =
|
|
118
|
-
default_state=RequestQueueState(),
|
|
119
|
-
persist_state_key=f'__RQ_STATE_{self._metadata.id}',
|
|
120
|
-
persistence_enabled=True,
|
|
121
|
-
logger=logger,
|
|
122
|
-
)
|
|
119
|
+
self._state = recoverable_state
|
|
123
120
|
"""Recoverable state to maintain request ordering, in-progress status, and handled status."""
|
|
124
121
|
|
|
125
122
|
@override
|
|
@@ -136,6 +133,22 @@ class FileSystemRequestQueueClient(RequestQueueClient):
|
|
|
136
133
|
"""The full path to the request queue metadata file."""
|
|
137
134
|
return self.path_to_rq / METADATA_FILENAME
|
|
138
135
|
|
|
136
|
+
@classmethod
|
|
137
|
+
async def _create_recoverable_state(cls, id: str, configuration: Configuration) -> RecoverableState:
|
|
138
|
+
async def kvs_factory() -> KeyValueStore:
|
|
139
|
+
from crawlee.storage_clients import FileSystemStorageClient # noqa: PLC0415 avoid circular import
|
|
140
|
+
from crawlee.storages import KeyValueStore # noqa: PLC0415 avoid circular import
|
|
141
|
+
|
|
142
|
+
return await KeyValueStore.open(storage_client=FileSystemStorageClient(), configuration=configuration)
|
|
143
|
+
|
|
144
|
+
return RecoverableState[RequestQueueState](
|
|
145
|
+
default_state=RequestQueueState(),
|
|
146
|
+
persist_state_key=f'__RQ_STATE_{id}',
|
|
147
|
+
persist_state_kvs_factory=kvs_factory,
|
|
148
|
+
persistence_enabled=True,
|
|
149
|
+
logger=logger,
|
|
150
|
+
)
|
|
151
|
+
|
|
139
152
|
@classmethod
|
|
140
153
|
async def open(
|
|
141
154
|
cls,
|
|
@@ -194,6 +207,9 @@ class FileSystemRequestQueueClient(RequestQueueClient):
|
|
|
194
207
|
metadata=metadata,
|
|
195
208
|
path_to_rq=rq_base_path / rq_dir,
|
|
196
209
|
lock=asyncio.Lock(),
|
|
210
|
+
recoverable_state=await cls._create_recoverable_state(
|
|
211
|
+
id=id, configuration=configuration
|
|
212
|
+
),
|
|
197
213
|
)
|
|
198
214
|
await client._state.initialize()
|
|
199
215
|
await client._discover_existing_requests()
|
|
@@ -230,6 +246,7 @@ class FileSystemRequestQueueClient(RequestQueueClient):
|
|
|
230
246
|
metadata=metadata,
|
|
231
247
|
path_to_rq=path_to_rq,
|
|
232
248
|
lock=asyncio.Lock(),
|
|
249
|
+
recoverable_state=await cls._create_recoverable_state(id=metadata.id, configuration=configuration),
|
|
233
250
|
)
|
|
234
251
|
|
|
235
252
|
await client._state.initialize()
|
|
@@ -254,6 +271,7 @@ class FileSystemRequestQueueClient(RequestQueueClient):
|
|
|
254
271
|
metadata=metadata,
|
|
255
272
|
path_to_rq=path_to_rq,
|
|
256
273
|
lock=asyncio.Lock(),
|
|
274
|
+
recoverable_state=await cls._create_recoverable_state(id=metadata.id, configuration=configuration),
|
|
257
275
|
)
|
|
258
276
|
await client._state.initialize()
|
|
259
277
|
await client._update_metadata()
|
|
@@ -205,9 +205,8 @@ class RequestDb(Base):
|
|
|
205
205
|
'idx_fetch_available',
|
|
206
206
|
'request_queue_id',
|
|
207
207
|
'is_handled',
|
|
208
|
-
'time_blocked_until',
|
|
209
208
|
'sequence_number',
|
|
210
|
-
postgresql_where=text('is_handled
|
|
209
|
+
postgresql_where=text('is_handled is false'),
|
|
211
210
|
),
|
|
212
211
|
)
|
|
213
212
|
|
|
@@ -281,11 +281,14 @@ class KeyValueStore(Storage):
|
|
|
281
281
|
if key in cache:
|
|
282
282
|
return cache[key].current_value.root
|
|
283
283
|
|
|
284
|
+
async def kvs_factory() -> KeyValueStore:
|
|
285
|
+
return self
|
|
286
|
+
|
|
284
287
|
cache[key] = recoverable_state = RecoverableState(
|
|
285
288
|
default_state=AutosavedValue(default_value),
|
|
286
|
-
persistence_enabled=True,
|
|
287
|
-
persist_state_kvs_id=self.id,
|
|
288
289
|
persist_state_key=key,
|
|
290
|
+
persistence_enabled=True,
|
|
291
|
+
persist_state_kvs_factory=kvs_factory,
|
|
289
292
|
logger=logger,
|
|
290
293
|
)
|
|
291
294
|
|
|
@@ -4,7 +4,7 @@ crawlee/_cli.py,sha256=czuEsGD8QYEiq5gtMcBxrL08hQ5OJQQkMVhAr1pvDaQ,10353
|
|
|
4
4
|
crawlee/_consts.py,sha256=RQ96gx7V-WPH91cVsMUz76X5UZUNDNhCudtlyGkxFVk,133
|
|
5
5
|
crawlee/_log_config.py,sha256=VyxoEfWCq_9fyicmmJbjiZ5KC91onMcAtX2L4oKX4m4,5999
|
|
6
6
|
crawlee/_request.py,sha256=XliqiaL5Gp3fIDqHVVw0GF35VydXOtg6wJIkeaLcAwk,16458
|
|
7
|
-
crawlee/_service_locator.py,sha256=
|
|
7
|
+
crawlee/_service_locator.py,sha256=SJ8ABYtclBl7rz8kfZ2jZkIgKq5oNIoGT7WmN8ApTzo,5058
|
|
8
8
|
crawlee/_types.py,sha256=DAmfSv5W1dt3nJhJ8z-02gDaE06fdEizNKUlHpsd2_A,29129
|
|
9
9
|
crawlee/configuration.py,sha256=KG_XDkPe1VaYfaIu41nICvMjfHbDKM0h4-YTi3DkyRY,7917
|
|
10
10
|
crawlee/errors.py,sha256=RhFNA_uT615nVBHf9TylpX5YWwtDuHUUEV8LPT4CYa4,3878
|
|
@@ -29,43 +29,43 @@ crawlee/_utils/globs.py,sha256=SGX2J35Kqw7yZnSS5c4mLz9UD8c77PF0IoCgXQM5uiw,5310
|
|
|
29
29
|
crawlee/_utils/html_to_text.py,sha256=1iykT-OXd2xXNy7isHVWHqPxe23X82CGQBHIfbZbZkY,902
|
|
30
30
|
crawlee/_utils/models.py,sha256=EqM50Uc-xvxKlLCLA2lPpRduzfKvT0z_-Q-UWG8aTRQ,1955
|
|
31
31
|
crawlee/_utils/raise_if_too_many_kwargs.py,sha256=J2gaUJmsmNwexohuehXw_mdYKv-eWiui6WUHFsQ3qTQ,597
|
|
32
|
-
crawlee/_utils/recoverable_state.py,sha256=
|
|
33
|
-
crawlee/_utils/recurring_task.py,sha256=
|
|
32
|
+
crawlee/_utils/recoverable_state.py,sha256=c1D2ZecxEliGZzhqYz9_oU5CF2Hm0UKvpOHqO6CDJRE,9032
|
|
33
|
+
crawlee/_utils/recurring_task.py,sha256=sQMiURuDXbwwfAcIXK8V4NXncSxIBxsqN1cZWX7DLyg,2128
|
|
34
34
|
crawlee/_utils/requests.py,sha256=yOjai7bHR9_duPJ0ck-L76y9AnKZr49JBfSOQv9kvJc,5048
|
|
35
|
-
crawlee/_utils/robots.py,sha256=
|
|
36
|
-
crawlee/_utils/sitemap.py,sha256=
|
|
35
|
+
crawlee/_utils/robots.py,sha256=DBU5ni4Y-p7bIKMbLd_ws8wgHSFc4K8zPVF3JvH_pkw,4661
|
|
36
|
+
crawlee/_utils/sitemap.py,sha256=UI9EJiFiyFvV5_flVUtdsEVz8ZsJeRERPtcx8ZsqjTU,16632
|
|
37
37
|
crawlee/_utils/system.py,sha256=tA8AP__9vsJ9OTLTnAYAKkxc8U5-IEna0N_hqYBybUo,4294
|
|
38
38
|
crawlee/_utils/time.py,sha256=WK17P939r65dLz2rWvL59OEJoxgzdinw-ND9WuG4DuU,2353
|
|
39
39
|
crawlee/_utils/try_import.py,sha256=QI_58ifc2l0Rxehzu6xcofQrRAVeLzZuBTTTHttLl8s,1310
|
|
40
|
-
crawlee/_utils/urls.py,sha256=
|
|
40
|
+
crawlee/_utils/urls.py,sha256=fEYXJxBT02f-DIYKF_h7PdaKAShfXBs99-dHDjDX03A,1725
|
|
41
41
|
crawlee/_utils/wait.py,sha256=RfiXhp5VUBxOEtEMtru7_jNfKDr2BJCcFge5qGg2gxk,2848
|
|
42
42
|
crawlee/_utils/web.py,sha256=nnKhg8pUSWz0RY64Qd-_GPNBX1fWI2hXS-gzcfQ-rig,364
|
|
43
43
|
crawlee/browsers/__init__.py,sha256=TghkrNSbI_k87UgVBlgNNcEm8Ot05pSLEAPRSv6YsUs,1064
|
|
44
44
|
crawlee/browsers/_browser_controller.py,sha256=-g0pB5Nx5q67eMZVka49x-HMfQqJYoI6kcV-g3AXE0I,3068
|
|
45
45
|
crawlee/browsers/_browser_plugin.py,sha256=Wuojop___8ZO9eDoMs4JFmwMAFe5mZaTl0-Vz1PjkD8,3057
|
|
46
|
-
crawlee/browsers/_browser_pool.py,sha256=
|
|
46
|
+
crawlee/browsers/_browser_pool.py,sha256=n1GTVS220yxo-aMaKDVfQO571_AqEV5pMawWbr0zUHk,15832
|
|
47
47
|
crawlee/browsers/_playwright_browser.py,sha256=1yXD6cXuLefZZGUG1m0CT38xXYSwIC7n95bJBdMOxbo,3820
|
|
48
|
-
crawlee/browsers/_playwright_browser_controller.py,sha256=
|
|
49
|
-
crawlee/browsers/_playwright_browser_plugin.py,sha256=
|
|
50
|
-
crawlee/browsers/_types.py,sha256=
|
|
48
|
+
crawlee/browsers/_playwright_browser_controller.py,sha256=W6G5MjZpg9IcZoHts6lTML5VxSEpBTgzx5qeQ8XDigY,10216
|
|
49
|
+
crawlee/browsers/_playwright_browser_plugin.py,sha256=A1qa1nJhTSKNP9uOiO-oGzR7VGlnOMo0A0YNedccb2A,8869
|
|
50
|
+
crawlee/browsers/_types.py,sha256=ZnDgJHeQNSd_s_mXhgQnAf09c2smuiXC31VbawHHXUM,436
|
|
51
51
|
crawlee/browsers/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
52
52
|
crawlee/crawlers/__init__.py,sha256=9VmFahav3rjE-2Bxa5PAhBgkYXP0k5SSAEpdG2xMZ7c,2340
|
|
53
53
|
crawlee/crawlers/_types.py,sha256=xbGTJQirgz5wUbfr12afMR4q-_5AWP7ngF2e8K5P8l0,355
|
|
54
54
|
crawlee/crawlers/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
55
55
|
crawlee/crawlers/_abstract_http/__init__.py,sha256=QCjn8x7jpo8FwEeSRw10TVj_0La2v9mLEiQWdk2RoTw,273
|
|
56
|
-
crawlee/crawlers/_abstract_http/_abstract_http_crawler.py,sha256=
|
|
56
|
+
crawlee/crawlers/_abstract_http/_abstract_http_crawler.py,sha256=DEiErZi7j2FHMgyVELPy09GyHo5Gx4UDpuiN6D3sGNk,11553
|
|
57
57
|
crawlee/crawlers/_abstract_http/_abstract_http_parser.py,sha256=Y5o_hiW_0mQAte5GFqkUxscwKEFpWrBYRsLKP1cfBwE,3521
|
|
58
58
|
crawlee/crawlers/_abstract_http/_http_crawling_context.py,sha256=Rno_uJ8ivmyRxFQv2MyY_z9B5WPHSEd5MAPz31_1ZIo,2179
|
|
59
59
|
crawlee/crawlers/_abstract_http/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
60
60
|
crawlee/crawlers/_adaptive_playwright/__init__.py,sha256=LREq9WR9BKsE8S8lSsEhlCoNjQaLhlJ9yo8y_6a8o4c,1072
|
|
61
|
-
crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawler.py,sha256=
|
|
61
|
+
crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawler.py,sha256=ME90JLkScWj_ynUymA59f832vEvvVpkP01cYfEc8m-Y,22895
|
|
62
62
|
crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawler_statistics.py,sha256=_At8T8S3JLGPA-1AeCFGrpE-FuCDW9sazrXt9U0tK6U,1048
|
|
63
63
|
crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawling_context.py,sha256=9FlHIUC05IzUhJsVldQvpnDnj1jk8GJpqC98mPLN_fw,10431
|
|
64
64
|
crawlee/crawlers/_adaptive_playwright/_rendering_type_predictor.py,sha256=TM4mkbIN_059jUyCG8Z6XAb_FBLClIKw7z-aDvjon2I,10834
|
|
65
65
|
crawlee/crawlers/_adaptive_playwright/_result_comparator.py,sha256=NAfw5VKzTnkvARtLr_zrZj6UGeMp05Voc6Oi8oPxU3w,1747
|
|
66
66
|
crawlee/crawlers/_adaptive_playwright/_utils.py,sha256=EUYVz5i2YkLpL_gbVRp9BAD5u6w1xJ_AFzc_qB9bdDQ,1102
|
|
67
67
|
crawlee/crawlers/_basic/__init__.py,sha256=LPln8SiBBXSMqrApiFUfpqz3hvqxN5HUa1cHQXMVKgU,280
|
|
68
|
-
crawlee/crawlers/_basic/_basic_crawler.py,sha256=
|
|
68
|
+
crawlee/crawlers/_basic/_basic_crawler.py,sha256=yZ_A_l9Dux9Y2eYa9XbN3c7h-3YO7MgGmJbzCMbCplg,73257
|
|
69
69
|
crawlee/crawlers/_basic/_basic_crawling_context.py,sha256=fjxm2RQXMDkDlWu38dQ3xn5rrGUOhJXkXiqkgbFJFk4,155
|
|
70
70
|
crawlee/crawlers/_basic/_context_pipeline.py,sha256=vM8EEvnCoguERjRV3oyrxUq2Ln2F9DzY7P5dAEiuMHo,5869
|
|
71
71
|
crawlee/crawlers/_basic/_logging_utils.py,sha256=jp5mEwSq5a_BgzUhNPJ9WrIDcoIeYGbeHstcRqCcP0s,3093
|
|
@@ -85,7 +85,7 @@ crawlee/crawlers/_parsel/_parsel_crawling_context.py,sha256=sZB26RcRLjSoD15myEOM
|
|
|
85
85
|
crawlee/crawlers/_parsel/_parsel_parser.py,sha256=yWBfuXUHMriK4DRnyrXTQoGeqX5WV9bOEkBp_g0YCvQ,1540
|
|
86
86
|
crawlee/crawlers/_parsel/_utils.py,sha256=MbRwx-cdjlq1zLzFYf64M3spOGQ6yxum4FvP0sdqA_Q,2693
|
|
87
87
|
crawlee/crawlers/_playwright/__init__.py,sha256=6Cahe6VEF82o8CYiP8Cmp58Cmb6Rb8uMeyy7wnwe5ms,837
|
|
88
|
-
crawlee/crawlers/_playwright/_playwright_crawler.py,sha256=
|
|
88
|
+
crawlee/crawlers/_playwright/_playwright_crawler.py,sha256=53iytj5LZHw19QOYqYNlZL4ApPlhbWn9Ds-DPTKANhQ,24158
|
|
89
89
|
crawlee/crawlers/_playwright/_playwright_crawling_context.py,sha256=Oi0tMBXHaEDlFjqG01DzgB7Ck52bjVjz-X__eMioxas,1249
|
|
90
90
|
crawlee/crawlers/_playwright/_playwright_http_client.py,sha256=Nfm69dqX85k68jN1p3ljZWbn8egqDWPIPRykXyXsoQs,3977
|
|
91
91
|
crawlee/crawlers/_playwright/_playwright_pre_nav_crawling_context.py,sha256=fEI2laWhmJdWiGoMF5JBLBsim9NtENfagZt6FFd2Rgo,1387
|
|
@@ -100,7 +100,7 @@ crawlee/fingerprint_suite/__init__.py,sha256=noY9qw80B0seZGj_B3bBvCDIDk2YWOSN-ll
|
|
|
100
100
|
crawlee/fingerprint_suite/_browserforge_adapter.py,sha256=bsGebBjjHawM-FiINgqkZW5I9a9Fnv3SGwdKgaVWiRI,11934
|
|
101
101
|
crawlee/fingerprint_suite/_consts.py,sha256=SgykWfxD-pYvOpRp_ooQ4ZTPS0sQ2b3wDyyCjwU_8-w,258
|
|
102
102
|
crawlee/fingerprint_suite/_fingerprint_generator.py,sha256=Di4sDk1qioiFGx4ZcoVyHhtFHF8JXDhxQt8ZPug99k8,730
|
|
103
|
-
crawlee/fingerprint_suite/_header_generator.py,sha256=
|
|
103
|
+
crawlee/fingerprint_suite/_header_generator.py,sha256=9X9FbStehXdw-FZc_D0y-nLk1BUHXVYFxs7fv4dl9zU,3513
|
|
104
104
|
crawlee/fingerprint_suite/_types.py,sha256=7n2LJTiL2XvL-H4G-Y26Uoq5-ZXzH07Dq4o50uhMa-w,2423
|
|
105
105
|
crawlee/fingerprint_suite/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
106
106
|
crawlee/http_clients/__init__.py,sha256=OQFhR9F8BrdlIaS5aRS7hvgQ0tKJPQ8FiyYPualyQcU,890
|
|
@@ -137,7 +137,7 @@ crawlee/request_loaders/_request_list.py,sha256=SIalHBMuFanE5GLnFocI0QCppWUiJQjr
|
|
|
137
137
|
crawlee/request_loaders/_request_loader.py,sha256=2Bg-AWWkIV1W-Dwjqo91dPY8nmc7H3teQy7d6OSgliQ,3620
|
|
138
138
|
crawlee/request_loaders/_request_manager.py,sha256=qFizyJuV2meIb9iiPfuii7ciuERMrp4SldAufiH46dc,3000
|
|
139
139
|
crawlee/request_loaders/_request_manager_tandem.py,sha256=lv-s94KPsoQAqx1KaXFch96ejhO147uOflF3UK5ORTk,4058
|
|
140
|
-
crawlee/request_loaders/_sitemap_request_loader.py,sha256=
|
|
140
|
+
crawlee/request_loaders/_sitemap_request_loader.py,sha256=s65D_N0mZxeIrGJEjqUYfu1uYj2AXSOkmErSnfAHv2A,15554
|
|
141
141
|
crawlee/sessions/__init__.py,sha256=dJdelbL-6MK5sW4SMU4QrjFbb9kRZ9uRnN-VS3R5-8Y,190
|
|
142
142
|
crawlee/sessions/_cookies.py,sha256=ihYbmpXfCzClzXDT7M2wefB_3KVzcMUdIzTZo6uGk6Y,9356
|
|
143
143
|
crawlee/sessions/_models.py,sha256=JMRQgDUP30XUdZ32isncHowOsXvK9jC_m9QYegbBI1E,2916
|
|
@@ -145,10 +145,10 @@ crawlee/sessions/_session.py,sha256=cMXVf6QjfGJDgdLUB4MhUP-zTm3pEDHRs-W5SBA4JFI,
|
|
|
145
145
|
crawlee/sessions/_session_pool.py,sha256=-cMODmTRAXuoYa9DoW0cSTxbPuauF5qXY9QJcX6hXTg,9630
|
|
146
146
|
crawlee/sessions/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
147
147
|
crawlee/statistics/__init__.py,sha256=lXAsHNkeRZQBffW1B7rERarivXIUJveNlcKTGOXQZY0,154
|
|
148
|
-
crawlee/statistics/_error_snapshotter.py,sha256=
|
|
148
|
+
crawlee/statistics/_error_snapshotter.py,sha256=g-roZgkJ-glyStZL7gXrOhrpdZvZ686W9lR43uZjPao,3279
|
|
149
149
|
crawlee/statistics/_error_tracker.py,sha256=x9Yw1TuyEptjwgPPJ4gIom-0oVjawcNReQDsHH2nZ3w,8553
|
|
150
150
|
crawlee/statistics/_models.py,sha256=SFWYpT3r1c4XugU8nrm0epTpcM5_0fS1mXi9fnbhGJ8,5237
|
|
151
|
-
crawlee/statistics/_statistics.py,sha256=
|
|
151
|
+
crawlee/statistics/_statistics.py,sha256=d6z5XxXm-an4M_8TierOPpSB78vxqxwvUFCewIEmiK4,12786
|
|
152
152
|
crawlee/storage_clients/__init__.py,sha256=RCnutWMOqs_kUQpzfLVT5jgpHGWakLv557c6UIYFQsA,754
|
|
153
153
|
crawlee/storage_clients/models.py,sha256=gfW_kpSCOBuoTBIW0N7tb3FUv7BgD3keZADS7pyT4_I,6586
|
|
154
154
|
crawlee/storage_clients/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -161,7 +161,7 @@ crawlee/storage_clients/_base/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJ
|
|
|
161
161
|
crawlee/storage_clients/_file_system/__init__.py,sha256=w3twfwz5YeLYeu_70pNPBRINS2wXRvzOMvA1hUDYgf0,387
|
|
162
162
|
crawlee/storage_clients/_file_system/_dataset_client.py,sha256=1Z8VCDx8ueh0FQQXUr8tJlOtKw8ggkaFjuz3-T_GJDY,17735
|
|
163
163
|
crawlee/storage_clients/_file_system/_key_value_store_client.py,sha256=qNa3RRJQ8Omy2AteQvYh1Td04PsP5AhUFyTpL6KQbSg,18676
|
|
164
|
-
crawlee/storage_clients/_file_system/_request_queue_client.py,sha256=
|
|
164
|
+
crawlee/storage_clients/_file_system/_request_queue_client.py,sha256=ETwy6eODf3dlBqy2RPM3nr2_oEm2ht37WpoTlFxn4A8,33970
|
|
165
165
|
crawlee/storage_clients/_file_system/_storage_client.py,sha256=My63uc513kfUPe5X-PTYWBRe9xUGnkLqJN7IcsQd2yw,3293
|
|
166
166
|
crawlee/storage_clients/_file_system/_utils.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
167
167
|
crawlee/storage_clients/_file_system/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -174,7 +174,7 @@ crawlee/storage_clients/_memory/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NM
|
|
|
174
174
|
crawlee/storage_clients/_sql/__init__.py,sha256=X_fDMc6jn50gEBZ9QyUw54sjovYfFvE-dgXAdci6Y2M,312
|
|
175
175
|
crawlee/storage_clients/_sql/_client_mixin.py,sha256=U9ThDUuRbT5JDtCFlBurhZIs1Ay5t9fTfPXXI_4dwHY,15988
|
|
176
176
|
crawlee/storage_clients/_sql/_dataset_client.py,sha256=tiJVvOPZgc7cy4kGfWnun-g2TJMHMdaLnoqns5Sl6ek,10203
|
|
177
|
-
crawlee/storage_clients/_sql/_db_models.py,sha256=
|
|
177
|
+
crawlee/storage_clients/_sql/_db_models.py,sha256=KzA-R_L6zv9gqQg7B27mF-fERNJuMUEnewV9iofmTnI,9812
|
|
178
178
|
crawlee/storage_clients/_sql/_key_value_store_client.py,sha256=LnVLWhOjo4LdvtCac4fwuf__DgEQjlqSxz8KkjY3Qx4,11311
|
|
179
179
|
crawlee/storage_clients/_sql/_request_queue_client.py,sha256=OlvAOwEoYY5f4NO7BdhLFRT_i_E3YzJDb_ptKKK2huY,29478
|
|
180
180
|
crawlee/storage_clients/_sql/_storage_client.py,sha256=ITtMpwfotIW4SZjO4rycB5wfMKaqTAJgMvzcUZxckrk,10905
|
|
@@ -182,13 +182,13 @@ crawlee/storage_clients/_sql/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJW
|
|
|
182
182
|
crawlee/storages/__init__.py,sha256=wc2eioyCKAAYrg4N7cshpjC-UbE23OzGar9nK_kteSY,186
|
|
183
183
|
crawlee/storages/_base.py,sha256=zUOcMJTg8MAzq-m9X1NJcWncCfxzI5mb5MyY35WAkMk,2310
|
|
184
184
|
crawlee/storages/_dataset.py,sha256=l3VJCaJnaAEhJFpfRUOLzIbW332R8gdEPSSGhLq65pg,14652
|
|
185
|
-
crawlee/storages/_key_value_store.py,sha256=
|
|
185
|
+
crawlee/storages/_key_value_store.py,sha256=xdkMJYdH3zXzwB3jtkijq-YkMlwBtfXxDFIUlpvpXAE,10298
|
|
186
186
|
crawlee/storages/_request_queue.py,sha256=bjBOGbpMaGUsqJPVB-JD2VShziPAYMI-GvWKKpylzDE,13233
|
|
187
187
|
crawlee/storages/_storage_instance_manager.py,sha256=72n0YlPwNpSQDJSPf4TxnI2GvIK6L-ZiTmHRbFcoVU0,8164
|
|
188
188
|
crawlee/storages/_utils.py,sha256=Yz-5tEBYKYCFJemYT29--uGJqoJLApLDLgPcsnbifRw,439
|
|
189
189
|
crawlee/storages/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
190
|
-
crawlee-1.0.
|
|
191
|
-
crawlee-1.0.
|
|
192
|
-
crawlee-1.0.
|
|
193
|
-
crawlee-1.0.
|
|
194
|
-
crawlee-1.0.
|
|
190
|
+
crawlee-1.0.5b6.dist-info/METADATA,sha256=VRTs6oXZKJWwfcqFD0n5jzVg9Y2K5kZnryeQWs0eXto,29314
|
|
191
|
+
crawlee-1.0.5b6.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
192
|
+
crawlee-1.0.5b6.dist-info/entry_points.txt,sha256=1p65X3dA-cYvzjtlxLL6Kn1wpY-3uEDVqJLp53uNPeo,45
|
|
193
|
+
crawlee-1.0.5b6.dist-info/licenses/LICENSE,sha256=AsFjHssKjj4LGd2ZCqXn6FBzMqcWdjQre1byPPSypVw,11355
|
|
194
|
+
crawlee-1.0.5b6.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|