PyPI - crawlee - Versions diffs - 1.0.1b8__py3-none-any.whl → 1.0.5b18__py3-none-any.whl - Mend

crawlee 1.0.1b8py3-none-any.whl → 1.0.5b18py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (58) hide show

crawlee/_request.py +31 -20
crawlee/_service_locator.py +4 -4
crawlee/_types.py +10 -16
crawlee/_utils/recoverable_state.py +32 -8
crawlee/_utils/recurring_task.py +15 -0
crawlee/_utils/robots.py +17 -5
crawlee/_utils/sitemap.py +1 -1
crawlee/_utils/urls.py +9 -2
crawlee/browsers/_browser_pool.py +4 -1
crawlee/browsers/_playwright_browser_controller.py +1 -1
crawlee/browsers/_playwright_browser_plugin.py +17 -3
crawlee/browsers/_types.py +1 -1
crawlee/configuration.py +3 -1
crawlee/crawlers/_abstract_http/_abstract_http_crawler.py +3 -1
crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawler.py +33 -13
crawlee/crawlers/_basic/_basic_crawler.py +23 -12
crawlee/crawlers/_playwright/_playwright_crawler.py +11 -4
crawlee/fingerprint_suite/_header_generator.py +2 -2
crawlee/otel/crawler_instrumentor.py +3 -3
crawlee/request_loaders/_sitemap_request_loader.py +5 -0
crawlee/sessions/_session_pool.py +1 -1
crawlee/statistics/_error_snapshotter.py +1 -1
crawlee/statistics/_statistics.py +41 -31
crawlee/storage_clients/__init__.py +4 -0
crawlee/storage_clients/_file_system/_dataset_client.py +2 -2
crawlee/storage_clients/_file_system/_key_value_store_client.py +2 -2
crawlee/storage_clients/_file_system/_request_queue_client.py +26 -8
crawlee/storage_clients/_memory/_dataset_client.py +2 -2
crawlee/storage_clients/_memory/_key_value_store_client.py +2 -2
crawlee/storage_clients/_memory/_request_queue_client.py +2 -2
crawlee/storage_clients/_redis/__init__.py +6 -0
crawlee/storage_clients/_redis/_client_mixin.py +295 -0
crawlee/storage_clients/_redis/_dataset_client.py +325 -0
crawlee/storage_clients/_redis/_key_value_store_client.py +264 -0
crawlee/storage_clients/_redis/_request_queue_client.py +586 -0
crawlee/storage_clients/_redis/_storage_client.py +146 -0
crawlee/storage_clients/_redis/_utils.py +23 -0
crawlee/storage_clients/_redis/lua_scripts/atomic_bloom_add_requests.lua +36 -0
crawlee/storage_clients/_redis/lua_scripts/atomic_fetch_request.lua +49 -0
crawlee/storage_clients/_redis/lua_scripts/atomic_set_add_requests.lua +37 -0
crawlee/storage_clients/_redis/lua_scripts/reclaim_stale_requests.lua +34 -0
crawlee/storage_clients/_redis/py.typed +0 -0
crawlee/storage_clients/_sql/_dataset_client.py +2 -2
crawlee/storage_clients/_sql/_db_models.py +1 -2
crawlee/storage_clients/_sql/_key_value_store_client.py +5 -4
crawlee/storage_clients/_sql/_request_queue_client.py +20 -6
crawlee/storage_clients/_sql/_storage_client.py +10 -1
crawlee/storages/_base.py +3 -1
crawlee/storages/_dataset.py +3 -0
crawlee/storages/_key_value_store.py +8 -2
crawlee/storages/_request_queue.py +3 -0
crawlee/storages/_storage_instance_manager.py +9 -1
crawlee/storages/_utils.py +11 -0
{crawlee-1.0.1b8.dist-info → crawlee-1.0.5b18.dist-info}/METADATA +9 -5
{crawlee-1.0.1b8.dist-info → crawlee-1.0.5b18.dist-info}/RECORD +58 -45
{crawlee-1.0.1b8.dist-info → crawlee-1.0.5b18.dist-info}/WHEEL +0 -0
{crawlee-1.0.1b8.dist-info → crawlee-1.0.5b18.dist-info}/entry_points.txt +0 -0
{crawlee-1.0.1b8.dist-info → crawlee-1.0.5b18.dist-info}/licenses/LICENSE +0 -0

crawlee/crawlers/_playwright/_playwright_crawler.py CHANGED Viewed

@@ -114,7 +114,10 @@ class PlaywrightCrawler(BasicCrawler[PlaywrightCrawlingContext, StatisticsState]
             browser_pool: A `BrowserPool` instance to be used for launching the browsers and getting pages.
             user_data_dir: Path to a user data directory, which stores browser session data like cookies
                 and local storage.
-            browser_type: The type of browser to launch ('chromium', 'firefox', or 'webkit').
+            browser_type: The type of browser to launch:
+                - 'chromium', 'firefox', 'webkit': Use Playwright-managed browsers
+                - 'chrome': Use your locally installed Google Chrome browser. Requires Google Chrome to be installed on
+                    the system.
                 This option should not be used if `browser_pool` is provided.
             browser_launch_options: Keyword arguments to pass to the browser launch method. These options are provided
                 directly to Playwright's `browser_type.launch` method. For more details, refer to the
@@ -153,7 +156,7 @@ class PlaywrightCrawler(BasicCrawler[PlaywrightCrawlingContext, StatisticsState]
             ):
                 raise ValueError(
                     'You cannot provide `headless`, `browser_type`, `browser_launch_options`, '
-                    '`browser_new_context_options`, `use_incognito_pages`, `user_data_dir`  or'
+                    '`browser_new_context_options`, `use_incognito_pages`, `user_data_dir` or '
                     '`fingerprint_generator` arguments when `browser_pool` is provided.'
                 )
@@ -366,7 +369,9 @@ class PlaywrightCrawler(BasicCrawler[PlaywrightCrawlingContext, StatisticsState]
             links_iterator: Iterator[str] = iter(
                 [url for element in elements if (url := await element.get_attribute('href')) is not None]
             )
-            links_iterator = to_absolute_url_iterator(context.request.loaded_url or context.request.url, links_iterator)
+            links_iterator = to_absolute_url_iterator(
+                context.request.loaded_url or context.request.url, links_iterator, logger=context.log
+            )
             if robots_txt_file:
                 skipped, links_iterator = partition(lambda url: robots_txt_file.is_allowed(url), links_iterator)
@@ -494,7 +499,9 @@ class _PlaywrightCrawlerAdditionalOptions(TypedDict):
     """A `BrowserPool` instance to be used for launching the browsers and getting pages."""
     browser_type: NotRequired[BrowserType]
-    """The type of browser to launch ('chromium', 'firefox', or 'webkit').
+    """The type of browser to launch:
+    - 'chromium', 'firefox', 'webkit': Use Playwright-managed browsers
+    - 'chrome': Use your locally installed Google Chrome browser. Requires Google Chrome to be installed on the system.
     This option should not be used if `browser_pool` is provided."""
     browser_launch_options: NotRequired[Mapping[str, Any]]

crawlee/fingerprint_suite/_header_generator.py CHANGED Viewed

@@ -11,9 +11,9 @@ if TYPE_CHECKING:
 def fingerprint_browser_type_from_playwright_browser_type(
-    playwright_browser_type: Literal['chromium', 'firefox', 'webkit'],
+    playwright_browser_type: Literal['chromium', 'firefox', 'webkit', 'chrome'],
 ) -> SupportedBrowserType:
-    if playwright_browser_type == 'chromium':
+    if playwright_browser_type in {'chromium', 'chrome'}:
         return 'chrome'
     if playwright_browser_type == 'firefox':
         return 'firefox'

crawlee/otel/crawler_instrumentor.py CHANGED Viewed

@@ -69,7 +69,7 @@ class CrawlerInstrumentor(BaseInstrumentor):
         if request_handling_instrumentation:
-            async def middlware_wrapper(wrapped: Any, instance: _Middleware, args: Any, kwargs: Any) -> Any:
+            async def middleware_wrapper(wrapped: Any, instance: _Middleware, args: Any, kwargs: Any) -> Any:
                 with self._tracer.start_as_current_span(
                     name=f'{instance.generator.__name__}, {wrapped.__name__}',  # type:ignore[attr-defined]  # valid in our context
                     attributes={
@@ -111,8 +111,8 @@ class CrawlerInstrumentor(BaseInstrumentor):
             # Handpicked interesting methods to instrument
             self._instrumented.extend(
                 [
-                    (_Middleware, 'action', middlware_wrapper),
-                    (_Middleware, 'cleanup', middlware_wrapper),
+                    (_Middleware, 'action', middleware_wrapper),
+                    (_Middleware, 'cleanup', middleware_wrapper),
                     (ContextPipeline, '__call__', context_pipeline_wrapper),
                     (BasicCrawler, '_BasicCrawler__run_task_function', self._simple_async_wrapper),
                     (BasicCrawler, '_commit_request_handler_result', _commit_request_handler_result_wrapper),

crawlee/request_loaders/_sitemap_request_loader.py CHANGED Viewed

@@ -90,6 +90,11 @@ class SitemapRequestLoaderState(BaseModel):
 class SitemapRequestLoader(RequestLoader):
     """A request loader that reads URLs from sitemap(s).
+    The loader is designed to handle sitemaps that follow the format described in the Sitemaps protocol
+    (https://www.sitemaps.org/protocol.html). It supports both XML and plain text sitemap formats.
+    Note that HTML pages containing links are not supported - those should be handled by regular crawlers
+    and the `enqueue_links` functionality.
     The loader fetches and parses sitemaps in the background, allowing crawling to start
     before all URLs are loaded. It supports filtering URLs using glob and regex patterns.

crawlee/sessions/_session_pool.py CHANGED Viewed

@@ -163,7 +163,7 @@ class SessionPool:
     def add_session(self, session: Session) -> None:
         """Add an externally created session to the pool.
-        This is intened only for the cases when you want to add a session that was created outside of the pool.
+        This is intended only for the cases when you want to add a session that was created outside of the pool.
         Otherwise, the pool will create new sessions automatically.
         Args:

crawlee/statistics/_error_snapshotter.py CHANGED Viewed

@@ -32,7 +32,7 @@ class ErrorSnapshotter:
         """Capture error snapshot and save it to key value store.
         It saves the error snapshot directly to a key value store. It can't use `context.get_key_value_store` because
-        it returns `KeyValueStoreChangeRecords` which is commited to the key value store only if the `RequestHandler`
+        it returns `KeyValueStoreChangeRecords` which is committed to the key value store only if the `RequestHandler`
         returned without an exception. ErrorSnapshotter is on the contrary active only when `RequestHandler` fails with
         an exception.

crawlee/statistics/_statistics.py CHANGED Viewed

@@ -1,6 +1,7 @@
 # Inspiration: https://github.com/apify/crawlee/blob/v3.9.2/packages/core/src/crawlers/statistics.ts
 from __future__ import annotations
+import asyncio
 import math
 import time
 from datetime import datetime, timedelta, timezone
@@ -17,8 +18,11 @@ from crawlee.statistics import FinalStatistics, StatisticsState
 from crawlee.statistics._error_tracker import ErrorTracker
 if TYPE_CHECKING:
+    from collections.abc import Callable, Coroutine
     from types import TracebackType
+    from crawlee.storages import KeyValueStore
 TStatisticsState = TypeVar('TStatisticsState', bound=StatisticsState, default=StatisticsState)
 TNewStatisticsState = TypeVar('TNewStatisticsState', bound=StatisticsState, default=StatisticsState)
 logger = getLogger(__name__)
@@ -70,6 +74,7 @@ class Statistics(Generic[TStatisticsState]):
         persistence_enabled: bool | Literal['explicit_only'] = False,
         persist_state_kvs_name: str | None = None,
         persist_state_key: str | None = None,
+        persist_state_kvs_factory: Callable[[], Coroutine[None, None, KeyValueStore]] | None = None,
         log_message: str = 'Statistics',
         periodic_message_logger: Logger | None = None,
         log_interval: timedelta = timedelta(minutes=1),
@@ -80,8 +85,6 @@ class Statistics(Generic[TStatisticsState]):
         self._id = Statistics.__next_id
         Statistics.__next_id += 1
-        self._instance_start: datetime | None = None
         self.error_tracker = ErrorTracker(
             save_error_snapshots=save_error_snapshots,
             snapshot_kvs_name=persist_state_kvs_name,
@@ -92,9 +95,10 @@ class Statistics(Generic[TStatisticsState]):
         self._state = RecoverableState(
             default_state=state_model(stats_id=self._id),
-            persist_state_key=persist_state_key or f'SDK_CRAWLER_STATISTICS_{self._id}',
+            persist_state_key=persist_state_key or f'__CRAWLER_STATISTICS_{self._id}',
             persistence_enabled=persistence_enabled,
             persist_state_kvs_name=persist_state_kvs_name,
+            persist_state_kvs_factory=persist_state_kvs_factory,
             logger=logger,
         )
@@ -106,12 +110,15 @@ class Statistics(Generic[TStatisticsState]):
         # Flag to indicate the context state.
         self._active = False
+        # Pre-existing runtime offset, that can be non-zero when restoring serialized state from KVS.
+        self._runtime_offset = timedelta(seconds=0)
     def replace_state_model(self, state_model: type[TNewStatisticsState]) -> Statistics[TNewStatisticsState]:
         """Create near copy of the `Statistics` with replaced `state_model`."""
         new_statistics: Statistics[TNewStatisticsState] = Statistics(
             persistence_enabled=self._state._persistence_enabled,  # noqa: SLF001
-            persist_state_kvs_name=self._state._persist_state_kvs_name,  # noqa: SLF001
             persist_state_key=self._state._persist_state_key,  # noqa: SLF001
+            persist_state_kvs_factory=self._state._persist_state_kvs_factory,  # noqa: SLF001
             log_message=self._log_message,
             periodic_message_logger=self._periodic_message_logger,
             state_model=state_model,
@@ -125,6 +132,7 @@ class Statistics(Generic[TStatisticsState]):
         persistence_enabled: bool = False,
         persist_state_kvs_name: str | None = None,
         persist_state_key: str | None = None,
+        persist_state_kvs_factory: Callable[[], Coroutine[None, None, KeyValueStore]] | None = None,
         log_message: str = 'Statistics',
         periodic_message_logger: Logger | None = None,
         log_interval: timedelta = timedelta(minutes=1),
@@ -136,6 +144,7 @@ class Statistics(Generic[TStatisticsState]):
             persistence_enabled=persistence_enabled,
             persist_state_kvs_name=persist_state_kvs_name,
             persist_state_key=persist_state_key,
+            persist_state_kvs_factory=persist_state_kvs_factory,
             log_message=log_message,
             periodic_message_logger=periodic_message_logger,
             log_interval=log_interval,
@@ -158,14 +167,17 @@ class Statistics(Generic[TStatisticsState]):
         if self._active:
             raise RuntimeError(f'The {self.__class__.__name__} is already active.')
-        self._active = True
-        self._instance_start = datetime.now(timezone.utc)
         await self._state.initialize()
-        self._after_initialize()
+        self._runtime_offset = self.state.crawler_runtime
+        # Start periodic logging and let it print initial state before activation.
         self._periodic_logger.start()
+        await asyncio.sleep(0.01)
+        self._active = True
+        self.state.crawler_last_started_at = datetime.now(timezone.utc)
+        self.state.crawler_started_at = self.state.crawler_started_at or self.state.crawler_last_started_at
         return self
     async def __aexit__(
@@ -182,13 +194,18 @@ class Statistics(Generic[TStatisticsState]):
         if not self._active:
             raise RuntimeError(f'The {self.__class__.__name__} is not active.')
-        self._state.current_value.crawler_finished_at = datetime.now(timezone.utc)
-        await self._state.teardown()
+        if not self.state.crawler_last_started_at:
+            raise RuntimeError('Statistics.state.crawler_last_started_at not set.')
+        # Stop logging and deactivate the statistics to prevent further changes to crawler_runtime
         await self._periodic_logger.stop()
+        self.state.crawler_finished_at = datetime.now(timezone.utc)
+        self.state.crawler_runtime = (
+            self._runtime_offset + self.state.crawler_finished_at - self.state.crawler_last_started_at
+        )
         self._active = False
+        await self._state.teardown()
     @property
     def state(self) -> TStatisticsState:
@@ -245,13 +262,21 @@ class Statistics(Generic[TStatisticsState]):
         del self._requests_in_progress[request_id_or_key]
+    def _update_crawler_runtime(self) -> None:
+        current_run_duration = (
+            (datetime.now(timezone.utc) - self.state.crawler_last_started_at)
+            if self.state.crawler_last_started_at
+            else timedelta()
+        )
+        self.state.crawler_runtime = current_run_duration + self._runtime_offset
     def calculate(self) -> FinalStatistics:
         """Calculate the current statistics."""
-        if self._instance_start is None:
-            raise RuntimeError('The Statistics object is not initialized')
+        if self._active:
+            # Only update state when active. If not, just report the last known runtime.
+            self._update_crawler_runtime()
-        crawler_runtime = datetime.now(timezone.utc) - self._instance_start
-        total_minutes = crawler_runtime.total_seconds() / 60
+        total_minutes = self.state.crawler_runtime.total_seconds() / 60
         state = self._state.current_value
         serialized_state = state.model_dump(by_alias=False)
@@ -262,7 +287,7 @@ class Statistics(Generic[TStatisticsState]):
             requests_failed_per_minute=math.floor(state.requests_failed / total_minutes) if total_minutes else 0,
             request_total_duration=state.request_total_finished_duration + state.request_total_failed_duration,
             requests_total=state.requests_failed + state.requests_finished,
-            crawler_runtime=crawler_runtime,
+            crawler_runtime=state.crawler_runtime,
             requests_finished=state.requests_finished,
             requests_failed=state.requests_failed,
             retry_histogram=serialized_state['request_retry_histogram'],
@@ -282,21 +307,6 @@ class Statistics(Generic[TStatisticsState]):
         else:
             self._periodic_message_logger.info(self._log_message, extra=stats.to_dict())
-    def _after_initialize(self) -> None:
-        state = self._state.current_value
-        if state.crawler_started_at is None:
-            state.crawler_started_at = datetime.now(timezone.utc)
-        if state.stats_persisted_at is not None and state.crawler_last_started_at:
-            self._instance_start = datetime.now(timezone.utc) - (
-                state.stats_persisted_at - state.crawler_last_started_at
-            )
-        elif state.crawler_last_started_at:
-            self._instance_start = state.crawler_last_started_at
-        state.crawler_last_started_at = self._instance_start
     def _save_retry_count_for_request(self, record: RequestProcessingRecord) -> None:
         retry_count = record.retry_count
         state = self._state.current_value

crawlee/storage_clients/__init__.py CHANGED Viewed

@@ -13,9 +13,13 @@ _install_import_hook(__name__)
 with _try_import(__name__, 'SqlStorageClient'):
     from ._sql import SqlStorageClient
+with _try_import(__name__, 'RedisStorageClient'):
+    from ._redis import RedisStorageClient
 __all__ = [
     'FileSystemStorageClient',
     'MemoryStorageClient',
+    'RedisStorageClient',
     'SqlStorageClient',
     'StorageClient',
 ]

crawlee/storage_clients/_file_system/_dataset_client.py CHANGED Viewed

@@ -9,7 +9,7 @@ from pathlib import Path
 from typing import TYPE_CHECKING, Any
 from pydantic import ValidationError
-from typing_extensions import override
+from typing_extensions import Self, override
 from crawlee._consts import METADATA_FILENAME
 from crawlee._utils.crypto import crypto_random_object_id
@@ -94,7 +94,7 @@ class FileSystemDatasetClient(DatasetClient):
         name: str | None,
         alias: str | None,
         configuration: Configuration,
-    ) -> FileSystemDatasetClient:
+    ) -> Self:
         """Open or create a file system dataset client.
         This method attempts to open an existing dataset from the file system. If a dataset with the specified ID

crawlee/storage_clients/_file_system/_key_value_store_client.py CHANGED Viewed

@@ -10,7 +10,7 @@ from pathlib import Path
 from typing import TYPE_CHECKING, Any
 from pydantic import ValidationError
-from typing_extensions import override
+from typing_extensions import Self, override
 from crawlee._consts import METADATA_FILENAME
 from crawlee._utils.crypto import crypto_random_object_id
@@ -93,7 +93,7 @@ class FileSystemKeyValueStoreClient(KeyValueStoreClient):
         name: str | None,
         alias: str | None,
         configuration: Configuration,
-    ) -> FileSystemKeyValueStoreClient:
+    ) -> Self:
         """Open or create a file system key-value store client.
         This method attempts to open an existing key-value store from the file system. If a KVS with the specified

crawlee/storage_clients/_file_system/_request_queue_client.py CHANGED Viewed

@@ -11,7 +11,7 @@ from pathlib import Path
 from typing import TYPE_CHECKING
 from pydantic import BaseModel, ValidationError
-from typing_extensions import override
+from typing_extensions import Self, override
 from crawlee import Request
 from crawlee._consts import METADATA_FILENAME
@@ -31,6 +31,7 @@ if TYPE_CHECKING:
     from collections.abc import Sequence
     from crawlee.configuration import Configuration
+    from crawlee.storages import KeyValueStore
 logger = getLogger(__name__)
@@ -92,6 +93,7 @@ class FileSystemRequestQueueClient(RequestQueueClient):
         metadata: RequestQueueMetadata,
         path_to_rq: Path,
         lock: asyncio.Lock,
+        recoverable_state: RecoverableState[RequestQueueState],
     ) -> None:
         """Initialize a new instance.
@@ -114,12 +116,7 @@ class FileSystemRequestQueueClient(RequestQueueClient):
         self._is_empty_cache: bool | None = None
         """Cache for is_empty result: None means unknown, True/False is cached state."""
-        self._state = RecoverableState[RequestQueueState](
-            default_state=RequestQueueState(),
-            persist_state_key=f'__RQ_STATE_{self._metadata.id}',
-            persistence_enabled=True,
-            logger=logger,
-        )
+        self._state = recoverable_state
         """Recoverable state to maintain request ordering, in-progress status, and handled status."""
     @override
@@ -136,6 +133,22 @@ class FileSystemRequestQueueClient(RequestQueueClient):
         """The full path to the request queue metadata file."""
         return self.path_to_rq / METADATA_FILENAME
+    @classmethod
+    async def _create_recoverable_state(cls, id: str, configuration: Configuration) -> RecoverableState:
+        async def kvs_factory() -> KeyValueStore:
+            from crawlee.storage_clients import FileSystemStorageClient  # noqa: PLC0415 avoid circular import
+            from crawlee.storages import KeyValueStore  # noqa: PLC0415 avoid circular import
+            return await KeyValueStore.open(storage_client=FileSystemStorageClient(), configuration=configuration)
+        return RecoverableState[RequestQueueState](
+            default_state=RequestQueueState(),
+            persist_state_key=f'__RQ_STATE_{id}',
+            persist_state_kvs_factory=kvs_factory,
+            persistence_enabled=True,
+            logger=logger,
+        )
     @classmethod
     async def open(
         cls,
@@ -144,7 +157,7 @@ class FileSystemRequestQueueClient(RequestQueueClient):
         name: str | None,
         alias: str | None,
         configuration: Configuration,
-    ) -> FileSystemRequestQueueClient:
+    ) -> Self:
         """Open or create a file system request queue client.
         This method attempts to open an existing request queue from the file system. If a queue with the specified
@@ -194,6 +207,9 @@ class FileSystemRequestQueueClient(RequestQueueClient):
                                 metadata=metadata,
                                 path_to_rq=rq_base_path / rq_dir,
                                 lock=asyncio.Lock(),
+                                recoverable_state=await cls._create_recoverable_state(
+                                    id=id, configuration=configuration
+                                ),
                             )
                             await client._state.initialize()
                             await client._discover_existing_requests()
@@ -230,6 +246,7 @@ class FileSystemRequestQueueClient(RequestQueueClient):
                     metadata=metadata,
                     path_to_rq=path_to_rq,
                     lock=asyncio.Lock(),
+                    recoverable_state=await cls._create_recoverable_state(id=metadata.id, configuration=configuration),
                 )
                 await client._state.initialize()
@@ -254,6 +271,7 @@ class FileSystemRequestQueueClient(RequestQueueClient):
                     metadata=metadata,
                     path_to_rq=path_to_rq,
                     lock=asyncio.Lock(),
+                    recoverable_state=await cls._create_recoverable_state(id=metadata.id, configuration=configuration),
                 )
                 await client._state.initialize()
                 await client._update_metadata()

crawlee/storage_clients/_memory/_dataset_client.py CHANGED Viewed

@@ -4,7 +4,7 @@ from datetime import datetime, timezone
 from logging import getLogger
 from typing import TYPE_CHECKING, Any
-from typing_extensions import override
+from typing_extensions import Self, override
 from crawlee._utils.crypto import crypto_random_object_id
 from crawlee._utils.raise_if_too_many_kwargs import raise_if_too_many_kwargs
@@ -55,7 +55,7 @@ class MemoryDatasetClient(DatasetClient):
         id: str | None,
         name: str | None,
         alias: str | None,
-    ) -> MemoryDatasetClient:
+    ) -> Self:
         """Open or create a new memory dataset client.
         This method creates a new in-memory dataset instance. Unlike persistent storage implementations, memory

crawlee/storage_clients/_memory/_key_value_store_client.py CHANGED Viewed

@@ -4,7 +4,7 @@ import sys
 from datetime import datetime, timezone
 from typing import TYPE_CHECKING, Any
-from typing_extensions import override
+from typing_extensions import Self, override
 from crawlee._utils.crypto import crypto_random_object_id
 from crawlee._utils.file import infer_mime_type
@@ -53,7 +53,7 @@ class MemoryKeyValueStoreClient(KeyValueStoreClient):
         id: str | None,
         name: str | None,
         alias: str | None,
-    ) -> MemoryKeyValueStoreClient:
+    ) -> Self:
         """Open or create a new memory key-value store client.
         This method creates a new in-memory key-value store instance. Unlike persistent storage implementations,

crawlee/storage_clients/_memory/_request_queue_client.py CHANGED Viewed

@@ -6,7 +6,7 @@ from datetime import datetime, timezone
 from logging import getLogger
 from typing import TYPE_CHECKING
-from typing_extensions import override
+from typing_extensions import Self, override
 from crawlee import Request
 from crawlee._utils.crypto import crypto_random_object_id
@@ -65,7 +65,7 @@ class MemoryRequestQueueClient(RequestQueueClient):
         id: str | None,
         name: str | None,
         alias: str | None,
-    ) -> MemoryRequestQueueClient:
+    ) -> Self:
         """Open or create a new memory request queue client.
         This method creates a new in-memory request queue instance. Unlike persistent storage implementations,

crawlee/storage_clients/_redis/__init__.py ADDED Viewed

@@ -0,0 +1,6 @@
+from ._dataset_client import RedisDatasetClient
+from ._key_value_store_client import RedisKeyValueStoreClient
+from ._request_queue_client import RedisRequestQueueClient
+from ._storage_client import RedisStorageClient
+__all__ = ['RedisDatasetClient', 'RedisKeyValueStoreClient', 'RedisRequestQueueClient', 'RedisStorageClient']

crawlee 1.0.1b8__py3-none-any.whl → 1.0.5b18__py3-none-any.whl

crawlee 1.0.1b8py3-none-any.whl → 1.0.5b18py3-none-any.whl