crawlee 1.0.2b6__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of crawlee might be problematic. Click here for more details.

crawlee/_request.py CHANGED
@@ -185,9 +185,6 @@ class Request(BaseModel):
185
185
  method: HttpMethod = 'GET'
186
186
  """HTTP request method."""
187
187
 
188
- headers: Annotated[HttpHeaders, Field(default_factory=HttpHeaders)] = HttpHeaders()
189
- """HTTP request headers."""
190
-
191
188
  payload: Annotated[
192
189
  HttpPayload | None,
193
190
  BeforeValidator(lambda v: v.encode() if isinstance(v, str) else v),
@@ -195,23 +192,37 @@ class Request(BaseModel):
195
192
  ] = None
196
193
  """HTTP request payload."""
197
194
 
198
- user_data: Annotated[
199
- dict[str, JsonSerializable], # Internally, the model contains `UserData`, this is just for convenience
200
- Field(alias='userData', default_factory=lambda: UserData()),
201
- PlainValidator(user_data_adapter.validate_python),
202
- PlainSerializer(
203
- lambda instance: user_data_adapter.dump_python(
204
- instance,
205
- by_alias=True,
206
- exclude_none=True,
207
- exclude_unset=True,
208
- exclude_defaults=True,
209
- )
210
- ),
211
- ] = {}
212
- """Custom user data assigned to the request. Use this to save any request related data to the
213
- request's scope, keeping them accessible on retries, failures etc.
214
- """
195
+ # Workaround for pydantic 2.12 and mypy type checking issue for Annotated with default_factory
196
+ if TYPE_CHECKING:
197
+ headers: HttpHeaders = HttpHeaders()
198
+ """HTTP request headers."""
199
+
200
+ user_data: dict[str, JsonSerializable] = {}
201
+ """Custom user data assigned to the request. Use this to save any request related data to the
202
+ request's scope, keeping them accessible on retries, failures etc.
203
+ """
204
+
205
+ else:
206
+ headers: Annotated[HttpHeaders, Field(default_factory=HttpHeaders)]
207
+ """HTTP request headers."""
208
+
209
+ user_data: Annotated[
210
+ dict[str, JsonSerializable], # Internally, the model contains `UserData`, this is just for convenience
211
+ Field(alias='userData', default_factory=lambda: UserData()),
212
+ PlainValidator(user_data_adapter.validate_python),
213
+ PlainSerializer(
214
+ lambda instance: user_data_adapter.dump_python(
215
+ instance,
216
+ by_alias=True,
217
+ exclude_none=True,
218
+ exclude_unset=True,
219
+ exclude_defaults=True,
220
+ )
221
+ ),
222
+ ]
223
+ """Custom user data assigned to the request. Use this to save any request related data to the
224
+ request's scope, keeping them accessible on retries, failures etc.
225
+ """
215
226
 
216
227
  retry_count: Annotated[int, Field(alias='retryCount')] = 0
217
228
  """Number of times the request has been retried."""
@@ -38,7 +38,7 @@ class ServiceLocator:
38
38
  def get_configuration(self) -> Configuration:
39
39
  """Get the configuration."""
40
40
  if self._configuration is None:
41
- logger.warning('No configuration set, implicitly creating and using default Configuration.')
41
+ logger.debug('No configuration set, implicitly creating and using default Configuration.')
42
42
  self._configuration = Configuration()
43
43
 
44
44
  return self._configuration
@@ -63,9 +63,9 @@ class ServiceLocator:
63
63
  def get_event_manager(self) -> EventManager:
64
64
  """Get the event manager."""
65
65
  if self._event_manager is None:
66
- logger.warning('No event manager set, implicitly creating and using default LocalEventManager.')
66
+ logger.debug('No event manager set, implicitly creating and using default LocalEventManager.')
67
67
  if self._configuration is None:
68
- logger.warning(
68
+ logger.debug(
69
69
  'Implicit creation of event manager will implicitly set configuration as side effect. '
70
70
  'It is advised to explicitly first set the configuration instead.'
71
71
  )
@@ -93,7 +93,7 @@ class ServiceLocator:
93
93
  def get_storage_client(self) -> StorageClient:
94
94
  """Get the storage client."""
95
95
  if self._storage_client is None:
96
- logger.warning('No storage client set, implicitly creating and using default FileSystemStorageClient.')
96
+ logger.debug('No storage client set, implicitly creating and using default FileSystemStorageClient.')
97
97
  if self._configuration is None:
98
98
  logger.warning(
99
99
  'Implicit creation of storage client will implicitly set configuration as side effect. '
crawlee/_types.py CHANGED
@@ -3,17 +3,7 @@ from __future__ import annotations
3
3
  import dataclasses
4
4
  from collections.abc import Callable, Iterator, Mapping
5
5
  from dataclasses import dataclass
6
- from typing import (
7
- TYPE_CHECKING,
8
- Annotated,
9
- Any,
10
- Literal,
11
- Protocol,
12
- TypedDict,
13
- TypeVar,
14
- cast,
15
- overload,
16
- )
6
+ from typing import TYPE_CHECKING, Annotated, Any, Literal, Protocol, TypedDict, TypeVar, cast, overload
17
7
 
18
8
  from pydantic import ConfigDict, Field, PlainValidator, RootModel
19
9
 
@@ -71,11 +61,15 @@ class HttpHeaders(RootModel, Mapping[str, str]):
71
61
 
72
62
  model_config = ConfigDict(validate_by_name=True, validate_by_alias=True)
73
63
 
74
- root: Annotated[
75
- dict[str, str],
76
- PlainValidator(lambda value: _normalize_headers(value)),
77
- Field(default_factory=dict),
78
- ] = {}
64
+ # Workaround for pydantic 2.12 and mypy type checking issue for Annotated with default_factory
65
+ if TYPE_CHECKING:
66
+ root: dict[str, str] = {}
67
+ else:
68
+ root: Annotated[
69
+ dict[str, str],
70
+ PlainValidator(lambda value: _normalize_headers(value)),
71
+ Field(default_factory=dict),
72
+ ]
79
73
 
80
74
  def __getitem__(self, key: str) -> str:
81
75
  return self.root[key.lower()]
@@ -4,12 +4,14 @@ from typing import TYPE_CHECKING, Generic, Literal, TypeVar
4
4
 
5
5
  from pydantic import BaseModel
6
6
 
7
+ from crawlee._utils.raise_if_too_many_kwargs import raise_if_too_many_kwargs
7
8
  from crawlee.events._types import Event, EventPersistStateData
8
9
 
9
10
  if TYPE_CHECKING:
10
11
  import logging
12
+ from collections.abc import Callable, Coroutine
11
13
 
12
- from crawlee.storages._key_value_store import KeyValueStore
14
+ from crawlee.storages import KeyValueStore
13
15
 
14
16
  TStateModel = TypeVar('TStateModel', bound=BaseModel)
15
17
 
@@ -37,6 +39,7 @@ class RecoverableState(Generic[TStateModel]):
37
39
  persistence_enabled: Literal[True, False, 'explicit_only'] = False,
38
40
  persist_state_kvs_name: str | None = None,
39
41
  persist_state_kvs_id: str | None = None,
42
+ persist_state_kvs_factory: Callable[[], Coroutine[None, None, KeyValueStore]] | None = None,
40
43
  logger: logging.Logger,
41
44
  ) -> None:
42
45
  """Initialize a new recoverable state object.
@@ -51,16 +54,40 @@ class RecoverableState(Generic[TStateModel]):
51
54
  If neither a name nor and id are supplied, the default store will be used.
52
55
  persist_state_kvs_id: The identifier of the KeyValueStore to use for persistence.
53
56
  If neither a name nor and id are supplied, the default store will be used.
57
+ persist_state_kvs_factory: Factory that can be awaited to create KeyValueStore to use for persistence. If
58
+ not provided, a system-wide KeyValueStore will be used, based on service locator configuration.
54
59
  logger: A logger instance for logging operations related to state persistence
55
60
  """
61
+ raise_if_too_many_kwargs(
62
+ persist_state_kvs_name=persist_state_kvs_name,
63
+ persist_state_kvs_id=persist_state_kvs_id,
64
+ persist_state_kvs_factory=persist_state_kvs_factory,
65
+ )
66
+ if not persist_state_kvs_factory:
67
+ logger.debug(
68
+ 'No explicit key_value_store set for recoverable state. Recovery will use a system-wide KeyValueStore '
69
+ 'based on service_locator configuration, potentially calling service_locator.set_storage_client in the '
70
+ 'process. It is recommended to initialize RecoverableState with explicit key_value_store to avoid '
71
+ 'global side effects.'
72
+ )
73
+
56
74
  self._default_state = default_state
57
75
  self._state_type: type[TStateModel] = self._default_state.__class__
58
76
  self._state: TStateModel | None = None
59
77
  self._persistence_enabled = persistence_enabled
60
78
  self._persist_state_key = persist_state_key
61
- self._persist_state_kvs_name = persist_state_kvs_name
62
- self._persist_state_kvs_id = persist_state_kvs_id
63
- self._key_value_store: 'KeyValueStore | None' = None # noqa: UP037
79
+ if persist_state_kvs_factory is None:
80
+
81
+ async def kvs_factory() -> KeyValueStore:
82
+ from crawlee.storages import KeyValueStore # noqa: PLC0415 avoid circular import
83
+
84
+ return await KeyValueStore.open(name=persist_state_kvs_name, id=persist_state_kvs_id)
85
+
86
+ self._persist_state_kvs_factory = kvs_factory
87
+ else:
88
+ self._persist_state_kvs_factory = persist_state_kvs_factory
89
+
90
+ self._key_value_store: KeyValueStore | None = None
64
91
  self._log = logger
65
92
 
66
93
  async def initialize(self) -> TStateModel:
@@ -77,11 +104,8 @@ class RecoverableState(Generic[TStateModel]):
77
104
  return self.current_value
78
105
 
79
106
  # Import here to avoid circular imports.
80
- from crawlee.storages._key_value_store import KeyValueStore # noqa: PLC0415
81
107
 
82
- self._key_value_store = await KeyValueStore.open(
83
- name=self._persist_state_kvs_name, id=self._persist_state_kvs_id
84
- )
108
+ self._key_value_store = await self._persist_state_kvs_factory()
85
109
 
86
110
  await self._load_saved_state()
87
111
 
crawlee/_utils/sitemap.py CHANGED
@@ -335,7 +335,7 @@ async def _fetch_and_process_sitemap(
335
335
  # Check if the first chunk is a valid gzip header
336
336
  if first_chunk and raw_chunk.startswith(b'\x1f\x8b'):
337
337
  decompressor = zlib.decompressobj(zlib.MAX_WBITS | 16)
338
- first_chunk = False
338
+ first_chunk = False
339
339
 
340
340
  chunk = decompressor.decompress(raw_chunk) if decompressor else raw_chunk
341
341
  text_chunk = decoder.decode(chunk)
@@ -17,8 +17,11 @@ from crawlee.statistics import FinalStatistics, StatisticsState
17
17
  from crawlee.statistics._error_tracker import ErrorTracker
18
18
 
19
19
  if TYPE_CHECKING:
20
+ from collections.abc import Callable, Coroutine
20
21
  from types import TracebackType
21
22
 
23
+ from crawlee.storages import KeyValueStore
24
+
22
25
  TStatisticsState = TypeVar('TStatisticsState', bound=StatisticsState, default=StatisticsState)
23
26
  TNewStatisticsState = TypeVar('TNewStatisticsState', bound=StatisticsState, default=StatisticsState)
24
27
  logger = getLogger(__name__)
@@ -70,6 +73,7 @@ class Statistics(Generic[TStatisticsState]):
70
73
  persistence_enabled: bool | Literal['explicit_only'] = False,
71
74
  persist_state_kvs_name: str | None = None,
72
75
  persist_state_key: str | None = None,
76
+ persist_state_kvs_factory: Callable[[], Coroutine[None, None, KeyValueStore]] | None = None,
73
77
  log_message: str = 'Statistics',
74
78
  periodic_message_logger: Logger | None = None,
75
79
  log_interval: timedelta = timedelta(minutes=1),
@@ -95,6 +99,7 @@ class Statistics(Generic[TStatisticsState]):
95
99
  persist_state_key=persist_state_key or f'SDK_CRAWLER_STATISTICS_{self._id}',
96
100
  persistence_enabled=persistence_enabled,
97
101
  persist_state_kvs_name=persist_state_kvs_name,
102
+ persist_state_kvs_factory=persist_state_kvs_factory,
98
103
  logger=logger,
99
104
  )
100
105
 
@@ -110,8 +115,8 @@ class Statistics(Generic[TStatisticsState]):
110
115
  """Create near copy of the `Statistics` with replaced `state_model`."""
111
116
  new_statistics: Statistics[TNewStatisticsState] = Statistics(
112
117
  persistence_enabled=self._state._persistence_enabled, # noqa: SLF001
113
- persist_state_kvs_name=self._state._persist_state_kvs_name, # noqa: SLF001
114
118
  persist_state_key=self._state._persist_state_key, # noqa: SLF001
119
+ persist_state_kvs_factory=self._state._persist_state_kvs_factory, # noqa: SLF001
115
120
  log_message=self._log_message,
116
121
  periodic_message_logger=self._periodic_message_logger,
117
122
  state_model=state_model,
@@ -31,6 +31,7 @@ if TYPE_CHECKING:
31
31
  from collections.abc import Sequence
32
32
 
33
33
  from crawlee.configuration import Configuration
34
+ from crawlee.storages import KeyValueStore
34
35
 
35
36
  logger = getLogger(__name__)
36
37
 
@@ -92,6 +93,7 @@ class FileSystemRequestQueueClient(RequestQueueClient):
92
93
  metadata: RequestQueueMetadata,
93
94
  path_to_rq: Path,
94
95
  lock: asyncio.Lock,
96
+ recoverable_state: RecoverableState[RequestQueueState],
95
97
  ) -> None:
96
98
  """Initialize a new instance.
97
99
 
@@ -114,12 +116,7 @@ class FileSystemRequestQueueClient(RequestQueueClient):
114
116
  self._is_empty_cache: bool | None = None
115
117
  """Cache for is_empty result: None means unknown, True/False is cached state."""
116
118
 
117
- self._state = RecoverableState[RequestQueueState](
118
- default_state=RequestQueueState(),
119
- persist_state_key=f'__RQ_STATE_{self._metadata.id}',
120
- persistence_enabled=True,
121
- logger=logger,
122
- )
119
+ self._state = recoverable_state
123
120
  """Recoverable state to maintain request ordering, in-progress status, and handled status."""
124
121
 
125
122
  @override
@@ -136,6 +133,22 @@ class FileSystemRequestQueueClient(RequestQueueClient):
136
133
  """The full path to the request queue metadata file."""
137
134
  return self.path_to_rq / METADATA_FILENAME
138
135
 
136
+ @classmethod
137
+ async def _create_recoverable_state(cls, id: str, configuration: Configuration) -> RecoverableState:
138
+ async def kvs_factory() -> KeyValueStore:
139
+ from crawlee.storage_clients import FileSystemStorageClient # noqa: PLC0415 avoid circular import
140
+ from crawlee.storages import KeyValueStore # noqa: PLC0415 avoid circular import
141
+
142
+ return await KeyValueStore.open(storage_client=FileSystemStorageClient(), configuration=configuration)
143
+
144
+ return RecoverableState[RequestQueueState](
145
+ default_state=RequestQueueState(),
146
+ persist_state_key=f'__RQ_STATE_{id}',
147
+ persist_state_kvs_factory=kvs_factory,
148
+ persistence_enabled=True,
149
+ logger=logger,
150
+ )
151
+
139
152
  @classmethod
140
153
  async def open(
141
154
  cls,
@@ -194,6 +207,9 @@ class FileSystemRequestQueueClient(RequestQueueClient):
194
207
  metadata=metadata,
195
208
  path_to_rq=rq_base_path / rq_dir,
196
209
  lock=asyncio.Lock(),
210
+ recoverable_state=await cls._create_recoverable_state(
211
+ id=id, configuration=configuration
212
+ ),
197
213
  )
198
214
  await client._state.initialize()
199
215
  await client._discover_existing_requests()
@@ -230,6 +246,7 @@ class FileSystemRequestQueueClient(RequestQueueClient):
230
246
  metadata=metadata,
231
247
  path_to_rq=path_to_rq,
232
248
  lock=asyncio.Lock(),
249
+ recoverable_state=await cls._create_recoverable_state(id=metadata.id, configuration=configuration),
233
250
  )
234
251
 
235
252
  await client._state.initialize()
@@ -254,6 +271,7 @@ class FileSystemRequestQueueClient(RequestQueueClient):
254
271
  metadata=metadata,
255
272
  path_to_rq=path_to_rq,
256
273
  lock=asyncio.Lock(),
274
+ recoverable_state=await cls._create_recoverable_state(id=metadata.id, configuration=configuration),
257
275
  )
258
276
  await client._state.initialize()
259
277
  await client._update_metadata()
@@ -2,9 +2,9 @@ from __future__ import annotations
2
2
 
3
3
  import json
4
4
  from logging import getLogger
5
- from typing import TYPE_CHECKING, Any
5
+ from typing import TYPE_CHECKING, Any, cast
6
6
 
7
- from sqlalchemy import delete, select
7
+ from sqlalchemy import CursorResult, delete, select
8
8
  from typing_extensions import Self, override
9
9
 
10
10
  from crawlee._utils.file import infer_mime_type
@@ -227,6 +227,7 @@ class SqlKeyValueStoreClient(KeyValueStoreClient, SqlClientMixin):
227
227
  async with self.get_session(with_simple_commit=True) as session:
228
228
  # Delete the record if it exists
229
229
  result = await session.execute(stmt)
230
+ result = cast('CursorResult', result) if not isinstance(result, CursorResult) else result
230
231
 
231
232
  # Update metadata if we actually deleted something
232
233
  if result.rowcount > 0:
@@ -5,9 +5,9 @@ from datetime import datetime, timedelta, timezone
5
5
  from functools import lru_cache
6
6
  from hashlib import sha256
7
7
  from logging import getLogger
8
- from typing import TYPE_CHECKING, Any
8
+ from typing import TYPE_CHECKING, Any, cast
9
9
 
10
- from sqlalchemy import func, or_, select, update
10
+ from sqlalchemy import CursorResult, func, or_, select, update
11
11
  from sqlalchemy.exc import SQLAlchemyError
12
12
  from sqlalchemy.orm import load_only
13
13
  from typing_extensions import NotRequired, Self, override
@@ -231,6 +231,7 @@ class SqlRequestQueueClient(RequestQueueClient, SqlClientMixin):
231
231
 
232
232
  async with self.get_session() as session:
233
233
  result = await session.execute(stmt)
234
+ result = cast('CursorResult', result) if not isinstance(result, CursorResult) else result
234
235
  existing_requests = {req.request_id: req for req in result.scalars()}
235
236
  state = await self._get_state(session)
236
237
  insert_values: list[dict] = []
@@ -498,9 +499,12 @@ class SqlRequestQueueClient(RequestQueueClient, SqlClientMixin):
498
499
  )
499
500
  async with self.get_session() as session:
500
501
  result = await session.execute(stmt)
502
+ result = cast('CursorResult', result) if not isinstance(result, CursorResult) else result
503
+
501
504
  if result.rowcount == 0:
502
505
  logger.warning(f'Request {request.unique_key} not found in database.')
503
506
  return None
507
+
504
508
  await self._update_metadata(
505
509
  session,
506
510
  **_QueueMetadataUpdateParams(
@@ -542,14 +546,24 @@ class SqlRequestQueueClient(RequestQueueClient, SqlClientMixin):
542
546
  block_until = now + timedelta(seconds=self._BLOCK_REQUEST_TIME)
543
547
  # Extend blocking for forefront request, it is considered blocked by the current client.
544
548
  stmt = stmt.values(
545
- sequence_number=new_sequence, time_blocked_until=block_until, client_key=self.client_key
549
+ sequence_number=new_sequence,
550
+ time_blocked_until=block_until,
551
+ client_key=self.client_key,
552
+ data=request.model_dump_json(),
546
553
  )
547
554
  else:
548
555
  new_sequence = state.sequence_counter
549
556
  state.sequence_counter += 1
550
- stmt = stmt.values(sequence_number=new_sequence, time_blocked_until=None, client_key=None)
557
+ stmt = stmt.values(
558
+ sequence_number=new_sequence,
559
+ time_blocked_until=None,
560
+ client_key=None,
561
+ data=request.model_dump_json(),
562
+ )
551
563
 
552
564
  result = await session.execute(stmt)
565
+ result = cast('CursorResult', result) if not isinstance(result, CursorResult) else result
566
+
553
567
  if result.rowcount == 0:
554
568
  logger.warning(f'Request {request.unique_key} not found in database.')
555
569
  return None
@@ -149,7 +149,7 @@ class SqlStorageClient(StorageClient):
149
149
  # Raise an error if the new version creates breaking changes in the database schema.
150
150
  if db_version and db_version != __version__:
151
151
  warnings.warn(
152
- f'Database version {db_version.version} does not match library version {__version__}. '
152
+ f'Database version {db_version} does not match library version {__version__}. '
153
153
  'This may lead to unexpected behavior. Drop the db if you want to make sure that '
154
154
  'everything will work fine.',
155
155
  category=UserWarning,
@@ -281,11 +281,14 @@ class KeyValueStore(Storage):
281
281
  if key in cache:
282
282
  return cache[key].current_value.root
283
283
 
284
+ async def kvs_factory() -> KeyValueStore:
285
+ return self
286
+
284
287
  cache[key] = recoverable_state = RecoverableState(
285
288
  default_state=AutosavedValue(default_value),
286
- persistence_enabled=True,
287
- persist_state_kvs_id=self.id,
288
289
  persist_state_key=key,
290
+ persistence_enabled=True,
291
+ persist_state_kvs_factory=kvs_factory,
289
292
  logger=logger,
290
293
  )
291
294
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: crawlee
3
- Version: 1.0.2b6
3
+ Version: 1.0.3
4
4
  Summary: Crawlee for Python
5
5
  Project-URL: Apify Homepage, https://apify.com
6
6
  Project-URL: Changelog, https://crawlee.dev/python/docs/changelog
@@ -232,7 +232,7 @@ Requires-Dist: more-itertools>=10.2.0
232
232
  Requires-Dist: protego>=0.5.0
233
233
  Requires-Dist: psutil>=6.0.0
234
234
  Requires-Dist: pydantic-settings!=2.7.0,!=2.7.1,!=2.8.0,>=2.2.0
235
- Requires-Dist: pydantic<2.12.0,>=2.11.0
235
+ Requires-Dist: pydantic>=2.11.0
236
236
  Requires-Dist: pyee>=9.0.0
237
237
  Requires-Dist: tldextract>=5.1.0
238
238
  Requires-Dist: typing-extensions>=4.1.0
@@ -3,9 +3,9 @@ crawlee/_browserforge_workaround.py,sha256=FYQaqpqfZGYkx-A8evF9nsHnj4KK4IMtjNq3L
3
3
  crawlee/_cli.py,sha256=czuEsGD8QYEiq5gtMcBxrL08hQ5OJQQkMVhAr1pvDaQ,10353
4
4
  crawlee/_consts.py,sha256=RQ96gx7V-WPH91cVsMUz76X5UZUNDNhCudtlyGkxFVk,133
5
5
  crawlee/_log_config.py,sha256=VyxoEfWCq_9fyicmmJbjiZ5KC91onMcAtX2L4oKX4m4,5999
6
- crawlee/_request.py,sha256=8bAbftf4loxnVPY3TVXZIUyRuwrGaoibqdwQULNHMzs,15948
7
- crawlee/_service_locator.py,sha256=uO1eml4YeMs0xqlFN2d1LvOAroAdNJYbb2Mt15V-qzs,5066
8
- crawlee/_types.py,sha256=qi2E-gWXINMFn_3nhRMxuB3QfndRpFf2urIpz6Hab9g,28990
6
+ crawlee/_request.py,sha256=XliqiaL5Gp3fIDqHVVw0GF35VydXOtg6wJIkeaLcAwk,16458
7
+ crawlee/_service_locator.py,sha256=SJ8ABYtclBl7rz8kfZ2jZkIgKq5oNIoGT7WmN8ApTzo,5058
8
+ crawlee/_types.py,sha256=DAmfSv5W1dt3nJhJ8z-02gDaE06fdEizNKUlHpsd2_A,29129
9
9
  crawlee/configuration.py,sha256=KG_XDkPe1VaYfaIu41nICvMjfHbDKM0h4-YTi3DkyRY,7917
10
10
  crawlee/errors.py,sha256=RhFNA_uT615nVBHf9TylpX5YWwtDuHUUEV8LPT4CYa4,3878
11
11
  crawlee/proxy_configuration.py,sha256=rqf67yerXvLvraBaAHW04nvf5ECze3wMQbK7LlqXucM,10386
@@ -29,11 +29,11 @@ crawlee/_utils/globs.py,sha256=SGX2J35Kqw7yZnSS5c4mLz9UD8c77PF0IoCgXQM5uiw,5310
29
29
  crawlee/_utils/html_to_text.py,sha256=1iykT-OXd2xXNy7isHVWHqPxe23X82CGQBHIfbZbZkY,902
30
30
  crawlee/_utils/models.py,sha256=EqM50Uc-xvxKlLCLA2lPpRduzfKvT0z_-Q-UWG8aTRQ,1955
31
31
  crawlee/_utils/raise_if_too_many_kwargs.py,sha256=J2gaUJmsmNwexohuehXw_mdYKv-eWiui6WUHFsQ3qTQ,597
32
- crawlee/_utils/recoverable_state.py,sha256=_88kOEDDRg1lr6RWs7NNDku6NNRlg7zuzUOoUxwMwGk,7734
32
+ crawlee/_utils/recoverable_state.py,sha256=c1D2ZecxEliGZzhqYz9_oU5CF2Hm0UKvpOHqO6CDJRE,9032
33
33
  crawlee/_utils/recurring_task.py,sha256=sA0n4Cf9pYLQyBD9PZ7QbR6m6KphlbkACaT2GdbLfs4,1757
34
34
  crawlee/_utils/requests.py,sha256=yOjai7bHR9_duPJ0ck-L76y9AnKZr49JBfSOQv9kvJc,5048
35
35
  crawlee/_utils/robots.py,sha256=k3Yi2OfKT0H04MPkP-OBGGV7fEePgOqb60awltjMYWY,4346
36
- crawlee/_utils/sitemap.py,sha256=9FtZRG87i5YZJhJa2m5CexSpm7RpvsSAgG60ih4BQc0,16636
36
+ crawlee/_utils/sitemap.py,sha256=UI9EJiFiyFvV5_flVUtdsEVz8ZsJeRERPtcx8ZsqjTU,16632
37
37
  crawlee/_utils/system.py,sha256=tA8AP__9vsJ9OTLTnAYAKkxc8U5-IEna0N_hqYBybUo,4294
38
38
  crawlee/_utils/time.py,sha256=WK17P939r65dLz2rWvL59OEJoxgzdinw-ND9WuG4DuU,2353
39
39
  crawlee/_utils/try_import.py,sha256=QI_58ifc2l0Rxehzu6xcofQrRAVeLzZuBTTTHttLl8s,1310
@@ -148,7 +148,7 @@ crawlee/statistics/__init__.py,sha256=lXAsHNkeRZQBffW1B7rERarivXIUJveNlcKTGOXQZY
148
148
  crawlee/statistics/_error_snapshotter.py,sha256=ChBBG0gIMWcSeyEzs3jQf3mSnHLZUHcD284wEDan1Js,3278
149
149
  crawlee/statistics/_error_tracker.py,sha256=x9Yw1TuyEptjwgPPJ4gIom-0oVjawcNReQDsHH2nZ3w,8553
150
150
  crawlee/statistics/_models.py,sha256=SFWYpT3r1c4XugU8nrm0epTpcM5_0fS1mXi9fnbhGJ8,5237
151
- crawlee/statistics/_statistics.py,sha256=fJr_du4CkVTz4_UgVToivAJKgA88PThE6IDBCW8RSTQ,12183
151
+ crawlee/statistics/_statistics.py,sha256=vp8swl1yt4lBi2W0YyaI_xKCrRku0remI4BLx90q7-Y,12455
152
152
  crawlee/storage_clients/__init__.py,sha256=RCnutWMOqs_kUQpzfLVT5jgpHGWakLv557c6UIYFQsA,754
153
153
  crawlee/storage_clients/models.py,sha256=gfW_kpSCOBuoTBIW0N7tb3FUv7BgD3keZADS7pyT4_I,6586
154
154
  crawlee/storage_clients/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -161,7 +161,7 @@ crawlee/storage_clients/_base/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJ
161
161
  crawlee/storage_clients/_file_system/__init__.py,sha256=w3twfwz5YeLYeu_70pNPBRINS2wXRvzOMvA1hUDYgf0,387
162
162
  crawlee/storage_clients/_file_system/_dataset_client.py,sha256=1Z8VCDx8ueh0FQQXUr8tJlOtKw8ggkaFjuz3-T_GJDY,17735
163
163
  crawlee/storage_clients/_file_system/_key_value_store_client.py,sha256=qNa3RRJQ8Omy2AteQvYh1Td04PsP5AhUFyTpL6KQbSg,18676
164
- crawlee/storage_clients/_file_system/_request_queue_client.py,sha256=0TM4BFcz2knQiWyF6dH62WTTj11wN9dNtpYOiBDqkpY,32862
164
+ crawlee/storage_clients/_file_system/_request_queue_client.py,sha256=ETwy6eODf3dlBqy2RPM3nr2_oEm2ht37WpoTlFxn4A8,33970
165
165
  crawlee/storage_clients/_file_system/_storage_client.py,sha256=My63uc513kfUPe5X-PTYWBRe9xUGnkLqJN7IcsQd2yw,3293
166
166
  crawlee/storage_clients/_file_system/_utils.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
167
167
  crawlee/storage_clients/_file_system/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -175,20 +175,20 @@ crawlee/storage_clients/_sql/__init__.py,sha256=X_fDMc6jn50gEBZ9QyUw54sjovYfFvE-
175
175
  crawlee/storage_clients/_sql/_client_mixin.py,sha256=U9ThDUuRbT5JDtCFlBurhZIs1Ay5t9fTfPXXI_4dwHY,15988
176
176
  crawlee/storage_clients/_sql/_dataset_client.py,sha256=tiJVvOPZgc7cy4kGfWnun-g2TJMHMdaLnoqns5Sl6ek,10203
177
177
  crawlee/storage_clients/_sql/_db_models.py,sha256=Gs4MS1YL0gWaUfNReVKJUXsqbU_d5jxiyvZ0sFxAV2A,9845
178
- crawlee/storage_clients/_sql/_key_value_store_client.py,sha256=D0nQoStq9PR0RTn9ZORKuTcRP7X-_2aDrLKgb2hKWM0,11189
179
- crawlee/storage_clients/_sql/_request_queue_client.py,sha256=iavp-G62ApPtPmKePYviaNOFDXDg7QN9ozPHau6C1TY,28925
180
- crawlee/storage_clients/_sql/_storage_client.py,sha256=3xfgUcdW7Pu_j3SDYFzAdnU81jl1CmZ9Z5_NLvNi4P8,10913
178
+ crawlee/storage_clients/_sql/_key_value_store_client.py,sha256=LnVLWhOjo4LdvtCac4fwuf__DgEQjlqSxz8KkjY3Qx4,11311
179
+ crawlee/storage_clients/_sql/_request_queue_client.py,sha256=OlvAOwEoYY5f4NO7BdhLFRT_i_E3YzJDb_ptKKK2huY,29478
180
+ crawlee/storage_clients/_sql/_storage_client.py,sha256=ITtMpwfotIW4SZjO4rycB5wfMKaqTAJgMvzcUZxckrk,10905
181
181
  crawlee/storage_clients/_sql/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
182
182
  crawlee/storages/__init__.py,sha256=wc2eioyCKAAYrg4N7cshpjC-UbE23OzGar9nK_kteSY,186
183
183
  crawlee/storages/_base.py,sha256=zUOcMJTg8MAzq-m9X1NJcWncCfxzI5mb5MyY35WAkMk,2310
184
184
  crawlee/storages/_dataset.py,sha256=l3VJCaJnaAEhJFpfRUOLzIbW332R8gdEPSSGhLq65pg,14652
185
- crawlee/storages/_key_value_store.py,sha256=ik--ZPCzOiG5hmm6k5LNH_FO9P3MoW0UvTKiGVf1RIY,10206
185
+ crawlee/storages/_key_value_store.py,sha256=xdkMJYdH3zXzwB3jtkijq-YkMlwBtfXxDFIUlpvpXAE,10298
186
186
  crawlee/storages/_request_queue.py,sha256=bjBOGbpMaGUsqJPVB-JD2VShziPAYMI-GvWKKpylzDE,13233
187
187
  crawlee/storages/_storage_instance_manager.py,sha256=72n0YlPwNpSQDJSPf4TxnI2GvIK6L-ZiTmHRbFcoVU0,8164
188
188
  crawlee/storages/_utils.py,sha256=Yz-5tEBYKYCFJemYT29--uGJqoJLApLDLgPcsnbifRw,439
189
189
  crawlee/storages/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
190
- crawlee-1.0.2b6.dist-info/METADATA,sha256=c6XbvL3HIwDkINDlVWZh1efOgt4ttxY5o0ycNu-qlwM,29322
191
- crawlee-1.0.2b6.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
192
- crawlee-1.0.2b6.dist-info/entry_points.txt,sha256=1p65X3dA-cYvzjtlxLL6Kn1wpY-3uEDVqJLp53uNPeo,45
193
- crawlee-1.0.2b6.dist-info/licenses/LICENSE,sha256=AsFjHssKjj4LGd2ZCqXn6FBzMqcWdjQre1byPPSypVw,11355
194
- crawlee-1.0.2b6.dist-info/RECORD,,
190
+ crawlee-1.0.3.dist-info/METADATA,sha256=HxPqenbeq5JL9rpS1yZe52XjrjYbVqjaBdm8xZ05dPw,29312
191
+ crawlee-1.0.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
192
+ crawlee-1.0.3.dist-info/entry_points.txt,sha256=1p65X3dA-cYvzjtlxLL6Kn1wpY-3uEDVqJLp53uNPeo,45
193
+ crawlee-1.0.3.dist-info/licenses/LICENSE,sha256=AsFjHssKjj4LGd2ZCqXn6FBzMqcWdjQre1byPPSypVw,11355
194
+ crawlee-1.0.3.dist-info/RECORD,,