crawlee 1.0.3b6__py3-none-any.whl → 1.0.5b18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. crawlee/_service_locator.py +4 -4
  2. crawlee/_utils/recoverable_state.py +32 -8
  3. crawlee/_utils/recurring_task.py +15 -0
  4. crawlee/_utils/robots.py +17 -5
  5. crawlee/_utils/sitemap.py +1 -1
  6. crawlee/_utils/urls.py +9 -2
  7. crawlee/browsers/_browser_pool.py +4 -1
  8. crawlee/browsers/_playwright_browser_controller.py +1 -1
  9. crawlee/browsers/_playwright_browser_plugin.py +17 -3
  10. crawlee/browsers/_types.py +1 -1
  11. crawlee/configuration.py +3 -1
  12. crawlee/crawlers/_abstract_http/_abstract_http_crawler.py +3 -1
  13. crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawler.py +33 -13
  14. crawlee/crawlers/_basic/_basic_crawler.py +23 -12
  15. crawlee/crawlers/_playwright/_playwright_crawler.py +11 -4
  16. crawlee/fingerprint_suite/_header_generator.py +2 -2
  17. crawlee/otel/crawler_instrumentor.py +3 -3
  18. crawlee/request_loaders/_sitemap_request_loader.py +5 -0
  19. crawlee/sessions/_session_pool.py +1 -1
  20. crawlee/statistics/_error_snapshotter.py +1 -1
  21. crawlee/statistics/_statistics.py +41 -31
  22. crawlee/storage_clients/__init__.py +4 -0
  23. crawlee/storage_clients/_file_system/_request_queue_client.py +24 -6
  24. crawlee/storage_clients/_redis/__init__.py +6 -0
  25. crawlee/storage_clients/_redis/_client_mixin.py +295 -0
  26. crawlee/storage_clients/_redis/_dataset_client.py +325 -0
  27. crawlee/storage_clients/_redis/_key_value_store_client.py +264 -0
  28. crawlee/storage_clients/_redis/_request_queue_client.py +586 -0
  29. crawlee/storage_clients/_redis/_storage_client.py +146 -0
  30. crawlee/storage_clients/_redis/_utils.py +23 -0
  31. crawlee/storage_clients/_redis/lua_scripts/atomic_bloom_add_requests.lua +36 -0
  32. crawlee/storage_clients/_redis/lua_scripts/atomic_fetch_request.lua +49 -0
  33. crawlee/storage_clients/_redis/lua_scripts/atomic_set_add_requests.lua +37 -0
  34. crawlee/storage_clients/_redis/lua_scripts/reclaim_stale_requests.lua +34 -0
  35. crawlee/storage_clients/_redis/py.typed +0 -0
  36. crawlee/storage_clients/_sql/_db_models.py +1 -2
  37. crawlee/storage_clients/_sql/_storage_client.py +9 -0
  38. crawlee/storages/_key_value_store.py +5 -2
  39. {crawlee-1.0.3b6.dist-info → crawlee-1.0.5b18.dist-info}/METADATA +9 -5
  40. {crawlee-1.0.3b6.dist-info → crawlee-1.0.5b18.dist-info}/RECORD +43 -31
  41. {crawlee-1.0.3b6.dist-info → crawlee-1.0.5b18.dist-info}/WHEEL +0 -0
  42. {crawlee-1.0.3b6.dist-info → crawlee-1.0.5b18.dist-info}/entry_points.txt +0 -0
  43. {crawlee-1.0.3b6.dist-info → crawlee-1.0.5b18.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,325 @@
1
+ from __future__ import annotations
2
+
3
+ from logging import getLogger
4
+ from typing import TYPE_CHECKING, Any, cast
5
+
6
+ from typing_extensions import NotRequired, override
7
+
8
+ from crawlee.storage_clients._base import DatasetClient
9
+ from crawlee.storage_clients.models import DatasetItemsListPage, DatasetMetadata
10
+
11
+ from ._client_mixin import MetadataUpdateParams, RedisClientMixin
12
+ from ._utils import await_redis_response
13
+
14
+ if TYPE_CHECKING:
15
+ from collections.abc import AsyncIterator
16
+
17
+ from redis.asyncio import Redis
18
+ from redis.asyncio.client import Pipeline
19
+
20
+ logger = getLogger(__name__)
21
+
22
+
23
+ class _DatasetMetadataUpdateParams(MetadataUpdateParams):
24
+ """Parameters for updating dataset metadata."""
25
+
26
+ new_item_count: NotRequired[int]
27
+ delta_item_count: NotRequired[int]
28
+
29
+
30
+ class RedisDatasetClient(DatasetClient, RedisClientMixin):
31
+ """Redis implementation of the dataset client.
32
+
33
+ This client persists dataset items to Redis using JSON arrays for efficient storage and retrieval.
34
+ Items are stored as JSON objects with automatic ordering preservation through Redis list operations.
35
+
36
+ The dataset data is stored in Redis using the following key pattern:
37
+ - `datasets:{name}:items` - Redis JSON array containing all dataset items.
38
+ - `datasets:{name}:metadata` - Redis JSON object containing dataset metadata.
39
+
40
+ Items must be JSON-serializable dictionaries. Single items or lists of items can be pushed to the dataset.
41
+ The item ordering is preserved through Redis JSON array operations. All operations provide atomic consistency
42
+ through Redis transactions and pipeline operations.
43
+ """
44
+
45
+ _DEFAULT_NAME = 'default'
46
+ """Default Dataset name key prefix when none provided."""
47
+
48
+ _MAIN_KEY = 'datasets'
49
+ """Main Redis key prefix for Dataset."""
50
+
51
+ _CLIENT_TYPE = 'Dataset'
52
+ """Human-readable client type for error messages."""
53
+
54
+ def __init__(self, storage_name: str, storage_id: str, redis: Redis) -> None:
55
+ """Initialize a new instance.
56
+
57
+ Preferably use the `RedisDatasetClient.open` class method to create a new instance.
58
+
59
+ Args:
60
+ storage_name: Internal storage name used for Redis keys.
61
+ storage_id: Unique identifier for the dataset.
62
+ redis: Redis client instance.
63
+ """
64
+ super().__init__(storage_name=storage_name, storage_id=storage_id, redis=redis)
65
+
66
+ @property
67
+ def _items_key(self) -> str:
68
+ """Return the Redis key for the items of this dataset."""
69
+ return f'{self._MAIN_KEY}:{self._storage_name}:items'
70
+
71
+ @classmethod
72
+ async def open(
73
+ cls,
74
+ *,
75
+ id: str | None,
76
+ name: str | None,
77
+ alias: str | None,
78
+ redis: Redis,
79
+ ) -> RedisDatasetClient:
80
+ """Open or create a new Redis dataset client.
81
+
82
+ This method attempts to open an existing dataset from the Redis database. If a dataset with the specified
83
+ ID or name exists, it loads the metadata from the database. If no existing store is found, a new one
84
+ is created.
85
+
86
+ Args:
87
+ id: The ID of the dataset. If not provided, a random ID will be generated.
88
+ name: The name of the dataset for named (global scope) storages.
89
+ alias: The alias of the dataset for unnamed (run scope) storages.
90
+ redis: Redis client instance.
91
+
92
+ Returns:
93
+ An instance for the opened or created storage client.
94
+ """
95
+ return await cls._open(
96
+ id=id,
97
+ name=name,
98
+ alias=alias,
99
+ redis=redis,
100
+ metadata_model=DatasetMetadata,
101
+ extra_metadata_fields={'item_count': 0},
102
+ instance_kwargs={},
103
+ )
104
+
105
+ @override
106
+ async def get_metadata(self) -> DatasetMetadata:
107
+ return await self._get_metadata(DatasetMetadata)
108
+
109
+ @override
110
+ async def drop(self) -> None:
111
+ await self._drop(extra_keys=[self._items_key])
112
+
113
+ @override
114
+ async def purge(self) -> None:
115
+ await self._purge(
116
+ extra_keys=[self._items_key],
117
+ metadata_kwargs=_DatasetMetadataUpdateParams(
118
+ new_item_count=0, update_accessed_at=True, update_modified_at=True
119
+ ),
120
+ )
121
+
122
+ @override
123
+ async def push_data(self, data: list[dict[str, Any]] | dict[str, Any]) -> None:
124
+ if isinstance(data, dict):
125
+ data = [data]
126
+
127
+ async with self._get_pipeline() as pipe:
128
+ pipe.json().arrappend(self._items_key, '$', *data)
129
+ await self._update_metadata(
130
+ pipe,
131
+ **_DatasetMetadataUpdateParams(
132
+ update_accessed_at=True, update_modified_at=True, delta_item_count=len(data)
133
+ ),
134
+ )
135
+
136
+ @override
137
+ async def get_data(
138
+ self,
139
+ *,
140
+ offset: int = 0,
141
+ limit: int | None = 999_999_999_999,
142
+ clean: bool = False,
143
+ desc: bool = False,
144
+ fields: list[str] | None = None,
145
+ omit: list[str] | None = None,
146
+ unwind: list[str] | None = None,
147
+ skip_empty: bool = False,
148
+ skip_hidden: bool = False,
149
+ flatten: list[str] | None = None,
150
+ view: str | None = None,
151
+ ) -> DatasetItemsListPage:
152
+ # Check for unsupported arguments and log a warning if found
153
+ unsupported_args: dict[str, Any] = {
154
+ 'clean': clean,
155
+ 'fields': fields,
156
+ 'omit': omit,
157
+ 'unwind': unwind,
158
+ 'skip_hidden': skip_hidden,
159
+ 'flatten': flatten,
160
+ 'view': view,
161
+ }
162
+ unsupported = {k: v for k, v in unsupported_args.items() if v not in (False, None)}
163
+
164
+ if unsupported:
165
+ logger.warning(
166
+ f'The arguments {list(unsupported.keys())} of get_data are not supported '
167
+ f'by the {self.__class__.__name__} client.'
168
+ )
169
+
170
+ metadata = await self.get_metadata()
171
+
172
+ total = metadata.item_count
173
+ json_path = '$'
174
+
175
+ # Apply sorting and pagination
176
+ match (desc, offset, limit):
177
+ case (True, 0, int()):
178
+ json_path += f'[-{limit}:]'
179
+ case (True, int(), None):
180
+ json_path += f'[:-{offset}]'
181
+ case (True, int(), int()):
182
+ json_path += f'[-{offset + limit}:-{offset}]'
183
+ case (False, 0, int()):
184
+ json_path += f'[:{limit}]'
185
+ case (False, int(), None):
186
+ json_path += f'[{offset}:]'
187
+ case (False, int(), int()):
188
+ json_path += f'[{offset}:{offset + limit}]'
189
+
190
+ if json_path == '$':
191
+ json_path = '$[*]'
192
+
193
+ data = await await_redis_response(self._redis.json().get(self._items_key, json_path))
194
+
195
+ if data is None:
196
+ data = []
197
+
198
+ if skip_empty:
199
+ data = [item for item in data if item]
200
+
201
+ if desc:
202
+ data = list(reversed(data))
203
+
204
+ async with self._get_pipeline() as pipe:
205
+ await self._update_metadata(pipe, **_DatasetMetadataUpdateParams(update_accessed_at=True))
206
+
207
+ return DatasetItemsListPage(
208
+ count=len(data),
209
+ offset=offset,
210
+ limit=limit or (total - offset),
211
+ total=total,
212
+ desc=desc,
213
+ items=data,
214
+ )
215
+
216
+ @override
217
+ async def iterate_items(
218
+ self,
219
+ *,
220
+ offset: int = 0,
221
+ limit: int | None = None,
222
+ clean: bool = False,
223
+ desc: bool = False,
224
+ fields: list[str] | None = None,
225
+ omit: list[str] | None = None,
226
+ unwind: list[str] | None = None,
227
+ skip_empty: bool = False,
228
+ skip_hidden: bool = False,
229
+ ) -> AsyncIterator[dict[str, Any]]:
230
+ """Iterate over dataset items one by one.
231
+
232
+ This method yields items individually instead of loading all items at once,
233
+ which is more memory efficient for large datasets.
234
+ """
235
+ # Log warnings for unsupported arguments
236
+ unsupported_args: dict[str, Any] = {
237
+ 'clean': clean,
238
+ 'fields': fields,
239
+ 'omit': omit,
240
+ 'unwind': unwind,
241
+ 'skip_hidden': skip_hidden,
242
+ }
243
+ unsupported = {k: v for k, v in unsupported_args.items() if v not in (False, None)}
244
+
245
+ if unsupported:
246
+ logger.warning(
247
+ f'The arguments {list(unsupported.keys())} of iterate_items are not supported '
248
+ f'by the {self.__class__.__name__} client.'
249
+ )
250
+
251
+ metadata = await self.get_metadata()
252
+ total_items = metadata.item_count
253
+
254
+ # Calculate actual range based on parameters
255
+ start_idx = offset
256
+ end_idx = min(total_items, offset + limit) if limit is not None else total_items
257
+
258
+ # Update accessed_at timestamp
259
+ async with self._get_pipeline() as pipe:
260
+ await self._update_metadata(pipe, **_DatasetMetadataUpdateParams(update_accessed_at=True))
261
+
262
+ # Process items in batches for better network efficiency
263
+ batch_size = 100
264
+
265
+ for batch_start in range(start_idx, end_idx, batch_size):
266
+ batch_end = min(batch_start + batch_size, end_idx)
267
+
268
+ # Build JsonPath for batch slice
269
+ if desc:
270
+ # For descending order, we need to reverse the slice calculation
271
+ desc_batch_start = total_items - batch_end
272
+ desc_batch_end = total_items - batch_start
273
+ json_path = f'$[{desc_batch_start}:{desc_batch_end}]'
274
+ else:
275
+ json_path = f'$[{batch_start}:{batch_end}]'
276
+
277
+ # Get batch of items
278
+ batch_items = await await_redis_response(self._redis.json().get(self._items_key, json_path))
279
+
280
+ # Handle case where batch_items might be None or not a list
281
+ if batch_items is None:
282
+ continue
283
+
284
+ # Reverse batch if desc order (since we got items in normal order but need desc)
285
+ items_iter = reversed(batch_items) if desc else iter(batch_items)
286
+
287
+ # Yield items from batch
288
+ for item in items_iter:
289
+ # Apply skip_empty filter
290
+ if skip_empty and not item:
291
+ continue
292
+
293
+ yield cast('dict[str, Any]', item)
294
+
295
+ async with self._get_pipeline() as pipe:
296
+ await self._update_metadata(pipe, **_DatasetMetadataUpdateParams(update_accessed_at=True))
297
+
298
+ @override
299
+ async def _create_storage(self, pipeline: Pipeline) -> None:
300
+ """Create the main dataset keys in Redis."""
301
+ # Create an empty JSON array for items
302
+ await await_redis_response(pipeline.json().set(self._items_key, '$', []))
303
+
304
+ @override
305
+ async def _specific_update_metadata(
306
+ self,
307
+ pipeline: Pipeline,
308
+ *,
309
+ new_item_count: int | None = None,
310
+ delta_item_count: int | None = None,
311
+ **_kwargs: Any,
312
+ ) -> None:
313
+ """Update the dataset metadata in the database.
314
+
315
+ Args:
316
+ pipeline: The Redis pipeline to use for the update.
317
+ new_item_count: If provided, update the item count to this value.
318
+ delta_item_count: If provided, increment the item count by this value.
319
+ """
320
+ if new_item_count is not None:
321
+ await await_redis_response(
322
+ pipeline.json().set(self.metadata_key, '$.item_count', new_item_count, nx=False, xx=True)
323
+ )
324
+ elif delta_item_count is not None:
325
+ await await_redis_response(pipeline.json().numincrby(self.metadata_key, '$.item_count', delta_item_count))
@@ -0,0 +1,264 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ from logging import getLogger
5
+ from typing import TYPE_CHECKING, Any
6
+
7
+ from typing_extensions import override
8
+
9
+ from crawlee._utils.file import infer_mime_type
10
+ from crawlee.storage_clients._base import KeyValueStoreClient
11
+ from crawlee.storage_clients.models import KeyValueStoreMetadata, KeyValueStoreRecord, KeyValueStoreRecordMetadata
12
+
13
+ from ._client_mixin import MetadataUpdateParams, RedisClientMixin
14
+ from ._utils import await_redis_response
15
+
16
+ if TYPE_CHECKING:
17
+ from collections.abc import AsyncIterator
18
+
19
+ from redis.asyncio import Redis
20
+
21
+ logger = getLogger(__name__)
22
+
23
+
24
+ class RedisKeyValueStoreClient(KeyValueStoreClient, RedisClientMixin):
25
+ """Redis implementation of the key-value store client.
26
+
27
+ This client persists key-value data to Redis using hash data structures for efficient storage and retrieval.
28
+ Keys are mapped to values with automatic content type detection and size tracking for metadata management.
29
+
30
+ The key-value store data is stored in Redis using the following key pattern:
31
+ - `key_value_stores:{name}:items` - Redis hash containing key-value pairs (values stored as binary data).
32
+ - `key_value_stores:{name}:metadata_items` - Redis hash containing metadata for each key.
33
+ - `key_value_stores:{name}:metadata` - Redis JSON object containing store metadata.
34
+
35
+ Values are serialized based on their type: JSON objects are stored as UTF-8 encoded JSON strings,
36
+ text values as UTF-8 encoded strings, and binary data as-is. The implementation automatically handles
37
+ content type detection and maintains metadata about each record including size and MIME type information.
38
+
39
+ All operations are atomic through Redis hash operations and pipeline transactions. The client supports
40
+ concurrent access through Redis's built-in atomic operations for hash fields.
41
+ """
42
+
43
+ _DEFAULT_NAME = 'default'
44
+ """Default Key-Value Store name key prefix when none provided."""
45
+
46
+ _MAIN_KEY = 'key_value_stores'
47
+ """Main Redis key prefix for Key-Value Store."""
48
+
49
+ _CLIENT_TYPE = 'Key-value store'
50
+ """Human-readable client type for error messages."""
51
+
52
+ def __init__(self, storage_name: str, storage_id: str, redis: Redis) -> None:
53
+ """Initialize a new instance.
54
+
55
+ Preferably use the `RedisKeyValueStoreClient.open` class method to create a new instance.
56
+ """
57
+ super().__init__(storage_name=storage_name, storage_id=storage_id, redis=redis)
58
+
59
+ @property
60
+ def _items_key(self) -> str:
61
+ """Return the Redis key for the items of KVS."""
62
+ return f'{self._MAIN_KEY}:{self._storage_name}:items'
63
+
64
+ @property
65
+ def _metadata_items_key(self) -> str:
66
+ """Return the Redis key for the items metadata of KVS."""
67
+ return f'{self._MAIN_KEY}:{self._storage_name}:metadata_items'
68
+
69
+ @classmethod
70
+ async def open(
71
+ cls,
72
+ *,
73
+ id: str | None,
74
+ name: str | None,
75
+ alias: str | None,
76
+ redis: Redis,
77
+ ) -> RedisKeyValueStoreClient:
78
+ """Open or create a new Redis key-value store client.
79
+
80
+ This method attempts to open an existing key-value store from the Redis database. If a store with the specified
81
+ ID or name exists, it loads the metadata from the database. If no existing store is found, a new one
82
+ is created.
83
+
84
+ Args:
85
+ id: The ID of the key-value store. If not provided, a random ID will be generated.
86
+ name: The name of the key-value store for named (global scope) storages.
87
+ alias: The alias of the key-value store for unnamed (run scope) storages.
88
+ redis: Redis client instance.
89
+
90
+ Returns:
91
+ An instance for the opened or created storage client.
92
+ """
93
+ return await cls._open(
94
+ id=id,
95
+ name=name,
96
+ alias=alias,
97
+ redis=redis,
98
+ metadata_model=KeyValueStoreMetadata,
99
+ extra_metadata_fields={},
100
+ instance_kwargs={},
101
+ )
102
+
103
+ @override
104
+ async def get_metadata(self) -> KeyValueStoreMetadata:
105
+ return await self._get_metadata(KeyValueStoreMetadata)
106
+
107
+ @override
108
+ async def drop(self) -> None:
109
+ await self._drop(extra_keys=[self._items_key, self._metadata_items_key])
110
+
111
+ @override
112
+ async def purge(self) -> None:
113
+ await self._purge(
114
+ extra_keys=[self._items_key, self._metadata_items_key],
115
+ metadata_kwargs=MetadataUpdateParams(update_accessed_at=True, update_modified_at=True),
116
+ )
117
+
118
+ @override
119
+ async def set_value(self, *, key: str, value: Any, content_type: str | None = None) -> None:
120
+ # Special handling for None values
121
+ if value is None:
122
+ content_type = 'application/x-none' # Special content type to identify None values
123
+ value_bytes = b''
124
+ else:
125
+ content_type = content_type or infer_mime_type(value)
126
+
127
+ # Serialize the value to bytes.
128
+ if 'application/json' in content_type:
129
+ value_bytes = json.dumps(value, default=str, ensure_ascii=False).encode('utf-8')
130
+ elif isinstance(value, str):
131
+ value_bytes = value.encode('utf-8')
132
+ elif isinstance(value, (bytes, bytearray)):
133
+ value_bytes = value
134
+ else:
135
+ # Fallback: attempt to convert to string and encode.
136
+ value_bytes = str(value).encode('utf-8')
137
+
138
+ size = len(value_bytes)
139
+ item_metadata = KeyValueStoreRecordMetadata(
140
+ key=key,
141
+ content_type=content_type,
142
+ size=size,
143
+ )
144
+
145
+ async with self._get_pipeline() as pipe:
146
+ # redis-py typing issue
147
+ await await_redis_response(pipe.hset(self._items_key, key, value_bytes)) # type: ignore[arg-type]
148
+
149
+ await await_redis_response(
150
+ pipe.hset(
151
+ self._metadata_items_key,
152
+ key,
153
+ item_metadata.model_dump_json(),
154
+ )
155
+ )
156
+ await self._update_metadata(pipe, **MetadataUpdateParams(update_accessed_at=True, update_modified_at=True))
157
+
158
+ @override
159
+ async def get_value(self, *, key: str) -> KeyValueStoreRecord | None:
160
+ serialized_metadata_item = await await_redis_response(self._redis.hget(self._metadata_items_key, key))
161
+
162
+ async with self._get_pipeline() as pipe:
163
+ await self._update_metadata(pipe, **MetadataUpdateParams(update_accessed_at=True))
164
+
165
+ if not isinstance(serialized_metadata_item, (str, bytes, bytearray)):
166
+ logger.warning(f'Metadata for key "{key}" is missing or invalid.')
167
+ return None
168
+
169
+ metadata_item = KeyValueStoreRecordMetadata.model_validate_json(serialized_metadata_item)
170
+
171
+ # Handle None values
172
+ if metadata_item.content_type == 'application/x-none':
173
+ return KeyValueStoreRecord(value=None, **metadata_item.model_dump())
174
+
175
+ # Query the record by key
176
+ # redis-py typing issue
177
+ value_bytes: bytes | None = await await_redis_response(
178
+ self._redis.hget(self._items_key, key) # type: ignore[arg-type]
179
+ )
180
+
181
+ if value_bytes is None:
182
+ logger.warning(f'Value for key "{key}" is missing.')
183
+ return None
184
+
185
+ # Handle JSON values
186
+ if 'application/json' in metadata_item.content_type:
187
+ try:
188
+ value = json.loads(value_bytes.decode('utf-8'))
189
+ except (json.JSONDecodeError, UnicodeDecodeError):
190
+ logger.warning(f'Failed to decode JSON value for key "{key}"')
191
+ return None
192
+ # Handle text values
193
+ elif metadata_item.content_type.startswith('text/'):
194
+ try:
195
+ value = value_bytes.decode('utf-8')
196
+ except UnicodeDecodeError:
197
+ logger.warning(f'Failed to decode text value for key "{key}"')
198
+ return None
199
+ # Handle binary values
200
+ else:
201
+ value = value_bytes
202
+
203
+ return KeyValueStoreRecord(value=value, **metadata_item.model_dump())
204
+
205
+ @override
206
+ async def delete_value(self, *, key: str) -> None:
207
+ async with self._get_pipeline() as pipe:
208
+ await await_redis_response(pipe.hdel(self._items_key, key))
209
+ await await_redis_response(pipe.hdel(self._metadata_items_key, key))
210
+ await self._update_metadata(pipe, **MetadataUpdateParams(update_accessed_at=True, update_modified_at=True))
211
+
212
+ @override
213
+ async def iterate_keys(
214
+ self,
215
+ *,
216
+ exclusive_start_key: str | None = None,
217
+ limit: int | None = None,
218
+ ) -> AsyncIterator[KeyValueStoreRecordMetadata]:
219
+ items_data = await await_redis_response(self._redis.hgetall(self._metadata_items_key))
220
+
221
+ if not items_data:
222
+ return # No items to iterate over
223
+
224
+ if not isinstance(items_data, dict):
225
+ raise TypeError('The items data was received in an incorrect format.')
226
+
227
+ # Get all keys, sorted alphabetically
228
+ keys = sorted(items_data.keys())
229
+
230
+ # Apply exclusive_start_key filter if provided
231
+ if exclusive_start_key is not None:
232
+ bytes_exclusive_start_key = exclusive_start_key.encode()
233
+ keys = [k for k in keys if k > bytes_exclusive_start_key]
234
+
235
+ # Apply limit if provided
236
+ if limit is not None:
237
+ keys = keys[:limit]
238
+
239
+ # Yield metadata for each key
240
+ for key in keys:
241
+ record = items_data[key]
242
+ yield KeyValueStoreRecordMetadata.model_validate_json(record)
243
+
244
+ async with self._get_pipeline() as pipe:
245
+ await self._update_metadata(
246
+ pipe,
247
+ **MetadataUpdateParams(update_accessed_at=True),
248
+ )
249
+
250
+ @override
251
+ async def get_public_url(self, *, key: str) -> str:
252
+ raise NotImplementedError('Public URLs are not supported for memory key-value stores.')
253
+
254
+ @override
255
+ async def record_exists(self, *, key: str) -> bool:
256
+ async with self._get_pipeline(with_execute=False) as pipe:
257
+ await await_redis_response(pipe.hexists(self._items_key, key))
258
+ await self._update_metadata(
259
+ pipe,
260
+ **MetadataUpdateParams(update_accessed_at=True),
261
+ )
262
+ results = await pipe.execute()
263
+
264
+ return bool(results[0])