crawlee 1.0.0rc1__py3-none-any.whl → 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. crawlee/_autoscaling/snapshotter.py +1 -1
  2. crawlee/_request.py +2 -1
  3. crawlee/_service_locator.py +44 -24
  4. crawlee/_types.py +76 -17
  5. crawlee/_utils/raise_if_too_many_kwargs.py +12 -0
  6. crawlee/_utils/sitemap.py +3 -1
  7. crawlee/_utils/system.py +3 -3
  8. crawlee/browsers/_playwright_browser_controller.py +20 -14
  9. crawlee/configuration.py +1 -1
  10. crawlee/crawlers/_abstract_http/_abstract_http_crawler.py +3 -1
  11. crawlee/crawlers/_abstract_http/_abstract_http_parser.py +1 -1
  12. crawlee/crawlers/_abstract_http/_http_crawling_context.py +1 -1
  13. crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawler.py +6 -2
  14. crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawler_statistics.py +1 -1
  15. crawlee/crawlers/_adaptive_playwright/_adaptive_playwright_crawling_context.py +2 -1
  16. crawlee/crawlers/_adaptive_playwright/_rendering_type_predictor.py +1 -1
  17. crawlee/crawlers/_basic/_basic_crawler.py +107 -27
  18. crawlee/crawlers/_basic/_logging_utils.py +5 -1
  19. crawlee/crawlers/_playwright/_playwright_crawler.py +6 -1
  20. crawlee/events/_types.py +6 -6
  21. crawlee/fingerprint_suite/_fingerprint_generator.py +3 -0
  22. crawlee/fingerprint_suite/_types.py +2 -2
  23. crawlee/project_template/{{cookiecutter.project_name}}/pyproject.toml +2 -2
  24. crawlee/project_template/{{cookiecutter.project_name}}/requirements.txt +3 -0
  25. crawlee/request_loaders/_request_list.py +1 -1
  26. crawlee/request_loaders/_request_loader.py +5 -1
  27. crawlee/request_loaders/_sitemap_request_loader.py +228 -48
  28. crawlee/sessions/_models.py +2 -2
  29. crawlee/statistics/_models.py +1 -1
  30. crawlee/storage_clients/__init__.py +12 -0
  31. crawlee/storage_clients/_base/_storage_client.py +13 -0
  32. crawlee/storage_clients/_file_system/_dataset_client.py +27 -25
  33. crawlee/storage_clients/_file_system/_key_value_store_client.py +27 -23
  34. crawlee/storage_clients/_file_system/_request_queue_client.py +84 -98
  35. crawlee/storage_clients/_file_system/_storage_client.py +16 -3
  36. crawlee/storage_clients/_file_system/_utils.py +0 -0
  37. crawlee/storage_clients/_memory/_dataset_client.py +14 -2
  38. crawlee/storage_clients/_memory/_key_value_store_client.py +14 -2
  39. crawlee/storage_clients/_memory/_request_queue_client.py +43 -12
  40. crawlee/storage_clients/_memory/_storage_client.py +6 -3
  41. crawlee/storage_clients/_sql/__init__.py +6 -0
  42. crawlee/storage_clients/_sql/_client_mixin.py +385 -0
  43. crawlee/storage_clients/_sql/_dataset_client.py +310 -0
  44. crawlee/storage_clients/_sql/_db_models.py +269 -0
  45. crawlee/storage_clients/_sql/_key_value_store_client.py +299 -0
  46. crawlee/storage_clients/_sql/_request_queue_client.py +706 -0
  47. crawlee/storage_clients/_sql/_storage_client.py +282 -0
  48. crawlee/storage_clients/_sql/py.typed +0 -0
  49. crawlee/storage_clients/models.py +10 -10
  50. crawlee/storages/_base.py +3 -1
  51. crawlee/storages/_dataset.py +9 -2
  52. crawlee/storages/_key_value_store.py +9 -2
  53. crawlee/storages/_request_queue.py +7 -2
  54. crawlee/storages/_storage_instance_manager.py +126 -72
  55. {crawlee-1.0.0rc1.dist-info → crawlee-1.0.1.dist-info}/METADATA +12 -5
  56. {crawlee-1.0.0rc1.dist-info → crawlee-1.0.1.dist-info}/RECORD +59 -49
  57. {crawlee-1.0.0rc1.dist-info → crawlee-1.0.1.dist-info}/WHEEL +0 -0
  58. {crawlee-1.0.0rc1.dist-info → crawlee-1.0.1.dist-info}/entry_points.txt +0 -0
  59. {crawlee-1.0.0rc1.dist-info → crawlee-1.0.1.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,310 @@
1
+ from __future__ import annotations
2
+
3
+ from logging import getLogger
4
+ from typing import TYPE_CHECKING, Any
5
+
6
+ from sqlalchemy import Select, insert, select
7
+ from typing_extensions import override
8
+
9
+ from crawlee.storage_clients._base import DatasetClient
10
+ from crawlee.storage_clients.models import DatasetItemsListPage, DatasetMetadata
11
+
12
+ from ._client_mixin import MetadataUpdateParams, SqlClientMixin
13
+ from ._db_models import DatasetItemDb, DatasetMetadataDb
14
+
15
+ if TYPE_CHECKING:
16
+ from collections.abc import AsyncIterator
17
+
18
+ from sqlalchemy import Select
19
+ from typing_extensions import NotRequired
20
+
21
+ from ._storage_client import SqlStorageClient
22
+
23
+
24
+ logger = getLogger(__name__)
25
+
26
+
27
+ class _DatasetMetadataUpdateParams(MetadataUpdateParams):
28
+ """Parameters for updating dataset metadata."""
29
+
30
+ new_item_count: NotRequired[int]
31
+ delta_item_count: NotRequired[int]
32
+
33
+
34
+ class SqlDatasetClient(DatasetClient, SqlClientMixin):
35
+ """SQL implementation of the dataset client.
36
+
37
+ This client persists dataset items to a SQL database using two tables for storage
38
+ and retrieval. Items are stored as JSON with automatic ordering preservation.
39
+
40
+ The dataset data is stored in SQL database tables following the pattern:
41
+ - `datasets` table: Contains dataset metadata (id, name, timestamps, item_count)
42
+ - `dataset_records` table: Contains individual items with JSON data and auto-increment ordering
43
+
44
+ Items are stored as a JSON object in SQLite and as JSONB in PostgreSQL. These objects must be JSON-serializable.
45
+ The `item_id` auto-increment primary key ensures insertion order is preserved.
46
+ All operations are wrapped in database transactions with CASCADE deletion support.
47
+ """
48
+
49
+ _DEFAULT_NAME = 'default'
50
+ """Default dataset name used when no name is provided."""
51
+
52
+ _METADATA_TABLE = DatasetMetadataDb
53
+ """SQLAlchemy model for dataset metadata."""
54
+
55
+ _ITEM_TABLE = DatasetItemDb
56
+ """SQLAlchemy model for dataset items."""
57
+
58
+ _CLIENT_TYPE = 'Dataset'
59
+ """Human-readable client type for error messages."""
60
+
61
+ def __init__(
62
+ self,
63
+ *,
64
+ id: str,
65
+ storage_client: SqlStorageClient,
66
+ ) -> None:
67
+ """Initialize a new instance.
68
+
69
+ Preferably use the `SqlDatasetClient.open` class method to create a new instance.
70
+ """
71
+ super().__init__(id=id, storage_client=storage_client)
72
+
73
+ @classmethod
74
+ async def open(
75
+ cls,
76
+ *,
77
+ id: str | None,
78
+ name: str | None,
79
+ alias: str | None,
80
+ storage_client: SqlStorageClient,
81
+ ) -> SqlDatasetClient:
82
+ """Open an existing dataset or create a new one.
83
+
84
+ Args:
85
+ id: The ID of the dataset to open. If provided, searches for existing dataset by ID.
86
+ name: The name of the dataset for named (global scope) storages.
87
+ alias: The alias of the dataset for unnamed (run scope) storages.
88
+ storage_client: The SQL storage client instance.
89
+
90
+ Returns:
91
+ An instance for the opened or created storage client.
92
+
93
+ Raises:
94
+ ValueError: If a dataset with the specified ID is not found.
95
+ """
96
+ return await cls._safely_open(
97
+ id=id,
98
+ name=name,
99
+ alias=alias,
100
+ storage_client=storage_client,
101
+ metadata_model=DatasetMetadata,
102
+ extra_metadata_fields={'item_count': 0},
103
+ )
104
+
105
+ @override
106
+ async def get_metadata(self) -> DatasetMetadata:
107
+ # The database is a single place of truth
108
+ return await self._get_metadata(DatasetMetadata)
109
+
110
+ @override
111
+ async def drop(self) -> None:
112
+ """Delete this dataset and all its items from the database.
113
+
114
+ This operation is irreversible. Uses CASCADE deletion to remove all related items.
115
+ """
116
+ await self._drop()
117
+
118
+ @override
119
+ async def purge(self) -> None:
120
+ """Remove all items from this dataset while keeping the dataset structure.
121
+
122
+ Resets item_count to 0 and deletes all records from dataset_records table.
123
+ """
124
+ await self._purge(
125
+ metadata_kwargs=_DatasetMetadataUpdateParams(
126
+ new_item_count=0,
127
+ update_accessed_at=True,
128
+ update_modified_at=True,
129
+ force=True,
130
+ )
131
+ )
132
+
133
+ @override
134
+ async def push_data(self, data: list[dict[str, Any]] | dict[str, Any]) -> None:
135
+ if not isinstance(data, list):
136
+ data = [data]
137
+
138
+ db_items: list[dict[str, Any]] = []
139
+ db_items = [{'dataset_id': self._id, 'data': item} for item in data]
140
+ stmt = insert(self._ITEM_TABLE).values(db_items)
141
+
142
+ async with self.get_session(with_simple_commit=True) as session:
143
+ await session.execute(stmt)
144
+
145
+ await self._update_metadata(
146
+ session,
147
+ **_DatasetMetadataUpdateParams(
148
+ update_accessed_at=True,
149
+ update_modified_at=True,
150
+ delta_item_count=len(data),
151
+ new_item_count=len(data),
152
+ force=True,
153
+ ),
154
+ )
155
+
156
+ @override
157
+ async def get_data(
158
+ self,
159
+ *,
160
+ offset: int = 0,
161
+ limit: int | None = 999_999_999_999,
162
+ clean: bool = False,
163
+ desc: bool = False,
164
+ fields: list[str] | None = None,
165
+ omit: list[str] | None = None,
166
+ unwind: list[str] | None = None,
167
+ skip_empty: bool = False,
168
+ skip_hidden: bool = False,
169
+ flatten: list[str] | None = None,
170
+ view: str | None = None,
171
+ ) -> DatasetItemsListPage:
172
+ stmt = self._prepare_get_stmt(
173
+ offset=offset,
174
+ limit=limit,
175
+ clean=clean,
176
+ desc=desc,
177
+ fields=fields,
178
+ omit=omit,
179
+ unwind=unwind,
180
+ skip_empty=skip_empty,
181
+ skip_hidden=skip_hidden,
182
+ flatten=flatten,
183
+ view=view,
184
+ )
185
+
186
+ async with self.get_session() as session:
187
+ result = await session.execute(stmt)
188
+ db_items = result.scalars().all()
189
+
190
+ updated = await self._update_metadata(session, **_DatasetMetadataUpdateParams(update_accessed_at=True))
191
+
192
+ # Commit updates to the metadata
193
+ if updated:
194
+ await session.commit()
195
+
196
+ items = [db_item.data for db_item in db_items]
197
+ metadata = await self.get_metadata()
198
+ return DatasetItemsListPage(
199
+ items=items,
200
+ count=len(items),
201
+ desc=desc,
202
+ limit=limit or 0,
203
+ offset=offset or 0,
204
+ total=metadata.item_count,
205
+ )
206
+
207
+ @override
208
+ async def iterate_items(
209
+ self,
210
+ *,
211
+ offset: int = 0,
212
+ limit: int | None = None,
213
+ clean: bool = False,
214
+ desc: bool = False,
215
+ fields: list[str] | None = None,
216
+ omit: list[str] | None = None,
217
+ unwind: list[str] | None = None,
218
+ skip_empty: bool = False,
219
+ skip_hidden: bool = False,
220
+ ) -> AsyncIterator[dict[str, Any]]:
221
+ stmt = self._prepare_get_stmt(
222
+ offset=offset,
223
+ limit=limit,
224
+ clean=clean,
225
+ desc=desc,
226
+ fields=fields,
227
+ omit=omit,
228
+ unwind=unwind,
229
+ skip_empty=skip_empty,
230
+ skip_hidden=skip_hidden,
231
+ )
232
+
233
+ async with self.get_session() as session:
234
+ db_items = await session.stream_scalars(stmt)
235
+
236
+ async for db_item in db_items:
237
+ yield db_item.data
238
+
239
+ updated = await self._update_metadata(session, **_DatasetMetadataUpdateParams(update_accessed_at=True))
240
+
241
+ # Commit updates to the metadata
242
+ if updated:
243
+ await session.commit()
244
+
245
+ def _prepare_get_stmt(
246
+ self,
247
+ *,
248
+ offset: int = 0,
249
+ limit: int | None = 999_999_999_999,
250
+ clean: bool = False,
251
+ desc: bool = False,
252
+ fields: list[str] | None = None,
253
+ omit: list[str] | None = None,
254
+ unwind: list[str] | None = None,
255
+ skip_empty: bool = False,
256
+ skip_hidden: bool = False,
257
+ flatten: list[str] | None = None,
258
+ view: str | None = None,
259
+ ) -> Select:
260
+ # Check for unsupported arguments and log a warning if found.
261
+ unsupported_args: dict[str, Any] = {
262
+ 'clean': clean,
263
+ 'fields': fields,
264
+ 'omit': omit,
265
+ 'unwind': unwind,
266
+ 'skip_hidden': skip_hidden,
267
+ 'flatten': flatten,
268
+ 'view': view,
269
+ }
270
+ unsupported = {k: v for k, v in unsupported_args.items() if v not in (False, None)}
271
+
272
+ if unsupported:
273
+ logger.warning(
274
+ f'The arguments {list(unsupported.keys())} of get_data are not supported by the '
275
+ f'{self.__class__.__name__} client.'
276
+ )
277
+
278
+ stmt = select(self._ITEM_TABLE).where(self._ITEM_TABLE.dataset_id == self._id)
279
+
280
+ if skip_empty:
281
+ # Skip items that are empty JSON objects
282
+ stmt = stmt.where(self._ITEM_TABLE.data != {})
283
+
284
+ # Apply ordering by insertion order (item_id)
285
+ stmt = stmt.order_by(self._ITEM_TABLE.item_id.desc()) if desc else stmt.order_by(self._ITEM_TABLE.item_id.asc())
286
+
287
+ return stmt.offset(offset).limit(limit)
288
+
289
+ def _specific_update_metadata(
290
+ self,
291
+ new_item_count: int | None = None,
292
+ delta_item_count: int | None = None,
293
+ **_kwargs: dict[str, Any],
294
+ ) -> dict[str, Any]:
295
+ """Update the dataset metadata in the database.
296
+
297
+ Args:
298
+ session: The SQLAlchemy AsyncSession to use for the update.
299
+ new_item_count: If provided, set item count to this value.
300
+ delta_item_count: If provided, add this value to the current item count.
301
+ """
302
+ values_to_set: dict[str, Any] = {}
303
+
304
+ if new_item_count is not None:
305
+ values_to_set['item_count'] = new_item_count
306
+ elif delta_item_count:
307
+ # Use database-level for atomic updates
308
+ values_to_set['item_count'] = self._METADATA_TABLE.item_count + delta_item_count
309
+
310
+ return values_to_set
@@ -0,0 +1,269 @@
1
+ from __future__ import annotations
2
+
3
+ from datetime import datetime, timezone
4
+ from typing import TYPE_CHECKING, Any
5
+
6
+ from sqlalchemy import JSON, BigInteger, Boolean, ForeignKey, Index, Integer, LargeBinary, String, text
7
+ from sqlalchemy.dialects.postgresql import JSONB
8
+ from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column, relationship, synonym
9
+ from sqlalchemy.types import DateTime, TypeDecorator
10
+ from typing_extensions import override
11
+
12
+ if TYPE_CHECKING:
13
+ from sqlalchemy.engine import Dialect
14
+ from sqlalchemy.types import TypeEngine
15
+
16
+
17
+ class AwareDateTime(TypeDecorator):
18
+ """Custom SQLAlchemy type for timezone-aware datetime handling.
19
+
20
+ Ensures all datetime values are timezone-aware by adding UTC timezone to
21
+ naive datetime values from databases that don't store timezone information.
22
+ """
23
+
24
+ impl = DateTime(timezone=True)
25
+ cache_ok = True
26
+
27
+ @override
28
+ def process_result_value(self, value: datetime | None, dialect: Dialect) -> datetime | None:
29
+ """Add UTC timezone to naive datetime values."""
30
+ if value is not None and value.tzinfo is None:
31
+ return value.replace(tzinfo=timezone.utc)
32
+ return value
33
+
34
+
35
+ class JsonField(TypeDecorator):
36
+ """Uses JSONB for PostgreSQL and JSON for other databases."""
37
+
38
+ impl = JSON
39
+ cache_ok = True
40
+
41
+ def load_dialect_impl(self, dialect: Dialect) -> TypeEngine[JSON | JSONB]:
42
+ """Load the appropriate dialect implementation for the JSON type."""
43
+ if dialect.name == 'postgresql':
44
+ return dialect.type_descriptor(JSONB())
45
+ return dialect.type_descriptor(JSON())
46
+
47
+
48
+ class Base(DeclarativeBase):
49
+ """Base class for all database models for correct type annotations."""
50
+
51
+
52
+ class StorageMetadataDb:
53
+ """Base database model for storage metadata."""
54
+
55
+ internal_name: Mapped[str] = mapped_column(String, nullable=False, index=True, unique=True)
56
+ """Internal unique name for a storage instance based on a name or alias."""
57
+
58
+ name: Mapped[str | None] = mapped_column(String, nullable=True, unique=True)
59
+ """Human-readable name. None becomes 'default' in database to enforce uniqueness."""
60
+
61
+ accessed_at: Mapped[datetime] = mapped_column(AwareDateTime, nullable=False)
62
+ """Last access datetime for usage tracking."""
63
+
64
+ created_at: Mapped[datetime] = mapped_column(AwareDateTime, nullable=False)
65
+ """Creation datetime."""
66
+
67
+ modified_at: Mapped[datetime] = mapped_column(AwareDateTime, nullable=False)
68
+ """Last modification datetime."""
69
+
70
+
71
+ class DatasetMetadataDb(StorageMetadataDb, Base):
72
+ """Metadata table for datasets."""
73
+
74
+ __tablename__ = 'datasets'
75
+
76
+ dataset_id: Mapped[str] = mapped_column(String(20), nullable=False, primary_key=True)
77
+ """Unique identifier for the dataset."""
78
+
79
+ item_count: Mapped[int] = mapped_column(Integer, nullable=False, default=0)
80
+ """Number of items in the dataset."""
81
+
82
+ # Relationship to dataset items with cascade deletion
83
+ items: Mapped[list[DatasetItemDb]] = relationship(
84
+ back_populates='dataset', cascade='all, delete-orphan', lazy='noload'
85
+ )
86
+
87
+ id = synonym('dataset_id')
88
+ """Alias for dataset_id to match Pydantic expectations."""
89
+
90
+
91
+ class RequestQueueMetadataDb(StorageMetadataDb, Base):
92
+ """Metadata table for request queues."""
93
+
94
+ __tablename__ = 'request_queues'
95
+
96
+ request_queue_id: Mapped[str] = mapped_column(String(20), nullable=False, primary_key=True)
97
+ """Unique identifier for the request queue."""
98
+
99
+ had_multiple_clients: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)
100
+ """Flag indicating if multiple clients have accessed this queue."""
101
+
102
+ handled_request_count: Mapped[int] = mapped_column(Integer, nullable=False, default=0)
103
+ """Number of requests processed."""
104
+
105
+ pending_request_count: Mapped[int] = mapped_column(Integer, nullable=False, default=0)
106
+ """Number of requests waiting to be processed."""
107
+
108
+ total_request_count: Mapped[int] = mapped_column(Integer, nullable=False, default=0)
109
+ """Total number of requests ever added to this queue."""
110
+
111
+ # Relationship to queue requests with cascade deletion
112
+ requests: Mapped[list[RequestDb]] = relationship(
113
+ back_populates='queue', cascade='all, delete-orphan', lazy='noload'
114
+ )
115
+ # Relationship to queue state
116
+ state: Mapped[RequestQueueStateDb] = relationship(
117
+ back_populates='queue', cascade='all, delete-orphan', lazy='noload'
118
+ )
119
+
120
+ id = synonym('request_queue_id')
121
+ """Alias for request_queue_id to match Pydantic expectations."""
122
+
123
+
124
+ class KeyValueStoreMetadataDb(StorageMetadataDb, Base):
125
+ """Metadata table for key-value stores."""
126
+
127
+ __tablename__ = 'key_value_stores'
128
+
129
+ key_value_store_id: Mapped[str] = mapped_column(String(20), nullable=False, primary_key=True)
130
+ """Unique identifier for the key-value store."""
131
+
132
+ # Relationship to store records with cascade deletion
133
+ records: Mapped[list[KeyValueStoreRecordDb]] = relationship(
134
+ back_populates='kvs', cascade='all, delete-orphan', lazy='noload'
135
+ )
136
+
137
+ id = synonym('key_value_store_id')
138
+ """Alias for key_value_store_id to match Pydantic expectations."""
139
+
140
+
141
+ class KeyValueStoreRecordDb(Base):
142
+ """Records table for key-value stores."""
143
+
144
+ __tablename__ = 'key_value_store_records'
145
+
146
+ key_value_store_id: Mapped[str] = mapped_column(
147
+ String(20),
148
+ ForeignKey('key_value_stores.key_value_store_id', ondelete='CASCADE'),
149
+ primary_key=True,
150
+ index=True,
151
+ nullable=False,
152
+ )
153
+ """Foreign key to metadata key-value store record."""
154
+
155
+ key: Mapped[str] = mapped_column(String(255), primary_key=True)
156
+ """The key part of the key-value pair."""
157
+
158
+ value: Mapped[bytes] = mapped_column(LargeBinary, nullable=False)
159
+ """Value stored as binary data to support any content type."""
160
+
161
+ content_type: Mapped[str] = mapped_column(String(50), nullable=False)
162
+ """MIME type for proper value deserialization."""
163
+
164
+ size: Mapped[int | None] = mapped_column(Integer, nullable=False, default=0)
165
+ """Size of stored value in bytes."""
166
+
167
+ # Relationship back to parent store
168
+ kvs: Mapped[KeyValueStoreMetadataDb] = relationship(back_populates='records')
169
+
170
+ storage_id = synonym('key_value_store_id')
171
+ """Alias for key_value_store_id to match SqlClientMixin expectations."""
172
+
173
+
174
+ class DatasetItemDb(Base):
175
+ """Items table for datasets."""
176
+
177
+ __tablename__ = 'dataset_records'
178
+
179
+ item_id: Mapped[int] = mapped_column(Integer, primary_key=True)
180
+ """Auto-increment primary key preserving insertion order."""
181
+
182
+ dataset_id: Mapped[str] = mapped_column(
183
+ String(20),
184
+ ForeignKey('datasets.dataset_id', ondelete='CASCADE'),
185
+ index=True,
186
+ )
187
+ """Foreign key to metadata dataset record."""
188
+
189
+ data: Mapped[list[dict[str, Any]] | dict[str, Any]] = mapped_column(JsonField, nullable=False)
190
+ """JSON serializable item data."""
191
+
192
+ # Relationship back to parent dataset
193
+ dataset: Mapped[DatasetMetadataDb] = relationship(back_populates='items')
194
+
195
+ storage_id = synonym('dataset_id')
196
+ """Alias for dataset_id to match SqlClientMixin expectations."""
197
+
198
+
199
+ class RequestDb(Base):
200
+ """Requests table for request queues."""
201
+
202
+ __tablename__ = 'request_queue_records'
203
+ __table_args__ = (
204
+ Index(
205
+ 'idx_fetch_available',
206
+ 'request_queue_id',
207
+ 'is_handled',
208
+ 'time_blocked_until',
209
+ 'sequence_number',
210
+ postgresql_where=text('is_handled = false'),
211
+ ),
212
+ )
213
+
214
+ request_id: Mapped[int] = mapped_column(BigInteger, primary_key=True)
215
+ """Unique identifier for the request representing the unique_key."""
216
+
217
+ request_queue_id: Mapped[str] = mapped_column(
218
+ String(20), ForeignKey('request_queues.request_queue_id', ondelete='CASCADE'), primary_key=True
219
+ )
220
+ """Foreign key to metadata request queue record."""
221
+
222
+ data: Mapped[str] = mapped_column(String, nullable=False)
223
+ """JSON-serialized Request object."""
224
+
225
+ sequence_number: Mapped[int] = mapped_column(Integer, nullable=False)
226
+ """Ordering sequence: negative for forefront, positive for regular."""
227
+
228
+ is_handled: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)
229
+ """Processing status flag."""
230
+
231
+ time_blocked_until: Mapped[datetime | None] = mapped_column(AwareDateTime, nullable=True)
232
+ """Timestamp until which this request is considered blocked for processing by other clients."""
233
+
234
+ client_key: Mapped[str | None] = mapped_column(String(32), nullable=True)
235
+ """Identifier of the client that has currently locked this request for processing."""
236
+
237
+ # Relationship back to metadata table
238
+ queue: Mapped[RequestQueueMetadataDb] = relationship(back_populates='requests')
239
+
240
+ storage_id = synonym('request_queue_id')
241
+ """Alias for request_queue_id to match SqlClientMixin expectations."""
242
+
243
+
244
+ class RequestQueueStateDb(Base):
245
+ """State table for request queues."""
246
+
247
+ __tablename__ = 'request_queue_state'
248
+
249
+ request_queue_id: Mapped[str] = mapped_column(
250
+ String(20), ForeignKey('request_queues.request_queue_id', ondelete='CASCADE'), primary_key=True
251
+ )
252
+ """Foreign key to metadata request queue record."""
253
+
254
+ sequence_counter: Mapped[int] = mapped_column(Integer, nullable=False, default=1)
255
+ """Counter for regular request ordering (positive)."""
256
+
257
+ forefront_sequence_counter: Mapped[int] = mapped_column(Integer, nullable=False, default=-1)
258
+ """Counter for forefront request ordering (negative)."""
259
+
260
+ # Relationship back to metadata table
261
+ queue: Mapped[RequestQueueMetadataDb] = relationship(back_populates='state')
262
+
263
+
264
+ class VersionDb(Base):
265
+ """Table for storing the database schema version."""
266
+
267
+ __tablename__ = 'version'
268
+
269
+ version: Mapped[str] = mapped_column(String(10), nullable=False, primary_key=True)