beanqueue 0.2.2__tar.gz → 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. {beanqueue-0.2.2 → beanqueue-1.0.0}/PKG-INFO +107 -26
  2. {beanqueue-0.2.2 → beanqueue-1.0.0}/README.md +105 -24
  3. beanqueue-1.0.0/bq/__init__.py +19 -0
  4. {beanqueue-0.2.2 → beanqueue-1.0.0}/bq/app.py +22 -3
  5. {beanqueue-0.2.2 → beanqueue-1.0.0}/bq/config.py +3 -0
  6. {beanqueue-0.2.2 → beanqueue-1.0.0}/bq/events.py +2 -0
  7. {beanqueue-0.2.2 → beanqueue-1.0.0}/bq/models/__init__.py +6 -0
  8. beanqueue-1.0.0/bq/models/event.py +76 -0
  9. {beanqueue-0.2.2 → beanqueue-1.0.0}/bq/models/task.py +46 -2
  10. {beanqueue-0.2.2 → beanqueue-1.0.0}/bq/models/worker.py +0 -2
  11. beanqueue-1.0.0/bq/processors/processor.py +120 -0
  12. {beanqueue-0.2.2 → beanqueue-1.0.0}/bq/processors/registry.py +14 -4
  13. beanqueue-1.0.0/bq/processors/retry_policies.py +55 -0
  14. {beanqueue-0.2.2 → beanqueue-1.0.0}/bq/services/dispatch.py +21 -3
  15. {beanqueue-0.2.2 → beanqueue-1.0.0}/pyproject.toml +2 -2
  16. beanqueue-0.2.2/bq/__init__.py +0 -10
  17. beanqueue-0.2.2/bq/processors/processor.py +0 -70
  18. {beanqueue-0.2.2 → beanqueue-1.0.0}/LICENSE +0 -0
  19. {beanqueue-0.2.2 → beanqueue-1.0.0}/bq/cmds/__init__.py +0 -0
  20. {beanqueue-0.2.2 → beanqueue-1.0.0}/bq/cmds/create_tables.py +0 -0
  21. {beanqueue-0.2.2 → beanqueue-1.0.0}/bq/cmds/process.py +0 -0
  22. {beanqueue-0.2.2 → beanqueue-1.0.0}/bq/cmds/submit.py +0 -0
  23. {beanqueue-0.2.2 → beanqueue-1.0.0}/bq/cmds/utils.py +0 -0
  24. {beanqueue-0.2.2 → beanqueue-1.0.0}/bq/constants.py +0 -0
  25. {beanqueue-0.2.2 → beanqueue-1.0.0}/bq/db/__init__.py +0 -0
  26. {beanqueue-0.2.2 → beanqueue-1.0.0}/bq/db/base.py +0 -0
  27. {beanqueue-0.2.2 → beanqueue-1.0.0}/bq/db/session.py +0 -0
  28. {beanqueue-0.2.2 → beanqueue-1.0.0}/bq/models/helpers.py +0 -0
  29. {beanqueue-0.2.2 → beanqueue-1.0.0}/bq/processors/__init__.py +0 -0
  30. {beanqueue-0.2.2 → beanqueue-1.0.0}/bq/services/__init__.py +0 -0
  31. {beanqueue-0.2.2 → beanqueue-1.0.0}/bq/services/worker.py +0 -0
  32. {beanqueue-0.2.2 → beanqueue-1.0.0}/bq/utils.py +0 -0
@@ -1,7 +1,7 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: beanqueue
3
- Version: 0.2.2
4
- Summary: BeanQueue or BQ for short, PostgreSQL SKIP LOCK based worker queue library
3
+ Version: 1.0.0
4
+ Summary: BeanQueue or BQ for short, PostgreSQL SKIP LOCK and SQLAlchemy based worker queue library
5
5
  License: MIT
6
6
  Author: Fang-Pen Lin
7
7
  Author-email: fangpen@launchplatform.com
@@ -19,7 +19,7 @@ Requires-Dist: venusian (>=3.1.0,<4.0.0)
19
19
  Description-Content-Type: text/markdown
20
20
 
21
21
  # BeanQueue [![CircleCI](https://dl.circleci.com/status-badge/img/gh/LaunchPlatform/bq/tree/master.svg?style=svg)](https://dl.circleci.com/status-badge/redirect/gh/LaunchPlatform/beanhub-extract/tree/master)
22
- BeanQueue, a lightweight worker queue framework based on [SQLAlchemy](https://www.sqlalchemy.org/), PostgreSQL [SKIP LOCKED queries](https://www.2ndquadrant.com/en/blog/what-is-select-skip-locked-for-in-postgresql-9-5/) and [NOTIFY](https://www.postgresql.org/docs/current/sql-notify.html) / [LISTEN](https://www.postgresql.org/docs/current/sql-listen.html) statements.
22
+ BeanQueue, a lightweight Python task queue framework based on [SQLAlchemy](https://www.sqlalchemy.org/), PostgreSQL [SKIP LOCKED queries](https://www.2ndquadrant.com/en/blog/what-is-select-skip-locked-for-in-postgresql-9-5/) and [NOTIFY](https://www.postgresql.org/docs/current/sql-notify.html) / [LISTEN](https://www.postgresql.org/docs/current/sql-listen.html) statements.
23
23
 
24
24
  **Notice**: Still in its early stage, we built this for [BeanHub](https://beanhub.io)'s internal usage. May change rapidly. Use at your own risk for now.
25
25
 
@@ -29,8 +29,10 @@ BeanQueue, a lightweight worker queue framework based on [SQLAlchemy](https://ww
29
29
  - **Easy-to-deploy**: Only rely on PostgreSQL
30
30
  - **Easy-to-use**: Provide command line tools for processing tasks, also helpers for generating tasks models
31
31
  - **Auto-notify**: Notify will automatically be generated and send for inserted or update tasks
32
+ - **Retry**: Built-in and customizable retry-policies
33
+ - **Schedule**: Schedule task to run later
32
34
  - **Worker heartbeat and auto-reschedule**: Each worker keeps updating heartbeat, if one is found dead, the others will reschedule the tasks
33
- - **Customizable**: Use it as an library and build your own worker queue
35
+ - **Customizable**: Use it as an library and build your own work queue
34
36
  - **Native DB operations**: Commit your tasks with other db entries altogether without worrying about data inconsistent issue
35
37
 
36
38
  ## Install
@@ -127,6 +129,78 @@ To create tables for BeanQueue, you can run
127
129
  python -m bq.cmds.create_tables
128
130
  ```
129
131
 
132
+ ### Schedule
133
+
134
+ In most cases, a task will be executed as soon as possible after it is created.
135
+ To run a task later, you can set a datetime value to the `scheduled_at` attribute of the task model.
136
+ For example:
137
+
138
+ ```python
139
+ import datetime
140
+
141
+ db = Session()
142
+ task = resize_image.run(width=200, height=300)
143
+ task.scheduled_at = func.now() + datetime.timedelta(minutes=3)
144
+ db.add(task)
145
+ ```
146
+
147
+ Please note that currently, workers won't wake up at the next exact moment when the scheduled tasks are ready to run.
148
+ It has to wait until the polling times out, and eventually, it will see the task's scheduled_at time exceeds the current datetime.
149
+ Therefore, depending on your `POLL_TIMEOUT` setting and the number of your workers when they started processing, the actual execution may be inaccurate.
150
+ If you set the `POLL_TIMEOUT` to 60 seconds, please expect less than 60 seconds of delay.
151
+
152
+ ### Retry
153
+
154
+ To automatically retry a task after failure, you can specify a retry policy to the processor.
155
+
156
+ ```python
157
+ import datetime
158
+ import bq
159
+ from sqlalchemy.orm import Session
160
+
161
+ app = bq.BeanQueue()
162
+ delay_retry = bq.DelayRetry(delay=datetime.timedelta(seconds=120))
163
+
164
+ @app.processor(channel="images", retry_policy=delay_retry)
165
+ def resize_image(db: Session, task: bq.Task, width: int, height: int):
166
+ # resize iamge here ...
167
+ pass
168
+ ```
169
+
170
+ Currently, we provide some simple common retry policies such as `DelayRetry` and `ExponentialBackoffRetry`.
171
+ Surely, you can define your retry policy easily by making a function that returns an optional object at the next scheduled time for retry.
172
+
173
+ ```python
174
+ def my_retry_policy(task: bq.Task) -> typing.Any:
175
+ # calculate delay based on task model ...
176
+ return func.now() + datetime.timedelta(seconds=delay)
177
+ ```
178
+
179
+ To cap how many attempts is allowed, you can also use `LimitAttempt` like this:
180
+
181
+ ```python
182
+ delay_retry = bq.DelayRetry(delay=datetime.timedelta(seconds=120))
183
+ capped_delay_retry = bq.LimitAttempt(3, delay_retry)
184
+
185
+ @app.processor(channel="images", retry_policy=capped_delay_retry)
186
+ def resize_image(db: Session, task: bq.Task, width: int, height: int):
187
+ # resize iamge here ...
188
+ pass
189
+ ```
190
+
191
+ You can also retry only for specific exception classes with the `retry_exceptions` argument.
192
+
193
+ ```python
194
+ @app.processor(
195
+ channel="images",
196
+ retry_policy=delay_retry,
197
+ retry_exceptions=ValueError,
198
+ )
199
+ def resize_image(db: Session, task: bq.Task, width: int, height: int):
200
+ # resize iamge here ...
201
+ pass
202
+ ```
203
+
130
204
  ### Configurations
131
205
 
132
206
  Configurations can be modified by setting environment variables with `BQ_` prefix.
@@ -145,7 +219,7 @@ container = bq.Container()
145
219
  container.wire(packages=[bq])
146
220
  config = bq.Config(
147
221
  PROCESSOR_PACKAGES=["my_pkgs.processors"],
148
- DATABASE_URL=str(config.DATABASE_URL),
222
+ DATABASE_URL=config.DATABASE_URL,
149
223
  BATCH_SIZE=10,
150
224
  )
151
225
  app = bq.BeanQueue(config=config)
@@ -168,12 +242,16 @@ app.process_tasks(channels=("images",))
168
242
  BeanQueue is designed to be as customizable as much as possible.
169
243
  Of course, you can define your own SQLAlchemy model instead of using the ones we provided.
170
244
 
171
- To make defining your own `Task` model or `Worker` model much easier, you can use our mixin classes:
245
+ To make defining your own `Task`, `Worker` or `Event` model much easier, you can use our mixin classes:
172
246
 
173
247
  - `bq.TaskModelMixin`: provides task model columns
174
248
  - `bq.TaskModelRefWorkerMixin`: provides foreign key column and relationship to `bq.Worker`
249
+ - `bq.TaskModelRefParentMixin`: provides foreign key column and relationship to children `bq.Task` created during processing
250
+ - `bq.TaskModelRefEventMixin`: provides foreign key column and relationship to `bq.Event`
175
251
  - `bq.WorkerModelMixin`: provides worker model columns
176
252
  - `bq.WorkerRefMixin`: provides relationship to `bq.Task`
253
+ - `bq.EventModelMixin`: provides event model columns
254
+ - `bq.EventModelRefTaskMixin`: provides foreign key column and relationship to `bq.Task`
177
255
 
178
256
  Here's an example for defining your own Task model:
179
257
 
@@ -232,13 +310,14 @@ class Worker(bq.WorkerModelMixin, Base):
232
310
  )
233
311
  ```
234
312
 
235
- With the model class ready, you only need to change the `TASK_MODEL` and `WORKER_MODEL` of `Config` to the full Python module name plus the class name like this.
313
+ With the model class ready, you only need to change the `TASK_MODEL`, `WORKER_MODEL` and `EVENT_MODEL` of `Config` to the full Python module name plus the class name like this.
236
314
 
237
315
  ```python
238
316
  import bq
239
317
  config = bq.Config(
240
318
  TASK_MODEL="my_pkgs.models.Task",
241
319
  WORKER_MODEL="my_pkgs.models.Worker",
320
+ EVENT_MODEL="my_pkgs.models.Event",
242
321
  # ... other configs
243
322
  )
244
323
  app = bq.BeanQueue(config)
@@ -246,21 +325,21 @@ app = bq.BeanQueue(config)
246
325
 
247
326
  ## Why?
248
327
 
249
- There are countless worker queue projects. Why make yet another one?
250
- The primary issue with most worker queue tools is their reliance on a standalone broker server.
251
- Our worker queue tasks frequently interact with the database, and the atomic nature of database transactions is great for data integrity.
252
- However, integrating an external worker queue into the system presents a risk.
253
- The worker queue and the database don't share the same data view, potentially compromising data integrity and reliability.
328
+ There are countless work queue projects. Why make yet another one?
329
+ The primary issue with most work queue tools is their reliance on a standalone broker server.
330
+ Our work queue tasks frequently interact with the database, and the atomic nature of database transactions is great for data integrity.
331
+ However, integrating an external work queue into the system presents a risk.
332
+ The work queue and the database don't share the same data view, potentially compromising data integrity and reliability.
254
333
 
255
334
  For example, you have a table of `images` to keep the user-uploaded images.
256
- And you have a background worker queue for resizing the uploaded images into different thumbnail sizes.
257
- So, you will first need to insert a row for the uploaded image about the job into the database before you push the task to the worker queue.
335
+ And you have a background work queue for resizing the uploaded images into different thumbnail sizes.
336
+ So, you will first need to insert a row for the uploaded image about the job into the database before you push the task to the work queue.
258
337
 
259
- Say you push the task to the worker queue immediately after you insert the `images` table then commit like this:
338
+ Say you push the task to the work queue immediately after you insert the `images` table then commit like this:
260
339
 
261
340
  ```
262
341
  1. Insert into the "images" table
263
- 2. Push resizing task to the worker queue
342
+ 2. Push resizing task to the work queue
264
343
  3. Commit db changes
265
344
  ```
266
345
 
@@ -268,21 +347,21 @@ While this might seem like the right way to do it, there's a hidden bug.
268
347
  If the worker starts too fast before the transaction commits at step 3, it will not be able to see the new row in `images` as it has not been committed yet.
269
348
  One may need to make the task retry a few times to ensure that even if the first attempt failed, it could see the image row in the following attempt.
270
349
  But this adds complexity to the system and also increases the latency if the first attempt fails.
271
- Also, if the commit step fails, you will have a failed worker queue job trying to fetch a row from the database that will never exist.
350
+ Also, if the commit step fails, you will have a failed work queue job trying to fetch a row from the database that will never exist.
272
351
 
273
352
  Another approach is to push the resize task after the database changes are committed. It works like this:
274
353
 
275
354
  ```
276
355
  1. Insert into the "images" table
277
356
  2. Commit db changes
278
- 3. Push resizing task to the worker queue
357
+ 3. Push resizing task to the work queue
279
358
  ```
280
359
 
281
360
  With this approach, we don't need to worry about workers picking up the task too early.
282
361
  However, there's another drawback.
283
- If step 3 for pushing a new task to the worker queue fails, the newly inserted `images` row will never be processed.
284
- There are many solutions to this problem, but these are all caused by inconsistent data views between the database and the worker queue storage.
285
- Things will be much easier if we have a worker queue that shares the same consistent view with the worker queue.
362
+ If step 3 for pushing a new task to the work queue fails, the newly inserted `images` row will never be processed.
363
+ There are many solutions to this problem, but these are all caused by inconsistent data views between the database and the work queue storage.
364
+ Things will be much easier if we have a work queue that shares the same consistent view with the database.
286
365
 
287
366
  By using a database as the data storage, all the problems are gone.
288
367
  You can simply do the following:
@@ -294,15 +373,15 @@ You can simply do the following:
294
373
  ```
295
374
 
296
375
  It's all or nothing!
297
- By doing so, you don't need to maintain another worker queue backend.
298
- You are probably using a database anyway, so this worker queue comes for free.
376
+ By doing so, you don't need to maintain another work queue backend.
377
+ You are probably using a database anyway, so this work queue comes for free.
299
378
 
300
- Usually, a database is inefficient as the worker queues data storage because of the potential lock contention and the need for constant querying.
379
+ Usually, a database is inefficient as the work queues data storage because of the potential lock contention and the need for constant querying.
301
380
  However, things have changed since the [introduction of the SKIP LOCKED](https://www.2ndquadrant.com/en/blog/what-is-select-skip-locked-for-in-postgresql-9-5/) and [LISTEN](https://www.postgresql.org/docs/current/sql-listen.html) / [NOTIFY](https://www.postgresql.org/docs/current/sql-notify.html) features in PostgreSQL or other databases.
302
381
 
303
- This project is inspired by many of the SKIP-LOCKED-based worker queue successors.
382
+ This project is inspired by many of the SKIP-LOCKED-based work queue successors.
304
383
  Why don't we just use those existing tools?
305
- Well, because while they work great as worker queue solutions, they don't take advantage of writing tasks and their relative data into the database in a transaction.
384
+ Well, because while they work great as work queue solutions, they don't take advantage of writing tasks and their relative data into the database in a transaction.
306
385
  Many provide an abstraction function or gRPC method of pushing tasks into the database instead of opening it up for the user to insert the row directly with other rows and commit altogether.
307
386
 
308
387
  With BeanQueue, we don't abstract away the logic of publishing a new task into the queue.
@@ -319,6 +398,8 @@ A modern accounting book service based on the most popular open source version c
319
398
  ## Alternatives
320
399
 
321
400
  - [solid_queue](https://github.com/rails/solid_queue)
401
+ - [good_job](https://github.com/bensheldon/good_job)
402
+ - [graphile-worker](https://github.com/graphile/worker)
322
403
  - [postgres-tq](https://github.com/flix-tech/postgres-tq)
323
404
  - [pq](https://github.com/malthe/pq/)
324
405
  - [PgQueuer](https://github.com/janbjorge/PgQueuer)
@@ -1,5 +1,5 @@
1
1
  # BeanQueue [![CircleCI](https://dl.circleci.com/status-badge/img/gh/LaunchPlatform/bq/tree/master.svg?style=svg)](https://dl.circleci.com/status-badge/redirect/gh/LaunchPlatform/beanhub-extract/tree/master)
2
- BeanQueue, a lightweight worker queue framework based on [SQLAlchemy](https://www.sqlalchemy.org/), PostgreSQL [SKIP LOCKED queries](https://www.2ndquadrant.com/en/blog/what-is-select-skip-locked-for-in-postgresql-9-5/) and [NOTIFY](https://www.postgresql.org/docs/current/sql-notify.html) / [LISTEN](https://www.postgresql.org/docs/current/sql-listen.html) statements.
2
+ BeanQueue, a lightweight Python task queue framework based on [SQLAlchemy](https://www.sqlalchemy.org/), PostgreSQL [SKIP LOCKED queries](https://www.2ndquadrant.com/en/blog/what-is-select-skip-locked-for-in-postgresql-9-5/) and [NOTIFY](https://www.postgresql.org/docs/current/sql-notify.html) / [LISTEN](https://www.postgresql.org/docs/current/sql-listen.html) statements.
3
3
 
4
4
  **Notice**: Still in its early stage, we built this for [BeanHub](https://beanhub.io)'s internal usage. May change rapidly. Use at your own risk for now.
5
5
 
@@ -9,8 +9,10 @@ BeanQueue, a lightweight worker queue framework based on [SQLAlchemy](https://ww
9
9
  - **Easy-to-deploy**: Only rely on PostgreSQL
10
10
  - **Easy-to-use**: Provide command line tools for processing tasks, also helpers for generating tasks models
11
11
  - **Auto-notify**: Notify will automatically be generated and send for inserted or update tasks
12
+ - **Retry**: Built-in and customizable retry-policies
13
+ - **Schedule**: Schedule task to run later
12
14
  - **Worker heartbeat and auto-reschedule**: Each worker keeps updating heartbeat, if one is found dead, the others will reschedule the tasks
13
- - **Customizable**: Use it as an library and build your own worker queue
15
+ - **Customizable**: Use it as an library and build your own work queue
14
16
  - **Native DB operations**: Commit your tasks with other db entries altogether without worrying about data inconsistent issue
15
17
 
16
18
  ## Install
@@ -107,6 +109,78 @@ To create tables for BeanQueue, you can run
107
109
  python -m bq.cmds.create_tables
108
110
  ```
109
111
 
112
+ ### Schedule
113
+
114
+ In most cases, a task will be executed as soon as possible after it is created.
115
+ To run a task later, you can set a datetime value to the `scheduled_at` attribute of the task model.
116
+ For example:
117
+
118
+ ```python
119
+ import datetime
120
+
121
+ db = Session()
122
+ task = resize_image.run(width=200, height=300)
123
+ task.scheduled_at = func.now() + datetime.timedelta(minutes=3)
124
+ db.add(task)
125
+ ```
126
+
127
+ Please note that currently, workers won't wake up at the next exact moment when the scheduled tasks are ready to run.
128
+ It has to wait until the polling times out, and eventually, it will see the task's scheduled_at time exceeds the current datetime.
129
+ Therefore, depending on your `POLL_TIMEOUT` setting and the number of your workers when they started processing, the actual execution may be inaccurate.
130
+ If you set the `POLL_TIMEOUT` to 60 seconds, please expect less than 60 seconds of delay.
131
+
132
+ ### Retry
133
+
134
+ To automatically retry a task after failure, you can specify a retry policy to the processor.
135
+
136
+ ```python
137
+ import datetime
138
+ import bq
139
+ from sqlalchemy.orm import Session
140
+
141
+ app = bq.BeanQueue()
142
+ delay_retry = bq.DelayRetry(delay=datetime.timedelta(seconds=120))
143
+
144
+ @app.processor(channel="images", retry_policy=delay_retry)
145
+ def resize_image(db: Session, task: bq.Task, width: int, height: int):
146
+ # resize iamge here ...
147
+ pass
148
+ ```
149
+
150
+ Currently, we provide some simple common retry policies such as `DelayRetry` and `ExponentialBackoffRetry`.
151
+ Surely, you can define your retry policy easily by making a function that returns an optional object at the next scheduled time for retry.
152
+
153
+ ```python
154
+ def my_retry_policy(task: bq.Task) -> typing.Any:
155
+ # calculate delay based on task model ...
156
+ return func.now() + datetime.timedelta(seconds=delay)
157
+ ```
158
+
159
+ To cap how many attempts is allowed, you can also use `LimitAttempt` like this:
160
+
161
+ ```python
162
+ delay_retry = bq.DelayRetry(delay=datetime.timedelta(seconds=120))
163
+ capped_delay_retry = bq.LimitAttempt(3, delay_retry)
164
+
165
+ @app.processor(channel="images", retry_policy=capped_delay_retry)
166
+ def resize_image(db: Session, task: bq.Task, width: int, height: int):
167
+ # resize iamge here ...
168
+ pass
169
+ ```
170
+
171
+ You can also retry only for specific exception classes with the `retry_exceptions` argument.
172
+
173
+ ```python
174
+ @app.processor(
175
+ channel="images",
176
+ retry_policy=delay_retry,
177
+ retry_exceptions=ValueError,
178
+ )
179
+ def resize_image(db: Session, task: bq.Task, width: int, height: int):
180
+ # resize iamge here ...
181
+ pass
182
+ ```
183
+
110
184
  ### Configurations
111
185
 
112
186
  Configurations can be modified by setting environment variables with `BQ_` prefix.
@@ -125,7 +199,7 @@ container = bq.Container()
125
199
  container.wire(packages=[bq])
126
200
  config = bq.Config(
127
201
  PROCESSOR_PACKAGES=["my_pkgs.processors"],
128
- DATABASE_URL=str(config.DATABASE_URL),
202
+ DATABASE_URL=config.DATABASE_URL,
129
203
  BATCH_SIZE=10,
130
204
  )
131
205
  app = bq.BeanQueue(config=config)
@@ -148,12 +222,16 @@ app.process_tasks(channels=("images",))
148
222
  BeanQueue is designed to be as customizable as much as possible.
149
223
  Of course, you can define your own SQLAlchemy model instead of using the ones we provided.
150
224
 
151
- To make defining your own `Task` model or `Worker` model much easier, you can use our mixin classes:
225
+ To make defining your own `Task`, `Worker` or `Event` model much easier, you can use our mixin classes:
152
226
 
153
227
  - `bq.TaskModelMixin`: provides task model columns
154
228
  - `bq.TaskModelRefWorkerMixin`: provides foreign key column and relationship to `bq.Worker`
229
+ - `bq.TaskModelRefParentMixin`: provides foreign key column and relationship to children `bq.Task` created during processing
230
+ - `bq.TaskModelRefEventMixin`: provides foreign key column and relationship to `bq.Event`
155
231
  - `bq.WorkerModelMixin`: provides worker model columns
156
232
  - `bq.WorkerRefMixin`: provides relationship to `bq.Task`
233
+ - `bq.EventModelMixin`: provides event model columns
234
+ - `bq.EventModelRefTaskMixin`: provides foreign key column and relationship to `bq.Task`
157
235
 
158
236
  Here's an example for defining your own Task model:
159
237
 
@@ -212,13 +290,14 @@ class Worker(bq.WorkerModelMixin, Base):
212
290
  )
213
291
  ```
214
292
 
215
- With the model class ready, you only need to change the `TASK_MODEL` and `WORKER_MODEL` of `Config` to the full Python module name plus the class name like this.
293
+ With the model class ready, you only need to change the `TASK_MODEL`, `WORKER_MODEL` and `EVENT_MODEL` of `Config` to the full Python module name plus the class name like this.
216
294
 
217
295
  ```python
218
296
  import bq
219
297
  config = bq.Config(
220
298
  TASK_MODEL="my_pkgs.models.Task",
221
299
  WORKER_MODEL="my_pkgs.models.Worker",
300
+ EVENT_MODEL="my_pkgs.models.Event",
222
301
  # ... other configs
223
302
  )
224
303
  app = bq.BeanQueue(config)
@@ -226,21 +305,21 @@ app = bq.BeanQueue(config)
226
305
 
227
306
  ## Why?
228
307
 
229
- There are countless worker queue projects. Why make yet another one?
230
- The primary issue with most worker queue tools is their reliance on a standalone broker server.
231
- Our worker queue tasks frequently interact with the database, and the atomic nature of database transactions is great for data integrity.
232
- However, integrating an external worker queue into the system presents a risk.
233
- The worker queue and the database don't share the same data view, potentially compromising data integrity and reliability.
308
+ There are countless work queue projects. Why make yet another one?
309
+ The primary issue with most work queue tools is their reliance on a standalone broker server.
310
+ Our work queue tasks frequently interact with the database, and the atomic nature of database transactions is great for data integrity.
311
+ However, integrating an external work queue into the system presents a risk.
312
+ The work queue and the database don't share the same data view, potentially compromising data integrity and reliability.
234
313
 
235
314
  For example, you have a table of `images` to keep the user-uploaded images.
236
- And you have a background worker queue for resizing the uploaded images into different thumbnail sizes.
237
- So, you will first need to insert a row for the uploaded image about the job into the database before you push the task to the worker queue.
315
+ And you have a background work queue for resizing the uploaded images into different thumbnail sizes.
316
+ So, you will first need to insert a row for the uploaded image about the job into the database before you push the task to the work queue.
238
317
 
239
- Say you push the task to the worker queue immediately after you insert the `images` table then commit like this:
318
+ Say you push the task to the work queue immediately after you insert the `images` table then commit like this:
240
319
 
241
320
  ```
242
321
  1. Insert into the "images" table
243
- 2. Push resizing task to the worker queue
322
+ 2. Push resizing task to the work queue
244
323
  3. Commit db changes
245
324
  ```
246
325
 
@@ -248,21 +327,21 @@ While this might seem like the right way to do it, there's a hidden bug.
248
327
  If the worker starts too fast before the transaction commits at step 3, it will not be able to see the new row in `images` as it has not been committed yet.
249
328
  One may need to make the task retry a few times to ensure that even if the first attempt failed, it could see the image row in the following attempt.
250
329
  But this adds complexity to the system and also increases the latency if the first attempt fails.
251
- Also, if the commit step fails, you will have a failed worker queue job trying to fetch a row from the database that will never exist.
330
+ Also, if the commit step fails, you will have a failed work queue job trying to fetch a row from the database that will never exist.
252
331
 
253
332
  Another approach is to push the resize task after the database changes are committed. It works like this:
254
333
 
255
334
  ```
256
335
  1. Insert into the "images" table
257
336
  2. Commit db changes
258
- 3. Push resizing task to the worker queue
337
+ 3. Push resizing task to the work queue
259
338
  ```
260
339
 
261
340
  With this approach, we don't need to worry about workers picking up the task too early.
262
341
  However, there's another drawback.
263
- If step 3 for pushing a new task to the worker queue fails, the newly inserted `images` row will never be processed.
264
- There are many solutions to this problem, but these are all caused by inconsistent data views between the database and the worker queue storage.
265
- Things will be much easier if we have a worker queue that shares the same consistent view with the worker queue.
342
+ If step 3 for pushing a new task to the work queue fails, the newly inserted `images` row will never be processed.
343
+ There are many solutions to this problem, but these are all caused by inconsistent data views between the database and the work queue storage.
344
+ Things will be much easier if we have a work queue that shares the same consistent view with the database.
266
345
 
267
346
  By using a database as the data storage, all the problems are gone.
268
347
  You can simply do the following:
@@ -274,15 +353,15 @@ You can simply do the following:
274
353
  ```
275
354
 
276
355
  It's all or nothing!
277
- By doing so, you don't need to maintain another worker queue backend.
278
- You are probably using a database anyway, so this worker queue comes for free.
356
+ By doing so, you don't need to maintain another work queue backend.
357
+ You are probably using a database anyway, so this work queue comes for free.
279
358
 
280
- Usually, a database is inefficient as the worker queues data storage because of the potential lock contention and the need for constant querying.
359
+ Usually, a database is inefficient as the work queues data storage because of the potential lock contention and the need for constant querying.
281
360
  However, things have changed since the [introduction of the SKIP LOCKED](https://www.2ndquadrant.com/en/blog/what-is-select-skip-locked-for-in-postgresql-9-5/) and [LISTEN](https://www.postgresql.org/docs/current/sql-listen.html) / [NOTIFY](https://www.postgresql.org/docs/current/sql-notify.html) features in PostgreSQL or other databases.
282
361
 
283
- This project is inspired by many of the SKIP-LOCKED-based worker queue successors.
362
+ This project is inspired by many of the SKIP-LOCKED-based work queue successors.
284
363
  Why don't we just use those existing tools?
285
- Well, because while they work great as worker queue solutions, they don't take advantage of writing tasks and their relative data into the database in a transaction.
364
+ Well, because while they work great as work queue solutions, they don't take advantage of writing tasks and their relative data into the database in a transaction.
286
365
  Many provide an abstraction function or gRPC method of pushing tasks into the database instead of opening it up for the user to insert the row directly with other rows and commit altogether.
287
366
 
288
367
  With BeanQueue, we don't abstract away the logic of publishing a new task into the queue.
@@ -299,6 +378,8 @@ A modern accounting book service based on the most popular open source version c
299
378
  ## Alternatives
300
379
 
301
380
  - [solid_queue](https://github.com/rails/solid_queue)
381
+ - [good_job](https://github.com/bensheldon/good_job)
382
+ - [graphile-worker](https://github.com/graphile/worker)
302
383
  - [postgres-tq](https://github.com/flix-tech/postgres-tq)
303
384
  - [pq](https://github.com/malthe/pq/)
304
385
  - [PgQueuer](https://github.com/janbjorge/PgQueuer)
@@ -0,0 +1,19 @@
1
+ from .app import BeanQueue
2
+ from .config import Config # noqa
3
+ from .models import Event
4
+ from .models import EventModelMixin
5
+ from .models import EventModelRefTaskMixin
6
+ from .models import EventType
7
+ from .models import Task # noqa
8
+ from .models import TaskModelMixin
9
+ from .models import TaskModelRefEventMixin
10
+ from .models import TaskModelRefParentMixin
11
+ from .models import TaskModelRefWorkerMixin
12
+ from .models import TaskState # noqa
13
+ from .models import Worker # noqa
14
+ from .models import WorkerModelMixin # noqa
15
+ from .models import WorkerRefMixin # noqa
16
+ from .models import WorkerState # noqa
17
+ from .processors.retry_policies import DelayRetry
18
+ from .processors.retry_policies import ExponentialBackoffRetry
19
+ from .processors.retry_policies import LimitAttempt
@@ -7,6 +7,8 @@ import sys
7
7
  import threading
8
8
  import time
9
9
  import typing
10
+ from importlib.metadata import PackageNotFoundError
11
+ from importlib.metadata import version
10
12
  from wsgiref.simple_server import make_server
11
13
  from wsgiref.simple_server import WSGIRequestHandler
12
14
 
@@ -84,6 +86,12 @@ class BeanQueue:
84
86
  def worker_model(self) -> typing.Type[models.Worker]:
85
87
  return load_module_var(self.config.WORKER_MODEL)
86
88
 
89
+ @property
90
+ def event_model(self) -> typing.Type[models.Event] | None:
91
+ if self.config.EVENT_MODEL is None:
92
+ return
93
+ return load_module_var(self.config.EVENT_MODEL)
94
+
87
95
  def _make_worker_service(self, session: DBSession):
88
96
  return self.worker_service_cls(
89
97
  session=session, task_model=self.task_model, worker_model=self.worker_model
@@ -96,7 +104,8 @@ class BeanQueue:
96
104
  self,
97
105
  channel: str = constants.DEFAULT_CHANNEL,
98
106
  auto_complete: bool = True,
99
- auto_rollback_on_exc: bool = True,
107
+ retry_policy: typing.Callable | None = None,
108
+ retry_exceptions: typing.Type | typing.Tuple[typing.Type, ...] | None = None,
100
109
  task_model: typing.Type | None = None,
101
110
  ) -> typing.Callable:
102
111
  def decorator(wrapped: typing.Callable):
@@ -106,7 +115,8 @@ class BeanQueue:
106
115
  channel=channel,
107
116
  func=wrapped,
108
117
  auto_complete=auto_complete,
109
- auto_rollback_on_exc=auto_rollback_on_exc,
118
+ retry_policy=retry_policy,
119
+ retry_exceptions=retry_exceptions,
110
120
  )
111
121
  helper_obj = ProcessorHelper(
112
122
  processor,
@@ -241,6 +251,15 @@ class BeanQueue:
241
251
  self,
242
252
  channels: tuple[str, ...],
243
253
  ):
254
+ try:
255
+ bq_version = version(__name__.split(".")[0])
256
+ except PackageNotFoundError:
257
+ bq_version = "unknown"
258
+
259
+ logger.info(
260
+ "Starting processing tasks, bq_version=%s",
261
+ bq_version,
262
+ )
244
263
  db = self.make_session()
245
264
  if not channels:
246
265
  channels = [constants.DEFAULT_CHANNEL]
@@ -318,7 +337,7 @@ class BeanQueue:
318
337
  task.func_name,
319
338
  )
320
339
  # TODO: support processor pool and other approaches to dispatch the workload
321
- registry.process(task)
340
+ registry.process(task, event_cls=self.event_model)
322
341
  if not tasks:
323
342
  # we should try to keep dispatching until we cannot find tasks
324
343
  break
@@ -30,6 +30,9 @@ class Config(BaseSettings):
30
30
  # which worker model to use
31
31
  WORKER_MODEL: str = "bq.Worker"
32
32
 
33
+ # which event model to use
34
+ EVENT_MODEL: str | None = "bq.Event"
35
+
33
36
  # Enable metrics HTTP server
34
37
  METRICS_HTTP_SERVER_ENABLED: bool = True
35
38
 
@@ -1,3 +1,5 @@
1
1
  import blinker
2
2
 
3
3
  worker_init = blinker.signal("worker-init")
4
+
5
+ task_failure = blinker.signal("task-failure")
@@ -1,5 +1,11 @@
1
+ from .event import Event
2
+ from .event import EventModelMixin
3
+ from .event import EventModelRefTaskMixin
4
+ from .event import EventType
1
5
  from .task import Task
2
6
  from .task import TaskModelMixin
7
+ from .task import TaskModelRefEventMixin
8
+ from .task import TaskModelRefParentMixin
3
9
  from .task import TaskModelRefWorkerMixin
4
10
  from .task import TaskState
5
11
  from .worker import Worker
@@ -0,0 +1,76 @@
1
+ import datetime
2
+ import enum
3
+ import typing
4
+ import uuid
5
+
6
+ from sqlalchemy import DateTime
7
+ from sqlalchemy import Enum
8
+ from sqlalchemy import ForeignKey
9
+ from sqlalchemy import func
10
+ from sqlalchemy import String
11
+ from sqlalchemy.dialects.postgresql import UUID
12
+ from sqlalchemy.orm import declared_attr
13
+ from sqlalchemy.orm import Mapped
14
+ from sqlalchemy.orm import mapped_column
15
+ from sqlalchemy.orm import relationship
16
+
17
+ from ..db.base import Base
18
+ from .helpers import make_repr_attrs
19
+
20
+
21
+ class EventType(enum.Enum):
22
+ # task failed
23
+ FAILED = "FAILED"
24
+ # task failed and retry scheduled
25
+ FAILED_RETRY_SCHEDULED = "FAILED_RETRY_SCHEDULED"
26
+ # task complete
27
+ COMPLETE = "COMPLETE"
28
+
29
+
30
+ class EventModelMixin:
31
+ id: Mapped[uuid.UUID] = mapped_column(
32
+ UUID(as_uuid=True), primary_key=True, server_default=func.gen_random_uuid()
33
+ )
34
+ # type of the event
35
+ type: Mapped[EventType] = mapped_column(
36
+ Enum(EventType),
37
+ nullable=False,
38
+ index=True,
39
+ )
40
+ # Error message
41
+ error_message: Mapped[typing.Optional[str]] = mapped_column(String, nullable=True)
42
+ # the scheduled at time for retry
43
+ scheduled_at: Mapped[datetime.datetime] = mapped_column(
44
+ DateTime(timezone=True),
45
+ nullable=True,
46
+ )
47
+ # created datetime of the event
48
+ created_at: Mapped[datetime.datetime] = mapped_column(
49
+ DateTime(timezone=True), nullable=False, server_default=func.now()
50
+ )
51
+
52
+
53
+ class EventModelRefTaskMixin:
54
+ # foreign key id of the task
55
+ task_id: Mapped[uuid.UUID] = mapped_column(
56
+ UUID(as_uuid=True),
57
+ ForeignKey("bq_tasks.id", name="fk_event_task_id"),
58
+ nullable=True,
59
+ )
60
+
61
+ @declared_attr
62
+ def task(cls) -> Mapped["Task"]:
63
+ return relationship("Task", back_populates="events", uselist=False)
64
+
65
+
66
+ class Event(EventModelMixin, EventModelRefTaskMixin, Base):
67
+ __tablename__ = "bq_events"
68
+
69
+ def __repr__(self) -> str:
70
+ items = [
71
+ ("id", self.id),
72
+ ("type", self.type),
73
+ ("created_at", self.created_at),
74
+ ("scheduled_at", self.scheduled_at),
75
+ ]
76
+ return f"<{self.__class__.__name__} {make_repr_attrs(items)}>"
@@ -24,7 +24,7 @@ from .helpers import make_repr_attrs
24
24
 
25
25
 
26
26
  class TaskState(enum.Enum):
27
- # task just created, not scheduled yet
27
+ # task just created, not dispatched yet. or, the task failed and is waiting for a retry.
28
28
  PENDING = "PENDING"
29
29
  # a worker is processing the task right now
30
30
  PROCESSING = "PROCESSING"
@@ -62,6 +62,11 @@ class TaskModelMixin:
62
62
  created_at: Mapped[datetime.datetime] = mapped_column(
63
63
  DateTime(timezone=True), nullable=False, server_default=func.now()
64
64
  )
65
+ # scheduled to run at a specific time
66
+ scheduled_at: Mapped[datetime.datetime] = mapped_column(
67
+ DateTime(timezone=True),
68
+ nullable=True,
69
+ )
65
70
 
66
71
 
67
72
  class TaskModelRefWorkerMixin:
@@ -77,7 +82,44 @@ class TaskModelRefWorkerMixin:
77
82
  return relationship("Worker", back_populates="tasks", uselist=False)
78
83
 
79
84
 
80
- class Task(TaskModelMixin, TaskModelRefWorkerMixin, Base):
85
+ class TaskModelRefParentMixin:
86
+ # foreign key id of the source task which created the current task while we are processing it
87
+ parent_id: Mapped[uuid.UUID] = mapped_column(
88
+ UUID(as_uuid=True),
89
+ ForeignKey("bq_tasks.id", name="fk_task_parent_task_id"),
90
+ nullable=True,
91
+ )
92
+
93
+ @declared_attr
94
+ def parent(cls) -> Mapped[typing.Optional["Task"]]:
95
+ return relationship(
96
+ "Task",
97
+ back_populates="children",
98
+ remote_side=[cls.id],
99
+ foreign_keys=[cls.parent_id],
100
+ uselist=False,
101
+ )
102
+
103
+ @declared_attr
104
+ def children(cls) -> Mapped[list["Task"]]:
105
+ return relationship(
106
+ "Task", foreign_keys=[cls.parent_id], back_populates="parent"
107
+ )
108
+
109
+
110
+ class TaskModelRefEventMixin:
111
+ @declared_attr
112
+ def events(cls) -> Mapped[list["Event"]]:
113
+ return relationship("Event", back_populates="task")
114
+
115
+
116
+ class Task(
117
+ TaskModelMixin,
118
+ TaskModelRefWorkerMixin,
119
+ TaskModelRefEventMixin,
120
+ TaskModelRefParentMixin,
121
+ Base,
122
+ ):
81
123
  __tablename__ = "bq_tasks"
82
124
 
83
125
  def __repr__(self) -> str:
@@ -85,6 +127,8 @@ class Task(TaskModelMixin, TaskModelRefWorkerMixin, Base):
85
127
  ("id", self.id),
86
128
  ("state", self.state),
87
129
  ("channel", self.channel),
130
+ ("module", self.module),
131
+ ("func_name", self.func_name),
88
132
  ]
89
133
  return f"<{self.__class__.__name__} {make_repr_attrs(items)}>"
90
134
 
@@ -2,7 +2,6 @@ import datetime
2
2
  import enum
3
3
  import uuid
4
4
 
5
- from sqlalchemy import Column
6
5
  from sqlalchemy import DateTime
7
6
  from sqlalchemy import Enum
8
7
  from sqlalchemy import func
@@ -12,7 +11,6 @@ from sqlalchemy.dialects.postgresql import UUID
12
11
  from sqlalchemy.orm import declared_attr
13
12
  from sqlalchemy.orm import Mapped
14
13
  from sqlalchemy.orm import mapped_column
15
- from sqlalchemy.orm import Mapper
16
14
  from sqlalchemy.orm import relationship
17
15
 
18
16
  from ..db.base import Base
@@ -0,0 +1,120 @@
1
+ import contextvars
2
+ import dataclasses
3
+ import datetime
4
+ import inspect
5
+ import logging
6
+ import typing
7
+
8
+ from sqlalchemy import select
9
+ from sqlalchemy.orm import object_session
10
+
11
+ from .. import events
12
+ from .. import models
13
+
14
+ logger = logging.getLogger(__name__)
15
+ current_task = contextvars.ContextVar("current_task")
16
+
17
+
18
+ @dataclasses.dataclass(frozen=True)
19
+ class Processor:
20
+ channel: str
21
+ module: str
22
+ name: str
23
+ func: typing.Callable
24
+ # should we auto complete the task or not
25
+ auto_complete: bool = True
26
+ # The retry policy function for returning a new scheduled time for next attempt
27
+ retry_policy: typing.Callable | None = None
28
+ # The exceptions we suppose to retry when encountered
29
+ retry_exceptions: typing.Type | typing.Tuple[typing.Type, ...] | None = None
30
+
31
+ def process(self, task: models.Task, event_cls: typing.Type | None = None):
32
+ ctx_token = current_task.set(task)
33
+ try:
34
+ db = object_session(task)
35
+ func_signature = inspect.signature(self.func)
36
+ base_kwargs = {}
37
+ if "task" in func_signature.parameters:
38
+ base_kwargs["task"] = task
39
+ if "db" in func_signature.parameters:
40
+ base_kwargs["db"] = db
41
+ try:
42
+ with db.begin_nested() as savepoint:
43
+ if "savepoint" in func_signature.parameters:
44
+ base_kwargs["savepoint"] = savepoint
45
+ result = self.func(**base_kwargs, **task.kwargs)
46
+ except Exception as exc:
47
+ logger.error("Unhandled exception for task %s", task.id, exc_info=True)
48
+ events.task_failure.send(self, task=task, exception=exc)
49
+ task.state = models.TaskState.FAILED
50
+ task.error_message = str(exc)
51
+ retry_scheduled_at = None
52
+ if (
53
+ self.retry_exceptions is None
54
+ or isinstance(exc, self.retry_exceptions)
55
+ ) and self.retry_policy is not None:
56
+ retry_scheduled_at = self.retry_policy(task)
57
+ if retry_scheduled_at is not None:
58
+ task.state = models.TaskState.PENDING
59
+ task.scheduled_at = retry_scheduled_at
60
+ if isinstance(retry_scheduled_at, datetime.datetime):
61
+ retry_scheduled_at_value = retry_scheduled_at
62
+ else:
63
+ retry_scheduled_at_value = db.scalar(
64
+ select(retry_scheduled_at)
65
+ )
66
+ logger.info(
67
+ "Schedule task %s for retry at %s",
68
+ task.id,
69
+ retry_scheduled_at_value,
70
+ )
71
+ if event_cls is not None:
72
+ event = event_cls(
73
+ task=task,
74
+ type=models.EventType.FAILED
75
+ if retry_scheduled_at is None
76
+ else models.EventType.FAILED_RETRY_SCHEDULED,
77
+ error_message=task.error_message,
78
+ scheduled_at=retry_scheduled_at,
79
+ )
80
+ db.add(event)
81
+ db.add(task)
82
+ return
83
+ if self.auto_complete:
84
+ logger.info("Task %s auto complete", task.id)
85
+ task.state = models.TaskState.DONE
86
+ task.result = result
87
+ if event_cls is not None:
88
+ event = event_cls(
89
+ task=task,
90
+ type=models.EventType.COMPLETE,
91
+ )
92
+ db.add(event)
93
+ db.add(task)
94
+ return result
95
+ finally:
96
+ current_task.reset(ctx_token)
97
+
98
+
99
+ class ProcessorHelper:
100
+ """Helper function to replace the decorated processor function and make creating Task model much easier"""
101
+
102
+ def __init__(self, processor: Processor, task_cls: typing.Type = models.Task):
103
+ self._processor = processor
104
+ self._task_cls = task_cls
105
+
106
+ def __call__(self, *args, **kwargs):
107
+ return self._processor.func(*args, **kwargs)
108
+
109
+ def run(self, **kwargs) -> models.Task:
110
+ try:
111
+ parent = current_task.get()
112
+ except LookupError:
113
+ parent = None
114
+ return self._task_cls(
115
+ channel=self._processor.channel,
116
+ module=self._processor.module,
117
+ func_name=self._processor.name,
118
+ kwargs=kwargs,
119
+ parent=parent,
120
+ )
@@ -18,10 +18,14 @@ class Registry:
18
18
  def add(self, processor: Processor):
19
19
  self.processors[processor.channel][processor.module][processor.name] = processor
20
20
 
21
- def process(self, task: models.Task) -> typing.Any:
21
+ def process(
22
+ self,
23
+ task: models.Task,
24
+ event_cls: typing.Type | None = None,
25
+ ) -> typing.Any:
22
26
  modules = self.processors.get(task.channel, {})
23
27
  functions = modules.get(task.module, {})
24
- processor = functions.get(task.func_name)
28
+ processor: Processor = functions.get(task.func_name)
25
29
  db = object_session(task)
26
30
  if processor is None:
27
31
  self.logger.error(
@@ -30,12 +34,18 @@ class Registry:
30
34
  task.module,
31
35
  task.func_name,
32
36
  )
33
- # TODO: add error event
34
37
  task.state = models.TaskState.FAILED
35
38
  task.error_message = f"Cannot find processor for task with module={task.module}, func={task.func_name}"
39
+ if event_cls is not None:
40
+ event = event_cls(
41
+ task=task,
42
+ type=models.EventType.FAILED,
43
+ error_message=task.error_message,
44
+ )
45
+ db.add(event)
36
46
  db.add(task)
37
47
  return
38
- return processor.process(task)
48
+ return processor.process(task, event_cls=event_cls)
39
49
 
40
50
 
41
51
  def collect(packages: list[typing.Any], registry: Registry | None = None) -> Registry:
@@ -0,0 +1,55 @@
1
+ import datetime
2
+ import typing
3
+
4
+ from sqlalchemy import func
5
+ from sqlalchemy import inspect
6
+ from sqlalchemy.orm import object_session
7
+
8
+ from .. import models
9
+
10
+
11
+ def get_failure_times(task: models.Task) -> int:
12
+ db = object_session(task)
13
+ task_info = inspect(task.__class__)
14
+ event_cls = task_info.attrs["events"].entity.class_
15
+ return (
16
+ db.query(event_cls)
17
+ .filter(event_cls.task == task)
18
+ .filter(event_cls.type == models.EventType.FAILED_RETRY_SCHEDULED)
19
+ ).count()
20
+
21
+
22
+ class DelayRetry:
23
+ def __init__(self, delay: datetime.timedelta):
24
+ self.delay = delay
25
+
26
+ def __call__(self, task: models.Task) -> typing.Any:
27
+ return func.now() + self.delay
28
+
29
+
30
+ class ExponentialBackoffRetry:
31
+ def __init__(
32
+ self, base: float = 2, exponent_offset: float = 0, exponent_scalar: float = 1.0
33
+ ):
34
+ self.base = base
35
+ self.exponent_offset = exponent_offset
36
+ self.exponent_scalar = exponent_scalar
37
+
38
+ def __call__(self, task: models.Task) -> typing.Any:
39
+ failure_times = get_failure_times(task)
40
+ delay_seconds = self.base ** (
41
+ self.exponent_offset + (self.exponent_scalar * (failure_times + 1))
42
+ )
43
+ return func.now() + datetime.timedelta(seconds=delay_seconds)
44
+
45
+
46
+ class LimitAttempt:
47
+ def __init__(self, maximum_attempt: int, retry_policy: typing.Callable):
48
+ self.maximum_attempt = maximum_attempt
49
+ self.retry_policy = retry_policy
50
+
51
+ def __call__(self, task: models.Task) -> typing.Any:
52
+ failure_times = get_failure_times(task)
53
+ if (failure_times + 1) >= self.maximum_attempt:
54
+ return None
55
+ return self.retry_policy(task)
@@ -3,6 +3,9 @@ import select
3
3
  import typing
4
4
  import uuid
5
5
 
6
+ from sqlalchemy import func
7
+ from sqlalchemy import null
8
+ from sqlalchemy import or_
6
9
  from sqlalchemy.orm import Query
7
10
 
8
11
  from .. import models
@@ -21,11 +24,22 @@ class DispatchService:
21
24
  self.session = session
22
25
  self.task_model: typing.Type[models.Task] = task_model
23
26
 
24
- def make_task_query(self, channels: typing.Sequence[str], limit: int = 1) -> Query:
27
+ def make_task_query(
28
+ self,
29
+ channels: typing.Sequence[str],
30
+ limit: int = 1,
31
+ now: typing.Any = func.now(),
32
+ ) -> Query:
25
33
  return (
26
34
  self.session.query(self.task_model.id)
27
35
  .filter(self.task_model.channel.in_(channels))
28
36
  .filter(self.task_model.state == models.TaskState.PENDING)
37
+ .filter(
38
+ or_(
39
+ self.task_model.scheduled_at.is_(null()),
40
+ now >= self.task_model.scheduled_at,
41
+ )
42
+ )
29
43
  .order_by(self.task_model.created_at)
30
44
  .limit(limit)
31
45
  .with_for_update(skip_locked=True)
@@ -43,9 +57,13 @@ class DispatchService:
43
57
  )
44
58
 
45
59
  def dispatch(
46
- self, channels: typing.Sequence[str], worker_id: uuid.UUID, limit: int = 1
60
+ self,
61
+ channels: typing.Sequence[str],
62
+ worker_id: uuid.UUID,
63
+ limit: int = 1,
64
+ now: typing.Any = func.now(),
47
65
  ) -> Query:
48
- task_query = self.make_task_query(channels, limit=limit)
66
+ task_query = self.make_task_query(channels, limit=limit, now=now)
49
67
  task_subquery = task_query.scalar_subquery()
50
68
  task_ids = [
51
69
  item[0]
@@ -1,7 +1,7 @@
1
1
  [tool.poetry]
2
2
  name = "beanqueue"
3
- version = "0.2.2"
4
- description = "BeanQueue or BQ for short, PostgreSQL SKIP LOCK based worker queue library"
3
+ version = "1.0.0"
4
+ description = "BeanQueue or BQ for short, PostgreSQL SKIP LOCK and SQLAlchemy based worker queue library"
5
5
  authors = ["Fang-Pen Lin <fangpen@launchplatform.com>"]
6
6
  license = "MIT"
7
7
  readme = "README.md"
@@ -1,10 +0,0 @@
1
- from .app import BeanQueue
2
- from .config import Config # noqa
3
- from .models import Task # noqa
4
- from .models import TaskModelMixin
5
- from .models import TaskModelRefWorkerMixin
6
- from .models import TaskState # noqa
7
- from .models import Worker # noqa
8
- from .models import WorkerModelMixin # noqa
9
- from .models import WorkerRefMixin # noqa
10
- from .models import WorkerState # noqa
@@ -1,70 +0,0 @@
1
- import dataclasses
2
- import inspect
3
- import logging
4
- import typing
5
-
6
- from sqlalchemy.orm import object_session
7
-
8
- from .. import models
9
-
10
- logger = logging.getLogger(__name__)
11
-
12
-
13
- @dataclasses.dataclass(frozen=True)
14
- class Processor:
15
- channel: str
16
- module: str
17
- name: str
18
- func: typing.Callable
19
- # should we auto complete the task or not
20
- auto_complete: bool = True
21
- # should we auto rollback the transaction when encounter unhandled exception
22
- auto_rollback_on_exc: bool = True
23
-
24
- def process(self, task: models.Task):
25
- db = object_session(task)
26
- func_signature = inspect.signature(self.func)
27
- base_kwargs = {}
28
- if "task" in func_signature.parameters:
29
- base_kwargs["task"] = task
30
- if "db" in func_signature.parameters:
31
- base_kwargs["db"] = db
32
- with db.begin_nested() as savepoint:
33
- if "savepoint" in func_signature.parameters:
34
- base_kwargs["savepoint"] = savepoint
35
- try:
36
- result = self.func(**base_kwargs, **task.kwargs)
37
- except Exception as exc:
38
- logger.error("Unhandled exception for task %s", task.id, exc_info=True)
39
- if self.auto_rollback_on_exc:
40
- savepoint.rollback()
41
- # TODO: add error event
42
- task.state = models.TaskState.FAILED
43
- task.error_message = str(exc)
44
- db.add(task)
45
- return
46
- if self.auto_complete:
47
- logger.info("Task %s auto complete", task.id)
48
- task.state = models.TaskState.DONE
49
- task.result = result
50
- db.add(task)
51
- return result
52
-
53
-
54
- class ProcessorHelper:
55
- """Helper function to replace the decorated processor function and make creating Task model much easier"""
56
-
57
- def __init__(self, processor: Processor, task_cls: typing.Type = models.Task):
58
- self._processor = processor
59
- self._task_cls = task_cls
60
-
61
- def __call__(self, *args, **kwargs):
62
- return self._processor.func(*args, **kwargs)
63
-
64
- def run(self, **kwargs) -> models.Task:
65
- return self._task_cls(
66
- channel=self._processor.channel,
67
- module=self._processor.module,
68
- func_name=self._processor.name,
69
- kwargs=kwargs,
70
- )
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes