beanqueue 0.2.3__tar.gz → 1.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. {beanqueue-0.2.3 → beanqueue-1.1.0}/PKG-INFO +112 -30
  2. {beanqueue-0.2.3 → beanqueue-1.1.0}/README.md +109 -28
  3. beanqueue-1.1.0/bq/__init__.py +19 -0
  4. {beanqueue-0.2.3 → beanqueue-1.1.0}/bq/app.py +22 -3
  5. beanqueue-1.1.0/bq/cmds/cli.py +39 -0
  6. beanqueue-1.1.0/bq/cmds/create_tables.py +12 -0
  7. beanqueue-1.1.0/bq/cmds/environment.py +36 -0
  8. beanqueue-1.1.0/bq/cmds/main.py +9 -0
  9. beanqueue-1.1.0/bq/cmds/process.py +15 -0
  10. {beanqueue-0.2.3 → beanqueue-1.1.0}/bq/cmds/submit.py +14 -20
  11. {beanqueue-0.2.3 → beanqueue-1.1.0}/bq/config.py +3 -0
  12. {beanqueue-0.2.3 → beanqueue-1.1.0}/bq/models/__init__.py +6 -0
  13. beanqueue-1.1.0/bq/models/event.py +76 -0
  14. {beanqueue-0.2.3 → beanqueue-1.1.0}/bq/models/task.py +44 -2
  15. beanqueue-1.1.0/bq/processors/processor.py +120 -0
  16. {beanqueue-0.2.3 → beanqueue-1.1.0}/bq/processors/registry.py +14 -4
  17. beanqueue-1.1.0/bq/processors/retry_policies.py +55 -0
  18. {beanqueue-0.2.3 → beanqueue-1.1.0}/bq/services/dispatch.py +21 -3
  19. {beanqueue-0.2.3 → beanqueue-1.1.0}/pyproject.toml +6 -2
  20. beanqueue-0.2.3/bq/__init__.py +0 -10
  21. beanqueue-0.2.3/bq/cmds/create_tables.py +0 -26
  22. beanqueue-0.2.3/bq/cmds/process.py +0 -23
  23. beanqueue-0.2.3/bq/processors/processor.py +0 -72
  24. {beanqueue-0.2.3 → beanqueue-1.1.0}/LICENSE +0 -0
  25. {beanqueue-0.2.3 → beanqueue-1.1.0}/bq/cmds/__init__.py +0 -0
  26. {beanqueue-0.2.3 → beanqueue-1.1.0}/bq/cmds/utils.py +0 -0
  27. {beanqueue-0.2.3 → beanqueue-1.1.0}/bq/constants.py +0 -0
  28. {beanqueue-0.2.3 → beanqueue-1.1.0}/bq/db/__init__.py +0 -0
  29. {beanqueue-0.2.3 → beanqueue-1.1.0}/bq/db/base.py +0 -0
  30. {beanqueue-0.2.3 → beanqueue-1.1.0}/bq/db/session.py +0 -0
  31. {beanqueue-0.2.3 → beanqueue-1.1.0}/bq/events.py +0 -0
  32. {beanqueue-0.2.3 → beanqueue-1.1.0}/bq/models/helpers.py +0 -0
  33. {beanqueue-0.2.3 → beanqueue-1.1.0}/bq/models/worker.py +0 -0
  34. {beanqueue-0.2.3 → beanqueue-1.1.0}/bq/processors/__init__.py +0 -0
  35. {beanqueue-0.2.3 → beanqueue-1.1.0}/bq/services/__init__.py +0 -0
  36. {beanqueue-0.2.3 → beanqueue-1.1.0}/bq/services/worker.py +0 -0
  37. {beanqueue-0.2.3 → beanqueue-1.1.0}/bq/utils.py +0 -0
@@ -1,7 +1,7 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: beanqueue
3
- Version: 0.2.3
4
- Summary: BeanQueue or BQ for short, PostgreSQL SKIP LOCK based worker queue library
3
+ Version: 1.1.0
4
+ Summary: BeanQueue or BQ for short, PostgreSQL SKIP LOCK and SQLAlchemy based worker queue library
5
5
  License: MIT
6
6
  Author: Fang-Pen Lin
7
7
  Author-email: fangpen@launchplatform.com
@@ -14,12 +14,13 @@ Requires-Dist: blinker (>=1.8.2,<2.0.0)
14
14
  Requires-Dist: click (>=8.1.7,<9.0.0)
15
15
  Requires-Dist: pg-activity (>=3.5.1,<4.0.0)
16
16
  Requires-Dist: pydantic-settings (>=2.2.1,<3.0.0)
17
+ Requires-Dist: rich (>=13.7.1,<14.0.0)
17
18
  Requires-Dist: sqlalchemy (>=2.0.30,<3.0.0)
18
19
  Requires-Dist: venusian (>=3.1.0,<4.0.0)
19
20
  Description-Content-Type: text/markdown
20
21
 
21
22
  # BeanQueue [![CircleCI](https://dl.circleci.com/status-badge/img/gh/LaunchPlatform/bq/tree/master.svg?style=svg)](https://dl.circleci.com/status-badge/redirect/gh/LaunchPlatform/beanhub-extract/tree/master)
22
- BeanQueue, a lightweight worker queue framework based on [SQLAlchemy](https://www.sqlalchemy.org/), PostgreSQL [SKIP LOCKED queries](https://www.2ndquadrant.com/en/blog/what-is-select-skip-locked-for-in-postgresql-9-5/) and [NOTIFY](https://www.postgresql.org/docs/current/sql-notify.html) / [LISTEN](https://www.postgresql.org/docs/current/sql-listen.html) statements.
23
+ BeanQueue, a lightweight Python task queue framework based on [SQLAlchemy](https://www.sqlalchemy.org/), PostgreSQL [SKIP LOCKED queries](https://www.2ndquadrant.com/en/blog/what-is-select-skip-locked-for-in-postgresql-9-5/) and [NOTIFY](https://www.postgresql.org/docs/current/sql-notify.html) / [LISTEN](https://www.postgresql.org/docs/current/sql-listen.html) statements.
23
24
 
24
25
  **Notice**: Still in its early stage, we built this for [BeanHub](https://beanhub.io)'s internal usage. May change rapidly. Use at your own risk for now.
25
26
 
@@ -29,8 +30,10 @@ BeanQueue, a lightweight worker queue framework based on [SQLAlchemy](https://ww
29
30
  - **Easy-to-deploy**: Only rely on PostgreSQL
30
31
  - **Easy-to-use**: Provide command line tools for processing tasks, also helpers for generating tasks models
31
32
  - **Auto-notify**: Notify will automatically be generated and send for inserted or update tasks
33
+ - **Retry**: Built-in and customizable retry-policies
34
+ - **Schedule**: Schedule task to run later
32
35
  - **Worker heartbeat and auto-reschedule**: Each worker keeps updating heartbeat, if one is found dead, the others will reschedule the tasks
33
- - **Customizable**: Use it as an library and build your own worker queue
36
+ - **Customizable**: Use it as an library and build your own work queue
34
37
  - **Native DB operations**: Commit your tasks with other db entries altogether without worrying about data inconsistent issue
35
38
 
36
39
  ## Install
@@ -111,20 +114,92 @@ db.commit()
111
114
  To run the worker, you can do this:
112
115
 
113
116
  ```bash
114
- BQ_PROCESSOR_PACKAGES='["my_pkgs.processors"]' python -m bq.cmds.process images
117
+ BQ_PROCESSOR_PACKAGES='["my_pkgs.processors"]' bq process images
115
118
  ```
116
119
 
117
120
  The `BQ_PROCESSOR_PACKAGES` is a JSON list contains the Python packages where you define your processors (the functions you decorated with `bq.processors.registry.processor`).
118
121
  To submit a task for testing purpose, you can do
119
122
 
120
123
  ```bash
121
- python -m bq.cmds.submit images my_pkgs.processors resize_image -k '{"width": 200, "height": 300}'
124
+ bq submit images my_pkgs.processors resize_image -k '{"width": 200, "height": 300}'
122
125
  ```
123
126
 
124
127
  To create tables for BeanQueue, you can run
125
128
 
126
129
  ```bash
127
- python -m bq.cmds.create_tables
130
+ bq create_tables
131
+ ```
132
+
133
+ ### Schedule
134
+
135
+ In most cases, a task will be executed as soon as possible after it is created.
136
+ To run a task later, you can set a datetime value to the `scheduled_at` attribute of the task model.
137
+ For example:
138
+
139
+ ```python
140
+ import datetime
141
+
142
+ db = Session()
143
+ task = resize_image.run(width=200, height=300)
144
+ task.scheduled_at = func.now() + datetime.timedelta(minutes=3)
145
+ db.add(task)
146
+ ```
147
+
148
+ Please note that currently, workers won't wake up at the next exact moment when the scheduled tasks are ready to run.
149
+ It has to wait until the polling times out, and eventually, it will see the task's scheduled_at time exceeds the current datetime.
150
+ Therefore, depending on your `POLL_TIMEOUT` setting and the number of your workers when they started processing, the actual execution may be inaccurate.
151
+ If you set the `POLL_TIMEOUT` to 60 seconds, please expect less than 60 seconds of delay.
152
+
153
+ ### Retry
154
+
155
+ To automatically retry a task after failure, you can specify a retry policy to the processor.
156
+
157
+ ```python
158
+ import datetime
159
+ import bq
160
+ from sqlalchemy.orm import Session
161
+
162
+ app = bq.BeanQueue()
163
+ delay_retry = bq.DelayRetry(delay=datetime.timedelta(seconds=120))
164
+
165
+ @app.processor(channel="images", retry_policy=delay_retry)
166
+ def resize_image(db: Session, task: bq.Task, width: int, height: int):
167
+ # resize iamge here ...
168
+ pass
169
+ ```
170
+
171
+ Currently, we provide some simple common retry policies such as `DelayRetry` and `ExponentialBackoffRetry`.
172
+ Surely, you can define your retry policy easily by making a function that returns an optional object at the next scheduled time for retry.
173
+
174
+ ```python
175
+ def my_retry_policy(task: bq.Task) -> typing.Any:
176
+ # calculate delay based on task model ...
177
+ return func.now() + datetime.timedelta(seconds=delay)
178
+ ```
179
+
180
+ To cap how many attempts is allowed, you can also use `LimitAttempt` like this:
181
+
182
+ ```python
183
+ delay_retry = bq.DelayRetry(delay=datetime.timedelta(seconds=120))
184
+ capped_delay_retry = bq.LimitAttempt(3, delay_retry)
185
+
186
+ @app.processor(channel="images", retry_policy=capped_delay_retry)
187
+ def resize_image(db: Session, task: bq.Task, width: int, height: int):
188
+ # resize iamge here ...
189
+ pass
190
+ ```
191
+
192
+ You can also retry only for specific exception classes with the `retry_exceptions` argument.
193
+
194
+ ```python
195
+ @app.processor(
196
+ channel="images",
197
+ retry_policy=delay_retry,
198
+ retry_exceptions=ValueError,
199
+ )
200
+ def resize_image(db: Session, task: bq.Task, width: int, height: int):
201
+ # resize iamge here ...
202
+ pass
128
203
  ```
129
204
 
130
205
  ### Configurations
@@ -145,7 +220,7 @@ container = bq.Container()
145
220
  container.wire(packages=[bq])
146
221
  config = bq.Config(
147
222
  PROCESSOR_PACKAGES=["my_pkgs.processors"],
148
- DATABASE_URL=str(config.DATABASE_URL),
223
+ DATABASE_URL=config.DATABASE_URL,
149
224
  BATCH_SIZE=10,
150
225
  )
151
226
  app = bq.BeanQueue(config=config)
@@ -154,7 +229,7 @@ app = bq.BeanQueue(config=config)
154
229
  Then you can pass `--app` argument (or `-a` for short) pointing to the app object to the process command like this:
155
230
 
156
231
  ```bash
157
- python -m bq.cmds.process -a my_pkgs.bq.app images
232
+ bq -a my_pkgs.bq.app process images
158
233
  ```
159
234
 
160
235
  Or if you prefer to define your own process command, you can also call `process_tasks` of the `BeanQueue` object directly like this:
@@ -168,12 +243,16 @@ app.process_tasks(channels=("images",))
168
243
  BeanQueue is designed to be as customizable as much as possible.
169
244
  Of course, you can define your own SQLAlchemy model instead of using the ones we provided.
170
245
 
171
- To make defining your own `Task` model or `Worker` model much easier, you can use our mixin classes:
246
+ To make defining your own `Task`, `Worker` or `Event` model much easier, you can use our mixin classes:
172
247
 
173
248
  - `bq.TaskModelMixin`: provides task model columns
174
249
  - `bq.TaskModelRefWorkerMixin`: provides foreign key column and relationship to `bq.Worker`
250
+ - `bq.TaskModelRefParentMixin`: provides foreign key column and relationship to children `bq.Task` created during processing
251
+ - `bq.TaskModelRefEventMixin`: provides foreign key column and relationship to `bq.Event`
175
252
  - `bq.WorkerModelMixin`: provides worker model columns
176
253
  - `bq.WorkerRefMixin`: provides relationship to `bq.Task`
254
+ - `bq.EventModelMixin`: provides event model columns
255
+ - `bq.EventModelRefTaskMixin`: provides foreign key column and relationship to `bq.Task`
177
256
 
178
257
  Here's an example for defining your own Task model:
179
258
 
@@ -232,13 +311,14 @@ class Worker(bq.WorkerModelMixin, Base):
232
311
  )
233
312
  ```
234
313
 
235
- With the model class ready, you only need to change the `TASK_MODEL` and `WORKER_MODEL` of `Config` to the full Python module name plus the class name like this.
314
+ With the model class ready, you only need to change the `TASK_MODEL`, `WORKER_MODEL` and `EVENT_MODEL` of `Config` to the full Python module name plus the class name like this.
236
315
 
237
316
  ```python
238
317
  import bq
239
318
  config = bq.Config(
240
319
  TASK_MODEL="my_pkgs.models.Task",
241
320
  WORKER_MODEL="my_pkgs.models.Worker",
321
+ EVENT_MODEL="my_pkgs.models.Event",
242
322
  # ... other configs
243
323
  )
244
324
  app = bq.BeanQueue(config)
@@ -246,21 +326,21 @@ app = bq.BeanQueue(config)
246
326
 
247
327
  ## Why?
248
328
 
249
- There are countless worker queue projects. Why make yet another one?
250
- The primary issue with most worker queue tools is their reliance on a standalone broker server.
251
- Our worker queue tasks frequently interact with the database, and the atomic nature of database transactions is great for data integrity.
252
- However, integrating an external worker queue into the system presents a risk.
253
- The worker queue and the database don't share the same data view, potentially compromising data integrity and reliability.
329
+ There are countless work queue projects. Why make yet another one?
330
+ The primary issue with most work queue tools is their reliance on a standalone broker server.
331
+ Our work queue tasks frequently interact with the database, and the atomic nature of database transactions is great for data integrity.
332
+ However, integrating an external work queue into the system presents a risk.
333
+ The work queue and the database don't share the same data view, potentially compromising data integrity and reliability.
254
334
 
255
335
  For example, you have a table of `images` to keep the user-uploaded images.
256
- And you have a background worker queue for resizing the uploaded images into different thumbnail sizes.
257
- So, you will first need to insert a row for the uploaded image about the job into the database before you push the task to the worker queue.
336
+ And you have a background work queue for resizing the uploaded images into different thumbnail sizes.
337
+ So, you will first need to insert a row for the uploaded image about the job into the database before you push the task to the work queue.
258
338
 
259
- Say you push the task to the worker queue immediately after you insert the `images` table then commit like this:
339
+ Say you push the task to the work queue immediately after you insert the `images` table then commit like this:
260
340
 
261
341
  ```
262
342
  1. Insert into the "images" table
263
- 2. Push resizing task to the worker queue
343
+ 2. Push resizing task to the work queue
264
344
  3. Commit db changes
265
345
  ```
266
346
 
@@ -268,21 +348,21 @@ While this might seem like the right way to do it, there's a hidden bug.
268
348
  If the worker starts too fast before the transaction commits at step 3, it will not be able to see the new row in `images` as it has not been committed yet.
269
349
  One may need to make the task retry a few times to ensure that even if the first attempt failed, it could see the image row in the following attempt.
270
350
  But this adds complexity to the system and also increases the latency if the first attempt fails.
271
- Also, if the commit step fails, you will have a failed worker queue job trying to fetch a row from the database that will never exist.
351
+ Also, if the commit step fails, you will have a failed work queue job trying to fetch a row from the database that will never exist.
272
352
 
273
353
  Another approach is to push the resize task after the database changes are committed. It works like this:
274
354
 
275
355
  ```
276
356
  1. Insert into the "images" table
277
357
  2. Commit db changes
278
- 3. Push resizing task to the worker queue
358
+ 3. Push resizing task to the work queue
279
359
  ```
280
360
 
281
361
  With this approach, we don't need to worry about workers picking up the task too early.
282
362
  However, there's another drawback.
283
- If step 3 for pushing a new task to the worker queue fails, the newly inserted `images` row will never be processed.
284
- There are many solutions to this problem, but these are all caused by inconsistent data views between the database and the worker queue storage.
285
- Things will be much easier if we have a worker queue that shares the same consistent view with the worker queue.
363
+ If step 3 for pushing a new task to the work queue fails, the newly inserted `images` row will never be processed.
364
+ There are many solutions to this problem, but these are all caused by inconsistent data views between the database and the work queue storage.
365
+ Things will be much easier if we have a work queue that shares the same consistent view with the database.
286
366
 
287
367
  By using a database as the data storage, all the problems are gone.
288
368
  You can simply do the following:
@@ -294,15 +374,15 @@ You can simply do the following:
294
374
  ```
295
375
 
296
376
  It's all or nothing!
297
- By doing so, you don't need to maintain another worker queue backend.
298
- You are probably using a database anyway, so this worker queue comes for free.
377
+ By doing so, you don't need to maintain another work queue backend.
378
+ You are probably using a database anyway, so this work queue comes for free.
299
379
 
300
- Usually, a database is inefficient as the worker queues data storage because of the potential lock contention and the need for constant querying.
380
+ Usually, a database is inefficient as the work queues data storage because of the potential lock contention and the need for constant querying.
301
381
  However, things have changed since the [introduction of the SKIP LOCKED](https://www.2ndquadrant.com/en/blog/what-is-select-skip-locked-for-in-postgresql-9-5/) and [LISTEN](https://www.postgresql.org/docs/current/sql-listen.html) / [NOTIFY](https://www.postgresql.org/docs/current/sql-notify.html) features in PostgreSQL or other databases.
302
382
 
303
- This project is inspired by many of the SKIP-LOCKED-based worker queue successors.
383
+ This project is inspired by many of the SKIP-LOCKED-based work queue successors.
304
384
  Why don't we just use those existing tools?
305
- Well, because while they work great as worker queue solutions, they don't take advantage of writing tasks and their relative data into the database in a transaction.
385
+ Well, because while they work great as work queue solutions, they don't take advantage of writing tasks and their relative data into the database in a transaction.
306
386
  Many provide an abstraction function or gRPC method of pushing tasks into the database instead of opening it up for the user to insert the row directly with other rows and commit altogether.
307
387
 
308
388
  With BeanQueue, we don't abstract away the logic of publishing a new task into the queue.
@@ -319,6 +399,8 @@ A modern accounting book service based on the most popular open source version c
319
399
  ## Alternatives
320
400
 
321
401
  - [solid_queue](https://github.com/rails/solid_queue)
402
+ - [good_job](https://github.com/bensheldon/good_job)
403
+ - [graphile-worker](https://github.com/graphile/worker)
322
404
  - [postgres-tq](https://github.com/flix-tech/postgres-tq)
323
405
  - [pq](https://github.com/malthe/pq/)
324
406
  - [PgQueuer](https://github.com/janbjorge/PgQueuer)
@@ -1,5 +1,5 @@
1
1
  # BeanQueue [![CircleCI](https://dl.circleci.com/status-badge/img/gh/LaunchPlatform/bq/tree/master.svg?style=svg)](https://dl.circleci.com/status-badge/redirect/gh/LaunchPlatform/beanhub-extract/tree/master)
2
- BeanQueue, a lightweight worker queue framework based on [SQLAlchemy](https://www.sqlalchemy.org/), PostgreSQL [SKIP LOCKED queries](https://www.2ndquadrant.com/en/blog/what-is-select-skip-locked-for-in-postgresql-9-5/) and [NOTIFY](https://www.postgresql.org/docs/current/sql-notify.html) / [LISTEN](https://www.postgresql.org/docs/current/sql-listen.html) statements.
2
+ BeanQueue, a lightweight Python task queue framework based on [SQLAlchemy](https://www.sqlalchemy.org/), PostgreSQL [SKIP LOCKED queries](https://www.2ndquadrant.com/en/blog/what-is-select-skip-locked-for-in-postgresql-9-5/) and [NOTIFY](https://www.postgresql.org/docs/current/sql-notify.html) / [LISTEN](https://www.postgresql.org/docs/current/sql-listen.html) statements.
3
3
 
4
4
  **Notice**: Still in its early stage, we built this for [BeanHub](https://beanhub.io)'s internal usage. May change rapidly. Use at your own risk for now.
5
5
 
@@ -9,8 +9,10 @@ BeanQueue, a lightweight worker queue framework based on [SQLAlchemy](https://ww
9
9
  - **Easy-to-deploy**: Only rely on PostgreSQL
10
10
  - **Easy-to-use**: Provide command line tools for processing tasks, also helpers for generating tasks models
11
11
  - **Auto-notify**: Notify will automatically be generated and send for inserted or update tasks
12
+ - **Retry**: Built-in and customizable retry-policies
13
+ - **Schedule**: Schedule task to run later
12
14
  - **Worker heartbeat and auto-reschedule**: Each worker keeps updating heartbeat, if one is found dead, the others will reschedule the tasks
13
- - **Customizable**: Use it as an library and build your own worker queue
15
+ - **Customizable**: Use it as an library and build your own work queue
14
16
  - **Native DB operations**: Commit your tasks with other db entries altogether without worrying about data inconsistent issue
15
17
 
16
18
  ## Install
@@ -91,20 +93,92 @@ db.commit()
91
93
  To run the worker, you can do this:
92
94
 
93
95
  ```bash
94
- BQ_PROCESSOR_PACKAGES='["my_pkgs.processors"]' python -m bq.cmds.process images
96
+ BQ_PROCESSOR_PACKAGES='["my_pkgs.processors"]' bq process images
95
97
  ```
96
98
 
97
99
  The `BQ_PROCESSOR_PACKAGES` is a JSON list contains the Python packages where you define your processors (the functions you decorated with `bq.processors.registry.processor`).
98
100
  To submit a task for testing purpose, you can do
99
101
 
100
102
  ```bash
101
- python -m bq.cmds.submit images my_pkgs.processors resize_image -k '{"width": 200, "height": 300}'
103
+ bq submit images my_pkgs.processors resize_image -k '{"width": 200, "height": 300}'
102
104
  ```
103
105
 
104
106
  To create tables for BeanQueue, you can run
105
107
 
106
108
  ```bash
107
- python -m bq.cmds.create_tables
109
+ bq create_tables
110
+ ```
111
+
112
+ ### Schedule
113
+
114
+ In most cases, a task will be executed as soon as possible after it is created.
115
+ To run a task later, you can set a datetime value to the `scheduled_at` attribute of the task model.
116
+ For example:
117
+
118
+ ```python
119
+ import datetime
120
+
121
+ db = Session()
122
+ task = resize_image.run(width=200, height=300)
123
+ task.scheduled_at = func.now() + datetime.timedelta(minutes=3)
124
+ db.add(task)
125
+ ```
126
+
127
+ Please note that currently, workers won't wake up at the next exact moment when the scheduled tasks are ready to run.
128
+ It has to wait until the polling times out, and eventually, it will see the task's scheduled_at time exceeds the current datetime.
129
+ Therefore, depending on your `POLL_TIMEOUT` setting and the number of your workers when they started processing, the actual execution may be inaccurate.
130
+ If you set the `POLL_TIMEOUT` to 60 seconds, please expect less than 60 seconds of delay.
131
+
132
+ ### Retry
133
+
134
+ To automatically retry a task after failure, you can specify a retry policy to the processor.
135
+
136
+ ```python
137
+ import datetime
138
+ import bq
139
+ from sqlalchemy.orm import Session
140
+
141
+ app = bq.BeanQueue()
142
+ delay_retry = bq.DelayRetry(delay=datetime.timedelta(seconds=120))
143
+
144
+ @app.processor(channel="images", retry_policy=delay_retry)
145
+ def resize_image(db: Session, task: bq.Task, width: int, height: int):
146
+ # resize iamge here ...
147
+ pass
148
+ ```
149
+
150
+ Currently, we provide some simple common retry policies such as `DelayRetry` and `ExponentialBackoffRetry`.
151
+ Surely, you can define your retry policy easily by making a function that returns an optional object at the next scheduled time for retry.
152
+
153
+ ```python
154
+ def my_retry_policy(task: bq.Task) -> typing.Any:
155
+ # calculate delay based on task model ...
156
+ return func.now() + datetime.timedelta(seconds=delay)
157
+ ```
158
+
159
+ To cap how many attempts is allowed, you can also use `LimitAttempt` like this:
160
+
161
+ ```python
162
+ delay_retry = bq.DelayRetry(delay=datetime.timedelta(seconds=120))
163
+ capped_delay_retry = bq.LimitAttempt(3, delay_retry)
164
+
165
+ @app.processor(channel="images", retry_policy=capped_delay_retry)
166
+ def resize_image(db: Session, task: bq.Task, width: int, height: int):
167
+ # resize iamge here ...
168
+ pass
169
+ ```
170
+
171
+ You can also retry only for specific exception classes with the `retry_exceptions` argument.
172
+
173
+ ```python
174
+ @app.processor(
175
+ channel="images",
176
+ retry_policy=delay_retry,
177
+ retry_exceptions=ValueError,
178
+ )
179
+ def resize_image(db: Session, task: bq.Task, width: int, height: int):
180
+ # resize iamge here ...
181
+ pass
108
182
  ```
109
183
 
110
184
  ### Configurations
@@ -125,7 +199,7 @@ container = bq.Container()
125
199
  container.wire(packages=[bq])
126
200
  config = bq.Config(
127
201
  PROCESSOR_PACKAGES=["my_pkgs.processors"],
128
- DATABASE_URL=str(config.DATABASE_URL),
202
+ DATABASE_URL=config.DATABASE_URL,
129
203
  BATCH_SIZE=10,
130
204
  )
131
205
  app = bq.BeanQueue(config=config)
@@ -134,7 +208,7 @@ app = bq.BeanQueue(config=config)
134
208
  Then you can pass `--app` argument (or `-a` for short) pointing to the app object to the process command like this:
135
209
 
136
210
  ```bash
137
- python -m bq.cmds.process -a my_pkgs.bq.app images
211
+ bq -a my_pkgs.bq.app process images
138
212
  ```
139
213
 
140
214
  Or if you prefer to define your own process command, you can also call `process_tasks` of the `BeanQueue` object directly like this:
@@ -148,12 +222,16 @@ app.process_tasks(channels=("images",))
148
222
  BeanQueue is designed to be as customizable as much as possible.
149
223
  Of course, you can define your own SQLAlchemy model instead of using the ones we provided.
150
224
 
151
- To make defining your own `Task` model or `Worker` model much easier, you can use our mixin classes:
225
+ To make defining your own `Task`, `Worker` or `Event` model much easier, you can use our mixin classes:
152
226
 
153
227
  - `bq.TaskModelMixin`: provides task model columns
154
228
  - `bq.TaskModelRefWorkerMixin`: provides foreign key column and relationship to `bq.Worker`
229
+ - `bq.TaskModelRefParentMixin`: provides foreign key column and relationship to children `bq.Task` created during processing
230
+ - `bq.TaskModelRefEventMixin`: provides foreign key column and relationship to `bq.Event`
155
231
  - `bq.WorkerModelMixin`: provides worker model columns
156
232
  - `bq.WorkerRefMixin`: provides relationship to `bq.Task`
233
+ - `bq.EventModelMixin`: provides event model columns
234
+ - `bq.EventModelRefTaskMixin`: provides foreign key column and relationship to `bq.Task`
157
235
 
158
236
  Here's an example for defining your own Task model:
159
237
 
@@ -212,13 +290,14 @@ class Worker(bq.WorkerModelMixin, Base):
212
290
  )
213
291
  ```
214
292
 
215
- With the model class ready, you only need to change the `TASK_MODEL` and `WORKER_MODEL` of `Config` to the full Python module name plus the class name like this.
293
+ With the model class ready, you only need to change the `TASK_MODEL`, `WORKER_MODEL` and `EVENT_MODEL` of `Config` to the full Python module name plus the class name like this.
216
294
 
217
295
  ```python
218
296
  import bq
219
297
  config = bq.Config(
220
298
  TASK_MODEL="my_pkgs.models.Task",
221
299
  WORKER_MODEL="my_pkgs.models.Worker",
300
+ EVENT_MODEL="my_pkgs.models.Event",
222
301
  # ... other configs
223
302
  )
224
303
  app = bq.BeanQueue(config)
@@ -226,21 +305,21 @@ app = bq.BeanQueue(config)
226
305
 
227
306
  ## Why?
228
307
 
229
- There are countless worker queue projects. Why make yet another one?
230
- The primary issue with most worker queue tools is their reliance on a standalone broker server.
231
- Our worker queue tasks frequently interact with the database, and the atomic nature of database transactions is great for data integrity.
232
- However, integrating an external worker queue into the system presents a risk.
233
- The worker queue and the database don't share the same data view, potentially compromising data integrity and reliability.
308
+ There are countless work queue projects. Why make yet another one?
309
+ The primary issue with most work queue tools is their reliance on a standalone broker server.
310
+ Our work queue tasks frequently interact with the database, and the atomic nature of database transactions is great for data integrity.
311
+ However, integrating an external work queue into the system presents a risk.
312
+ The work queue and the database don't share the same data view, potentially compromising data integrity and reliability.
234
313
 
235
314
  For example, you have a table of `images` to keep the user-uploaded images.
236
- And you have a background worker queue for resizing the uploaded images into different thumbnail sizes.
237
- So, you will first need to insert a row for the uploaded image about the job into the database before you push the task to the worker queue.
315
+ And you have a background work queue for resizing the uploaded images into different thumbnail sizes.
316
+ So, you will first need to insert a row for the uploaded image about the job into the database before you push the task to the work queue.
238
317
 
239
- Say you push the task to the worker queue immediately after you insert the `images` table then commit like this:
318
+ Say you push the task to the work queue immediately after you insert the `images` table then commit like this:
240
319
 
241
320
  ```
242
321
  1. Insert into the "images" table
243
- 2. Push resizing task to the worker queue
322
+ 2. Push resizing task to the work queue
244
323
  3. Commit db changes
245
324
  ```
246
325
 
@@ -248,21 +327,21 @@ While this might seem like the right way to do it, there's a hidden bug.
248
327
  If the worker starts too fast before the transaction commits at step 3, it will not be able to see the new row in `images` as it has not been committed yet.
249
328
  One may need to make the task retry a few times to ensure that even if the first attempt failed, it could see the image row in the following attempt.
250
329
  But this adds complexity to the system and also increases the latency if the first attempt fails.
251
- Also, if the commit step fails, you will have a failed worker queue job trying to fetch a row from the database that will never exist.
330
+ Also, if the commit step fails, you will have a failed work queue job trying to fetch a row from the database that will never exist.
252
331
 
253
332
  Another approach is to push the resize task after the database changes are committed. It works like this:
254
333
 
255
334
  ```
256
335
  1. Insert into the "images" table
257
336
  2. Commit db changes
258
- 3. Push resizing task to the worker queue
337
+ 3. Push resizing task to the work queue
259
338
  ```
260
339
 
261
340
  With this approach, we don't need to worry about workers picking up the task too early.
262
341
  However, there's another drawback.
263
- If step 3 for pushing a new task to the worker queue fails, the newly inserted `images` row will never be processed.
264
- There are many solutions to this problem, but these are all caused by inconsistent data views between the database and the worker queue storage.
265
- Things will be much easier if we have a worker queue that shares the same consistent view with the worker queue.
342
+ If step 3 for pushing a new task to the work queue fails, the newly inserted `images` row will never be processed.
343
+ There are many solutions to this problem, but these are all caused by inconsistent data views between the database and the work queue storage.
344
+ Things will be much easier if we have a work queue that shares the same consistent view with the database.
266
345
 
267
346
  By using a database as the data storage, all the problems are gone.
268
347
  You can simply do the following:
@@ -274,15 +353,15 @@ You can simply do the following:
274
353
  ```
275
354
 
276
355
  It's all or nothing!
277
- By doing so, you don't need to maintain another worker queue backend.
278
- You are probably using a database anyway, so this worker queue comes for free.
356
+ By doing so, you don't need to maintain another work queue backend.
357
+ You are probably using a database anyway, so this work queue comes for free.
279
358
 
280
- Usually, a database is inefficient as the worker queues data storage because of the potential lock contention and the need for constant querying.
359
+ Usually, a database is inefficient as the work queues data storage because of the potential lock contention and the need for constant querying.
281
360
  However, things have changed since the [introduction of the SKIP LOCKED](https://www.2ndquadrant.com/en/blog/what-is-select-skip-locked-for-in-postgresql-9-5/) and [LISTEN](https://www.postgresql.org/docs/current/sql-listen.html) / [NOTIFY](https://www.postgresql.org/docs/current/sql-notify.html) features in PostgreSQL or other databases.
282
361
 
283
- This project is inspired by many of the SKIP-LOCKED-based worker queue successors.
362
+ This project is inspired by many of the SKIP-LOCKED-based work queue successors.
284
363
  Why don't we just use those existing tools?
285
- Well, because while they work great as worker queue solutions, they don't take advantage of writing tasks and their relative data into the database in a transaction.
364
+ Well, because while they work great as work queue solutions, they don't take advantage of writing tasks and their relative data into the database in a transaction.
286
365
  Many provide an abstraction function or gRPC method of pushing tasks into the database instead of opening it up for the user to insert the row directly with other rows and commit altogether.
287
366
 
288
367
  With BeanQueue, we don't abstract away the logic of publishing a new task into the queue.
@@ -299,6 +378,8 @@ A modern accounting book service based on the most popular open source version c
299
378
  ## Alternatives
300
379
 
301
380
  - [solid_queue](https://github.com/rails/solid_queue)
381
+ - [good_job](https://github.com/bensheldon/good_job)
382
+ - [graphile-worker](https://github.com/graphile/worker)
302
383
  - [postgres-tq](https://github.com/flix-tech/postgres-tq)
303
384
  - [pq](https://github.com/malthe/pq/)
304
385
  - [PgQueuer](https://github.com/janbjorge/PgQueuer)
@@ -0,0 +1,19 @@
1
+ from .app import BeanQueue
2
+ from .config import Config # noqa
3
+ from .models import Event
4
+ from .models import EventModelMixin
5
+ from .models import EventModelRefTaskMixin
6
+ from .models import EventType
7
+ from .models import Task # noqa
8
+ from .models import TaskModelMixin
9
+ from .models import TaskModelRefEventMixin
10
+ from .models import TaskModelRefParentMixin
11
+ from .models import TaskModelRefWorkerMixin
12
+ from .models import TaskState # noqa
13
+ from .models import Worker # noqa
14
+ from .models import WorkerModelMixin # noqa
15
+ from .models import WorkerRefMixin # noqa
16
+ from .models import WorkerState # noqa
17
+ from .processors.retry_policies import DelayRetry
18
+ from .processors.retry_policies import ExponentialBackoffRetry
19
+ from .processors.retry_policies import LimitAttempt
@@ -7,6 +7,8 @@ import sys
7
7
  import threading
8
8
  import time
9
9
  import typing
10
+ from importlib.metadata import PackageNotFoundError
11
+ from importlib.metadata import version
10
12
  from wsgiref.simple_server import make_server
11
13
  from wsgiref.simple_server import WSGIRequestHandler
12
14
 
@@ -84,6 +86,12 @@ class BeanQueue:
84
86
  def worker_model(self) -> typing.Type[models.Worker]:
85
87
  return load_module_var(self.config.WORKER_MODEL)
86
88
 
89
+ @property
90
+ def event_model(self) -> typing.Type[models.Event] | None:
91
+ if self.config.EVENT_MODEL is None:
92
+ return
93
+ return load_module_var(self.config.EVENT_MODEL)
94
+
87
95
  def _make_worker_service(self, session: DBSession):
88
96
  return self.worker_service_cls(
89
97
  session=session, task_model=self.task_model, worker_model=self.worker_model
@@ -96,7 +104,8 @@ class BeanQueue:
96
104
  self,
97
105
  channel: str = constants.DEFAULT_CHANNEL,
98
106
  auto_complete: bool = True,
99
- auto_rollback_on_exc: bool = True,
107
+ retry_policy: typing.Callable | None = None,
108
+ retry_exceptions: typing.Type | typing.Tuple[typing.Type, ...] | None = None,
100
109
  task_model: typing.Type | None = None,
101
110
  ) -> typing.Callable:
102
111
  def decorator(wrapped: typing.Callable):
@@ -106,7 +115,8 @@ class BeanQueue:
106
115
  channel=channel,
107
116
  func=wrapped,
108
117
  auto_complete=auto_complete,
109
- auto_rollback_on_exc=auto_rollback_on_exc,
118
+ retry_policy=retry_policy,
119
+ retry_exceptions=retry_exceptions,
110
120
  )
111
121
  helper_obj = ProcessorHelper(
112
122
  processor,
@@ -241,6 +251,15 @@ class BeanQueue:
241
251
  self,
242
252
  channels: tuple[str, ...],
243
253
  ):
254
+ try:
255
+ bq_version = version(__name__.split(".")[0])
256
+ except PackageNotFoundError:
257
+ bq_version = "unknown"
258
+
259
+ logger.info(
260
+ "Starting processing tasks, bq_version=%s",
261
+ bq_version,
262
+ )
244
263
  db = self.make_session()
245
264
  if not channels:
246
265
  channels = [constants.DEFAULT_CHANNEL]
@@ -318,7 +337,7 @@ class BeanQueue:
318
337
  task.func_name,
319
338
  )
320
339
  # TODO: support processor pool and other approaches to dispatch the workload
321
- registry.process(task)
340
+ registry.process(task, event_cls=self.event_model)
322
341
  if not tasks:
323
342
  # we should try to keep dispatching until we cannot find tasks
324
343
  break
@@ -0,0 +1,39 @@
1
+ import logging
2
+ import os
3
+
4
+ import click
5
+ from rich.logging import RichHandler
6
+
7
+ from .environment import Environment
8
+ from .environment import LOG_LEVEL_MAP
9
+ from .environment import LogLevel
10
+ from .environment import pass_env
11
+ from .utils import load_app
12
+
13
+
14
+ @click.group(help="Command line tools for BeanQueue")
15
+ @click.option(
16
+ "-l",
17
+ "--log-level",
18
+ type=click.Choice(
19
+ list(map(lambda key: key.value, LOG_LEVEL_MAP.keys())), case_sensitive=False
20
+ ),
21
+ default=lambda: os.environ.get("LOG_LEVEL", "INFO"),
22
+ )
23
+ @click.option(
24
+ "-a", "--app", type=str, help='BeanQueue app object to use, e.g. "my_pkgs.bq.app"'
25
+ )
26
+ @click.version_option(prog_name="bq", package_name="bq")
27
+ @pass_env
28
+ def cli(env: Environment, log_level: str, app: str):
29
+ env.log_level = LogLevel(log_level)
30
+ env.app = load_app(app)
31
+
32
+ FORMAT = "%(message)s"
33
+ logging.basicConfig(
34
+ level=LOG_LEVEL_MAP[env.log_level],
35
+ format=FORMAT,
36
+ datefmt="[%X]",
37
+ handlers=[RichHandler()],
38
+ force=True,
39
+ )
@@ -0,0 +1,12 @@
1
+ from .. import models # noqa
2
+ from ..db.base import Base
3
+ from .cli import cli
4
+ from .environment import Environment
5
+ from .environment import pass_env
6
+
7
+
8
+ @cli.command(name="create_tables", help="Create BeanQueue tables")
9
+ @pass_env
10
+ def create_tables(env: Environment):
11
+ Base.metadata.create_all(bind=env.app.engine)
12
+ env.logger.info("Done, tables created")