beanqueue 1.1.9__tar.gz → 1.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. {beanqueue-1.1.9 → beanqueue-1.2.0}/PKG-INFO +2 -2
  2. {beanqueue-1.1.9 → beanqueue-1.2.0}/bq/app.py +216 -36
  3. {beanqueue-1.1.9 → beanqueue-1.2.0}/bq/config.py +5 -0
  4. {beanqueue-1.1.9 → beanqueue-1.2.0}/pyproject.toml +6 -3
  5. {beanqueue-1.1.9 → beanqueue-1.2.0}/.gitignore +0 -0
  6. {beanqueue-1.1.9 → beanqueue-1.2.0}/LICENSE +0 -0
  7. {beanqueue-1.1.9 → beanqueue-1.2.0}/README.md +0 -0
  8. {beanqueue-1.1.9 → beanqueue-1.2.0}/bq/__init__.py +0 -0
  9. {beanqueue-1.1.9 → beanqueue-1.2.0}/bq/cmds/__init__.py +0 -0
  10. {beanqueue-1.1.9 → beanqueue-1.2.0}/bq/cmds/cli.py +0 -0
  11. {beanqueue-1.1.9 → beanqueue-1.2.0}/bq/cmds/create_tables.py +0 -0
  12. {beanqueue-1.1.9 → beanqueue-1.2.0}/bq/cmds/environment.py +0 -0
  13. {beanqueue-1.1.9 → beanqueue-1.2.0}/bq/cmds/main.py +0 -0
  14. {beanqueue-1.1.9 → beanqueue-1.2.0}/bq/cmds/process.py +0 -0
  15. {beanqueue-1.1.9 → beanqueue-1.2.0}/bq/cmds/submit.py +0 -0
  16. {beanqueue-1.1.9 → beanqueue-1.2.0}/bq/cmds/utils.py +0 -0
  17. {beanqueue-1.1.9 → beanqueue-1.2.0}/bq/constants.py +0 -0
  18. {beanqueue-1.1.9 → beanqueue-1.2.0}/bq/db/__init__.py +0 -0
  19. {beanqueue-1.1.9 → beanqueue-1.2.0}/bq/db/base.py +0 -0
  20. {beanqueue-1.1.9 → beanqueue-1.2.0}/bq/db/session.py +0 -0
  21. {beanqueue-1.1.9 → beanqueue-1.2.0}/bq/events.py +0 -0
  22. {beanqueue-1.1.9 → beanqueue-1.2.0}/bq/models/__init__.py +0 -0
  23. {beanqueue-1.1.9 → beanqueue-1.2.0}/bq/models/event.py +0 -0
  24. {beanqueue-1.1.9 → beanqueue-1.2.0}/bq/models/helpers.py +0 -0
  25. {beanqueue-1.1.9 → beanqueue-1.2.0}/bq/models/task.py +0 -0
  26. {beanqueue-1.1.9 → beanqueue-1.2.0}/bq/models/worker.py +0 -0
  27. {beanqueue-1.1.9 → beanqueue-1.2.0}/bq/processors/__init__.py +0 -0
  28. {beanqueue-1.1.9 → beanqueue-1.2.0}/bq/processors/processor.py +0 -0
  29. {beanqueue-1.1.9 → beanqueue-1.2.0}/bq/processors/registry.py +0 -0
  30. {beanqueue-1.1.9 → beanqueue-1.2.0}/bq/processors/retry_policies.py +0 -0
  31. {beanqueue-1.1.9 → beanqueue-1.2.0}/bq/services/__init__.py +0 -0
  32. {beanqueue-1.1.9 → beanqueue-1.2.0}/bq/services/dispatch.py +0 -0
  33. {beanqueue-1.1.9 → beanqueue-1.2.0}/bq/services/worker.py +0 -0
  34. {beanqueue-1.1.9 → beanqueue-1.2.0}/bq/utils.py +0 -0
@@ -1,11 +1,11 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: beanqueue
3
- Version: 1.1.9
3
+ Version: 1.2.0
4
4
  Summary: BeanQueue or BQ for short, PostgreSQL SKIP LOCK and SQLAlchemy based worker queue library
5
5
  Author-email: Fang-Pen Lin <fangpen@launchplatform.com>
6
6
  License-Expression: MIT
7
7
  License-File: LICENSE
8
- Requires-Python: ~=3.11
8
+ Requires-Python: <4,>=3.11
9
9
  Requires-Dist: blinker<2,>=1.8.2
10
10
  Requires-Dist: click<9,>=8.1.7
11
11
  Requires-Dist: pydantic-settings<3,>=2.2.1
@@ -6,6 +6,9 @@ import platform
6
6
  import sys
7
7
  import threading
8
8
  import typing
9
+ from concurrent.futures import FIRST_COMPLETED
10
+ from concurrent.futures import ThreadPoolExecutor
11
+ from concurrent.futures import wait as futures_wait
9
12
  from importlib.metadata import PackageNotFoundError
10
13
  from importlib.metadata import version
11
14
  from wsgiref.simple_server import make_server
@@ -16,6 +19,8 @@ from sqlalchemy import func
16
19
  from sqlalchemy.engine import create_engine
17
20
  from sqlalchemy.engine import Engine
18
21
  from sqlalchemy.orm import Session as DBSession
22
+ from sqlalchemy.pool import NullPool
23
+ from sqlalchemy.pool import QueuePool
19
24
  from sqlalchemy.pool import SingletonThreadPool
20
25
 
21
26
  from . import constants
@@ -67,9 +72,23 @@ class BeanQueue:
67
72
  self._metrics_server_shutdown: typing.Callable[[], None] = lambda: None
68
73
 
69
74
  def create_default_engine(self):
70
- return create_engine(
71
- str(self.config.DATABASE_URL), poolclass=SingletonThreadPool
72
- )
75
+ # Use thread-safe connection pool when thread pool executor is enabled
76
+ if self.config.MAX_WORKER_THREADS != 1:
77
+ # QueuePool is thread-safe and suitable for multi-threaded usage
78
+ # Configure pool size based on number of worker threads
79
+ max_workers = self.config.MAX_WORKER_THREADS if self.config.MAX_WORKER_THREADS > 0 else 10
80
+ pool_size = max_workers + 5 # Extra connections for main thread and worker update thread
81
+ return create_engine(
82
+ str(self.config.DATABASE_URL),
83
+ poolclass=QueuePool,
84
+ pool_size=pool_size,
85
+ max_overflow=10,
86
+ )
87
+ else:
88
+ # SingletonThreadPool for single-threaded sequential processing
89
+ return create_engine(
90
+ str(self.config.DATABASE_URL), poolclass=SingletonThreadPool
91
+ )
73
92
 
74
93
  def make_session(self) -> DBSession:
75
94
  return self.session_cls(bind=self.engine)
@@ -256,6 +275,163 @@ class BeanQueue:
256
275
  logger.info("Run metrics HTTP server on %s:%s", host, port)
257
276
  httpd.serve_forever()
258
277
 
278
+ def _process_task_in_thread(
279
+ self,
280
+ task_id: typing.Any,
281
+ registry: typing.Any,
282
+ ):
283
+ """Process a single task in a thread-safe manner with its own database session.
284
+
285
+ This method is called from worker threads in the thread pool. It creates its own
286
+ database session to avoid SQLAlchemy session conflicts between threads.
287
+ """
288
+ db = self.make_session()
289
+ try:
290
+ # Reload the task in this thread's session to avoid SQLAlchemy context issues
291
+ task = db.query(self.task_model).filter(self.task_model.id == task_id).one()
292
+
293
+ logger.info(
294
+ "Processing task %s, channel=%s, module=%s, func=%s",
295
+ task.id,
296
+ task.channel,
297
+ task.module,
298
+ task.func_name,
299
+ )
300
+ registry.process(task, event_cls=self.event_model)
301
+ db.commit()
302
+ except Exception as e:
303
+ logger.exception("Error processing task %s: %s", task_id, e)
304
+ db.rollback()
305
+ raise
306
+ finally:
307
+ db.close()
308
+
309
+ def _process_tasks_sequential(
310
+ self,
311
+ db: DBSession,
312
+ dispatch_service: DispatchService,
313
+ registry: typing.Any,
314
+ channels: tuple[str, ...],
315
+ worker_id: typing.Any,
316
+ ):
317
+ """Process tasks sequentially (original behavior for MAX_WORKER_THREADS=1)."""
318
+ while True:
319
+ while True:
320
+ tasks = dispatch_service.dispatch(
321
+ channels,
322
+ worker_id=worker_id,
323
+ limit=self.config.BATCH_SIZE,
324
+ ).all()
325
+
326
+ for task in tasks:
327
+ logger.info(
328
+ "Processing task %s, channel=%s, module=%s, func=%s",
329
+ task.id,
330
+ task.channel,
331
+ task.module,
332
+ task.func_name,
333
+ )
334
+ registry.process(task, event_cls=self.event_model)
335
+ if tasks:
336
+ db.commit()
337
+
338
+ if not tasks:
339
+ break
340
+
341
+ db.close()
342
+ try:
343
+ for notification in dispatch_service.poll(
344
+ timeout=self.config.POLL_TIMEOUT
345
+ ):
346
+ logger.debug("Receive notification %s", notification)
347
+ except TimeoutError:
348
+ logger.debug("Poll timeout, try again")
349
+ continue
350
+
351
+ def _process_tasks_threaded(
352
+ self,
353
+ db: DBSession,
354
+ executor: ThreadPoolExecutor,
355
+ dispatch_service: DispatchService,
356
+ registry: typing.Any,
357
+ channels: tuple[str, ...],
358
+ worker_id: typing.Any,
359
+ ):
360
+ """Process tasks using thread pool with continuous task feeding.
361
+
362
+ This implementation continuously checks for completed futures and fetches new tasks
363
+ when there's capacity in the thread pool. It uses concurrent.futures.wait() to
364
+ properly detect ANY completed future, not just the first one submitted.
365
+ """
366
+ max_workers = self.config.MAX_WORKER_THREADS
367
+ if max_workers == 0:
368
+ max_workers = 10 # Default when set to auto
369
+
370
+ running_futures: set = set()
371
+
372
+ while True:
373
+ # Clean up ANY completed futures using wait() with zero timeout
374
+ if running_futures:
375
+ done, running_futures = futures_wait(
376
+ running_futures, timeout=0, return_when=FIRST_COMPLETED
377
+ )
378
+ for f in done:
379
+ try:
380
+ f.result()
381
+ except Exception as e:
382
+ logger.error("Task processing failed: %s", e)
383
+
384
+ # If we have capacity, fetch and submit more tasks
385
+ capacity = max_workers - len(running_futures)
386
+ if capacity > 0:
387
+ tasks = dispatch_service.dispatch(
388
+ channels,
389
+ worker_id=worker_id,
390
+ limit=min(capacity, self.config.BATCH_SIZE),
391
+ ).all()
392
+
393
+ # Always commit to close the transaction and refresh the snapshot,
394
+ # so subsequent dispatch calls can see newly committed tasks
395
+ db.commit()
396
+
397
+ if tasks:
398
+ logger.debug(
399
+ "Dispatching %d tasks (running=%d, capacity=%d)",
400
+ len(tasks), len(running_futures), capacity
401
+ )
402
+
403
+ for task in tasks:
404
+ future = executor.submit(
405
+ self._process_task_in_thread,
406
+ task.id,
407
+ registry,
408
+ )
409
+ running_futures.add(future)
410
+
411
+ # If we have running tasks, wait briefly for any to complete then check for new tasks
412
+ if running_futures:
413
+ # Short wait - allows checking for new tasks frequently
414
+ done, running_futures = futures_wait(
415
+ running_futures, timeout=0.05, return_when=FIRST_COMPLETED
416
+ )
417
+ for f in done:
418
+ try:
419
+ f.result()
420
+ except Exception as e:
421
+ logger.error("Task processing failed: %s", e)
422
+ continue
423
+
424
+ # No running tasks and no new tasks found - poll for notifications
425
+ db.close()
426
+ try:
427
+ for notification in dispatch_service.poll(
428
+ timeout=self.config.POLL_TIMEOUT
429
+ ):
430
+ logger.debug("Receive notification %s", notification)
431
+ except TimeoutError:
432
+ logger.debug("Poll timeout, try again")
433
+ continue
434
+
259
435
  def process_tasks(
260
436
  self,
261
437
  channels: tuple[str, ...],
@@ -329,43 +505,47 @@ class BeanQueue:
329
505
 
330
506
  worker_id = worker.id
331
507
 
508
+ # Determine the number of worker threads
509
+ max_workers = self.config.MAX_WORKER_THREADS
510
+ if max_workers == 0:
511
+ max_workers = None # Default to (num_cpus * 5)
512
+
513
+ # Create thread pool executor for concurrent task processing
514
+ executor = None
515
+ if max_workers != 1:
516
+ executor = ThreadPoolExecutor(max_workers=max_workers, thread_name_prefix="task_worker")
517
+ logger.info("Created thread pool executor with max_workers=%s", max_workers)
518
+
332
519
  try:
333
- while True:
334
- while True:
335
- tasks = dispatch_service.dispatch(
336
- channels,
337
- worker_id=worker_id,
338
- limit=self.config.BATCH_SIZE,
339
- ).all()
340
- for task in tasks:
341
- logger.info(
342
- "Processing task %s, channel=%s, module=%s, func=%s",
343
- task.id,
344
- task.channel,
345
- task.module,
346
- task.func_name,
347
- )
348
- # TODO: support processor pool and other approaches to dispatch the workload
349
- registry.process(task, event_cls=self.event_model)
350
- if not tasks:
351
- # we should try to keep dispatching until we cannot find tasks
352
- break
353
- else:
354
- db.commit()
355
- # we will not see notifications in a transaction, need to close the transaction first before entering
356
- # polling
357
- db.close()
358
- try:
359
- for notification in dispatch_service.poll(
360
- timeout=self.config.POLL_TIMEOUT
361
- ):
362
- logger.debug("Receive notification %s", notification)
363
- except TimeoutError:
364
- logger.debug("Poll timeout, try again")
365
- continue
520
+ if executor is not None:
521
+ # Threaded processing with continuous task feeding
522
+ self._process_tasks_threaded(
523
+ db=db,
524
+ executor=executor,
525
+ dispatch_service=dispatch_service,
526
+ registry=registry,
527
+ channels=channels,
528
+ worker_id=worker_id,
529
+ )
530
+ else:
531
+ # Sequential processing (original behavior)
532
+ self._process_tasks_sequential(
533
+ db=db,
534
+ dispatch_service=dispatch_service,
535
+ registry=registry,
536
+ channels=channels,
537
+ worker_id=worker_id,
538
+ )
366
539
  except (SystemExit, KeyboardInterrupt):
367
540
  db.rollback()
368
541
  logger.info("Shutting down ...")
542
+
543
+ # Shutdown the executor if it was created
544
+ if executor is not None:
545
+ logger.info("Shutting down thread pool executor...")
546
+ executor.shutdown(wait=True, cancel_futures=False)
547
+ logger.info("Thread pool executor shutdown complete")
548
+
369
549
  self._worker_update_shutdown_event.set()
370
550
  worker_update_thread.join(5)
371
551
  if metrics_server_thread is not None:
@@ -16,6 +16,11 @@ class Config(BaseSettings):
16
16
  # Size of tasks batch to fetch each time from the database
17
17
  BATCH_SIZE: int = 1
18
18
 
19
+ # Maximum number of worker threads for concurrent task processing
20
+ # Set to 1 to disable thread pool and process tasks sequentially
21
+ # Set to 0 to use the default (number of CPUs * 5)
22
+ MAX_WORKER_THREADS: int = 1
23
+
19
24
  # How long we should poll before timeout in seconds
20
25
  POLL_TIMEOUT: int = 60
21
26
 
@@ -1,9 +1,9 @@
1
1
  [project]
2
2
  name = "beanqueue"
3
- version = "1.1.9"
3
+ version = "1.2.0"
4
4
  description = "BeanQueue or BQ for short, PostgreSQL SKIP LOCK and SQLAlchemy based worker queue library"
5
5
  authors = [{ name = "Fang-Pen Lin", email = "fangpen@launchplatform.com" }]
6
- requires-python = "~=3.11"
6
+ requires-python = ">=3.11,<4"
7
7
  readme = "README.md"
8
8
  license = "MIT"
9
9
  dependencies = [
@@ -20,7 +20,7 @@ bq = "bq.cmds.main:cli"
20
20
 
21
21
  [dependency-groups]
22
22
  dev = [
23
- "psycopg2-binary>=2.9.9,<3",
23
+ "psycopg2-binary>=2.9.10,<3",
24
24
  "pytest-factoryboy>=2.7.0,<3",
25
25
  ]
26
26
 
@@ -33,3 +33,6 @@ include = ["bq"]
33
33
  [build-system]
34
34
  requires = ["hatchling"]
35
35
  build-backend = "hatchling.build"
36
+
37
+ [tool.pytest.ini_options]
38
+ testpaths = ["tests"]
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes