crazy-workers 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- crazy_workers/__init__.py +5 -0
- crazy_workers/_bootstrap.py +33 -0
- crazy_workers/cli/__init__.py +4 -0
- crazy_workers/cli/commands/__init__.py +8 -0
- crazy_workers/cli/commands/lister.py +57 -0
- crazy_workers/cli/commands/params.py +37 -0
- crazy_workers/cli/commands/restorer.py +14 -0
- crazy_workers/cli/commands/starter.py +36 -0
- crazy_workers/cli/commands/stopper.py +30 -0
- crazy_workers/cli/discovery.py +93 -0
- crazy_workers/cli/main.py +85 -0
- crazy_workers/cli/ui.py +9 -0
- crazy_workers/core/__init__.py +4 -0
- crazy_workers/core/engine.py +90 -0
- crazy_workers/core/manager/__init__.py +89 -0
- crazy_workers/core/manager/lister.py +67 -0
- crazy_workers/core/manager/recoverer.py +25 -0
- crazy_workers/core/manager/starter.py +137 -0
- crazy_workers/core/manager/stopper.py +58 -0
- crazy_workers/core/recovery.py +68 -0
- crazy_workers/database/__init__.py +5 -0
- crazy_workers/database/schema.py +42 -0
- crazy_workers/database/storage.py +56 -0
- crazy_workers-0.1.0.dist-info/METADATA +247 -0
- crazy_workers-0.1.0.dist-info/RECORD +29 -0
- crazy_workers-0.1.0.dist-info/WHEEL +5 -0
- crazy_workers-0.1.0.dist-info/entry_points.txt +2 -0
- crazy_workers-0.1.0.dist-info/licenses/LICENSE +19 -0
- crazy_workers-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
|
|
3
|
+
from ...database.schema import Worker, WorkerStatus
|
|
4
|
+
from ..engine import is_process_running
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
logger = logging.getLogger('crazy_workers')
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def recover_workers(manager):
|
|
11
|
+
if not manager.storage:
|
|
12
|
+
return []
|
|
13
|
+
|
|
14
|
+
with manager.storage.session_scope() as session:
|
|
15
|
+
workers_to_restart = session.query(Worker).filter_by(status=WorkerStatus.RUNNING).all()
|
|
16
|
+
to_process = [(w.worker_key, w.worker_type, w.parameters, w.pid) for w in workers_to_restart]
|
|
17
|
+
|
|
18
|
+
restarted = []
|
|
19
|
+
for key, w_type, params, pid in to_process:
|
|
20
|
+
if not is_process_running(pid):
|
|
21
|
+
logger.info(f'Recovering worker {key}...')
|
|
22
|
+
success, _ = manager.start_worker(w_type, key, params)
|
|
23
|
+
if success:
|
|
24
|
+
restarted.append(key)
|
|
25
|
+
return restarted
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import logging
|
|
3
|
+
import os
|
|
4
|
+
import subprocess
|
|
5
|
+
import sys
|
|
6
|
+
from sqlalchemy import func
|
|
7
|
+
from sqlalchemy.exc import IntegrityError
|
|
8
|
+
|
|
9
|
+
from ...database.schema import Worker, WorkerStatus
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger('crazy_workers')
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def start_worker(manager, worker_type, worker_key=None, parameters=None, env=None):
|
|
16
|
+
if not manager.storage:
|
|
17
|
+
return False, 'System not initialized (database missing)'
|
|
18
|
+
|
|
19
|
+
worker_key = worker_key or worker_type
|
|
20
|
+
if not _validate_inputs(worker_type, worker_key):
|
|
21
|
+
return False, 'Invalid worker_type or worker_key'
|
|
22
|
+
|
|
23
|
+
parameters = parameters or {}
|
|
24
|
+
with manager.storage.session_scope() as session:
|
|
25
|
+
worker = session.query(Worker).filter_by(worker_key=worker_key).first()
|
|
26
|
+
|
|
27
|
+
if _check_already_running(manager, worker, session):
|
|
28
|
+
return False, 'Worker already running'
|
|
29
|
+
|
|
30
|
+
worker = _prepare_worker_record(worker, worker_type, worker_key, parameters, session)
|
|
31
|
+
if not worker:
|
|
32
|
+
return False, 'Worker state conflict (concurrent start)'
|
|
33
|
+
|
|
34
|
+
worker_path = _get_worker_script_path(manager, worker_type)
|
|
35
|
+
if not worker_path:
|
|
36
|
+
worker.status = WorkerStatus.STOPPED
|
|
37
|
+
return False, f'Worker file {worker_type}.py not found'
|
|
38
|
+
|
|
39
|
+
return _spawn_worker_process(manager, worker, worker_path, parameters, env, session)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _validate_inputs(worker_type, worker_key):
|
|
43
|
+
for name, val in [('worker_type', worker_type), ('worker_key', worker_key)]:
|
|
44
|
+
if '..' in val or os.path.isabs(val) or '/' in val or '\\' in val:
|
|
45
|
+
logger.error(f'Invalid {name}: {val}. Potential path traversal attempt.')
|
|
46
|
+
return False
|
|
47
|
+
return True
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def _check_already_running(manager, worker, session):
|
|
51
|
+
if worker and worker.status == WorkerStatus.RUNNING:
|
|
52
|
+
if manager._is_process_running(worker.pid):
|
|
53
|
+
logger.info(f'Worker {worker.worker_key} already running with PID {worker.pid}')
|
|
54
|
+
return True
|
|
55
|
+
else:
|
|
56
|
+
logger.warning(f'Worker {worker.worker_key} found in RUNNING state but PID {worker.pid} is dead. Cleaning up.')
|
|
57
|
+
worker.status = WorkerStatus.CRASHED
|
|
58
|
+
session.commit()
|
|
59
|
+
return False
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def _prepare_worker_record(worker, worker_type, worker_key, parameters, session):
|
|
63
|
+
if not worker:
|
|
64
|
+
worker = Worker(worker_key=worker_key, worker_type=worker_type, parameters=parameters)
|
|
65
|
+
session.add(worker)
|
|
66
|
+
else:
|
|
67
|
+
worker.worker_type = worker_type
|
|
68
|
+
worker.parameters = parameters
|
|
69
|
+
worker.status = WorkerStatus.STARTING
|
|
70
|
+
|
|
71
|
+
try:
|
|
72
|
+
session.commit()
|
|
73
|
+
return worker
|
|
74
|
+
except IntegrityError:
|
|
75
|
+
session.rollback()
|
|
76
|
+
logger.error(f'Concurrent start attempt for worker {worker_key}')
|
|
77
|
+
return None
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def _get_worker_script_path(manager, worker_type):
|
|
81
|
+
worker_filename = f'{worker_type}.py'
|
|
82
|
+
worker_path = os.path.join(manager.workers_dir, worker_filename)
|
|
83
|
+
if not os.path.exists(worker_path):
|
|
84
|
+
logger.error(f'Worker file {worker_filename} not found in {manager.workers_dir}')
|
|
85
|
+
return None
|
|
86
|
+
return worker_path
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def _spawn_worker_process(manager, worker, worker_path, parameters, env, session):
|
|
90
|
+
child_env = os.environ.copy()
|
|
91
|
+
if env:
|
|
92
|
+
child_env.update(env)
|
|
93
|
+
|
|
94
|
+
log_file_path = os.path.join(manager.logs_dir, f'{worker.worker_key}.log')
|
|
95
|
+
try:
|
|
96
|
+
log_fh = open(log_file_path, 'a')
|
|
97
|
+
logger.info(f'Worker {worker.worker_key} logging to {log_file_path}')
|
|
98
|
+
except Exception as e:
|
|
99
|
+
logger.error(f'Failed to open log file for worker {worker.worker_key}: {e}')
|
|
100
|
+
log_fh = None
|
|
101
|
+
|
|
102
|
+
# log_fh ownership is transferred to Popen; do NOT close it here.
|
|
103
|
+
stdout_dest = log_fh if log_fh else subprocess.DEVNULL
|
|
104
|
+
stderr_dest = log_fh if log_fh else subprocess.DEVNULL
|
|
105
|
+
|
|
106
|
+
process = subprocess.Popen(
|
|
107
|
+
[sys.executable, '-u', '-m', 'crazy_workers._bootstrap', worker_path, json.dumps(parameters)],
|
|
108
|
+
stdout=stdout_dest,
|
|
109
|
+
stderr=stderr_dest,
|
|
110
|
+
text=True,
|
|
111
|
+
env=child_env,
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
# Close our copy of the handle — Popen duplicated it via os.dup2 internally.
|
|
115
|
+
if log_fh:
|
|
116
|
+
log_fh.close()
|
|
117
|
+
|
|
118
|
+
try:
|
|
119
|
+
process.wait(timeout=0.05)
|
|
120
|
+
# If we reach here, it means the process exited immediately
|
|
121
|
+
logger.error(f'Worker {worker.worker_key} failed to start immediately (exit code: {process.returncode})')
|
|
122
|
+
worker.status = WorkerStatus.CRASHED
|
|
123
|
+
worker.pid = None
|
|
124
|
+
session.commit()
|
|
125
|
+
return False, 'Worker process failed to start'
|
|
126
|
+
except subprocess.TimeoutExpired:
|
|
127
|
+
# This is the expected case: the process is still running after the timeout
|
|
128
|
+
pass
|
|
129
|
+
|
|
130
|
+
worker.pid = process.pid
|
|
131
|
+
worker.status = WorkerStatus.RUNNING
|
|
132
|
+
worker.last_started_at = func.now()
|
|
133
|
+
session.commit()
|
|
134
|
+
|
|
135
|
+
manager._active_processes[worker.worker_key] = process
|
|
136
|
+
logger.info(f'Worker {worker.worker_key} started with PID {worker.pid}')
|
|
137
|
+
return True, worker.to_dict()
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from sqlalchemy import func
|
|
3
|
+
|
|
4
|
+
from ...database.schema import Worker, WorkerStatus
|
|
5
|
+
from ..engine import terminate_process
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
logger = logging.getLogger('crazy_workers')
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def stop_worker(manager, worker_key):
|
|
12
|
+
if not manager.storage:
|
|
13
|
+
return False, 'System not initialized (database missing)'
|
|
14
|
+
|
|
15
|
+
# Collect everything we need from the DB, then release the session before
|
|
16
|
+
# calling terminate_process (which can block for up to `timeout` seconds).
|
|
17
|
+
with manager.storage.session_scope() as session:
|
|
18
|
+
worker = session.query(Worker).filter_by(worker_key=worker_key).first()
|
|
19
|
+
if not worker:
|
|
20
|
+
return False, 'Worker not found'
|
|
21
|
+
if worker.status != WorkerStatus.RUNNING:
|
|
22
|
+
return False, 'Worker is not running'
|
|
23
|
+
|
|
24
|
+
pid = worker.pid
|
|
25
|
+
# PIDs of other managed workers — their processes must not be killed even
|
|
26
|
+
# if they happen to be child processes of the worker being stopped.
|
|
27
|
+
managed_pids = {
|
|
28
|
+
w.pid
|
|
29
|
+
for w in session.query(Worker)
|
|
30
|
+
.filter(
|
|
31
|
+
Worker.status == WorkerStatus.RUNNING,
|
|
32
|
+
Worker.worker_key != worker_key,
|
|
33
|
+
Worker.pid.isnot(None),
|
|
34
|
+
)
|
|
35
|
+
.all()
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
logger.info(f'Stopping worker {worker_key} (PID {pid})')
|
|
39
|
+
process = manager._active_processes.get(worker_key)
|
|
40
|
+
|
|
41
|
+
try:
|
|
42
|
+
terminate_process(pid, popen_process=process, exclude_pids=managed_pids)
|
|
43
|
+
except Exception as e:
|
|
44
|
+
logger.error(f'Error stopping worker {worker_key}: {e}')
|
|
45
|
+
return False, str(e)
|
|
46
|
+
|
|
47
|
+
if worker_key in manager._active_processes:
|
|
48
|
+
del manager._active_processes[worker_key]
|
|
49
|
+
|
|
50
|
+
with manager.storage.session_scope() as session:
|
|
51
|
+
worker = session.query(Worker).filter_by(worker_key=worker_key).first()
|
|
52
|
+
if worker:
|
|
53
|
+
worker.status = WorkerStatus.STOPPED
|
|
54
|
+
worker.pid = None
|
|
55
|
+
worker.last_stopped_at = func.now()
|
|
56
|
+
|
|
57
|
+
logger.info(f'Worker {worker_key} stopped.')
|
|
58
|
+
return True, 'Worker stopped'
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import os
|
|
3
|
+
import psutil
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
logger = logging.getLogger('crazy_workers')
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class RecoveryLock:
|
|
10
|
+
def __init__(self, path):
|
|
11
|
+
self.path = path
|
|
12
|
+
|
|
13
|
+
def acquire(self):
|
|
14
|
+
try:
|
|
15
|
+
fd = os.open(self.path, os.O_CREAT | os.O_EXCL | os.O_WRONLY)
|
|
16
|
+
with os.fdopen(fd, 'w') as f:
|
|
17
|
+
f.write(str(os.getpid()))
|
|
18
|
+
return True
|
|
19
|
+
except FileExistsError:
|
|
20
|
+
return self._handle_existing_lock()
|
|
21
|
+
|
|
22
|
+
def release(self):
|
|
23
|
+
try:
|
|
24
|
+
os.remove(self.path)
|
|
25
|
+
except OSError:
|
|
26
|
+
pass
|
|
27
|
+
|
|
28
|
+
def _handle_existing_lock(self):
|
|
29
|
+
try:
|
|
30
|
+
with open(self.path, 'r') as f:
|
|
31
|
+
old_pid_str = f.read().strip()
|
|
32
|
+
except OSError:
|
|
33
|
+
return False
|
|
34
|
+
|
|
35
|
+
if not old_pid_str:
|
|
36
|
+
logger.warning('Found empty recovery lock. Breaking lock.')
|
|
37
|
+
return self._break_and_reacquire()
|
|
38
|
+
|
|
39
|
+
try:
|
|
40
|
+
old_pid = int(old_pid_str)
|
|
41
|
+
except ValueError:
|
|
42
|
+
logger.warning(f'Found invalid recovery lock content: "{old_pid_str}". Breaking lock.')
|
|
43
|
+
return self._break_and_reacquire()
|
|
44
|
+
|
|
45
|
+
try:
|
|
46
|
+
alive = psutil.pid_exists(old_pid)
|
|
47
|
+
except OSError:
|
|
48
|
+
return False
|
|
49
|
+
|
|
50
|
+
if not alive:
|
|
51
|
+
logger.warning(f'Found stale recovery lock from dead PID {old_pid}. Breaking lock.')
|
|
52
|
+
return self._break_and_reacquire()
|
|
53
|
+
|
|
54
|
+
return False
|
|
55
|
+
|
|
56
|
+
def _break_and_reacquire(self):
|
|
57
|
+
try:
|
|
58
|
+
os.remove(self.path)
|
|
59
|
+
except OSError:
|
|
60
|
+
return False
|
|
61
|
+
# Re-acquire once — if another process grabbed the lock in the meantime, give up.
|
|
62
|
+
try:
|
|
63
|
+
fd = os.open(self.path, os.O_CREAT | os.O_EXCL | os.O_WRONLY)
|
|
64
|
+
with os.fdopen(fd, 'w') as f:
|
|
65
|
+
f.write(str(os.getpid()))
|
|
66
|
+
return True
|
|
67
|
+
except FileExistsError:
|
|
68
|
+
return False
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
import enum
|
|
2
|
+
from datetime import datetime
|
|
3
|
+
from sqlalchemy import JSON, Column, DateTime, Enum, Integer, String, func
|
|
4
|
+
from sqlalchemy.orm import DeclarativeBase
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class WorkerStatus(enum.Enum):
|
|
8
|
+
NEVER_STARTED = 'NEVER_STARTED'
|
|
9
|
+
STARTING = 'STARTING'
|
|
10
|
+
RUNNING = 'RUNNING'
|
|
11
|
+
STOPPED = 'STOPPED'
|
|
12
|
+
CRASHED = 'CRASHED'
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class Base(DeclarativeBase):
|
|
16
|
+
pass
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class Worker(Base):
|
|
20
|
+
__tablename__ = 'workers'
|
|
21
|
+
|
|
22
|
+
id = Column(Integer, primary_key=True)
|
|
23
|
+
worker_key = Column(String(255), unique=True, nullable=False)
|
|
24
|
+
worker_type = Column(String(255), nullable=False) # Name of the .py file
|
|
25
|
+
parameters = Column(JSON, nullable=False, default={})
|
|
26
|
+
pid = Column(Integer, nullable=True)
|
|
27
|
+
status = Column(Enum(WorkerStatus), default=WorkerStatus.STOPPED)
|
|
28
|
+
last_started_at: datetime = Column(DateTime, nullable=True)
|
|
29
|
+
last_stopped_at: datetime = Column(DateTime, nullable=True)
|
|
30
|
+
created_at = Column(DateTime, server_default=func.now())
|
|
31
|
+
updated_at = Column(DateTime, onupdate=func.now())
|
|
32
|
+
|
|
33
|
+
def to_dict(self):
|
|
34
|
+
return {
|
|
35
|
+
'worker_key': self.worker_key,
|
|
36
|
+
'worker_type': self.worker_type,
|
|
37
|
+
'parameters': self.parameters,
|
|
38
|
+
'pid': self.pid,
|
|
39
|
+
'status': self.status.value,
|
|
40
|
+
'last_started_at': self.last_started_at.isoformat() if self.last_started_at else None,
|
|
41
|
+
'last_stopped_at': self.last_stopped_at.isoformat() if self.last_stopped_at else None,
|
|
42
|
+
}
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from contextlib import contextmanager
|
|
3
|
+
from sqlalchemy import create_engine, event
|
|
4
|
+
from sqlalchemy.orm import sessionmaker
|
|
5
|
+
|
|
6
|
+
from .schema import Base
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
logger = logging.getLogger('crazy_workers')
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class Storage:
|
|
13
|
+
def __init__(self, db_path):
|
|
14
|
+
# sqlite:///path/to/db
|
|
15
|
+
self.db_path = db_path
|
|
16
|
+
self.engine = create_engine(f'sqlite:///{db_path}', connect_args={'timeout': 30})
|
|
17
|
+
|
|
18
|
+
@event.listens_for(self.engine, 'connect')
|
|
19
|
+
def set_sqlite_pragma(dbapi_connection, connection_record):
|
|
20
|
+
cursor = dbapi_connection.cursor()
|
|
21
|
+
cursor.execute('PRAGMA journal_mode=WAL')
|
|
22
|
+
cursor.close()
|
|
23
|
+
|
|
24
|
+
@event.listens_for(self.engine, 'begin')
|
|
25
|
+
def do_begin(conn):
|
|
26
|
+
conn.exec_driver_sql('BEGIN IMMEDIATE')
|
|
27
|
+
|
|
28
|
+
self.Session = sessionmaker(bind=self.engine)
|
|
29
|
+
self._ensure_tables()
|
|
30
|
+
|
|
31
|
+
def _ensure_tables(self):
|
|
32
|
+
"""Initializes the database schema."""
|
|
33
|
+
self._create_tables()
|
|
34
|
+
|
|
35
|
+
def _create_tables(self):
|
|
36
|
+
logger.info(f'Creating tables for database at {self.db_path}')
|
|
37
|
+
Base.metadata.create_all(self.engine)
|
|
38
|
+
|
|
39
|
+
def get_session(self):
|
|
40
|
+
return self.Session()
|
|
41
|
+
|
|
42
|
+
@contextmanager
|
|
43
|
+
def session_scope(self):
|
|
44
|
+
"""Provides a transactional scope around a series of operations."""
|
|
45
|
+
session = self.get_session()
|
|
46
|
+
try:
|
|
47
|
+
yield session
|
|
48
|
+
session.commit()
|
|
49
|
+
except Exception:
|
|
50
|
+
session.rollback()
|
|
51
|
+
raise
|
|
52
|
+
finally:
|
|
53
|
+
session.close()
|
|
54
|
+
|
|
55
|
+
def dispose(self):
|
|
56
|
+
self.engine.dispose()
|
|
@@ -0,0 +1,247 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: crazy-workers
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: A Python library for managing background worker processes with persistent state, automatic recovery, and a CLI.
|
|
5
|
+
Author: GioVanni Colasanto
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/Vanni-broUser/crazy-workers
|
|
8
|
+
Project-URL: Bug Tracker, https://github.com/Vanni-broUser/crazy-workers/issues
|
|
9
|
+
Project-URL: Source, https://github.com/Vanni-broUser/crazy-workers
|
|
10
|
+
Keywords: workers,background,processes,process-manager,task-runner,cli,psutil
|
|
11
|
+
Classifier: Development Status :: 4 - Beta
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
14
|
+
Classifier: Operating System :: OS Independent
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
20
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
21
|
+
Classifier: Topic :: System :: Systems Administration
|
|
22
|
+
Classifier: Topic :: Utilities
|
|
23
|
+
Requires-Python: >=3.10
|
|
24
|
+
Description-Content-Type: text/markdown
|
|
25
|
+
License-File: LICENSE
|
|
26
|
+
Requires-Dist: sqlalchemy>=2.0.0
|
|
27
|
+
Requires-Dist: psutil>=5.9.0
|
|
28
|
+
Requires-Dist: rich>=13.0.0
|
|
29
|
+
Provides-Extra: dev
|
|
30
|
+
Requires-Dist: ruff; extra == "dev"
|
|
31
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
32
|
+
Requires-Dist: coverage; extra == "dev"
|
|
33
|
+
Requires-Dist: flask; extra == "dev"
|
|
34
|
+
Dynamic: license-file
|
|
35
|
+
|
|
36
|
+
# Crazy Workers
|
|
37
|
+
|
|
38
|
+
A Python library for managing background worker processes with persistent state, automatic crash recovery, and a built-in CLI.
|
|
39
|
+
|
|
40
|
+
[](https://www.python.org/)
|
|
41
|
+
[](LICENSE)
|
|
42
|
+
|
|
43
|
+
## Features
|
|
44
|
+
|
|
45
|
+
- **Persistent State** — SQLite database tracks worker status, PIDs, and parameters across restarts.
|
|
46
|
+
- **Process Management** — Start, stop, and monitor background Python scripts as independent OS processes.
|
|
47
|
+
- **Automatic Recovery** — Detects crashed workers and restarts them on application boot.
|
|
48
|
+
- **Child Process Control** — On stop, terminates unmanaged subprocesses while preserving independently-managed nested workers.
|
|
49
|
+
- **CLI Interface** — Manage workers from the terminal with interactive prompts and auto-discovery (see [CLI.md](CLI.md)).
|
|
50
|
+
- **Security** — Built-in protection against path traversal in worker type and key names.
|
|
51
|
+
- **Observability** — Per-worker file logging; all service files (DB, lock, logs) live in a `.service/` folder inside your workers directory.
|
|
52
|
+
- **Zombie Protection** — Distinguishes active processes from zombies using `psutil`.
|
|
53
|
+
- **Gunicorn-safe** — File-based lock prevents concurrent recovery runs across multiple workers.
|
|
54
|
+
|
|
55
|
+
## Installation
|
|
56
|
+
|
|
57
|
+
```bash
|
|
58
|
+
pip install crazy-workers
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
Or from source:
|
|
62
|
+
|
|
63
|
+
```bash
|
|
64
|
+
git clone https://github.com/Vanni-broUser/crazy-workers
|
|
65
|
+
cd crazy-workers
|
|
66
|
+
pip install .
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
## Quick Start
|
|
70
|
+
|
|
71
|
+
### 1. Create a worker script
|
|
72
|
+
|
|
73
|
+
```python
|
|
74
|
+
# workers/my_worker.py
|
|
75
|
+
import json, sys, time
|
|
76
|
+
|
|
77
|
+
params = json.loads(sys.argv[1]) if len(sys.argv) > 1 else {}
|
|
78
|
+
duration = params.get('duration', 60)
|
|
79
|
+
|
|
80
|
+
for _ in range(duration):
|
|
81
|
+
time.sleep(1)
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
### 2. Manage it from Python
|
|
85
|
+
|
|
86
|
+
```python
|
|
87
|
+
from crazy_workers import WorkerManager
|
|
88
|
+
|
|
89
|
+
manager = WorkerManager('workers')
|
|
90
|
+
|
|
91
|
+
# Start
|
|
92
|
+
success, result = manager.start_worker(
|
|
93
|
+
'my_worker',
|
|
94
|
+
worker_key='job_1',
|
|
95
|
+
parameters={'duration': 30},
|
|
96
|
+
)
|
|
97
|
+
print(result['pid']) # OS process ID
|
|
98
|
+
print(result['status']) # 'RUNNING'
|
|
99
|
+
|
|
100
|
+
# List
|
|
101
|
+
for w in manager.list_workers():
|
|
102
|
+
print(w['worker_key'], w['status'])
|
|
103
|
+
|
|
104
|
+
# Stop
|
|
105
|
+
manager.stop_worker('job_1')
|
|
106
|
+
|
|
107
|
+
# Recover crashed workers (call on app startup)
|
|
108
|
+
restarted = manager.recover_workers()
|
|
109
|
+
|
|
110
|
+
manager.dispose() # releases DB connection; does NOT kill workers
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
### 3. Or from the CLI
|
|
114
|
+
|
|
115
|
+
```bash
|
|
116
|
+
crazy-workers list
|
|
117
|
+
crazy-workers start my_worker --key job_1 --params '{"duration": 30}'
|
|
118
|
+
crazy-workers stop job_1
|
|
119
|
+
crazy-workers restore
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
See [CLI.md](CLI.md) for full CLI documentation.
|
|
123
|
+
|
|
124
|
+
## API Reference
|
|
125
|
+
|
|
126
|
+
### `WorkerManager(workers_dir, create_dir=True)`
|
|
127
|
+
|
|
128
|
+
| Parameter | Type | Default | Description |
|
|
129
|
+
|-----------|------|---------|-------------|
|
|
130
|
+
| `workers_dir` | `str` | `'workers'` | Directory containing worker `.py` scripts |
|
|
131
|
+
| `create_dir` | `bool` | `True` | Create `workers_dir` and `.service/` if they don't exist |
|
|
132
|
+
|
|
133
|
+
### `start_worker(worker_type, worker_key=None, parameters=None, env=None)`
|
|
134
|
+
|
|
135
|
+
| Parameter | Type | Default | Description |
|
|
136
|
+
|-----------|------|---------|-------------|
|
|
137
|
+
| `worker_type` | `str` | — | Filename (without `.py`) of the worker script |
|
|
138
|
+
| `worker_key` | `str` | `worker_type` | Unique identifier; allows multiple instances of the same type |
|
|
139
|
+
| `parameters` | `dict` | `{}` | JSON-serializable dict passed as `sys.argv[1]` to the worker |
|
|
140
|
+
| `env` | `dict` | `None` | Extra environment variables injected into the worker process |
|
|
141
|
+
|
|
142
|
+
Returns `(bool, dict | str)` — `(True, worker_dict)` on success, `(False, error_message)` on failure.
|
|
143
|
+
|
|
144
|
+
### `stop_worker(worker_key)`
|
|
145
|
+
|
|
146
|
+
Gracefully terminates the worker (SIGTERM → SIGKILL after timeout). Returns `(bool, str)`.
|
|
147
|
+
|
|
148
|
+
### `list_workers()`
|
|
149
|
+
|
|
150
|
+
Returns a list of worker dicts including RUNNING, STOPPED, CRASHED, and NEVER_STARTED (filesystem-discovered) workers.
|
|
151
|
+
|
|
152
|
+
### `recover_workers()`
|
|
153
|
+
|
|
154
|
+
Restarts any worker whose DB status is RUNNING but whose process is dead. Uses a file lock to prevent concurrent recovery. Returns a list of restarted keys.
|
|
155
|
+
|
|
156
|
+
### `dispose()`
|
|
157
|
+
|
|
158
|
+
Closes the database connection and clears internal process references. Does **not** kill background workers — they continue running independently.
|
|
159
|
+
|
|
160
|
+
## Worker Script Contract
|
|
161
|
+
|
|
162
|
+
A worker receives its parameters as a JSON string in `sys.argv[1]`:
|
|
163
|
+
|
|
164
|
+
```python
|
|
165
|
+
import json, sys
|
|
166
|
+
|
|
167
|
+
params = json.loads(sys.argv[1]) if len(sys.argv) > 1 else {}
|
|
168
|
+
# ... do work ...
|
|
169
|
+
```
|
|
170
|
+
|
|
171
|
+
## Project Structure
|
|
172
|
+
|
|
173
|
+
```
|
|
174
|
+
crazy_workers/ # Library package
|
|
175
|
+
core/ # WorkerManager, process engine, recovery lock
|
|
176
|
+
cli/ # CLI entry point, commands, discovery
|
|
177
|
+
database/ # SQLAlchemy schema and SQLite storage
|
|
178
|
+
example_app/ # Flask demo application
|
|
179
|
+
app.py
|
|
180
|
+
workers/ # Example worker scripts
|
|
181
|
+
tests/
|
|
182
|
+
core/ # Unit tests for core modules
|
|
183
|
+
cli/ # Unit tests for CLI modules
|
|
184
|
+
database/ # Unit tests for storage layer
|
|
185
|
+
integration/ # Full-stack integration tests (real processes)
|
|
186
|
+
app/ # Tests for the example Flask app
|
|
187
|
+
```
|
|
188
|
+
|
|
189
|
+
## Flask Integration
|
|
190
|
+
|
|
191
|
+
```python
|
|
192
|
+
from crazy_workers import WorkerManager
|
|
193
|
+
|
|
194
|
+
def create_app():
|
|
195
|
+
app = Flask(__name__)
|
|
196
|
+
manager = WorkerManager('workers')
|
|
197
|
+
|
|
198
|
+
@app.route('/workers/start', methods=['POST'])
|
|
199
|
+
def start():
|
|
200
|
+
data = request.json
|
|
201
|
+
success, result = manager.start_worker(
|
|
202
|
+
data['worker_type'],
|
|
203
|
+
worker_key=data.get('worker_key'),
|
|
204
|
+
parameters=data.get('parameters', {}),
|
|
205
|
+
)
|
|
206
|
+
return (jsonify(result), 200) if success else (jsonify({'error': result}), 400)
|
|
207
|
+
|
|
208
|
+
manager.recover_workers() # restart any crashed workers on boot
|
|
209
|
+
return app
|
|
210
|
+
```
|
|
211
|
+
|
|
212
|
+
See `example_app/app.py` for a complete example.
|
|
213
|
+
|
|
214
|
+
## Gunicorn / Multi-Process Servers
|
|
215
|
+
|
|
216
|
+
When using a pre-fork server like Gunicorn:
|
|
217
|
+
|
|
218
|
+
- **Recovery is atomic** — a file lock (`.service/workers.db.recovery.lock`) ensures `recover_workers()` runs once even when multiple workers boot simultaneously.
|
|
219
|
+
- **Workers outlive their parent** — if a Gunicorn worker is recycled, background processes keep running. The next recovery cycle re-attaches or restarts them.
|
|
220
|
+
|
|
221
|
+
## Development
|
|
222
|
+
|
|
223
|
+
### Setup
|
|
224
|
+
|
|
225
|
+
```bash
|
|
226
|
+
git clone https://github.com/Vanni-broUser/crazy-workers
|
|
227
|
+
cd crazy-workers
|
|
228
|
+
pip install -e .[dev]
|
|
229
|
+
```
|
|
230
|
+
|
|
231
|
+
### Commands
|
|
232
|
+
|
|
233
|
+
```bash
|
|
234
|
+
# Lint and format
|
|
235
|
+
ruff check . --fix && ruff format .
|
|
236
|
+
|
|
237
|
+
# Run tests
|
|
238
|
+
pytest
|
|
239
|
+
|
|
240
|
+
# Run tests with coverage
|
|
241
|
+
coverage run -m pytest && coverage report
|
|
242
|
+
```
|
|
243
|
+
|
|
244
|
+
### Standards
|
|
245
|
+
|
|
246
|
+
See [AI.md](AI.md) for the full coding and testing standards used in this project.
|
|
247
|
+
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
crazy_workers/__init__.py,sha256=E-P1e5BIOUQBNtFy3kJ329eglZhMmD-qS6RTpipOHUs,128
|
|
2
|
+
crazy_workers/_bootstrap.py,sha256=jWG6fvnQ6InfzfrqbyDJsqUaNwOAw9jFuSeWswccnVk,753
|
|
3
|
+
crazy_workers/cli/__init__.py,sha256=lySqDljKE4Rt0heEH8gh7Kd3x2MYlwm1hMil8HiUuCo,44
|
|
4
|
+
crazy_workers/cli/discovery.py,sha256=utpaJnsvHK9wZsve4bTOsmAIjFcufkwkBoc93b5tgMI,2653
|
|
5
|
+
crazy_workers/cli/main.py,sha256=woOdKkQE0qJvv8oUMiqBOxQ3Sh7JYF8yrtZ5Ri45yV0,2942
|
|
6
|
+
crazy_workers/cli/ui.py,sha256=-6s-O1LNPOGeLyFZl9-XsA2w6fdzdbgf8yf_7ykA600,142
|
|
7
|
+
crazy_workers/cli/commands/__init__.py,sha256=owAZyn-BXrJB_FzAMTnSpwA0JDxkiXfrq14JpcfOBok,264
|
|
8
|
+
crazy_workers/cli/commands/lister.py,sha256=mAmf2z8OoOlSHFU4eEnn1RAXJF0GQzFTsDpm8OxMHY0,1913
|
|
9
|
+
crazy_workers/cli/commands/params.py,sha256=DtAz0tdVj1USJMcEr3jSZEqpeeRE2vmedcYhYh0F5iY,1391
|
|
10
|
+
crazy_workers/cli/commands/restorer.py,sha256=OWo0r-DeUBEIAjCaYUDNrWtgV-Mrr9BlBsNK737QdDk,384
|
|
11
|
+
crazy_workers/cli/commands/starter.py,sha256=Nvxmzm20XN33QKPK0ZEGlWNHfzWBPHBKlZLK7HBxuFI,1315
|
|
12
|
+
crazy_workers/cli/commands/stopper.py,sha256=kS4w9yOXG5-qr58p36WcnmpY3iqt-BcKw7Of1_vRFjg,1011
|
|
13
|
+
crazy_workers/core/__init__.py,sha256=22cwXOaOtqyo2YFofcBI6rXnVcO98W2SOt_qZBrmBVI,65
|
|
14
|
+
crazy_workers/core/engine.py,sha256=tGUwqiyTOHm4pvD1oXofoEss8isrrAgoJErWFTQeXR8,2659
|
|
15
|
+
crazy_workers/core/recovery.py,sha256=o7GHWtUCTtOt7L-hVJA75BED93oVNKuXOvae04BnvsE,1665
|
|
16
|
+
crazy_workers/core/manager/__init__.py,sha256=XVGXygXs0X1uj0V-HEe7QUh9b15IBBkkfwxH61XcV9Y,2792
|
|
17
|
+
crazy_workers/core/manager/lister.py,sha256=dLpONofDpZT5BmlpkV48Mqvq1BpPhkZylvCz4LJtFM4,2091
|
|
18
|
+
crazy_workers/core/manager/recoverer.py,sha256=wFdl6K1qCjLWFrQXhdE143esIqLOvZhi-3o0hsySRHI,745
|
|
19
|
+
crazy_workers/core/manager/starter.py,sha256=spUBtLUN_v3HVJfZMknt5aHrDaJLgG0-4KhAXEe1H2g,4651
|
|
20
|
+
crazy_workers/core/manager/stopper.py,sha256=gYnJT_VHon2FM9EU0ZW3AzSnDWThVeP8wrU2-PXuJFk,1879
|
|
21
|
+
crazy_workers/database/__init__.py,sha256=hggJ9S84uhCofJm3YNYJJzGpp6LAr9v6y6bLnFV_vF0,120
|
|
22
|
+
crazy_workers/database/schema.py,sha256=S4vrjdSaKF_NkqLcMJtNWUtTA12kaYv6Gq5_DLZFOgY,1384
|
|
23
|
+
crazy_workers/database/storage.py,sha256=0qd3EFqCxTe9KDEscsvYw84KZyhUE5ZhjRsIyp78L70,1445
|
|
24
|
+
crazy_workers-0.1.0.dist-info/licenses/LICENSE,sha256=_rtyiMzX9GiLWkxcFJALIyiQ2SkCNKyjFakhn1z1m_U,1062
|
|
25
|
+
crazy_workers-0.1.0.dist-info/METADATA,sha256=tKb5fNr0ntZPmxeWSLT1eb47Dbd99Ku-BdxrbFyazlI,7984
|
|
26
|
+
crazy_workers-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
27
|
+
crazy_workers-0.1.0.dist-info/entry_points.txt,sha256=UzmFuEpQGyNASJY1q1GRsW2mvPJumngWSPKWsJFzAYA,57
|
|
28
|
+
crazy_workers-0.1.0.dist-info/top_level.txt,sha256=0J_jqq2xxlzP1cEmhpvJYCkVTI2OnoZEzLDPZjFn_cc,14
|
|
29
|
+
crazy_workers-0.1.0.dist-info/RECORD,,
|