crazy-workers 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- crazy_workers/__init__.py +5 -0
- crazy_workers/_bootstrap.py +33 -0
- crazy_workers/cli/__init__.py +4 -0
- crazy_workers/cli/commands/__init__.py +8 -0
- crazy_workers/cli/commands/lister.py +57 -0
- crazy_workers/cli/commands/params.py +37 -0
- crazy_workers/cli/commands/restorer.py +14 -0
- crazy_workers/cli/commands/starter.py +36 -0
- crazy_workers/cli/commands/stopper.py +30 -0
- crazy_workers/cli/discovery.py +93 -0
- crazy_workers/cli/main.py +85 -0
- crazy_workers/cli/ui.py +9 -0
- crazy_workers/core/__init__.py +4 -0
- crazy_workers/core/engine.py +90 -0
- crazy_workers/core/manager/__init__.py +89 -0
- crazy_workers/core/manager/lister.py +67 -0
- crazy_workers/core/manager/recoverer.py +25 -0
- crazy_workers/core/manager/starter.py +137 -0
- crazy_workers/core/manager/stopper.py +58 -0
- crazy_workers/core/recovery.py +68 -0
- crazy_workers/database/__init__.py +5 -0
- crazy_workers/database/schema.py +42 -0
- crazy_workers/database/storage.py +56 -0
- crazy_workers-0.1.0.dist-info/METADATA +247 -0
- crazy_workers-0.1.0.dist-info/RECORD +29 -0
- crazy_workers-0.1.0.dist-info/WHEEL +5 -0
- crazy_workers-0.1.0.dist-info/entry_points.txt +2 -0
- crazy_workers-0.1.0.dist-info/licenses/LICENSE +19 -0
- crazy_workers-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Thin launcher invoked by WorkerManager for every worker subprocess.
|
|
3
|
+
Configures logging once so individual worker scripts don't have to.
|
|
4
|
+
|
|
5
|
+
Invocation (managed internally by WorkerManager):
|
|
6
|
+
python -m crazy_workers._bootstrap <worker_path> <json_params>
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import logging
|
|
10
|
+
import os
|
|
11
|
+
import runpy
|
|
12
|
+
import sys
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def main():
|
|
16
|
+
logging.basicConfig(
|
|
17
|
+
level=logging.INFO,
|
|
18
|
+
format='%(asctime)s - %(levelname)s - %(message)s',
|
|
19
|
+
stream=sys.stderr,
|
|
20
|
+
force=True,
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
# Restore sys.argv so the worker sees [worker_path, json_params]
|
|
24
|
+
sys.argv = sys.argv[1:]
|
|
25
|
+
|
|
26
|
+
worker_path = sys.argv[0]
|
|
27
|
+
sys.path.insert(0, os.path.dirname(os.path.abspath(worker_path)))
|
|
28
|
+
|
|
29
|
+
runpy.run_path(worker_path, run_name='__main__')
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
if __name__ == '__main__':
|
|
33
|
+
main()
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
from .lister import list_workers
|
|
2
|
+
from .params import show_params
|
|
3
|
+
from .restorer import restore_workers
|
|
4
|
+
from .starter import start_worker
|
|
5
|
+
from .stopper import stop_worker
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
__all__ = ['list_workers', 'show_params', 'start_worker', 'stop_worker', 'restore_workers']
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from datetime import datetime
|
|
3
|
+
from rich.table import Table
|
|
4
|
+
|
|
5
|
+
from ..ui import console
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def list_workers(manager):
|
|
9
|
+
workers = manager.list_workers()
|
|
10
|
+
if not workers:
|
|
11
|
+
console().print('[yellow]No workers found in database.[/yellow]')
|
|
12
|
+
return []
|
|
13
|
+
else:
|
|
14
|
+
table = Table(
|
|
15
|
+
title='[bold cyan]Active & Registered Workers[/bold cyan]', border_style='cyan', header_style='bold magenta'
|
|
16
|
+
)
|
|
17
|
+
table.add_column('#', justify='right', style='dim')
|
|
18
|
+
table.add_column('Key', style='bold')
|
|
19
|
+
table.add_column('Type')
|
|
20
|
+
table.add_column('Status', justify='center')
|
|
21
|
+
table.add_column('PID', justify='right', style='green')
|
|
22
|
+
table.add_column('Last Action', justify='center')
|
|
23
|
+
table.add_column('Params', overflow='ellipsis')
|
|
24
|
+
|
|
25
|
+
for i, w in enumerate(workers, 1):
|
|
26
|
+
status = w['status']
|
|
27
|
+
status_style = 'green' if status == 'RUNNING' else 'yellow'
|
|
28
|
+
if status in ['CRASHED', 'FAILED']:
|
|
29
|
+
status_style = 'bold red'
|
|
30
|
+
elif status == 'STOPPED':
|
|
31
|
+
status_style = 'dim'
|
|
32
|
+
elif status == 'NEVER_STARTED':
|
|
33
|
+
status_style = 'cyan'
|
|
34
|
+
|
|
35
|
+
last_action = '-'
|
|
36
|
+
if status == 'RUNNING' and w.get('last_started_at'):
|
|
37
|
+
dt = datetime.fromisoformat(w['last_started_at'])
|
|
38
|
+
last_action = f'[green]Started {dt.strftime("%H:%M:%S")}[/green]'
|
|
39
|
+
elif w.get('last_stopped_at'):
|
|
40
|
+
dt = datetime.fromisoformat(w['last_stopped_at'])
|
|
41
|
+
last_action = f'[dim]Stopped {dt.strftime("%H:%M:%S")}[/dim]'
|
|
42
|
+
|
|
43
|
+
params_str = json.dumps(w['parameters']) if w['parameters'] else '-'
|
|
44
|
+
if len(params_str) > 30:
|
|
45
|
+
params_str = params_str[:27] + '...'
|
|
46
|
+
|
|
47
|
+
table.add_row(
|
|
48
|
+
str(i),
|
|
49
|
+
w['worker_key'] or '-',
|
|
50
|
+
w['worker_type'],
|
|
51
|
+
f'[{status_style}]{status}[/{status_style}]',
|
|
52
|
+
str(w['pid']) if w['pid'] else '-',
|
|
53
|
+
last_action,
|
|
54
|
+
params_str,
|
|
55
|
+
)
|
|
56
|
+
console().print(table)
|
|
57
|
+
return workers
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from rich.prompt import IntPrompt
|
|
3
|
+
|
|
4
|
+
from ..ui import console, err_console
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def show_params(manager, worker_key):
|
|
8
|
+
|
|
9
|
+
workers = manager.list_workers()
|
|
10
|
+
if not workers:
|
|
11
|
+
console().print('[yellow]No workers found.[/yellow]')
|
|
12
|
+
return False
|
|
13
|
+
|
|
14
|
+
if not worker_key:
|
|
15
|
+
# Interactive mode
|
|
16
|
+
active_workers = [w for w in workers if w['worker_key'] is not None]
|
|
17
|
+
|
|
18
|
+
if not active_workers:
|
|
19
|
+
console().print('[yellow]No registered workers to show parameters for.[/yellow]')
|
|
20
|
+
return False
|
|
21
|
+
|
|
22
|
+
console().print('\n[bold cyan]Select a worker to show parameters:[/bold cyan]')
|
|
23
|
+
for i, w in enumerate(active_workers, 1):
|
|
24
|
+
status_style = 'green' if w['status'] == 'RUNNING' else 'dim'
|
|
25
|
+
console().print(f' [bold]{i})[/bold] {w["worker_key"]} [{status_style}]({w["status"]})[/{status_style}]')
|
|
26
|
+
|
|
27
|
+
choice = IntPrompt.ask('Enter the number', choices=[str(i) for i in range(1, len(active_workers) + 1)])
|
|
28
|
+
selected_worker = active_workers[choice - 1]
|
|
29
|
+
else:
|
|
30
|
+
selected_worker = next((w for w in workers if w['worker_key'] == worker_key), None)
|
|
31
|
+
if not selected_worker:
|
|
32
|
+
err_console().print(f'[bold red]Error:[/bold red] Worker {worker_key} not found')
|
|
33
|
+
return False
|
|
34
|
+
|
|
35
|
+
console().print(f'\n[bold cyan]Parameters for worker:[/bold cyan] {selected_worker["worker_key"]}')
|
|
36
|
+
console().print_json(json.dumps(selected_worker['parameters']))
|
|
37
|
+
return True
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
from ..ui import console
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def restore_workers(manager):
|
|
5
|
+
restarted = manager.recover_workers()
|
|
6
|
+
|
|
7
|
+
if restarted:
|
|
8
|
+
console().print(f'[bold green]Successfully restored {len(restarted)} workers:[/bold green]')
|
|
9
|
+
for key in restarted:
|
|
10
|
+
console().print(f' - {key}')
|
|
11
|
+
return True
|
|
12
|
+
else:
|
|
13
|
+
console().print('[yellow]No workers needed restoration.[/yellow]')
|
|
14
|
+
return True
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from rich.prompt import IntPrompt
|
|
3
|
+
|
|
4
|
+
from ..ui import console, err_console
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def start_worker(manager, worker_type, worker_key=None, parameters=None):
|
|
8
|
+
|
|
9
|
+
if not worker_type:
|
|
10
|
+
# Interactive mode: list .py files in workers_dir
|
|
11
|
+
try:
|
|
12
|
+
files = [f[:-3] for f in os.listdir(manager.workers_dir) if f.endswith('.py')]
|
|
13
|
+
except Exception as e:
|
|
14
|
+
err_console().print(f'[bold red]Error reading workers directory:[/bold red] {e}')
|
|
15
|
+
return False
|
|
16
|
+
|
|
17
|
+
if not files:
|
|
18
|
+
console().print(f'[yellow]No worker scripts found in {manager.workers_dir}[/yellow]')
|
|
19
|
+
return False
|
|
20
|
+
|
|
21
|
+
console().print('\n[bold cyan]Select a worker type to start:[/bold cyan]')
|
|
22
|
+
for i, f in enumerate(files, 1):
|
|
23
|
+
console().print(f' [bold]{i})[/bold] {f}')
|
|
24
|
+
|
|
25
|
+
choice = IntPrompt.ask('Enter the number', choices=[str(i) for i in range(1, len(files) + 1)])
|
|
26
|
+
worker_type = files[choice - 1]
|
|
27
|
+
|
|
28
|
+
success, result = manager.start_worker(worker_type, worker_key=worker_key, parameters=parameters)
|
|
29
|
+
if success:
|
|
30
|
+
console().print('[bold green]Success:[/bold green] Worker started')
|
|
31
|
+
console().print(f' [bold]Key:[/bold] {result["worker_key"]}')
|
|
32
|
+
console().print(f' [bold]PID:[/bold] {result["pid"]}')
|
|
33
|
+
else:
|
|
34
|
+
err_console().print(f'[bold red]Error:[/bold red] {result}')
|
|
35
|
+
return False
|
|
36
|
+
return True
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
from rich.prompt import IntPrompt
|
|
2
|
+
|
|
3
|
+
from ..ui import console, err_console
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def stop_worker(manager, worker_key):
|
|
7
|
+
|
|
8
|
+
if not worker_key:
|
|
9
|
+
# Interactive mode
|
|
10
|
+
workers = manager.list_workers()
|
|
11
|
+
running_workers = [w for w in workers if w['status'] == 'RUNNING']
|
|
12
|
+
|
|
13
|
+
if not running_workers:
|
|
14
|
+
console().print('[yellow]No running workers to stop.[/yellow]')
|
|
15
|
+
return False
|
|
16
|
+
|
|
17
|
+
console().print('\n[bold cyan]Select a worker to stop:[/bold cyan]')
|
|
18
|
+
for i, w in enumerate(running_workers, 1):
|
|
19
|
+
console().print(f' [bold]{i})[/bold] {w["worker_key"]} [dim]({w["worker_type"]})[/dim]')
|
|
20
|
+
|
|
21
|
+
choice = IntPrompt.ask('Enter the number', choices=[str(i) for i in range(1, len(running_workers) + 1)])
|
|
22
|
+
worker_key = running_workers[choice - 1]['worker_key']
|
|
23
|
+
|
|
24
|
+
success, message = manager.stop_worker(worker_key)
|
|
25
|
+
if success:
|
|
26
|
+
console().print(f'[bold green]Success:[/bold green] {message}')
|
|
27
|
+
else:
|
|
28
|
+
err_console().print(f'[bold red]Error:[/bold red] {message}')
|
|
29
|
+
return False
|
|
30
|
+
return True
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import sys
|
|
3
|
+
from rich.prompt import Prompt
|
|
4
|
+
|
|
5
|
+
from .ui import console, err_console
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def load_env():
|
|
9
|
+
"""Loads variables from .env file into os.environ."""
|
|
10
|
+
if not os.path.exists('.env'):
|
|
11
|
+
return
|
|
12
|
+
with open('.env', 'r') as f:
|
|
13
|
+
for line in f:
|
|
14
|
+
line = line.strip()
|
|
15
|
+
if not line or line.startswith('#'):
|
|
16
|
+
continue
|
|
17
|
+
if '=' in line:
|
|
18
|
+
key, value = line.split('=', 1)
|
|
19
|
+
os.environ.setdefault(key.strip(), value.strip().strip('"').strip("'"))
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def save_to_env(key, value):
|
|
23
|
+
"""Saves a key-value pair to .env file, using an atomic write."""
|
|
24
|
+
lines = []
|
|
25
|
+
if os.path.exists('.env'):
|
|
26
|
+
with open('.env', 'r') as f:
|
|
27
|
+
lines = f.readlines()
|
|
28
|
+
|
|
29
|
+
found = False
|
|
30
|
+
new_line = f'{key}={value}\n'
|
|
31
|
+
for i, line in enumerate(lines):
|
|
32
|
+
if line.strip().startswith(f'{key}='):
|
|
33
|
+
lines[i] = new_line
|
|
34
|
+
found = True
|
|
35
|
+
break
|
|
36
|
+
|
|
37
|
+
if not found:
|
|
38
|
+
if lines and not lines[-1].endswith('\n'):
|
|
39
|
+
lines.append('\n')
|
|
40
|
+
lines.append(new_line)
|
|
41
|
+
|
|
42
|
+
tmp = '.env.tmp'
|
|
43
|
+
with open(tmp, 'w') as f:
|
|
44
|
+
f.writelines(lines)
|
|
45
|
+
os.replace(tmp, '.env')
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def resolve_workers_dir(flag_dir):
|
|
49
|
+
load_env()
|
|
50
|
+
|
|
51
|
+
# 1. Flag priority
|
|
52
|
+
if flag_dir:
|
|
53
|
+
if os.path.isdir(flag_dir):
|
|
54
|
+
return flag_dir
|
|
55
|
+
else:
|
|
56
|
+
err_console().print(f'[bold red]Error:[/bold red] Directory "{flag_dir}" does not exist.')
|
|
57
|
+
sys.exit(1)
|
|
58
|
+
|
|
59
|
+
# 2. Environment Variable
|
|
60
|
+
env_dir = os.environ.get('CRAZY_WORKERS_DIR')
|
|
61
|
+
if env_dir:
|
|
62
|
+
if os.path.isdir(env_dir):
|
|
63
|
+
return env_dir
|
|
64
|
+
else:
|
|
65
|
+
err_console().print(f'[bold red]Error:[/bold red] Directory "{env_dir}" (from CRAZY_WORKERS_DIR) does not exist.')
|
|
66
|
+
sys.exit(1)
|
|
67
|
+
|
|
68
|
+
# 3. Interactive Prompt
|
|
69
|
+
if sys.stdin.isatty():
|
|
70
|
+
console().print('[bold yellow]CRAZY_WORKERS_DIR not set in environment.[/bold yellow]')
|
|
71
|
+
user_input = Prompt.ask('Please enter the path to your workers directory')
|
|
72
|
+
if user_input:
|
|
73
|
+
if os.path.isdir(user_input):
|
|
74
|
+
abs_path = os.path.abspath(user_input)
|
|
75
|
+
try:
|
|
76
|
+
save_to_env('CRAZY_WORKERS_DIR', abs_path)
|
|
77
|
+
console().print(f'[bold green]Saved CRAZY_WORKERS_DIR={abs_path} to .env[/bold green]')
|
|
78
|
+
except Exception as e:
|
|
79
|
+
err_console().print(f'[bold red]Failed to save configuration:[/bold red] {e}')
|
|
80
|
+
return abs_path
|
|
81
|
+
else:
|
|
82
|
+
err_console().print(f'[bold red]Error:[/bold red] "{user_input}" is not a valid directory.')
|
|
83
|
+
sys.exit(1)
|
|
84
|
+
|
|
85
|
+
# 4. Fallback
|
|
86
|
+
if os.path.isdir('workers'):
|
|
87
|
+
return 'workers'
|
|
88
|
+
|
|
89
|
+
err_console().print(
|
|
90
|
+
'[bold red]Error:[/bold red] Workers directory not found. '
|
|
91
|
+
'Please provide it via --workers-dir or set CRAZY_WORKERS_DIR.'
|
|
92
|
+
)
|
|
93
|
+
sys.exit(1)
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
import sys
|
|
3
|
+
from rich.panel import Panel
|
|
4
|
+
|
|
5
|
+
from ..core.manager import WorkerManager
|
|
6
|
+
from .commands import list_workers, restore_workers, show_params, start_worker, stop_worker
|
|
7
|
+
from .discovery import resolve_workers_dir
|
|
8
|
+
from .ui import console, err_console
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def main():
|
|
12
|
+
|
|
13
|
+
def formatter(prog):
|
|
14
|
+
return argparse.HelpFormatter(prog, max_help_position=32)
|
|
15
|
+
|
|
16
|
+
parser = argparse.ArgumentParser(description='Crazy Workers CLI', formatter_class=formatter)
|
|
17
|
+
parser.add_argument('--workers-dir', help='Directory containing worker scripts')
|
|
18
|
+
|
|
19
|
+
subparsers = parser.add_subparsers(dest='command', help='Commands')
|
|
20
|
+
|
|
21
|
+
# List command
|
|
22
|
+
subparsers.add_parser('list', help='List all workers and their status')
|
|
23
|
+
|
|
24
|
+
# Start command
|
|
25
|
+
start_parser = subparsers.add_parser('start', help='Start a worker (interactive if type missing)')
|
|
26
|
+
start_parser.add_argument('worker_type', nargs='?', help='The type (filename) of worker to start')
|
|
27
|
+
start_parser.add_argument('--key', help='Optional custom key for the worker')
|
|
28
|
+
start_parser.add_argument('--params', help='JSON string of parameters for the worker')
|
|
29
|
+
|
|
30
|
+
# Stop command
|
|
31
|
+
stop_parser = subparsers.add_parser('stop', help='Stop a worker (interactive if key missing)')
|
|
32
|
+
stop_parser.add_argument('worker_key', nargs='?', help='The key of the worker to stop')
|
|
33
|
+
|
|
34
|
+
# Params command
|
|
35
|
+
params_parser = subparsers.add_parser('params', help='Show parameters for a worker')
|
|
36
|
+
params_parser.add_argument('worker_key', nargs='?', help='The key of the worker')
|
|
37
|
+
|
|
38
|
+
# Restore command
|
|
39
|
+
subparsers.add_parser('restore', help='Restore workers that should be running')
|
|
40
|
+
|
|
41
|
+
args = parser.parse_args()
|
|
42
|
+
|
|
43
|
+
if not args.command:
|
|
44
|
+
console().print(
|
|
45
|
+
Panel.fit(
|
|
46
|
+
'[bold cyan]Crazy Workers CLI[/bold cyan]\n[dim]Manage your background processes with ease[/dim]',
|
|
47
|
+
border_style='cyan',
|
|
48
|
+
)
|
|
49
|
+
)
|
|
50
|
+
parser.print_help()
|
|
51
|
+
sys.exit(1)
|
|
52
|
+
|
|
53
|
+
workers_dir = resolve_workers_dir(args.workers_dir)
|
|
54
|
+
try:
|
|
55
|
+
with WorkerManager(workers_dir, create_dir=False) as manager:
|
|
56
|
+
if args.command == 'list':
|
|
57
|
+
list_workers(manager)
|
|
58
|
+
elif args.command == 'start':
|
|
59
|
+
import json
|
|
60
|
+
|
|
61
|
+
params = None
|
|
62
|
+
if args.params:
|
|
63
|
+
try:
|
|
64
|
+
params = json.loads(args.params)
|
|
65
|
+
except json.JSONDecodeError:
|
|
66
|
+
err_console().print('[bold red]Error:[/bold red] Invalid JSON in --params')
|
|
67
|
+
sys.exit(1)
|
|
68
|
+
|
|
69
|
+
if not start_worker(manager, args.worker_type, worker_key=args.key, parameters=params):
|
|
70
|
+
sys.exit(1)
|
|
71
|
+
elif args.command == 'stop':
|
|
72
|
+
if not stop_worker(manager, args.worker_key):
|
|
73
|
+
sys.exit(1)
|
|
74
|
+
elif args.command == 'params':
|
|
75
|
+
if not show_params(manager, args.worker_key):
|
|
76
|
+
sys.exit(1)
|
|
77
|
+
elif args.command == 'restore':
|
|
78
|
+
restore_workers(manager)
|
|
79
|
+
except ValueError as e:
|
|
80
|
+
err_console().print(f'[bold red]Error:[/bold red] {e}')
|
|
81
|
+
sys.exit(1)
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
if __name__ == '__main__':
|
|
85
|
+
main()
|
crazy_workers/cli/ui.py
ADDED
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import psutil
|
|
3
|
+
import subprocess
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
logger = logging.getLogger('crazy_workers')
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def get_running_process(pid):
|
|
10
|
+
"""Returns a psutil.Process object if the PID exists and is not a zombie."""
|
|
11
|
+
if pid is None:
|
|
12
|
+
return None
|
|
13
|
+
try:
|
|
14
|
+
proc = psutil.Process(pid)
|
|
15
|
+
if proc.is_running() and proc.status() != psutil.STATUS_ZOMBIE:
|
|
16
|
+
return proc
|
|
17
|
+
except (psutil.NoSuchProcess, psutil.AccessDenied):
|
|
18
|
+
pass
|
|
19
|
+
return None
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def is_process_running(pid):
|
|
23
|
+
"""Checks if a process is truly running. Very resilient."""
|
|
24
|
+
try:
|
|
25
|
+
return get_running_process(pid) is not None
|
|
26
|
+
except (psutil.Error, OSError):
|
|
27
|
+
return False
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def terminate_process(pid, timeout=5, popen_process=None, exclude_pids=None):
|
|
31
|
+
"""Gracefully terminates a process and its non-managed children.
|
|
32
|
+
|
|
33
|
+
Children whose PIDs appear in exclude_pids are left alive — they are
|
|
34
|
+
independently managed workers that should outlive their parent.
|
|
35
|
+
Any other child process (raw subprocesses, shell helpers, etc.) is
|
|
36
|
+
terminated alongside the parent.
|
|
37
|
+
"""
|
|
38
|
+
proc = get_running_process(pid)
|
|
39
|
+
if not proc:
|
|
40
|
+
return True
|
|
41
|
+
|
|
42
|
+
# Build the full exclusion set: each managed PID and all its descendants.
|
|
43
|
+
# This is necessary on platforms where a single logical worker spans more
|
|
44
|
+
# than one OS process (e.g. the Python launcher on Windows spawns the
|
|
45
|
+
# actual interpreter as a child).
|
|
46
|
+
excluded: set[int] = set(exclude_pids or [])
|
|
47
|
+
for mpid in list(excluded):
|
|
48
|
+
try:
|
|
49
|
+
for desc in psutil.Process(mpid).children(recursive=True):
|
|
50
|
+
excluded.add(desc.pid)
|
|
51
|
+
except (psutil.NoSuchProcess, psutil.AccessDenied):
|
|
52
|
+
pass
|
|
53
|
+
|
|
54
|
+
try:
|
|
55
|
+
# Snapshot descendant PIDs before killing the parent; the list becomes
|
|
56
|
+
# unavailable once the parent exits.
|
|
57
|
+
try:
|
|
58
|
+
children = [c for c in proc.children(recursive=True) if c.pid not in excluded]
|
|
59
|
+
except (psutil.NoSuchProcess, psutil.AccessDenied):
|
|
60
|
+
children = []
|
|
61
|
+
|
|
62
|
+
proc.terminate()
|
|
63
|
+
for child in children:
|
|
64
|
+
try:
|
|
65
|
+
child.terminate()
|
|
66
|
+
except (psutil.NoSuchProcess, psutil.AccessDenied):
|
|
67
|
+
pass
|
|
68
|
+
|
|
69
|
+
try:
|
|
70
|
+
if popen_process:
|
|
71
|
+
popen_process.wait(timeout=timeout)
|
|
72
|
+
else:
|
|
73
|
+
proc.wait(timeout=timeout)
|
|
74
|
+
except (psutil.TimeoutExpired, subprocess.TimeoutExpired):
|
|
75
|
+
if popen_process:
|
|
76
|
+
popen_process.kill()
|
|
77
|
+
popen_process.wait()
|
|
78
|
+
else:
|
|
79
|
+
proc.kill()
|
|
80
|
+
for child in children:
|
|
81
|
+
try:
|
|
82
|
+
if child.is_running():
|
|
83
|
+
child.kill()
|
|
84
|
+
except (psutil.NoSuchProcess, psutil.AccessDenied):
|
|
85
|
+
pass
|
|
86
|
+
|
|
87
|
+
return True
|
|
88
|
+
except Exception as e:
|
|
89
|
+
logger.error(f'Unexpected error terminating process {pid}: {e}')
|
|
90
|
+
raise
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import os
|
|
3
|
+
|
|
4
|
+
from ...database.storage import Storage
|
|
5
|
+
from ..engine import is_process_running
|
|
6
|
+
from ..recovery import RecoveryLock
|
|
7
|
+
from .lister import list_workers
|
|
8
|
+
from .recoverer import recover_workers
|
|
9
|
+
from .starter import start_worker
|
|
10
|
+
from .stopper import stop_worker
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger('crazy_workers')
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class WorkerManager:
|
|
17
|
+
def __init__(self, workers_dir='workers', create_dir=True):
|
|
18
|
+
self.workers_dir = workers_dir
|
|
19
|
+
self._validate_workers_dir(create_dir)
|
|
20
|
+
|
|
21
|
+
self.service_dir = os.path.join(self.workers_dir, '.service')
|
|
22
|
+
self.logs_dir = os.path.join(self.service_dir, 'logs')
|
|
23
|
+
self.db_path = os.path.join(self.service_dir, 'workers.db')
|
|
24
|
+
|
|
25
|
+
self._initialize_storage(create_dir)
|
|
26
|
+
self._active_processes = {} # worker_key -> Popen object
|
|
27
|
+
|
|
28
|
+
def __enter__(self):
|
|
29
|
+
return self
|
|
30
|
+
|
|
31
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
32
|
+
self.dispose()
|
|
33
|
+
|
|
34
|
+
def _validate_workers_dir(self, create_dir):
|
|
35
|
+
"""Checks if the workers directory exists and creates it if allowed."""
|
|
36
|
+
if not os.path.isdir(self.workers_dir):
|
|
37
|
+
if create_dir:
|
|
38
|
+
os.makedirs(self.workers_dir, exist_ok=True)
|
|
39
|
+
else:
|
|
40
|
+
raise ValueError(f'Workers directory "{self.workers_dir}" does not exist.')
|
|
41
|
+
|
|
42
|
+
def _initialize_storage(self, create_dir):
|
|
43
|
+
"""Sets up service directories and storage if allowed or if they already exist."""
|
|
44
|
+
if create_dir:
|
|
45
|
+
os.makedirs(self.service_dir, exist_ok=True)
|
|
46
|
+
os.makedirs(self.logs_dir, exist_ok=True)
|
|
47
|
+
self.storage = Storage(self.db_path)
|
|
48
|
+
else:
|
|
49
|
+
# If not allowed to create, only initialize storage if the DB already exists
|
|
50
|
+
if os.path.exists(self.db_path):
|
|
51
|
+
self.storage = Storage(self.db_path)
|
|
52
|
+
else:
|
|
53
|
+
self.storage = None
|
|
54
|
+
|
|
55
|
+
def _is_process_running(self, pid):
|
|
56
|
+
"""Internal wrapper for process check."""
|
|
57
|
+
return is_process_running(pid)
|
|
58
|
+
|
|
59
|
+
def start_worker(self, worker_type, worker_key=None, parameters=None, env=None):
|
|
60
|
+
return start_worker(self, worker_type, worker_key, parameters, env)
|
|
61
|
+
|
|
62
|
+
def stop_worker(self, worker_key):
|
|
63
|
+
return stop_worker(self, worker_key)
|
|
64
|
+
|
|
65
|
+
def list_workers(self):
|
|
66
|
+
return list_workers(self)
|
|
67
|
+
|
|
68
|
+
def recover_workers(self):
|
|
69
|
+
if not os.path.exists(self.service_dir):
|
|
70
|
+
return []
|
|
71
|
+
|
|
72
|
+
lock_path = f'{self.db_path}.recovery.lock'
|
|
73
|
+
lock = RecoveryLock(lock_path)
|
|
74
|
+
|
|
75
|
+
if lock.acquire():
|
|
76
|
+
try:
|
|
77
|
+
logger.info('Starting worker recovery process.')
|
|
78
|
+
return recover_workers(self)
|
|
79
|
+
finally:
|
|
80
|
+
lock.release()
|
|
81
|
+
else:
|
|
82
|
+
logger.debug('Recovery lock held by another process. Skipping.')
|
|
83
|
+
return []
|
|
84
|
+
|
|
85
|
+
def dispose(self):
|
|
86
|
+
"""Clean up resources like database connections. Does NOT kill background processes."""
|
|
87
|
+
self._active_processes.clear()
|
|
88
|
+
if self.storage:
|
|
89
|
+
self.storage.dispose()
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import os
|
|
3
|
+
|
|
4
|
+
from ...database.schema import Worker, WorkerStatus
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
logger = logging.getLogger('crazy_workers')
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def list_workers(manager):
|
|
11
|
+
"""Logic for listing workers, discovered and registered."""
|
|
12
|
+
# 1. Get all .py files from workers_dir
|
|
13
|
+
try:
|
|
14
|
+
available_types = {f[:-3] for f in os.listdir(manager.workers_dir) if f.endswith('.py') and f != '__init__.py'}
|
|
15
|
+
except Exception:
|
|
16
|
+
available_types = set()
|
|
17
|
+
|
|
18
|
+
if not manager.storage:
|
|
19
|
+
# If no storage, return virtual workers for all found files
|
|
20
|
+
return [
|
|
21
|
+
{
|
|
22
|
+
'worker_key': None,
|
|
23
|
+
'worker_type': t,
|
|
24
|
+
'parameters': {},
|
|
25
|
+
'pid': None,
|
|
26
|
+
'status': WorkerStatus.NEVER_STARTED.value,
|
|
27
|
+
'last_started_at': None,
|
|
28
|
+
'last_stopped_at': None,
|
|
29
|
+
}
|
|
30
|
+
for t in sorted(available_types)
|
|
31
|
+
]
|
|
32
|
+
|
|
33
|
+
with manager.storage.session_scope() as session:
|
|
34
|
+
# 2. Get registered workers from DB
|
|
35
|
+
db_workers = session.query(Worker).all()
|
|
36
|
+
results = []
|
|
37
|
+
|
|
38
|
+
for worker in db_workers:
|
|
39
|
+
# Update status if dead
|
|
40
|
+
if worker.status == WorkerStatus.RUNNING:
|
|
41
|
+
if not manager._is_process_running(worker.pid):
|
|
42
|
+
logger.warning(
|
|
43
|
+
f'Worker {worker.worker_key} found in RUNNING state but PID {worker.pid} is dead. Updating status.'
|
|
44
|
+
)
|
|
45
|
+
worker.status = WorkerStatus.STOPPED
|
|
46
|
+
worker.pid = None
|
|
47
|
+
results.append(worker.to_dict())
|
|
48
|
+
|
|
49
|
+
# 3. Add virtual workers for files not in DB (using filename as key)
|
|
50
|
+
# Note: A file might be in DB multiple times with different keys,
|
|
51
|
+
# but here we only want to show types that have NEVER been started at all.
|
|
52
|
+
registered_types = {w['worker_type'] for w in results}
|
|
53
|
+
for w_type in sorted(available_types):
|
|
54
|
+
if w_type not in registered_types:
|
|
55
|
+
results.append(
|
|
56
|
+
{
|
|
57
|
+
'worker_key': None,
|
|
58
|
+
'worker_type': w_type,
|
|
59
|
+
'parameters': {},
|
|
60
|
+
'pid': None,
|
|
61
|
+
'status': WorkerStatus.NEVER_STARTED.value,
|
|
62
|
+
'last_started_at': None,
|
|
63
|
+
'last_stopped_at': None,
|
|
64
|
+
}
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
return results
|