crazy-workers 1.3.0__tar.gz → 1.4.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {crazy_workers-1.3.0 → crazy_workers-1.4.1}/PKG-INFO +1 -1
- crazy_workers-1.4.1/crazy_workers/__init__.py +6 -0
- {crazy_workers-1.3.0 → crazy_workers-1.4.1}/crazy_workers/cli/commands/params.py +5 -12
- crazy_workers-1.4.1/crazy_workers/cli/commands/starter.py +36 -0
- crazy_workers-1.4.1/crazy_workers/cli/commands/status.py +116 -0
- crazy_workers-1.4.1/crazy_workers/cli/commands/stopper.py +28 -0
- {crazy_workers-1.3.0 → crazy_workers-1.4.1}/crazy_workers/cli/discovery.py +23 -6
- crazy_workers-1.4.1/crazy_workers/cli/main.py +121 -0
- crazy_workers-1.4.1/crazy_workers/client.py +69 -0
- {crazy_workers-1.3.0 → crazy_workers-1.4.1}/crazy_workers/core/manager/__init__.py +7 -4
- {crazy_workers-1.3.0 → crazy_workers-1.4.1}/crazy_workers/core/manager/starter.py +14 -2
- crazy_workers-1.4.1/crazy_workers/daemon/__init__.py +16 -0
- crazy_workers-1.4.1/crazy_workers/daemon/__main__.py +7 -0
- crazy_workers-1.4.1/crazy_workers/daemon/reconciler.py +118 -0
- crazy_workers-1.4.1/crazy_workers/daemon/runner.py +78 -0
- {crazy_workers-1.3.0 → crazy_workers-1.4.1}/crazy_workers/database/schema.py +30 -0
- {crazy_workers-1.3.0 → crazy_workers-1.4.1}/crazy_workers/database/storage.py +6 -1
- {crazy_workers-1.3.0 → crazy_workers-1.4.1}/crazy_workers.egg-info/PKG-INFO +1 -1
- {crazy_workers-1.3.0 → crazy_workers-1.4.1}/crazy_workers.egg-info/SOURCES.txt +5 -0
- {crazy_workers-1.3.0 → crazy_workers-1.4.1}/pyproject.toml +1 -1
- crazy_workers-1.3.0/crazy_workers/__init__.py +0 -5
- crazy_workers-1.3.0/crazy_workers/cli/commands/starter.py +0 -36
- crazy_workers-1.3.0/crazy_workers/cli/commands/status.py +0 -82
- crazy_workers-1.3.0/crazy_workers/cli/commands/stopper.py +0 -30
- crazy_workers-1.3.0/crazy_workers/cli/main.py +0 -80
- {crazy_workers-1.3.0 → crazy_workers-1.4.1}/LICENSE +0 -0
- {crazy_workers-1.3.0 → crazy_workers-1.4.1}/README.md +0 -0
- {crazy_workers-1.3.0 → crazy_workers-1.4.1}/crazy_workers/_bootstrap.py +0 -0
- {crazy_workers-1.3.0 → crazy_workers-1.4.1}/crazy_workers/boot/__init__.py +0 -0
- {crazy_workers-1.3.0 → crazy_workers-1.4.1}/crazy_workers/boot/__main__.py +0 -0
- {crazy_workers-1.3.0 → crazy_workers-1.4.1}/crazy_workers/boot/base.py +0 -0
- {crazy_workers-1.3.0 → crazy_workers-1.4.1}/crazy_workers/boot/detect.py +0 -0
- {crazy_workers-1.3.0 → crazy_workers-1.4.1}/crazy_workers/boot/entry.py +0 -0
- {crazy_workers-1.3.0 → crazy_workers-1.4.1}/crazy_workers/boot/orchestrator.py +0 -0
- {crazy_workers-1.3.0 → crazy_workers-1.4.1}/crazy_workers/boot/systemd.py +0 -0
- {crazy_workers-1.3.0 → crazy_workers-1.4.1}/crazy_workers/boot/windows.py +0 -0
- {crazy_workers-1.3.0 → crazy_workers-1.4.1}/crazy_workers/cli/__init__.py +0 -0
- {crazy_workers-1.3.0 → crazy_workers-1.4.1}/crazy_workers/cli/commands/__init__.py +0 -0
- {crazy_workers-1.3.0 → crazy_workers-1.4.1}/crazy_workers/cli/ui.py +0 -0
- {crazy_workers-1.3.0 → crazy_workers-1.4.1}/crazy_workers/core/__init__.py +0 -0
- {crazy_workers-1.3.0 → crazy_workers-1.4.1}/crazy_workers/core/backend.py +0 -0
- {crazy_workers-1.3.0 → crazy_workers-1.4.1}/crazy_workers/core/engine.py +0 -0
- {crazy_workers-1.3.0 → crazy_workers-1.4.1}/crazy_workers/core/manager/lister.py +0 -0
- {crazy_workers-1.3.0 → crazy_workers-1.4.1}/crazy_workers/core/manager/recoverer.py +0 -0
- {crazy_workers-1.3.0 → crazy_workers-1.4.1}/crazy_workers/core/manager/stopper.py +0 -0
- {crazy_workers-1.3.0 → crazy_workers-1.4.1}/crazy_workers/core/recovery.py +0 -0
- {crazy_workers-1.3.0 → crazy_workers-1.4.1}/crazy_workers/database/__init__.py +0 -0
- {crazy_workers-1.3.0 → crazy_workers-1.4.1}/crazy_workers/testing/__init__.py +0 -0
- {crazy_workers-1.3.0 → crazy_workers-1.4.1}/crazy_workers/testing/backends.py +0 -0
- {crazy_workers-1.3.0 → crazy_workers-1.4.1}/crazy_workers/testing/polling.py +0 -0
- {crazy_workers-1.3.0 → crazy_workers-1.4.1}/crazy_workers.egg-info/dependency_links.txt +0 -0
- {crazy_workers-1.3.0 → crazy_workers-1.4.1}/crazy_workers.egg-info/entry_points.txt +0 -0
- {crazy_workers-1.3.0 → crazy_workers-1.4.1}/crazy_workers.egg-info/requires.txt +0 -0
- {crazy_workers-1.3.0 → crazy_workers-1.4.1}/crazy_workers.egg-info/top_level.txt +0 -0
- {crazy_workers-1.3.0 → crazy_workers-1.4.1}/setup.cfg +0 -0
|
@@ -4,28 +4,21 @@ from rich.prompt import IntPrompt
|
|
|
4
4
|
from ..ui import console, err_console
|
|
5
5
|
|
|
6
6
|
|
|
7
|
-
def show_params(
|
|
8
|
-
|
|
9
|
-
workers = manager.list_workers()
|
|
7
|
+
def show_params(client, worker_key):
|
|
8
|
+
workers = client.list()
|
|
10
9
|
if not workers:
|
|
11
10
|
console().print('[yellow]No workers found.[/yellow]')
|
|
12
11
|
return False
|
|
13
12
|
|
|
14
13
|
if not worker_key:
|
|
15
14
|
# Interactive mode
|
|
16
|
-
active_workers = [w for w in workers if w['worker_key'] is not None]
|
|
17
|
-
|
|
18
|
-
if not active_workers:
|
|
19
|
-
console().print('[yellow]No registered workers to show parameters for.[/yellow]')
|
|
20
|
-
return False
|
|
21
|
-
|
|
22
15
|
console().print('\n[bold cyan]Select a worker to show parameters:[/bold cyan]')
|
|
23
|
-
for i, w in enumerate(
|
|
16
|
+
for i, w in enumerate(workers, 1):
|
|
24
17
|
status_style = 'green' if w['status'] == 'RUNNING' else 'dim'
|
|
25
18
|
console().print(f' [bold]{i})[/bold] {w["worker_key"]} [{status_style}]({w["status"]})[/{status_style}]')
|
|
26
19
|
|
|
27
|
-
choice = IntPrompt.ask('Enter the number', choices=[str(i) for i in range(1, len(
|
|
28
|
-
selected_worker =
|
|
20
|
+
choice = IntPrompt.ask('Enter the number', choices=[str(i) for i in range(1, len(workers) + 1)])
|
|
21
|
+
selected_worker = workers[choice - 1]
|
|
29
22
|
else:
|
|
30
23
|
selected_worker = next((w for w in workers if w['worker_key'] == worker_key), None)
|
|
31
24
|
if not selected_worker:
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from rich.prompt import IntPrompt
|
|
3
|
+
|
|
4
|
+
from ..ui import console, err_console
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def start_worker(client, workers_dir, worker_type, worker_key=None, parameters=None):
|
|
8
|
+
"""Request a worker to run. The daemon performs the actual spawn."""
|
|
9
|
+
if not worker_type:
|
|
10
|
+
# Interactive mode: list .py files in workers_dir
|
|
11
|
+
try:
|
|
12
|
+
files = [f[:-3] for f in os.listdir(workers_dir) if f.endswith('.py') and f != '__init__.py']
|
|
13
|
+
except Exception as e:
|
|
14
|
+
err_console().print(f'[bold red]Error reading workers directory:[/bold red] {e}')
|
|
15
|
+
return False
|
|
16
|
+
|
|
17
|
+
if not files:
|
|
18
|
+
console().print(f'[yellow]No worker scripts found in {workers_dir}[/yellow]')
|
|
19
|
+
return False
|
|
20
|
+
|
|
21
|
+
console().print('\n[bold cyan]Select a worker type to start:[/bold cyan]')
|
|
22
|
+
for i, f in enumerate(files, 1):
|
|
23
|
+
console().print(f' [bold]{i})[/bold] {f}')
|
|
24
|
+
|
|
25
|
+
choice = IntPrompt.ask('Enter the number', choices=[str(i) for i in range(1, len(files) + 1)])
|
|
26
|
+
worker_type = files[choice - 1]
|
|
27
|
+
|
|
28
|
+
# Surface a typo here rather than as a daemon CRASHED/retry loop later.
|
|
29
|
+
if not os.path.exists(os.path.join(workers_dir, f'{worker_type}.py')):
|
|
30
|
+
err_console().print(f'[bold red]Error:[/bold red] Worker file {worker_type}.py not found in {workers_dir}')
|
|
31
|
+
return False
|
|
32
|
+
|
|
33
|
+
key = client.request_start(worker_type, worker_key=worker_key, parameters=parameters)
|
|
34
|
+
console().print('[bold green]Requested:[/bold green] worker set to RUNNING (the daemon will start it)')
|
|
35
|
+
console().print(f' [bold]Key:[/bold] {key}')
|
|
36
|
+
return True
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import os
|
|
3
|
+
import re
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
from rich.panel import Panel
|
|
6
|
+
from rich.table import Table
|
|
7
|
+
|
|
8
|
+
from ..ui import console
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def show_status(client, workers_dir):
|
|
12
|
+
"""Observability hub: the target state store plus the worker table (desired vs actual)."""
|
|
13
|
+
console().print(_build_header(workers_dir))
|
|
14
|
+
|
|
15
|
+
workers = _merge_with_filesystem(client.list(), workers_dir)
|
|
16
|
+
if not workers:
|
|
17
|
+
console().print('[yellow]No workers found.[/yellow]')
|
|
18
|
+
return workers
|
|
19
|
+
|
|
20
|
+
console().print(_build_table(workers))
|
|
21
|
+
return workers
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _build_header(workers_dir):
|
|
25
|
+
db_url = os.environ.get('CRAZY_WORKERS_DB_URL')
|
|
26
|
+
if db_url:
|
|
27
|
+
target = f'[green]shared DB[/green] [dim]({_redact(db_url)})[/dim]'
|
|
28
|
+
else:
|
|
29
|
+
target = '[dim]self-contained SQLite (.service/workers.db)[/dim]'
|
|
30
|
+
dir_label = workers_dir if workers_dir else '[dim](not set — scripts not listed)[/dim]'
|
|
31
|
+
body = f'[bold]Workers dir:[/bold] {dir_label}\n[bold]State store:[/bold] {target}'
|
|
32
|
+
return Panel.fit(body, border_style='cyan', title='[bold cyan]Crazy Workers status[/bold cyan]')
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _redact(db_url):
|
|
36
|
+
"""Hide the password in a SQLAlchemy URL for display."""
|
|
37
|
+
return re.sub(r'://([^:/@]+):[^@]*@', r'://\1:***@', db_url)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _merge_with_filesystem(db_workers, workers_dir):
|
|
41
|
+
"""Append NEVER_STARTED rows for worker scripts that have no DB record yet."""
|
|
42
|
+
results = list(db_workers)
|
|
43
|
+
if not workers_dir:
|
|
44
|
+
# No dir resolved (shared-DB mode without CRAZY_WORKERS_DIR): nothing to scan.
|
|
45
|
+
return results
|
|
46
|
+
registered_types = {w['worker_type'] for w in results}
|
|
47
|
+
try:
|
|
48
|
+
available = sorted({f[:-3] for f in os.listdir(workers_dir) if f.endswith('.py') and f != '__init__.py'})
|
|
49
|
+
except OSError:
|
|
50
|
+
available = []
|
|
51
|
+
for worker_type in available:
|
|
52
|
+
if worker_type not in registered_types:
|
|
53
|
+
results.append(
|
|
54
|
+
{
|
|
55
|
+
'worker_key': None,
|
|
56
|
+
'worker_type': worker_type,
|
|
57
|
+
'parameters': {},
|
|
58
|
+
'desired_status': None,
|
|
59
|
+
'pid': None,
|
|
60
|
+
'status': 'NEVER_STARTED',
|
|
61
|
+
'last_started_at': None,
|
|
62
|
+
'last_stopped_at': None,
|
|
63
|
+
}
|
|
64
|
+
)
|
|
65
|
+
return results
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def _build_table(workers):
|
|
69
|
+
table = Table(
|
|
70
|
+
title='[bold cyan]Workers — desired vs actual[/bold cyan]', border_style='cyan', header_style='bold magenta'
|
|
71
|
+
)
|
|
72
|
+
table.add_column('#', justify='right', style='dim')
|
|
73
|
+
table.add_column('Key', style='bold')
|
|
74
|
+
table.add_column('Type')
|
|
75
|
+
table.add_column('Desired', justify='center')
|
|
76
|
+
table.add_column('Status', justify='center')
|
|
77
|
+
table.add_column('PID', justify='right', style='green')
|
|
78
|
+
table.add_column('Last Action', justify='center')
|
|
79
|
+
table.add_column('Params', overflow='ellipsis')
|
|
80
|
+
|
|
81
|
+
for i, w in enumerate(workers, 1):
|
|
82
|
+
status = w['status']
|
|
83
|
+
status_style = 'green' if status == 'RUNNING' else 'yellow'
|
|
84
|
+
if status in ('CRASHED', 'FAILED'):
|
|
85
|
+
status_style = 'bold red'
|
|
86
|
+
elif status == 'STOPPED':
|
|
87
|
+
status_style = 'dim'
|
|
88
|
+
elif status == 'NEVER_STARTED':
|
|
89
|
+
status_style = 'cyan'
|
|
90
|
+
|
|
91
|
+
desired = w.get('desired_status') or '-'
|
|
92
|
+
desired_style = 'green' if desired == 'RUNNING' else 'dim'
|
|
93
|
+
|
|
94
|
+
last_action = '-'
|
|
95
|
+
if status == 'RUNNING' and w.get('last_started_at'):
|
|
96
|
+
dt = datetime.fromisoformat(w['last_started_at'])
|
|
97
|
+
last_action = f'[green]Started {dt.strftime("%H:%M:%S")}[/green]'
|
|
98
|
+
elif w.get('last_stopped_at'):
|
|
99
|
+
dt = datetime.fromisoformat(w['last_stopped_at'])
|
|
100
|
+
last_action = f'[dim]Stopped {dt.strftime("%H:%M:%S")}[/dim]'
|
|
101
|
+
|
|
102
|
+
params_str = json.dumps(w['parameters']) if w['parameters'] else '-'
|
|
103
|
+
if len(params_str) > 30:
|
|
104
|
+
params_str = params_str[:27] + '...'
|
|
105
|
+
|
|
106
|
+
table.add_row(
|
|
107
|
+
str(i),
|
|
108
|
+
w['worker_key'] or '-',
|
|
109
|
+
w['worker_type'],
|
|
110
|
+
f'[{desired_style}]{desired}[/{desired_style}]',
|
|
111
|
+
f'[{status_style}]{status}[/{status_style}]',
|
|
112
|
+
str(w['pid']) if w['pid'] else '-',
|
|
113
|
+
last_action,
|
|
114
|
+
params_str,
|
|
115
|
+
)
|
|
116
|
+
return table
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
from rich.prompt import IntPrompt
|
|
2
|
+
|
|
3
|
+
from ..ui import console, err_console
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def stop_worker(client, worker_key):
|
|
7
|
+
"""Request a worker to stop. The daemon performs the actual termination."""
|
|
8
|
+
if not worker_key:
|
|
9
|
+
# Interactive mode: offer the workers a stop is meaningful for.
|
|
10
|
+
candidates = [w for w in client.list() if w['desired_status'] == 'RUNNING']
|
|
11
|
+
|
|
12
|
+
if not candidates:
|
|
13
|
+
console().print('[yellow]No workers desired RUNNING to stop.[/yellow]')
|
|
14
|
+
return False
|
|
15
|
+
|
|
16
|
+
console().print('\n[bold cyan]Select a worker to stop:[/bold cyan]')
|
|
17
|
+
for i, w in enumerate(candidates, 1):
|
|
18
|
+
console().print(f' [bold]{i})[/bold] {w["worker_key"]} [dim]({w["worker_type"]}, {w["status"]})[/dim]')
|
|
19
|
+
|
|
20
|
+
choice = IntPrompt.ask('Enter the number', choices=[str(i) for i in range(1, len(candidates) + 1)])
|
|
21
|
+
worker_key = candidates[choice - 1]['worker_key']
|
|
22
|
+
|
|
23
|
+
if client.request_stop(worker_key):
|
|
24
|
+
console().print(f'[bold green]Requested:[/bold green] worker {worker_key} set to STOPPED (the daemon will stop it)')
|
|
25
|
+
return True
|
|
26
|
+
|
|
27
|
+
err_console().print(f'[bold red]Error:[/bold red] Worker {worker_key} not found')
|
|
28
|
+
return False
|
|
@@ -45,7 +45,15 @@ def save_to_env(key, value):
|
|
|
45
45
|
os.replace(tmp, '.env')
|
|
46
46
|
|
|
47
47
|
|
|
48
|
-
def resolve_workers_dir(flag_dir):
|
|
48
|
+
def resolve_workers_dir(flag_dir, required=True):
|
|
49
|
+
"""Locate the workers directory (where the ``.py`` scripts live).
|
|
50
|
+
|
|
51
|
+
``required=False`` is used by commands that only talk to a shared DB (e.g.
|
|
52
|
+
``status``/``stop``/``params`` with ``CRAZY_WORKERS_DB_URL`` set): they do not
|
|
53
|
+
need the scripts, so we must never block on the interactive prompt. In that
|
|
54
|
+
mode the dir is best-effort — returned from the flag/env/``workers`` fallback
|
|
55
|
+
if available, otherwise ``None``.
|
|
56
|
+
"""
|
|
49
57
|
load_env()
|
|
50
58
|
|
|
51
59
|
# 1. Flag priority
|
|
@@ -65,6 +73,10 @@ def resolve_workers_dir(flag_dir):
|
|
|
65
73
|
err_console().print(f'[bold red]Error:[/bold red] Directory "{env_dir}" (from CRAZY_WORKERS_DIR) does not exist.')
|
|
66
74
|
sys.exit(1)
|
|
67
75
|
|
|
76
|
+
# When the dir is not needed, never prompt or exit — report it as unknown.
|
|
77
|
+
if not required:
|
|
78
|
+
return 'workers' if os.path.isdir('workers') else None
|
|
79
|
+
|
|
68
80
|
# 3. Interactive Prompt
|
|
69
81
|
if sys.stdin.isatty():
|
|
70
82
|
console().print('[bold yellow]CRAZY_WORKERS_DIR not set in environment.[/bold yellow]')
|
|
@@ -72,11 +84,16 @@ def resolve_workers_dir(flag_dir):
|
|
|
72
84
|
if user_input:
|
|
73
85
|
if os.path.isdir(user_input):
|
|
74
86
|
abs_path = os.path.abspath(user_input)
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
87
|
+
# When pointed at a shared DB the cwd is the consumer app's, not ours —
|
|
88
|
+
# don't rewrite its .env (see T2). The env var lives only for this run.
|
|
89
|
+
if os.environ.get('CRAZY_WORKERS_DB_URL'):
|
|
90
|
+
os.environ.setdefault('CRAZY_WORKERS_DIR', abs_path)
|
|
91
|
+
else:
|
|
92
|
+
try:
|
|
93
|
+
save_to_env('CRAZY_WORKERS_DIR', abs_path)
|
|
94
|
+
console().print(f'[bold green]Saved CRAZY_WORKERS_DIR={abs_path} to .env[/bold green]')
|
|
95
|
+
except Exception as e:
|
|
96
|
+
err_console().print(f'[bold red]Failed to save configuration:[/bold red] {e}')
|
|
80
97
|
return abs_path
|
|
81
98
|
else:
|
|
82
99
|
err_console().print(f'[bold red]Error:[/bold red] "{user_input}" is not a valid directory.')
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
import json
|
|
3
|
+
import os
|
|
4
|
+
import sys
|
|
5
|
+
from rich.panel import Panel
|
|
6
|
+
|
|
7
|
+
from ..client import WorkerClient
|
|
8
|
+
from .commands import show_params, show_status, start_worker, stop_worker
|
|
9
|
+
from .discovery import resolve_workers_dir
|
|
10
|
+
from .ui import console, err_console
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def _db_url():
|
|
14
|
+
return os.environ.get('CRAZY_WORKERS_DB_URL')
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def _build_client(workers_dir):
|
|
18
|
+
"""A control-plane client over the shared DB, or the local self-contained SQLite.
|
|
19
|
+
|
|
20
|
+
With CRAZY_WORKERS_DB_URL set the CLI talks to the same DB as the daemon and
|
|
21
|
+
issues no DDL (the daemon or host owns the schema). Otherwise it falls back to
|
|
22
|
+
the local ``.service/workers.db``, the self-contained mode.
|
|
23
|
+
"""
|
|
24
|
+
db_url = _db_url()
|
|
25
|
+
if db_url:
|
|
26
|
+
return WorkerClient(db_url=db_url, create_tables=False)
|
|
27
|
+
service_dir = os.path.join(workers_dir, '.service')
|
|
28
|
+
os.makedirs(service_dir, exist_ok=True)
|
|
29
|
+
sqlite_path = os.path.join(service_dir, 'workers.db')
|
|
30
|
+
return WorkerClient(db_url=f'sqlite:///{sqlite_path}', create_tables=True)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def _build_parser():
|
|
34
|
+
def formatter(prog):
|
|
35
|
+
return argparse.HelpFormatter(prog, max_help_position=32)
|
|
36
|
+
|
|
37
|
+
parser = argparse.ArgumentParser(description='Crazy Workers CLI', formatter_class=formatter)
|
|
38
|
+
parser.add_argument('--workers-dir', help='Directory containing worker scripts')
|
|
39
|
+
|
|
40
|
+
subparsers = parser.add_subparsers(dest='command', help='Commands')
|
|
41
|
+
|
|
42
|
+
subparsers.add_parser('status', help='Show workers (desired vs actual) and the target DB')
|
|
43
|
+
|
|
44
|
+
start_parser = subparsers.add_parser('start', help='Request a worker to run (interactive if type missing)')
|
|
45
|
+
start_parser.add_argument('worker_type', nargs='?', help='The type (filename) of worker to start')
|
|
46
|
+
start_parser.add_argument('--key', help='Optional custom key for the worker')
|
|
47
|
+
start_parser.add_argument('--params', help='JSON string of parameters for the worker')
|
|
48
|
+
|
|
49
|
+
stop_parser = subparsers.add_parser('stop', help='Request a worker to stop (interactive if key missing)')
|
|
50
|
+
stop_parser.add_argument('worker_key', nargs='?', help='The key of the worker to stop')
|
|
51
|
+
|
|
52
|
+
params_parser = subparsers.add_parser('params', help='Show parameters for a worker')
|
|
53
|
+
params_parser.add_argument('worker_key', nargs='?', help='The key of the worker')
|
|
54
|
+
|
|
55
|
+
daemon_parser = subparsers.add_parser('daemon', help='Run the reconcile loop (owns the worker processes)')
|
|
56
|
+
daemon_parser.add_argument('--interval', type=float, default=2.0, help='Seconds between reconcile passes')
|
|
57
|
+
|
|
58
|
+
return parser
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def main():
|
|
62
|
+
parser = _build_parser()
|
|
63
|
+
args = parser.parse_args()
|
|
64
|
+
|
|
65
|
+
if not args.command:
|
|
66
|
+
console().print(
|
|
67
|
+
Panel.fit(
|
|
68
|
+
'[bold cyan]Crazy Workers CLI[/bold cyan]\n[dim]Manage your background processes with ease[/dim]',
|
|
69
|
+
border_style='cyan',
|
|
70
|
+
)
|
|
71
|
+
)
|
|
72
|
+
parser.print_help()
|
|
73
|
+
sys.exit(1)
|
|
74
|
+
|
|
75
|
+
# Only `start` (lists/validates the worker scripts) and the daemon (owns them)
|
|
76
|
+
# truly need the workers dir. With a shared DB, status/stop/params work without
|
|
77
|
+
# it, so we must not block on the interactive prompt (see CRAZY_WORKERS_DB_URL).
|
|
78
|
+
needs_dir = args.command in ('start', 'daemon') or _db_url() is None
|
|
79
|
+
workers_dir = resolve_workers_dir(args.workers_dir, required=needs_dir)
|
|
80
|
+
|
|
81
|
+
# The daemon is the process owner, not a client — it builds its own manager.
|
|
82
|
+
if args.command == 'daemon':
|
|
83
|
+
from ..daemon.runner import main as daemon_main
|
|
84
|
+
|
|
85
|
+
argv = ['--workers-dir', workers_dir, '--interval', str(args.interval)]
|
|
86
|
+
db_url = _db_url()
|
|
87
|
+
if db_url:
|
|
88
|
+
argv += ['--db-url', db_url]
|
|
89
|
+
sys.exit(daemon_main(argv))
|
|
90
|
+
|
|
91
|
+
try:
|
|
92
|
+
with _build_client(workers_dir) as client:
|
|
93
|
+
if args.command == 'status':
|
|
94
|
+
show_status(client, workers_dir)
|
|
95
|
+
elif args.command == 'start':
|
|
96
|
+
params = _parse_params(args.params)
|
|
97
|
+
if not start_worker(client, workers_dir, args.worker_type, worker_key=args.key, parameters=params):
|
|
98
|
+
sys.exit(1)
|
|
99
|
+
elif args.command == 'stop':
|
|
100
|
+
if not stop_worker(client, args.worker_key):
|
|
101
|
+
sys.exit(1)
|
|
102
|
+
elif args.command == 'params':
|
|
103
|
+
if not show_params(client, args.worker_key):
|
|
104
|
+
sys.exit(1)
|
|
105
|
+
except ValueError as e:
|
|
106
|
+
err_console().print(f'[bold red]Error:[/bold red] {e}')
|
|
107
|
+
sys.exit(1)
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def _parse_params(raw):
|
|
111
|
+
if not raw:
|
|
112
|
+
return None
|
|
113
|
+
try:
|
|
114
|
+
return json.loads(raw)
|
|
115
|
+
except json.JSONDecodeError:
|
|
116
|
+
err_console().print('[bold red]Error:[/bold red] Invalid JSON in --params')
|
|
117
|
+
sys.exit(1)
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
if __name__ == '__main__':
|
|
121
|
+
main()
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
"""Control-plane client: writes desired state only, never spawns processes.
|
|
2
|
+
|
|
3
|
+
Used by anything that is NOT the daemon (HTTP API, CLI, scripts). It shares the
|
|
4
|
+
daemon's database; the daemon reconciles desired -> actual. A client touches the
|
|
5
|
+
``workers`` table and nothing else — no OS processes, no boot hooks, no recovery.
|
|
6
|
+
|
|
7
|
+
Three ways to point it at a database, mirroring :class:`Storage`:
|
|
8
|
+
|
|
9
|
+
- ``engine``: reuse an existing SQLAlchemy engine (e.g. the host backend's).
|
|
10
|
+
- ``db_url``: any SQLAlchemy URL.
|
|
11
|
+
- neither: the caller must pass one — unlike WorkerManager, the client has no
|
|
12
|
+
workers_dir and therefore no implicit SQLite location.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from .database.schema import DesiredStatus, Worker
|
|
16
|
+
from .database.storage import Storage
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class WorkerClient:
|
|
20
|
+
def __init__(self, db_url=None, engine=None, create_tables=False):
|
|
21
|
+
self.storage = Storage(db_url=db_url, engine=engine, create_tables=create_tables)
|
|
22
|
+
|
|
23
|
+
def request_start(self, worker_type, worker_key=None, parameters=None):
|
|
24
|
+
"""Declare that ``worker_key`` should be RUNNING (upserting its spec).
|
|
25
|
+
|
|
26
|
+
Returns the resolved worker_key. The worker is not started here; the daemon
|
|
27
|
+
notices the desired state and starts it.
|
|
28
|
+
"""
|
|
29
|
+
worker_key = worker_key or worker_type
|
|
30
|
+
with self.storage.session_scope() as session:
|
|
31
|
+
worker = session.query(Worker).filter_by(worker_key=worker_key).first()
|
|
32
|
+
if not worker:
|
|
33
|
+
worker = Worker(worker_key=worker_key, worker_type=worker_type)
|
|
34
|
+
session.add(worker)
|
|
35
|
+
worker.worker_type = worker_type
|
|
36
|
+
worker.parameters = parameters or {}
|
|
37
|
+
worker.desired_status = DesiredStatus.RUNNING
|
|
38
|
+
return worker_key
|
|
39
|
+
|
|
40
|
+
def request_stop(self, worker_key):
|
|
41
|
+
"""Declare that ``worker_key`` should be STOPPED.
|
|
42
|
+
|
|
43
|
+
Returns False if no such worker exists. The actual stop (and last_stopped_at)
|
|
44
|
+
is performed by the daemon when it reconciles.
|
|
45
|
+
"""
|
|
46
|
+
with self.storage.session_scope() as session:
|
|
47
|
+
worker = session.query(Worker).filter_by(worker_key=worker_key).first()
|
|
48
|
+
if not worker:
|
|
49
|
+
return False
|
|
50
|
+
worker.desired_status = DesiredStatus.STOPPED
|
|
51
|
+
return True
|
|
52
|
+
|
|
53
|
+
def list(self):
|
|
54
|
+
with self.storage.session_scope() as session:
|
|
55
|
+
return [w.to_dict() for w in session.query(Worker).all()]
|
|
56
|
+
|
|
57
|
+
def get(self, worker_key):
|
|
58
|
+
with self.storage.session_scope() as session:
|
|
59
|
+
worker = session.query(Worker).filter_by(worker_key=worker_key).first()
|
|
60
|
+
return worker.to_dict() if worker else None
|
|
61
|
+
|
|
62
|
+
def dispose(self):
|
|
63
|
+
self.storage.dispose()
|
|
64
|
+
|
|
65
|
+
def __enter__(self):
|
|
66
|
+
return self
|
|
67
|
+
|
|
68
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
69
|
+
self.dispose()
|
|
@@ -19,7 +19,7 @@ class WorkerManager:
|
|
|
19
19
|
workers_dir='workers',
|
|
20
20
|
create_dir=True,
|
|
21
21
|
backend=None,
|
|
22
|
-
auto_boot=
|
|
22
|
+
auto_boot=False,
|
|
23
23
|
boot_provider=None,
|
|
24
24
|
db_url=None,
|
|
25
25
|
engine=None,
|
|
@@ -38,9 +38,12 @@ class WorkerManager:
|
|
|
38
38
|
# The backend is the only component that touches OS processes. The default
|
|
39
39
|
# spawns real subprocesses; tests can swap in a fake one (see for_testing).
|
|
40
40
|
self.backend = backend or SubprocessBackend()
|
|
41
|
-
#
|
|
42
|
-
# boot-restore hook (see crazy_workers.boot).
|
|
43
|
-
#
|
|
41
|
+
# Opt-in legacy behaviour: when True, starting a worker transparently
|
|
42
|
+
# installs the per-user OS boot-restore hook (see crazy_workers.boot). The
|
|
43
|
+
# default is now False — in the reconciler model, surviving a reboot is the
|
|
44
|
+
# deployment's job (a systemd unit / container that runs the daemon), not a
|
|
45
|
+
# per-worker hook. boot_provider is an injection seam for tests; None lets
|
|
46
|
+
# the platform default be auto-detected.
|
|
44
47
|
self.auto_boot = auto_boot
|
|
45
48
|
self._boot_provider = boot_provider
|
|
46
49
|
# Environment variables injected into every spawned worker — e.g. the host
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
import os
|
|
3
3
|
import re
|
|
4
|
+
from datetime import datetime, timezone
|
|
4
5
|
from sqlalchemy import func
|
|
5
6
|
from sqlalchemy.exc import IntegrityError
|
|
6
7
|
|
|
@@ -60,7 +61,7 @@ def _check_already_running(manager, worker, session):
|
|
|
60
61
|
return True
|
|
61
62
|
else:
|
|
62
63
|
logger.warning(f'Worker {worker.worker_key} found in RUNNING state but PID {worker.pid} is dead. Cleaning up.')
|
|
63
|
-
worker
|
|
64
|
+
_mark_crashed(worker)
|
|
64
65
|
session.commit()
|
|
65
66
|
return False
|
|
66
67
|
|
|
@@ -123,7 +124,7 @@ def _spawn_worker_process(manager, worker, worker_path, parameters, env, session
|
|
|
123
124
|
)
|
|
124
125
|
|
|
125
126
|
if handle is None:
|
|
126
|
-
worker
|
|
127
|
+
_mark_crashed(worker)
|
|
127
128
|
worker.pid = None
|
|
128
129
|
session.commit()
|
|
129
130
|
return False, 'Worker process failed to start'
|
|
@@ -131,6 +132,8 @@ def _spawn_worker_process(manager, worker, worker_path, parameters, env, session
|
|
|
131
132
|
worker.pid = handle.pid
|
|
132
133
|
worker.status = WorkerStatus.RUNNING
|
|
133
134
|
worker.last_started_at = func.now()
|
|
135
|
+
# A clean start clears the crash backoff so the next failure starts over.
|
|
136
|
+
worker.restart_count = 0
|
|
134
137
|
session.commit()
|
|
135
138
|
|
|
136
139
|
manager._active_processes[worker.worker_key] = handle
|
|
@@ -138,6 +141,15 @@ def _spawn_worker_process(manager, worker, worker_path, parameters, env, session
|
|
|
138
141
|
return True, worker.to_dict()
|
|
139
142
|
|
|
140
143
|
|
|
144
|
+
def _mark_crashed(worker):
|
|
145
|
+
# Record a death for crash backoff: bump the restart counter and stamp the
|
|
146
|
+
# exit time in UTC (Python-side, so the reconciler's backoff math does not
|
|
147
|
+
# depend on the DB dialect's now()/timezone semantics).
|
|
148
|
+
worker.status = WorkerStatus.CRASHED
|
|
149
|
+
worker.last_exit_at = datetime.now(timezone.utc)
|
|
150
|
+
worker.restart_count = (worker.restart_count or 0) + 1
|
|
151
|
+
|
|
152
|
+
|
|
141
153
|
def _ensure_boot_restore(manager):
|
|
142
154
|
# Best-effort and never raising: a freshly started worker transparently
|
|
143
155
|
# registers the OS boot-restore hook so it survives a reboot.
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
"""The reconcile daemon: the single owner of worker processes for a context.
|
|
2
|
+
|
|
3
|
+
Clients (HTTP API, CLI, scripts) only write desired state to the shared DB; the
|
|
4
|
+
daemon runs a loop that drives the observed state toward it — starting,
|
|
5
|
+
stopping and crash-restarting processes. Exactly one daemon owns a given
|
|
6
|
+
workers_dir/DB at a time (enforced by a lock in ``crazy_workers.daemon.runner``).
|
|
7
|
+
|
|
8
|
+
Run it with ``python -m crazy_workers.daemon`` (a thin ``__main__`` shim over
|
|
9
|
+
:func:`crazy_workers.daemon.runner.main`).
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from .reconciler import Reconciler
|
|
13
|
+
from .runner import main
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
__all__ = ['Reconciler', 'main']
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import time
|
|
3
|
+
from datetime import datetime, timedelta, timezone
|
|
4
|
+
|
|
5
|
+
from ..database.schema import DesiredStatus, Worker, WorkerStatus
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
logger = logging.getLogger('crazy_workers')
|
|
9
|
+
|
|
10
|
+
_BACKOFF_BASE_SECONDS = 1
|
|
11
|
+
_BACKOFF_MAX_SECONDS = 60
|
|
12
|
+
# Cap the exponent so a long-crashed worker doesn't compute an astronomically
|
|
13
|
+
# large intermediate before min() clamps it.
|
|
14
|
+
_BACKOFF_MAX_EXPONENT = 16
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class Reconciler:
|
|
18
|
+
"""Single-owner loop: drives actual worker state toward desired state.
|
|
19
|
+
|
|
20
|
+
Owns every worker process for one workers_dir/DB. Clients never spawn; they
|
|
21
|
+
only set desired_status in the shared DB and this loop makes it so.
|
|
22
|
+
|
|
23
|
+
| desired | alive | action |
|
|
24
|
+
|----------|-------|-------------------------------------|
|
|
25
|
+
| RUNNING | no | start (skipped while in backoff) |
|
|
26
|
+
| RUNNING | yes | noop (reconcile observed status) |
|
|
27
|
+
| STOPPED | yes | stop |
|
|
28
|
+
| STOPPED | no | noop |
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
def __init__(self, manager, interval=2.0):
|
|
32
|
+
self.manager = manager
|
|
33
|
+
self.interval = interval
|
|
34
|
+
self._stop = False
|
|
35
|
+
|
|
36
|
+
def run_forever(self):
|
|
37
|
+
logger.info('Reconciler started (interval=%ss)', self.interval)
|
|
38
|
+
while not self._stop:
|
|
39
|
+
try:
|
|
40
|
+
self.reconcile_once()
|
|
41
|
+
except Exception:
|
|
42
|
+
logger.exception('Reconcile pass failed; continuing.')
|
|
43
|
+
# Sleep in small slices so a SIGTERM-triggered stop is honoured promptly
|
|
44
|
+
# instead of after a full interval.
|
|
45
|
+
self._interruptible_sleep(self.interval)
|
|
46
|
+
logger.info('Reconciler stopped.')
|
|
47
|
+
|
|
48
|
+
def stop(self):
|
|
49
|
+
self._stop = True
|
|
50
|
+
|
|
51
|
+
def _interruptible_sleep(self, seconds):
|
|
52
|
+
deadline = time.monotonic() + seconds
|
|
53
|
+
while not self._stop and time.monotonic() < deadline:
|
|
54
|
+
time.sleep(min(0.2, deadline - time.monotonic()))
|
|
55
|
+
|
|
56
|
+
def reconcile_once(self):
|
|
57
|
+
"""One pass over every worker. Returns the actions taken (for tests/observability)."""
|
|
58
|
+
actions = []
|
|
59
|
+
for row in self._load_snapshot():
|
|
60
|
+
action = self._reconcile_worker(row)
|
|
61
|
+
if action:
|
|
62
|
+
actions.append((row['worker_key'], action))
|
|
63
|
+
return actions
|
|
64
|
+
|
|
65
|
+
def _load_snapshot(self):
|
|
66
|
+
# Read everything we need into plain dicts and release the session before
|
|
67
|
+
# touching processes — start/stop open their own short-lived sessions.
|
|
68
|
+
with self.manager.storage.session_scope() as session:
|
|
69
|
+
return [
|
|
70
|
+
{
|
|
71
|
+
'worker_key': w.worker_key,
|
|
72
|
+
'worker_type': w.worker_type,
|
|
73
|
+
'parameters': w.parameters,
|
|
74
|
+
'pid': w.pid,
|
|
75
|
+
'desired': w.desired_status,
|
|
76
|
+
'status': w.status,
|
|
77
|
+
'restart_count': w.restart_count,
|
|
78
|
+
'last_exit_at': w.last_exit_at,
|
|
79
|
+
}
|
|
80
|
+
for w in session.query(Worker).all()
|
|
81
|
+
]
|
|
82
|
+
|
|
83
|
+
def _reconcile_worker(self, row):
|
|
84
|
+
alive = self.manager.backend.is_alive(pid=row['pid'], worker_key=row['worker_key'])
|
|
85
|
+
|
|
86
|
+
if row['desired'] == DesiredStatus.RUNNING and not alive:
|
|
87
|
+
if self._in_backoff(row):
|
|
88
|
+
return None
|
|
89
|
+
logger.info('Reconcile: starting %s', row['worker_key'])
|
|
90
|
+
self.manager.start_worker(row['worker_type'], row['worker_key'], row['parameters'])
|
|
91
|
+
return 'start'
|
|
92
|
+
if row['desired'] == DesiredStatus.STOPPED and alive:
|
|
93
|
+
logger.info('Reconcile: stopping %s', row['worker_key'])
|
|
94
|
+
self.manager.stop_worker(row['worker_key'])
|
|
95
|
+
return 'stop'
|
|
96
|
+
if row['desired'] == DesiredStatus.RUNNING and alive and row['status'] != WorkerStatus.RUNNING:
|
|
97
|
+
# Process is up but the observed status drifted (e.g. left STARTING). Heal it.
|
|
98
|
+
self._mark_running(row['worker_key'])
|
|
99
|
+
return 'mark_running'
|
|
100
|
+
return None
|
|
101
|
+
|
|
102
|
+
def _in_backoff(self, row):
|
|
103
|
+
if not row['last_exit_at'] or row['status'] != WorkerStatus.CRASHED:
|
|
104
|
+
return False
|
|
105
|
+
exponent = min(row['restart_count'], _BACKOFF_MAX_EXPONENT)
|
|
106
|
+
delay = min(_BACKOFF_BASE_SECONDS * (2**exponent), _BACKOFF_MAX_SECONDS)
|
|
107
|
+
last_exit = row['last_exit_at']
|
|
108
|
+
# last_exit_at is stored as UTC wall-clock; coerce naive values read back
|
|
109
|
+
# from the DB to aware UTC so the comparison never mixes naive and aware.
|
|
110
|
+
if last_exit.tzinfo is None:
|
|
111
|
+
last_exit = last_exit.replace(tzinfo=timezone.utc)
|
|
112
|
+
return datetime.now(timezone.utc) < last_exit + timedelta(seconds=delay)
|
|
113
|
+
|
|
114
|
+
def _mark_running(self, worker_key):
|
|
115
|
+
with self.manager.storage.session_scope() as session:
|
|
116
|
+
worker = session.query(Worker).filter_by(worker_key=worker_key).first()
|
|
117
|
+
if worker:
|
|
118
|
+
worker.status = WorkerStatus.RUNNING
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
import logging
|
|
3
|
+
import os
|
|
4
|
+
import signal
|
|
5
|
+
|
|
6
|
+
from ..core.manager import WorkerManager
|
|
7
|
+
from ..core.recovery import RecoveryLock
|
|
8
|
+
from .reconciler import Reconciler
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
logger = logging.getLogger('crazy_workers')
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def main(argv=None):
|
|
15
|
+
parser = argparse.ArgumentParser(prog='crazy_workers.daemon', description='Run the reconcile loop.')
|
|
16
|
+
parser.add_argument('--workers-dir', required=True, help='Directory containing worker scripts')
|
|
17
|
+
parser.add_argument(
|
|
18
|
+
'--db-url',
|
|
19
|
+
default=os.environ.get('CRAZY_WORKERS_DB_URL'),
|
|
20
|
+
help='Shared DB URL (defaults to $CRAZY_WORKERS_DB_URL, else the local SQLite under .service/)',
|
|
21
|
+
)
|
|
22
|
+
parser.add_argument('--interval', type=float, default=2.0, help='Seconds between reconcile passes')
|
|
23
|
+
parser.add_argument('--log-level', default=os.environ.get('CRAZY_WORKERS_LOG_LEVEL', 'INFO'))
|
|
24
|
+
args = parser.parse_args(argv)
|
|
25
|
+
|
|
26
|
+
logging.basicConfig(
|
|
27
|
+
level=getattr(logging, args.log_level.upper(), logging.INFO),
|
|
28
|
+
format='%(asctime)s %(levelname)s %(name)s: %(message)s',
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
# Fail loudly on a misconfigured path rather than silently creating an empty
|
|
32
|
+
# workers dir (and then never finding any worker script in it).
|
|
33
|
+
if not os.path.isdir(args.workers_dir):
|
|
34
|
+
logger.error('Workers directory %s does not exist', args.workers_dir)
|
|
35
|
+
return 2
|
|
36
|
+
|
|
37
|
+
# The daemon owns the host-local runtime area (.service, logs, and the SQLite
|
|
38
|
+
# file in self-contained mode), so it materialises them (create_dir=True).
|
|
39
|
+
# auto_recover is off: a reconcile pass already restarts dead RUNNING workers.
|
|
40
|
+
manager = WorkerManager(
|
|
41
|
+
args.workers_dir,
|
|
42
|
+
create_dir=True,
|
|
43
|
+
auto_boot=False,
|
|
44
|
+
auto_recover=False,
|
|
45
|
+
db_url=args.db_url,
|
|
46
|
+
create_tables=args.db_url is None,
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
lock = RecoveryLock(os.path.join(manager.service_dir, 'daemon.lock'))
|
|
50
|
+
if not lock.acquire():
|
|
51
|
+
logger.error('Another crazy_workers daemon already owns %s; exiting.', args.workers_dir)
|
|
52
|
+
manager.dispose()
|
|
53
|
+
return 1
|
|
54
|
+
|
|
55
|
+
reconciler = Reconciler(manager, interval=args.interval)
|
|
56
|
+
_install_signal_handlers(reconciler)
|
|
57
|
+
|
|
58
|
+
try:
|
|
59
|
+
reconciler.run_forever()
|
|
60
|
+
finally:
|
|
61
|
+
lock.release()
|
|
62
|
+
manager.dispose()
|
|
63
|
+
return 0
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def _install_signal_handlers(reconciler):
|
|
67
|
+
def _handle(signum, _frame):
|
|
68
|
+
logger.info('Received signal %s; shutting down.', signum)
|
|
69
|
+
reconciler.stop()
|
|
70
|
+
|
|
71
|
+
for signame in ('SIGTERM', 'SIGINT'):
|
|
72
|
+
sig = getattr(signal, signame, None)
|
|
73
|
+
if sig is not None:
|
|
74
|
+
try:
|
|
75
|
+
signal.signal(sig, _handle)
|
|
76
|
+
except (ValueError, OSError):
|
|
77
|
+
# Not the main thread, or unsupported on this platform — best effort.
|
|
78
|
+
pass
|
|
@@ -5,6 +5,12 @@ from sqlalchemy.orm import DeclarativeBase
|
|
|
5
5
|
|
|
6
6
|
|
|
7
7
|
class WorkerStatus(enum.Enum):
|
|
8
|
+
"""Observed status of a worker, owned by whoever runs the processes.
|
|
9
|
+
|
|
10
|
+
In the reconciler model this is daemon-owned; in embedded mode the
|
|
11
|
+
WorkerManager writes it directly.
|
|
12
|
+
"""
|
|
13
|
+
|
|
8
14
|
NEVER_STARTED = 'NEVER_STARTED'
|
|
9
15
|
STARTING = 'STARTING'
|
|
10
16
|
RUNNING = 'RUNNING'
|
|
@@ -12,6 +18,17 @@ class WorkerStatus(enum.Enum):
|
|
|
12
18
|
CRASHED = 'CRASHED'
|
|
13
19
|
|
|
14
20
|
|
|
21
|
+
class DesiredStatus(enum.Enum):
|
|
22
|
+
"""What a client wants a worker to be doing. Client-owned.
|
|
23
|
+
|
|
24
|
+
Clients (HTTP API, CLI, scripts) only ever write this; the daemon reconciles
|
|
25
|
+
the observed ``status`` toward it.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
RUNNING = 'RUNNING'
|
|
29
|
+
STOPPED = 'STOPPED'
|
|
30
|
+
|
|
31
|
+
|
|
15
32
|
class Base(DeclarativeBase):
|
|
16
33
|
pass
|
|
17
34
|
|
|
@@ -23,8 +40,18 @@ class Worker(Base):
|
|
|
23
40
|
worker_key = Column(String(255), unique=True, nullable=False)
|
|
24
41
|
worker_type = Column(String(255), nullable=False) # Name of the .py file
|
|
25
42
|
parameters = Column(JSON, nullable=False, default={})
|
|
43
|
+
|
|
44
|
+
# Desired state — written by clients (control plane).
|
|
45
|
+
desired_status = Column(Enum(DesiredStatus), nullable=False, default=DesiredStatus.STOPPED)
|
|
46
|
+
|
|
47
|
+
# Observed state — written by whoever owns the processes (the daemon, or the
|
|
48
|
+
# WorkerManager in embedded mode).
|
|
26
49
|
pid = Column(Integer, nullable=True)
|
|
27
50
|
status = Column(Enum(WorkerStatus), default=WorkerStatus.STOPPED)
|
|
51
|
+
# Crash backoff bookkeeping: restart_count grows on each failed/crashed spawn
|
|
52
|
+
# and resets on a successful start; last_exit_at timestamps the latest death.
|
|
53
|
+
restart_count = Column(Integer, nullable=False, default=0)
|
|
54
|
+
last_exit_at = Column(DateTime, nullable=True)
|
|
28
55
|
last_started_at: datetime = Column(DateTime, nullable=True)
|
|
29
56
|
last_stopped_at: datetime = Column(DateTime, nullable=True)
|
|
30
57
|
created_at = Column(DateTime, server_default=func.now())
|
|
@@ -35,8 +62,11 @@ class Worker(Base):
|
|
|
35
62
|
'worker_key': self.worker_key,
|
|
36
63
|
'worker_type': self.worker_type,
|
|
37
64
|
'parameters': self.parameters,
|
|
65
|
+
'desired_status': self.desired_status.value if self.desired_status else None,
|
|
38
66
|
'pid': self.pid,
|
|
39
67
|
'status': self.status.value,
|
|
68
|
+
'restart_count': self.restart_count,
|
|
69
|
+
'last_exit_at': self.last_exit_at.isoformat() if self.last_exit_at else None,
|
|
40
70
|
'last_started_at': self.last_started_at.isoformat() if self.last_started_at else None,
|
|
41
71
|
'last_stopped_at': self.last_stopped_at.isoformat() if self.last_stopped_at else None,
|
|
42
72
|
}
|
|
@@ -39,8 +39,13 @@ class Storage:
|
|
|
39
39
|
self.engine = create_engine(url, connect_args=connect_args)
|
|
40
40
|
self._owns_engine = True
|
|
41
41
|
|
|
42
|
-
|
|
42
|
+
# The tuning below registers engine-level listeners; on a shared engine more
|
|
43
|
+
# than one Storage may wrap it, so install exactly once per engine — a second
|
|
44
|
+
# 'begin' listener would emit a second BEGIN IMMEDIATE (transaction within a
|
|
45
|
+
# transaction) and fail.
|
|
46
|
+
if self.engine.dialect.name == 'sqlite' and not getattr(self.engine, '_cw_sqlite_tuned', False):
|
|
43
47
|
self._install_sqlite_tuning()
|
|
48
|
+
self.engine._cw_sqlite_tuned = True
|
|
44
49
|
|
|
45
50
|
self.Session = sessionmaker(bind=self.engine)
|
|
46
51
|
if create_tables:
|
|
@@ -3,6 +3,7 @@ README.md
|
|
|
3
3
|
pyproject.toml
|
|
4
4
|
crazy_workers/__init__.py
|
|
5
5
|
crazy_workers/_bootstrap.py
|
|
6
|
+
crazy_workers/client.py
|
|
6
7
|
crazy_workers.egg-info/PKG-INFO
|
|
7
8
|
crazy_workers.egg-info/SOURCES.txt
|
|
8
9
|
crazy_workers.egg-info/dependency_links.txt
|
|
@@ -35,6 +36,10 @@ crazy_workers/core/manager/lister.py
|
|
|
35
36
|
crazy_workers/core/manager/recoverer.py
|
|
36
37
|
crazy_workers/core/manager/starter.py
|
|
37
38
|
crazy_workers/core/manager/stopper.py
|
|
39
|
+
crazy_workers/daemon/__init__.py
|
|
40
|
+
crazy_workers/daemon/__main__.py
|
|
41
|
+
crazy_workers/daemon/reconciler.py
|
|
42
|
+
crazy_workers/daemon/runner.py
|
|
38
43
|
crazy_workers/database/__init__.py
|
|
39
44
|
crazy_workers/database/schema.py
|
|
40
45
|
crazy_workers/database/storage.py
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "crazy-workers"
|
|
3
|
-
version = "1.
|
|
3
|
+
version = "1.4.1"
|
|
4
4
|
description = "A Python library for managing background worker processes with persistent state, automatic recovery, and a CLI."
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
authors = [{ name = "GioVanni Colasanto" }]
|
|
@@ -1,36 +0,0 @@
|
|
|
1
|
-
import os
|
|
2
|
-
from rich.prompt import IntPrompt
|
|
3
|
-
|
|
4
|
-
from ..ui import console, err_console
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
def start_worker(manager, worker_type, worker_key=None, parameters=None):
|
|
8
|
-
|
|
9
|
-
if not worker_type:
|
|
10
|
-
# Interactive mode: list .py files in workers_dir
|
|
11
|
-
try:
|
|
12
|
-
files = [f[:-3] for f in os.listdir(manager.workers_dir) if f.endswith('.py')]
|
|
13
|
-
except Exception as e:
|
|
14
|
-
err_console().print(f'[bold red]Error reading workers directory:[/bold red] {e}')
|
|
15
|
-
return False
|
|
16
|
-
|
|
17
|
-
if not files:
|
|
18
|
-
console().print(f'[yellow]No worker scripts found in {manager.workers_dir}[/yellow]')
|
|
19
|
-
return False
|
|
20
|
-
|
|
21
|
-
console().print('\n[bold cyan]Select a worker type to start:[/bold cyan]')
|
|
22
|
-
for i, f in enumerate(files, 1):
|
|
23
|
-
console().print(f' [bold]{i})[/bold] {f}')
|
|
24
|
-
|
|
25
|
-
choice = IntPrompt.ask('Enter the number', choices=[str(i) for i in range(1, len(files) + 1)])
|
|
26
|
-
worker_type = files[choice - 1]
|
|
27
|
-
|
|
28
|
-
success, result = manager.start_worker(worker_type, worker_key=worker_key, parameters=parameters)
|
|
29
|
-
if success:
|
|
30
|
-
console().print('[bold green]Success:[/bold green] Worker started')
|
|
31
|
-
console().print(f' [bold]Key:[/bold] {result["worker_key"]}')
|
|
32
|
-
console().print(f' [bold]PID:[/bold] {result["pid"]}')
|
|
33
|
-
else:
|
|
34
|
-
err_console().print(f'[bold red]Error:[/bold red] {result}')
|
|
35
|
-
return False
|
|
36
|
-
return True
|
|
@@ -1,82 +0,0 @@
|
|
|
1
|
-
import json
|
|
2
|
-
from datetime import datetime
|
|
3
|
-
from rich.panel import Panel
|
|
4
|
-
from rich.table import Table
|
|
5
|
-
|
|
6
|
-
from ...boot import boot_state
|
|
7
|
-
from ..ui import console
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
def show_status(manager):
|
|
11
|
-
"""Observability hub: boot-restore state plus the worker table."""
|
|
12
|
-
console().print(_build_header(manager))
|
|
13
|
-
|
|
14
|
-
workers = manager.list_workers()
|
|
15
|
-
if not workers:
|
|
16
|
-
console().print('[yellow]No workers found in database.[/yellow]')
|
|
17
|
-
return workers
|
|
18
|
-
|
|
19
|
-
console().print(_build_table(workers))
|
|
20
|
-
return workers
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
def _build_header(manager):
|
|
24
|
-
state = boot_state(manager.workers_dir, provider=manager._boot_provider)
|
|
25
|
-
if state.mechanism == 'disabled':
|
|
26
|
-
boot_line = '[dim]boot-restore: disabled[/dim]'
|
|
27
|
-
elif not state.supported:
|
|
28
|
-
boot_line = '[dim]boot-restore: not supported on this platform[/dim]'
|
|
29
|
-
elif state.installed:
|
|
30
|
-
boot_line = f'[green]boot-restore: enabled[/green] [dim]({state.mechanism}, {state.detail})[/dim]'
|
|
31
|
-
else:
|
|
32
|
-
reason = f' — {state.detail}' if state.detail else ''
|
|
33
|
-
boot_line = f'[yellow]boot-restore: not installed[/yellow][dim]{reason}[/dim]'
|
|
34
|
-
|
|
35
|
-
body = f'[bold]Workers dir:[/bold] {manager.workers_dir}\n{boot_line}'
|
|
36
|
-
return Panel.fit(body, border_style='cyan', title='[bold cyan]Crazy Workers status[/bold cyan]')
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
def _build_table(workers):
|
|
40
|
-
table = Table(
|
|
41
|
-
title='[bold cyan]Active & Registered Workers[/bold cyan]', border_style='cyan', header_style='bold magenta'
|
|
42
|
-
)
|
|
43
|
-
table.add_column('#', justify='right', style='dim')
|
|
44
|
-
table.add_column('Key', style='bold')
|
|
45
|
-
table.add_column('Type')
|
|
46
|
-
table.add_column('Status', justify='center')
|
|
47
|
-
table.add_column('PID', justify='right', style='green')
|
|
48
|
-
table.add_column('Last Action', justify='center')
|
|
49
|
-
table.add_column('Params', overflow='ellipsis')
|
|
50
|
-
|
|
51
|
-
for i, w in enumerate(workers, 1):
|
|
52
|
-
status = w['status']
|
|
53
|
-
status_style = 'green' if status == 'RUNNING' else 'yellow'
|
|
54
|
-
if status in ['CRASHED', 'FAILED']:
|
|
55
|
-
status_style = 'bold red'
|
|
56
|
-
elif status == 'STOPPED':
|
|
57
|
-
status_style = 'dim'
|
|
58
|
-
elif status == 'NEVER_STARTED':
|
|
59
|
-
status_style = 'cyan'
|
|
60
|
-
|
|
61
|
-
last_action = '-'
|
|
62
|
-
if status == 'RUNNING' and w.get('last_started_at'):
|
|
63
|
-
dt = datetime.fromisoformat(w['last_started_at'])
|
|
64
|
-
last_action = f'[green]Started {dt.strftime("%H:%M:%S")}[/green]'
|
|
65
|
-
elif w.get('last_stopped_at'):
|
|
66
|
-
dt = datetime.fromisoformat(w['last_stopped_at'])
|
|
67
|
-
last_action = f'[dim]Stopped {dt.strftime("%H:%M:%S")}[/dim]'
|
|
68
|
-
|
|
69
|
-
params_str = json.dumps(w['parameters']) if w['parameters'] else '-'
|
|
70
|
-
if len(params_str) > 30:
|
|
71
|
-
params_str = params_str[:27] + '...'
|
|
72
|
-
|
|
73
|
-
table.add_row(
|
|
74
|
-
str(i),
|
|
75
|
-
w['worker_key'] or '-',
|
|
76
|
-
w['worker_type'],
|
|
77
|
-
f'[{status_style}]{status}[/{status_style}]',
|
|
78
|
-
str(w['pid']) if w['pid'] else '-',
|
|
79
|
-
last_action,
|
|
80
|
-
params_str,
|
|
81
|
-
)
|
|
82
|
-
return table
|
|
@@ -1,30 +0,0 @@
|
|
|
1
|
-
from rich.prompt import IntPrompt
|
|
2
|
-
|
|
3
|
-
from ..ui import console, err_console
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
def stop_worker(manager, worker_key):
|
|
7
|
-
|
|
8
|
-
if not worker_key:
|
|
9
|
-
# Interactive mode
|
|
10
|
-
workers = manager.list_workers()
|
|
11
|
-
running_workers = [w for w in workers if w['status'] == 'RUNNING']
|
|
12
|
-
|
|
13
|
-
if not running_workers:
|
|
14
|
-
console().print('[yellow]No running workers to stop.[/yellow]')
|
|
15
|
-
return False
|
|
16
|
-
|
|
17
|
-
console().print('\n[bold cyan]Select a worker to stop:[/bold cyan]')
|
|
18
|
-
for i, w in enumerate(running_workers, 1):
|
|
19
|
-
console().print(f' [bold]{i})[/bold] {w["worker_key"]} [dim]({w["worker_type"]})[/dim]')
|
|
20
|
-
|
|
21
|
-
choice = IntPrompt.ask('Enter the number', choices=[str(i) for i in range(1, len(running_workers) + 1)])
|
|
22
|
-
worker_key = running_workers[choice - 1]['worker_key']
|
|
23
|
-
|
|
24
|
-
success, message = manager.stop_worker(worker_key)
|
|
25
|
-
if success:
|
|
26
|
-
console().print(f'[bold green]Success:[/bold green] {message}')
|
|
27
|
-
else:
|
|
28
|
-
err_console().print(f'[bold red]Error:[/bold red] {message}')
|
|
29
|
-
return False
|
|
30
|
-
return True
|
|
@@ -1,80 +0,0 @@
|
|
|
1
|
-
import argparse
|
|
2
|
-
import sys
|
|
3
|
-
from rich.panel import Panel
|
|
4
|
-
|
|
5
|
-
from ..core.manager import WorkerManager
|
|
6
|
-
from .commands import show_params, show_status, start_worker, stop_worker
|
|
7
|
-
from .discovery import resolve_workers_dir
|
|
8
|
-
from .ui import console, err_console
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
def main():
|
|
12
|
-
|
|
13
|
-
def formatter(prog):
|
|
14
|
-
return argparse.HelpFormatter(prog, max_help_position=32)
|
|
15
|
-
|
|
16
|
-
parser = argparse.ArgumentParser(description='Crazy Workers CLI', formatter_class=formatter)
|
|
17
|
-
parser.add_argument('--workers-dir', help='Directory containing worker scripts')
|
|
18
|
-
|
|
19
|
-
subparsers = parser.add_subparsers(dest='command', help='Commands')
|
|
20
|
-
|
|
21
|
-
# Status command
|
|
22
|
-
subparsers.add_parser('status', help='Show workers and boot-restore status')
|
|
23
|
-
|
|
24
|
-
# Start command
|
|
25
|
-
start_parser = subparsers.add_parser('start', help='Start a worker (interactive if type missing)')
|
|
26
|
-
start_parser.add_argument('worker_type', nargs='?', help='The type (filename) of worker to start')
|
|
27
|
-
start_parser.add_argument('--key', help='Optional custom key for the worker')
|
|
28
|
-
start_parser.add_argument('--params', help='JSON string of parameters for the worker')
|
|
29
|
-
|
|
30
|
-
# Stop command
|
|
31
|
-
stop_parser = subparsers.add_parser('stop', help='Stop a worker (interactive if key missing)')
|
|
32
|
-
stop_parser.add_argument('worker_key', nargs='?', help='The key of the worker to stop')
|
|
33
|
-
|
|
34
|
-
# Params command
|
|
35
|
-
params_parser = subparsers.add_parser('params', help='Show parameters for a worker')
|
|
36
|
-
params_parser.add_argument('worker_key', nargs='?', help='The key of the worker')
|
|
37
|
-
|
|
38
|
-
args = parser.parse_args()
|
|
39
|
-
|
|
40
|
-
if not args.command:
|
|
41
|
-
console().print(
|
|
42
|
-
Panel.fit(
|
|
43
|
-
'[bold cyan]Crazy Workers CLI[/bold cyan]\n[dim]Manage your background processes with ease[/dim]',
|
|
44
|
-
border_style='cyan',
|
|
45
|
-
)
|
|
46
|
-
)
|
|
47
|
-
parser.print_help()
|
|
48
|
-
sys.exit(1)
|
|
49
|
-
|
|
50
|
-
workers_dir = resolve_workers_dir(args.workers_dir)
|
|
51
|
-
try:
|
|
52
|
-
with WorkerManager(workers_dir, create_dir=False, auto_recover=False) as manager:
|
|
53
|
-
if args.command == 'status':
|
|
54
|
-
show_status(manager)
|
|
55
|
-
elif args.command == 'start':
|
|
56
|
-
import json
|
|
57
|
-
|
|
58
|
-
params = None
|
|
59
|
-
if args.params:
|
|
60
|
-
try:
|
|
61
|
-
params = json.loads(args.params)
|
|
62
|
-
except json.JSONDecodeError:
|
|
63
|
-
err_console().print('[bold red]Error:[/bold red] Invalid JSON in --params')
|
|
64
|
-
sys.exit(1)
|
|
65
|
-
|
|
66
|
-
if not start_worker(manager, args.worker_type, worker_key=args.key, parameters=params):
|
|
67
|
-
sys.exit(1)
|
|
68
|
-
elif args.command == 'stop':
|
|
69
|
-
if not stop_worker(manager, args.worker_key):
|
|
70
|
-
sys.exit(1)
|
|
71
|
-
elif args.command == 'params':
|
|
72
|
-
if not show_params(manager, args.worker_key):
|
|
73
|
-
sys.exit(1)
|
|
74
|
-
except ValueError as e:
|
|
75
|
-
err_console().print(f'[bold red]Error:[/bold red] {e}')
|
|
76
|
-
sys.exit(1)
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
if __name__ == '__main__':
|
|
80
|
-
main()
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|