crazy-workers 1.2.0__tar.gz → 1.4.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {crazy_workers-1.2.0 → crazy_workers-1.4.0}/PKG-INFO +12 -2
- {crazy_workers-1.2.0 → crazy_workers-1.4.0}/README.md +11 -1
- crazy_workers-1.4.0/crazy_workers/__init__.py +6 -0
- {crazy_workers-1.2.0 → crazy_workers-1.4.0}/crazy_workers/cli/commands/params.py +5 -12
- crazy_workers-1.4.0/crazy_workers/cli/commands/starter.py +36 -0
- crazy_workers-1.4.0/crazy_workers/cli/commands/status.py +112 -0
- crazy_workers-1.4.0/crazy_workers/cli/commands/stopper.py +28 -0
- {crazy_workers-1.2.0 → crazy_workers-1.4.0}/crazy_workers/cli/discovery.py +10 -5
- crazy_workers-1.4.0/crazy_workers/cli/main.py +117 -0
- crazy_workers-1.4.0/crazy_workers/client.py +69 -0
- {crazy_workers-1.2.0 → crazy_workers-1.4.0}/crazy_workers/core/manager/__init__.py +15 -10
- {crazy_workers-1.2.0 → crazy_workers-1.4.0}/crazy_workers/core/manager/starter.py +14 -2
- crazy_workers-1.4.0/crazy_workers/daemon/__init__.py +16 -0
- crazy_workers-1.4.0/crazy_workers/daemon/__main__.py +7 -0
- crazy_workers-1.4.0/crazy_workers/daemon/reconciler.py +118 -0
- crazy_workers-1.4.0/crazy_workers/daemon/runner.py +78 -0
- {crazy_workers-1.2.0 → crazy_workers-1.4.0}/crazy_workers/database/schema.py +30 -0
- {crazy_workers-1.2.0 → crazy_workers-1.4.0}/crazy_workers/database/storage.py +15 -3
- {crazy_workers-1.2.0 → crazy_workers-1.4.0}/crazy_workers.egg-info/PKG-INFO +12 -2
- {crazy_workers-1.2.0 → crazy_workers-1.4.0}/crazy_workers.egg-info/SOURCES.txt +5 -0
- {crazy_workers-1.2.0 → crazy_workers-1.4.0}/pyproject.toml +1 -1
- crazy_workers-1.2.0/crazy_workers/__init__.py +0 -5
- crazy_workers-1.2.0/crazy_workers/cli/commands/starter.py +0 -36
- crazy_workers-1.2.0/crazy_workers/cli/commands/status.py +0 -82
- crazy_workers-1.2.0/crazy_workers/cli/commands/stopper.py +0 -30
- crazy_workers-1.2.0/crazy_workers/cli/main.py +0 -80
- {crazy_workers-1.2.0 → crazy_workers-1.4.0}/LICENSE +0 -0
- {crazy_workers-1.2.0 → crazy_workers-1.4.0}/crazy_workers/_bootstrap.py +0 -0
- {crazy_workers-1.2.0 → crazy_workers-1.4.0}/crazy_workers/boot/__init__.py +0 -0
- {crazy_workers-1.2.0 → crazy_workers-1.4.0}/crazy_workers/boot/__main__.py +0 -0
- {crazy_workers-1.2.0 → crazy_workers-1.4.0}/crazy_workers/boot/base.py +0 -0
- {crazy_workers-1.2.0 → crazy_workers-1.4.0}/crazy_workers/boot/detect.py +0 -0
- {crazy_workers-1.2.0 → crazy_workers-1.4.0}/crazy_workers/boot/entry.py +0 -0
- {crazy_workers-1.2.0 → crazy_workers-1.4.0}/crazy_workers/boot/orchestrator.py +0 -0
- {crazy_workers-1.2.0 → crazy_workers-1.4.0}/crazy_workers/boot/systemd.py +0 -0
- {crazy_workers-1.2.0 → crazy_workers-1.4.0}/crazy_workers/boot/windows.py +0 -0
- {crazy_workers-1.2.0 → crazy_workers-1.4.0}/crazy_workers/cli/__init__.py +0 -0
- {crazy_workers-1.2.0 → crazy_workers-1.4.0}/crazy_workers/cli/commands/__init__.py +0 -0
- {crazy_workers-1.2.0 → crazy_workers-1.4.0}/crazy_workers/cli/ui.py +0 -0
- {crazy_workers-1.2.0 → crazy_workers-1.4.0}/crazy_workers/core/__init__.py +0 -0
- {crazy_workers-1.2.0 → crazy_workers-1.4.0}/crazy_workers/core/backend.py +0 -0
- {crazy_workers-1.2.0 → crazy_workers-1.4.0}/crazy_workers/core/engine.py +0 -0
- {crazy_workers-1.2.0 → crazy_workers-1.4.0}/crazy_workers/core/manager/lister.py +0 -0
- {crazy_workers-1.2.0 → crazy_workers-1.4.0}/crazy_workers/core/manager/recoverer.py +0 -0
- {crazy_workers-1.2.0 → crazy_workers-1.4.0}/crazy_workers/core/manager/stopper.py +0 -0
- {crazy_workers-1.2.0 → crazy_workers-1.4.0}/crazy_workers/core/recovery.py +0 -0
- {crazy_workers-1.2.0 → crazy_workers-1.4.0}/crazy_workers/database/__init__.py +0 -0
- {crazy_workers-1.2.0 → crazy_workers-1.4.0}/crazy_workers/testing/__init__.py +0 -0
- {crazy_workers-1.2.0 → crazy_workers-1.4.0}/crazy_workers/testing/backends.py +0 -0
- {crazy_workers-1.2.0 → crazy_workers-1.4.0}/crazy_workers/testing/polling.py +0 -0
- {crazy_workers-1.2.0 → crazy_workers-1.4.0}/crazy_workers.egg-info/dependency_links.txt +0 -0
- {crazy_workers-1.2.0 → crazy_workers-1.4.0}/crazy_workers.egg-info/entry_points.txt +0 -0
- {crazy_workers-1.2.0 → crazy_workers-1.4.0}/crazy_workers.egg-info/requires.txt +0 -0
- {crazy_workers-1.2.0 → crazy_workers-1.4.0}/crazy_workers.egg-info/top_level.txt +0 -0
- {crazy_workers-1.2.0 → crazy_workers-1.4.0}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: crazy-workers
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.4.0
|
|
4
4
|
Summary: A Python library for managing background worker processes with persistent state, automatic recovery, and a CLI.
|
|
5
5
|
Author: GioVanni Colasanto
|
|
6
6
|
License: MIT
|
|
@@ -139,8 +139,9 @@ See [CLI.md](https://github.com/Vanni-broUser/crazy-workers/blob/main/CLI.md) fo
|
|
|
139
139
|
| `engine` | `Engine` | `None` | Reuse an existing SQLAlchemy engine so the tables live in your database; **not** disposed by crazy_workers |
|
|
140
140
|
| `worker_env` | `dict` | `None` | Environment variables injected into **every** spawned worker (e.g. `DATABASE_URL`) |
|
|
141
141
|
| `auto_recover` | `bool` | `True` | Recover dead-but-`RUNNING` workers when the manager is constructed |
|
|
142
|
+
| `create_tables` | `bool` | `True` | Create crazy_workers' own tables on init; set `False` when the host owns the schema via its migrations |
|
|
142
143
|
|
|
143
|
-
See [Backend integration](#backend-integration) for `db_url` / `engine` / `worker_env` / `auto_recover`.
|
|
144
|
+
See [Backend integration](#backend-integration) for `db_url` / `engine` / `worker_env` / `auto_recover` / `create_tables`.
|
|
144
145
|
|
|
145
146
|
### `start_worker(worker_type, worker_key=None, parameters=None, env=None)`
|
|
146
147
|
|
|
@@ -338,6 +339,15 @@ the project instead of living off to the side:
|
|
|
338
339
|
`workers` table inside your database and inherits its persistence and backups
|
|
339
340
|
— so state survives even if the process/container is recreated. A shared
|
|
340
341
|
engine is never disposed by crazy_workers; its owner manages it.
|
|
342
|
+
- **Let your migrations own the schema.** If your project tracks its schema with
|
|
343
|
+
a migration tool (Alembic, etc.), pass `create_tables=False` so crazy_workers
|
|
344
|
+
issues no DDL: the `workers` table becomes a normal migration in your history,
|
|
345
|
+
with a single source of truth and no create-on-import side effect. You own the
|
|
346
|
+
ordering — the table must exist before the manager queries it, so build the
|
|
347
|
+
manager *after* your migrations run (and keep `auto_recover=False` until then,
|
|
348
|
+
since recovery reads that table). See the `workers` schema in
|
|
349
|
+
[`crazy_workers/database/schema.py`](https://github.com/Vanni-broUser/crazy-workers/blob/main/crazy_workers/database/schema.py)
|
|
350
|
+
for the columns your migration must create.
|
|
341
351
|
- **Give workers the connection they need.** A worker is a separate process, so
|
|
342
352
|
it can't receive a live DB connection — pass the *configuration* instead.
|
|
343
353
|
`worker_env={'DATABASE_URL': ...}` is injected into every spawned worker
|
|
@@ -104,8 +104,9 @@ See [CLI.md](https://github.com/Vanni-broUser/crazy-workers/blob/main/CLI.md) fo
|
|
|
104
104
|
| `engine` | `Engine` | `None` | Reuse an existing SQLAlchemy engine so the tables live in your database; **not** disposed by crazy_workers |
|
|
105
105
|
| `worker_env` | `dict` | `None` | Environment variables injected into **every** spawned worker (e.g. `DATABASE_URL`) |
|
|
106
106
|
| `auto_recover` | `bool` | `True` | Recover dead-but-`RUNNING` workers when the manager is constructed |
|
|
107
|
+
| `create_tables` | `bool` | `True` | Create crazy_workers' own tables on init; set `False` when the host owns the schema via its migrations |
|
|
107
108
|
|
|
108
|
-
See [Backend integration](#backend-integration) for `db_url` / `engine` / `worker_env` / `auto_recover`.
|
|
109
|
+
See [Backend integration](#backend-integration) for `db_url` / `engine` / `worker_env` / `auto_recover` / `create_tables`.
|
|
109
110
|
|
|
110
111
|
### `start_worker(worker_type, worker_key=None, parameters=None, env=None)`
|
|
111
112
|
|
|
@@ -303,6 +304,15 @@ the project instead of living off to the side:
|
|
|
303
304
|
`workers` table inside your database and inherits its persistence and backups
|
|
304
305
|
— so state survives even if the process/container is recreated. A shared
|
|
305
306
|
engine is never disposed by crazy_workers; its owner manages it.
|
|
307
|
+
- **Let your migrations own the schema.** If your project tracks its schema with
|
|
308
|
+
a migration tool (Alembic, etc.), pass `create_tables=False` so crazy_workers
|
|
309
|
+
issues no DDL: the `workers` table becomes a normal migration in your history,
|
|
310
|
+
with a single source of truth and no create-on-import side effect. You own the
|
|
311
|
+
ordering — the table must exist before the manager queries it, so build the
|
|
312
|
+
manager *after* your migrations run (and keep `auto_recover=False` until then,
|
|
313
|
+
since recovery reads that table). See the `workers` schema in
|
|
314
|
+
[`crazy_workers/database/schema.py`](https://github.com/Vanni-broUser/crazy-workers/blob/main/crazy_workers/database/schema.py)
|
|
315
|
+
for the columns your migration must create.
|
|
306
316
|
- **Give workers the connection they need.** A worker is a separate process, so
|
|
307
317
|
it can't receive a live DB connection — pass the *configuration* instead.
|
|
308
318
|
`worker_env={'DATABASE_URL': ...}` is injected into every spawned worker
|
|
@@ -4,28 +4,21 @@ from rich.prompt import IntPrompt
|
|
|
4
4
|
from ..ui import console, err_console
|
|
5
5
|
|
|
6
6
|
|
|
7
|
-
def show_params(
|
|
8
|
-
|
|
9
|
-
workers = manager.list_workers()
|
|
7
|
+
def show_params(client, worker_key):
|
|
8
|
+
workers = client.list()
|
|
10
9
|
if not workers:
|
|
11
10
|
console().print('[yellow]No workers found.[/yellow]')
|
|
12
11
|
return False
|
|
13
12
|
|
|
14
13
|
if not worker_key:
|
|
15
14
|
# Interactive mode
|
|
16
|
-
active_workers = [w for w in workers if w['worker_key'] is not None]
|
|
17
|
-
|
|
18
|
-
if not active_workers:
|
|
19
|
-
console().print('[yellow]No registered workers to show parameters for.[/yellow]')
|
|
20
|
-
return False
|
|
21
|
-
|
|
22
15
|
console().print('\n[bold cyan]Select a worker to show parameters:[/bold cyan]')
|
|
23
|
-
for i, w in enumerate(
|
|
16
|
+
for i, w in enumerate(workers, 1):
|
|
24
17
|
status_style = 'green' if w['status'] == 'RUNNING' else 'dim'
|
|
25
18
|
console().print(f' [bold]{i})[/bold] {w["worker_key"]} [{status_style}]({w["status"]})[/{status_style}]')
|
|
26
19
|
|
|
27
|
-
choice = IntPrompt.ask('Enter the number', choices=[str(i) for i in range(1, len(
|
|
28
|
-
selected_worker =
|
|
20
|
+
choice = IntPrompt.ask('Enter the number', choices=[str(i) for i in range(1, len(workers) + 1)])
|
|
21
|
+
selected_worker = workers[choice - 1]
|
|
29
22
|
else:
|
|
30
23
|
selected_worker = next((w for w in workers if w['worker_key'] == worker_key), None)
|
|
31
24
|
if not selected_worker:
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from rich.prompt import IntPrompt
|
|
3
|
+
|
|
4
|
+
from ..ui import console, err_console
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def start_worker(client, workers_dir, worker_type, worker_key=None, parameters=None):
|
|
8
|
+
"""Request a worker to run. The daemon performs the actual spawn."""
|
|
9
|
+
if not worker_type:
|
|
10
|
+
# Interactive mode: list .py files in workers_dir
|
|
11
|
+
try:
|
|
12
|
+
files = [f[:-3] for f in os.listdir(workers_dir) if f.endswith('.py') and f != '__init__.py']
|
|
13
|
+
except Exception as e:
|
|
14
|
+
err_console().print(f'[bold red]Error reading workers directory:[/bold red] {e}')
|
|
15
|
+
return False
|
|
16
|
+
|
|
17
|
+
if not files:
|
|
18
|
+
console().print(f'[yellow]No worker scripts found in {workers_dir}[/yellow]')
|
|
19
|
+
return False
|
|
20
|
+
|
|
21
|
+
console().print('\n[bold cyan]Select a worker type to start:[/bold cyan]')
|
|
22
|
+
for i, f in enumerate(files, 1):
|
|
23
|
+
console().print(f' [bold]{i})[/bold] {f}')
|
|
24
|
+
|
|
25
|
+
choice = IntPrompt.ask('Enter the number', choices=[str(i) for i in range(1, len(files) + 1)])
|
|
26
|
+
worker_type = files[choice - 1]
|
|
27
|
+
|
|
28
|
+
# Surface a typo here rather than as a daemon CRASHED/retry loop later.
|
|
29
|
+
if not os.path.exists(os.path.join(workers_dir, f'{worker_type}.py')):
|
|
30
|
+
err_console().print(f'[bold red]Error:[/bold red] Worker file {worker_type}.py not found in {workers_dir}')
|
|
31
|
+
return False
|
|
32
|
+
|
|
33
|
+
key = client.request_start(worker_type, worker_key=worker_key, parameters=parameters)
|
|
34
|
+
console().print('[bold green]Requested:[/bold green] worker set to RUNNING (the daemon will start it)')
|
|
35
|
+
console().print(f' [bold]Key:[/bold] {key}')
|
|
36
|
+
return True
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import os
|
|
3
|
+
import re
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
from rich.panel import Panel
|
|
6
|
+
from rich.table import Table
|
|
7
|
+
|
|
8
|
+
from ..ui import console
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def show_status(client, workers_dir):
|
|
12
|
+
"""Observability hub: the target state store plus the worker table (desired vs actual)."""
|
|
13
|
+
console().print(_build_header(workers_dir))
|
|
14
|
+
|
|
15
|
+
workers = _merge_with_filesystem(client.list(), workers_dir)
|
|
16
|
+
if not workers:
|
|
17
|
+
console().print('[yellow]No workers found.[/yellow]')
|
|
18
|
+
return workers
|
|
19
|
+
|
|
20
|
+
console().print(_build_table(workers))
|
|
21
|
+
return workers
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _build_header(workers_dir):
|
|
25
|
+
db_url = os.environ.get('CRAZY_WORKERS_DB_URL')
|
|
26
|
+
if db_url:
|
|
27
|
+
target = f'[green]shared DB[/green] [dim]({_redact(db_url)})[/dim]'
|
|
28
|
+
else:
|
|
29
|
+
target = '[dim]self-contained SQLite (.service/workers.db)[/dim]'
|
|
30
|
+
body = f'[bold]Workers dir:[/bold] {workers_dir}\n[bold]State store:[/bold] {target}'
|
|
31
|
+
return Panel.fit(body, border_style='cyan', title='[bold cyan]Crazy Workers status[/bold cyan]')
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def _redact(db_url):
|
|
35
|
+
"""Hide the password in a SQLAlchemy URL for display."""
|
|
36
|
+
return re.sub(r'://([^:/@]+):[^@]*@', r'://\1:***@', db_url)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _merge_with_filesystem(db_workers, workers_dir):
|
|
40
|
+
"""Append NEVER_STARTED rows for worker scripts that have no DB record yet."""
|
|
41
|
+
results = list(db_workers)
|
|
42
|
+
registered_types = {w['worker_type'] for w in results}
|
|
43
|
+
try:
|
|
44
|
+
available = sorted({f[:-3] for f in os.listdir(workers_dir) if f.endswith('.py') and f != '__init__.py'})
|
|
45
|
+
except OSError:
|
|
46
|
+
available = []
|
|
47
|
+
for worker_type in available:
|
|
48
|
+
if worker_type not in registered_types:
|
|
49
|
+
results.append(
|
|
50
|
+
{
|
|
51
|
+
'worker_key': None,
|
|
52
|
+
'worker_type': worker_type,
|
|
53
|
+
'parameters': {},
|
|
54
|
+
'desired_status': None,
|
|
55
|
+
'pid': None,
|
|
56
|
+
'status': 'NEVER_STARTED',
|
|
57
|
+
'last_started_at': None,
|
|
58
|
+
'last_stopped_at': None,
|
|
59
|
+
}
|
|
60
|
+
)
|
|
61
|
+
return results
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def _build_table(workers):
|
|
65
|
+
table = Table(
|
|
66
|
+
title='[bold cyan]Workers — desired vs actual[/bold cyan]', border_style='cyan', header_style='bold magenta'
|
|
67
|
+
)
|
|
68
|
+
table.add_column('#', justify='right', style='dim')
|
|
69
|
+
table.add_column('Key', style='bold')
|
|
70
|
+
table.add_column('Type')
|
|
71
|
+
table.add_column('Desired', justify='center')
|
|
72
|
+
table.add_column('Status', justify='center')
|
|
73
|
+
table.add_column('PID', justify='right', style='green')
|
|
74
|
+
table.add_column('Last Action', justify='center')
|
|
75
|
+
table.add_column('Params', overflow='ellipsis')
|
|
76
|
+
|
|
77
|
+
for i, w in enumerate(workers, 1):
|
|
78
|
+
status = w['status']
|
|
79
|
+
status_style = 'green' if status == 'RUNNING' else 'yellow'
|
|
80
|
+
if status in ('CRASHED', 'FAILED'):
|
|
81
|
+
status_style = 'bold red'
|
|
82
|
+
elif status == 'STOPPED':
|
|
83
|
+
status_style = 'dim'
|
|
84
|
+
elif status == 'NEVER_STARTED':
|
|
85
|
+
status_style = 'cyan'
|
|
86
|
+
|
|
87
|
+
desired = w.get('desired_status') or '-'
|
|
88
|
+
desired_style = 'green' if desired == 'RUNNING' else 'dim'
|
|
89
|
+
|
|
90
|
+
last_action = '-'
|
|
91
|
+
if status == 'RUNNING' and w.get('last_started_at'):
|
|
92
|
+
dt = datetime.fromisoformat(w['last_started_at'])
|
|
93
|
+
last_action = f'[green]Started {dt.strftime("%H:%M:%S")}[/green]'
|
|
94
|
+
elif w.get('last_stopped_at'):
|
|
95
|
+
dt = datetime.fromisoformat(w['last_stopped_at'])
|
|
96
|
+
last_action = f'[dim]Stopped {dt.strftime("%H:%M:%S")}[/dim]'
|
|
97
|
+
|
|
98
|
+
params_str = json.dumps(w['parameters']) if w['parameters'] else '-'
|
|
99
|
+
if len(params_str) > 30:
|
|
100
|
+
params_str = params_str[:27] + '...'
|
|
101
|
+
|
|
102
|
+
table.add_row(
|
|
103
|
+
str(i),
|
|
104
|
+
w['worker_key'] or '-',
|
|
105
|
+
w['worker_type'],
|
|
106
|
+
f'[{desired_style}]{desired}[/{desired_style}]',
|
|
107
|
+
f'[{status_style}]{status}[/{status_style}]',
|
|
108
|
+
str(w['pid']) if w['pid'] else '-',
|
|
109
|
+
last_action,
|
|
110
|
+
params_str,
|
|
111
|
+
)
|
|
112
|
+
return table
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
from rich.prompt import IntPrompt
|
|
2
|
+
|
|
3
|
+
from ..ui import console, err_console
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def stop_worker(client, worker_key):
|
|
7
|
+
"""Request a worker to stop. The daemon performs the actual termination."""
|
|
8
|
+
if not worker_key:
|
|
9
|
+
# Interactive mode: offer the workers a stop is meaningful for.
|
|
10
|
+
candidates = [w for w in client.list() if w['desired_status'] == 'RUNNING']
|
|
11
|
+
|
|
12
|
+
if not candidates:
|
|
13
|
+
console().print('[yellow]No workers desired RUNNING to stop.[/yellow]')
|
|
14
|
+
return False
|
|
15
|
+
|
|
16
|
+
console().print('\n[bold cyan]Select a worker to stop:[/bold cyan]')
|
|
17
|
+
for i, w in enumerate(candidates, 1):
|
|
18
|
+
console().print(f' [bold]{i})[/bold] {w["worker_key"]} [dim]({w["worker_type"]}, {w["status"]})[/dim]')
|
|
19
|
+
|
|
20
|
+
choice = IntPrompt.ask('Enter the number', choices=[str(i) for i in range(1, len(candidates) + 1)])
|
|
21
|
+
worker_key = candidates[choice - 1]['worker_key']
|
|
22
|
+
|
|
23
|
+
if client.request_stop(worker_key):
|
|
24
|
+
console().print(f'[bold green]Requested:[/bold green] worker {worker_key} set to STOPPED (the daemon will stop it)')
|
|
25
|
+
return True
|
|
26
|
+
|
|
27
|
+
err_console().print(f'[bold red]Error:[/bold red] Worker {worker_key} not found')
|
|
28
|
+
return False
|
|
@@ -72,11 +72,16 @@ def resolve_workers_dir(flag_dir):
|
|
|
72
72
|
if user_input:
|
|
73
73
|
if os.path.isdir(user_input):
|
|
74
74
|
abs_path = os.path.abspath(user_input)
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
75
|
+
# When pointed at a shared DB the cwd is the consumer app's, not ours —
|
|
76
|
+
# don't rewrite its .env (see T2). The env var lives only for this run.
|
|
77
|
+
if os.environ.get('CRAZY_WORKERS_DB_URL'):
|
|
78
|
+
os.environ.setdefault('CRAZY_WORKERS_DIR', abs_path)
|
|
79
|
+
else:
|
|
80
|
+
try:
|
|
81
|
+
save_to_env('CRAZY_WORKERS_DIR', abs_path)
|
|
82
|
+
console().print(f'[bold green]Saved CRAZY_WORKERS_DIR={abs_path} to .env[/bold green]')
|
|
83
|
+
except Exception as e:
|
|
84
|
+
err_console().print(f'[bold red]Failed to save configuration:[/bold red] {e}')
|
|
80
85
|
return abs_path
|
|
81
86
|
else:
|
|
82
87
|
err_console().print(f'[bold red]Error:[/bold red] "{user_input}" is not a valid directory.')
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
import json
|
|
3
|
+
import os
|
|
4
|
+
import sys
|
|
5
|
+
from rich.panel import Panel
|
|
6
|
+
|
|
7
|
+
from ..client import WorkerClient
|
|
8
|
+
from .commands import show_params, show_status, start_worker, stop_worker
|
|
9
|
+
from .discovery import resolve_workers_dir
|
|
10
|
+
from .ui import console, err_console
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def _db_url():
|
|
14
|
+
return os.environ.get('CRAZY_WORKERS_DB_URL')
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def _build_client(workers_dir):
|
|
18
|
+
"""A control-plane client over the shared DB, or the local self-contained SQLite.
|
|
19
|
+
|
|
20
|
+
With CRAZY_WORKERS_DB_URL set the CLI talks to the same DB as the daemon and
|
|
21
|
+
issues no DDL (the daemon or host owns the schema). Otherwise it falls back to
|
|
22
|
+
the local ``.service/workers.db``, the self-contained mode.
|
|
23
|
+
"""
|
|
24
|
+
db_url = _db_url()
|
|
25
|
+
if db_url:
|
|
26
|
+
return WorkerClient(db_url=db_url, create_tables=False)
|
|
27
|
+
service_dir = os.path.join(workers_dir, '.service')
|
|
28
|
+
os.makedirs(service_dir, exist_ok=True)
|
|
29
|
+
sqlite_path = os.path.join(service_dir, 'workers.db')
|
|
30
|
+
return WorkerClient(db_url=f'sqlite:///{sqlite_path}', create_tables=True)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def _build_parser():
|
|
34
|
+
def formatter(prog):
|
|
35
|
+
return argparse.HelpFormatter(prog, max_help_position=32)
|
|
36
|
+
|
|
37
|
+
parser = argparse.ArgumentParser(description='Crazy Workers CLI', formatter_class=formatter)
|
|
38
|
+
parser.add_argument('--workers-dir', help='Directory containing worker scripts')
|
|
39
|
+
|
|
40
|
+
subparsers = parser.add_subparsers(dest='command', help='Commands')
|
|
41
|
+
|
|
42
|
+
subparsers.add_parser('status', help='Show workers (desired vs actual) and the target DB')
|
|
43
|
+
|
|
44
|
+
start_parser = subparsers.add_parser('start', help='Request a worker to run (interactive if type missing)')
|
|
45
|
+
start_parser.add_argument('worker_type', nargs='?', help='The type (filename) of worker to start')
|
|
46
|
+
start_parser.add_argument('--key', help='Optional custom key for the worker')
|
|
47
|
+
start_parser.add_argument('--params', help='JSON string of parameters for the worker')
|
|
48
|
+
|
|
49
|
+
stop_parser = subparsers.add_parser('stop', help='Request a worker to stop (interactive if key missing)')
|
|
50
|
+
stop_parser.add_argument('worker_key', nargs='?', help='The key of the worker to stop')
|
|
51
|
+
|
|
52
|
+
params_parser = subparsers.add_parser('params', help='Show parameters for a worker')
|
|
53
|
+
params_parser.add_argument('worker_key', nargs='?', help='The key of the worker')
|
|
54
|
+
|
|
55
|
+
daemon_parser = subparsers.add_parser('daemon', help='Run the reconcile loop (owns the worker processes)')
|
|
56
|
+
daemon_parser.add_argument('--interval', type=float, default=2.0, help='Seconds between reconcile passes')
|
|
57
|
+
|
|
58
|
+
return parser
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def main():
|
|
62
|
+
parser = _build_parser()
|
|
63
|
+
args = parser.parse_args()
|
|
64
|
+
|
|
65
|
+
if not args.command:
|
|
66
|
+
console().print(
|
|
67
|
+
Panel.fit(
|
|
68
|
+
'[bold cyan]Crazy Workers CLI[/bold cyan]\n[dim]Manage your background processes with ease[/dim]',
|
|
69
|
+
border_style='cyan',
|
|
70
|
+
)
|
|
71
|
+
)
|
|
72
|
+
parser.print_help()
|
|
73
|
+
sys.exit(1)
|
|
74
|
+
|
|
75
|
+
workers_dir = resolve_workers_dir(args.workers_dir)
|
|
76
|
+
|
|
77
|
+
# The daemon is the process owner, not a client — it builds its own manager.
|
|
78
|
+
if args.command == 'daemon':
|
|
79
|
+
from ..daemon.runner import main as daemon_main
|
|
80
|
+
|
|
81
|
+
argv = ['--workers-dir', workers_dir, '--interval', str(args.interval)]
|
|
82
|
+
db_url = _db_url()
|
|
83
|
+
if db_url:
|
|
84
|
+
argv += ['--db-url', db_url]
|
|
85
|
+
sys.exit(daemon_main(argv))
|
|
86
|
+
|
|
87
|
+
try:
|
|
88
|
+
with _build_client(workers_dir) as client:
|
|
89
|
+
if args.command == 'status':
|
|
90
|
+
show_status(client, workers_dir)
|
|
91
|
+
elif args.command == 'start':
|
|
92
|
+
params = _parse_params(args.params)
|
|
93
|
+
if not start_worker(client, workers_dir, args.worker_type, worker_key=args.key, parameters=params):
|
|
94
|
+
sys.exit(1)
|
|
95
|
+
elif args.command == 'stop':
|
|
96
|
+
if not stop_worker(client, args.worker_key):
|
|
97
|
+
sys.exit(1)
|
|
98
|
+
elif args.command == 'params':
|
|
99
|
+
if not show_params(client, args.worker_key):
|
|
100
|
+
sys.exit(1)
|
|
101
|
+
except ValueError as e:
|
|
102
|
+
err_console().print(f'[bold red]Error:[/bold red] {e}')
|
|
103
|
+
sys.exit(1)
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def _parse_params(raw):
|
|
107
|
+
if not raw:
|
|
108
|
+
return None
|
|
109
|
+
try:
|
|
110
|
+
return json.loads(raw)
|
|
111
|
+
except json.JSONDecodeError:
|
|
112
|
+
err_console().print('[bold red]Error:[/bold red] Invalid JSON in --params')
|
|
113
|
+
sys.exit(1)
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
if __name__ == '__main__':
|
|
117
|
+
main()
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
"""Control-plane client: writes desired state only, never spawns processes.
|
|
2
|
+
|
|
3
|
+
Used by anything that is NOT the daemon (HTTP API, CLI, scripts). It shares the
|
|
4
|
+
daemon's database; the daemon reconciles desired -> actual. A client touches the
|
|
5
|
+
``workers`` table and nothing else — no OS processes, no boot hooks, no recovery.
|
|
6
|
+
|
|
7
|
+
Three ways to point it at a database, mirroring :class:`Storage`:
|
|
8
|
+
|
|
9
|
+
- ``engine``: reuse an existing SQLAlchemy engine (e.g. the host backend's).
|
|
10
|
+
- ``db_url``: any SQLAlchemy URL.
|
|
11
|
+
- neither: the caller must pass one — unlike WorkerManager, the client has no
|
|
12
|
+
workers_dir and therefore no implicit SQLite location.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from .database.schema import DesiredStatus, Worker
|
|
16
|
+
from .database.storage import Storage
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class WorkerClient:
|
|
20
|
+
def __init__(self, db_url=None, engine=None, create_tables=False):
|
|
21
|
+
self.storage = Storage(db_url=db_url, engine=engine, create_tables=create_tables)
|
|
22
|
+
|
|
23
|
+
def request_start(self, worker_type, worker_key=None, parameters=None):
|
|
24
|
+
"""Declare that ``worker_key`` should be RUNNING (upserting its spec).
|
|
25
|
+
|
|
26
|
+
Returns the resolved worker_key. The worker is not started here; the daemon
|
|
27
|
+
notices the desired state and starts it.
|
|
28
|
+
"""
|
|
29
|
+
worker_key = worker_key or worker_type
|
|
30
|
+
with self.storage.session_scope() as session:
|
|
31
|
+
worker = session.query(Worker).filter_by(worker_key=worker_key).first()
|
|
32
|
+
if not worker:
|
|
33
|
+
worker = Worker(worker_key=worker_key, worker_type=worker_type)
|
|
34
|
+
session.add(worker)
|
|
35
|
+
worker.worker_type = worker_type
|
|
36
|
+
worker.parameters = parameters or {}
|
|
37
|
+
worker.desired_status = DesiredStatus.RUNNING
|
|
38
|
+
return worker_key
|
|
39
|
+
|
|
40
|
+
def request_stop(self, worker_key):
|
|
41
|
+
"""Declare that ``worker_key`` should be STOPPED.
|
|
42
|
+
|
|
43
|
+
Returns False if no such worker exists. The actual stop (and last_stopped_at)
|
|
44
|
+
is performed by the daemon when it reconciles.
|
|
45
|
+
"""
|
|
46
|
+
with self.storage.session_scope() as session:
|
|
47
|
+
worker = session.query(Worker).filter_by(worker_key=worker_key).first()
|
|
48
|
+
if not worker:
|
|
49
|
+
return False
|
|
50
|
+
worker.desired_status = DesiredStatus.STOPPED
|
|
51
|
+
return True
|
|
52
|
+
|
|
53
|
+
def list(self):
|
|
54
|
+
with self.storage.session_scope() as session:
|
|
55
|
+
return [w.to_dict() for w in session.query(Worker).all()]
|
|
56
|
+
|
|
57
|
+
def get(self, worker_key):
|
|
58
|
+
with self.storage.session_scope() as session:
|
|
59
|
+
worker = session.query(Worker).filter_by(worker_key=worker_key).first()
|
|
60
|
+
return worker.to_dict() if worker else None
|
|
61
|
+
|
|
62
|
+
def dispose(self):
|
|
63
|
+
self.storage.dispose()
|
|
64
|
+
|
|
65
|
+
def __enter__(self):
|
|
66
|
+
return self
|
|
67
|
+
|
|
68
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
69
|
+
self.dispose()
|
|
@@ -19,12 +19,13 @@ class WorkerManager:
|
|
|
19
19
|
workers_dir='workers',
|
|
20
20
|
create_dir=True,
|
|
21
21
|
backend=None,
|
|
22
|
-
auto_boot=
|
|
22
|
+
auto_boot=False,
|
|
23
23
|
boot_provider=None,
|
|
24
24
|
db_url=None,
|
|
25
25
|
engine=None,
|
|
26
26
|
worker_env=None,
|
|
27
27
|
auto_recover=True,
|
|
28
|
+
create_tables=True,
|
|
28
29
|
):
|
|
29
30
|
self.workers_dir = workers_dir
|
|
30
31
|
self._validate_workers_dir(create_dir)
|
|
@@ -33,13 +34,16 @@ class WorkerManager:
|
|
|
33
34
|
self.logs_dir = os.path.join(self.service_dir, 'logs')
|
|
34
35
|
self.db_path = os.path.join(self.service_dir, 'workers.db')
|
|
35
36
|
|
|
36
|
-
self._initialize_storage(create_dir, db_url, engine)
|
|
37
|
+
self._initialize_storage(create_dir, db_url, engine, create_tables)
|
|
37
38
|
# The backend is the only component that touches OS processes. The default
|
|
38
39
|
# spawns real subprocesses; tests can swap in a fake one (see for_testing).
|
|
39
40
|
self.backend = backend or SubprocessBackend()
|
|
40
|
-
#
|
|
41
|
-
# boot-restore hook (see crazy_workers.boot).
|
|
42
|
-
#
|
|
41
|
+
# Opt-in legacy behaviour: when True, starting a worker transparently
|
|
42
|
+
# installs the per-user OS boot-restore hook (see crazy_workers.boot). The
|
|
43
|
+
# default is now False — in the reconciler model, surviving a reboot is the
|
|
44
|
+
# deployment's job (a systemd unit / container that runs the daemon), not a
|
|
45
|
+
# per-worker hook. boot_provider is an injection seam for tests; None lets
|
|
46
|
+
# the platform default be auto-detected.
|
|
43
47
|
self.auto_boot = auto_boot
|
|
44
48
|
self._boot_provider = boot_provider
|
|
45
49
|
# Environment variables injected into every spawned worker — e.g. the host
|
|
@@ -86,7 +90,7 @@ class WorkerManager:
|
|
|
86
90
|
else:
|
|
87
91
|
raise ValueError(f'Workers directory "{self.workers_dir}" does not exist.')
|
|
88
92
|
|
|
89
|
-
def _initialize_storage(self, create_dir, db_url, engine):
|
|
93
|
+
def _initialize_storage(self, create_dir, db_url, engine, create_tables):
|
|
90
94
|
"""Sets up service directories and storage if allowed or if they already exist."""
|
|
91
95
|
if create_dir:
|
|
92
96
|
os.makedirs(self.service_dir, exist_ok=True)
|
|
@@ -94,11 +98,12 @@ class WorkerManager:
|
|
|
94
98
|
|
|
95
99
|
if engine is not None or db_url is not None:
|
|
96
100
|
# External/shared database (e.g. the host backend's). crazy_workers' tables
|
|
97
|
-
# are created there
|
|
98
|
-
# recovery lock and the
|
|
99
|
-
|
|
101
|
+
# are created there unless the host owns the schema (create_tables=False);
|
|
102
|
+
# the local .service dir is still used for logs, the recovery lock and the
|
|
103
|
+
# boot marker.
|
|
104
|
+
self.storage = Storage(db_url=db_url, engine=engine, create_tables=create_tables)
|
|
100
105
|
elif create_dir or os.path.exists(self.db_path):
|
|
101
|
-
self.storage = Storage(self.db_path)
|
|
106
|
+
self.storage = Storage(self.db_path, create_tables=create_tables)
|
|
102
107
|
else:
|
|
103
108
|
self.storage = None
|
|
104
109
|
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
import os
|
|
3
3
|
import re
|
|
4
|
+
from datetime import datetime, timezone
|
|
4
5
|
from sqlalchemy import func
|
|
5
6
|
from sqlalchemy.exc import IntegrityError
|
|
6
7
|
|
|
@@ -60,7 +61,7 @@ def _check_already_running(manager, worker, session):
|
|
|
60
61
|
return True
|
|
61
62
|
else:
|
|
62
63
|
logger.warning(f'Worker {worker.worker_key} found in RUNNING state but PID {worker.pid} is dead. Cleaning up.')
|
|
63
|
-
worker
|
|
64
|
+
_mark_crashed(worker)
|
|
64
65
|
session.commit()
|
|
65
66
|
return False
|
|
66
67
|
|
|
@@ -123,7 +124,7 @@ def _spawn_worker_process(manager, worker, worker_path, parameters, env, session
|
|
|
123
124
|
)
|
|
124
125
|
|
|
125
126
|
if handle is None:
|
|
126
|
-
worker
|
|
127
|
+
_mark_crashed(worker)
|
|
127
128
|
worker.pid = None
|
|
128
129
|
session.commit()
|
|
129
130
|
return False, 'Worker process failed to start'
|
|
@@ -131,6 +132,8 @@ def _spawn_worker_process(manager, worker, worker_path, parameters, env, session
|
|
|
131
132
|
worker.pid = handle.pid
|
|
132
133
|
worker.status = WorkerStatus.RUNNING
|
|
133
134
|
worker.last_started_at = func.now()
|
|
135
|
+
# A clean start clears the crash backoff so the next failure starts over.
|
|
136
|
+
worker.restart_count = 0
|
|
134
137
|
session.commit()
|
|
135
138
|
|
|
136
139
|
manager._active_processes[worker.worker_key] = handle
|
|
@@ -138,6 +141,15 @@ def _spawn_worker_process(manager, worker, worker_path, parameters, env, session
|
|
|
138
141
|
return True, worker.to_dict()
|
|
139
142
|
|
|
140
143
|
|
|
144
|
+
def _mark_crashed(worker):
|
|
145
|
+
# Record a death for crash backoff: bump the restart counter and stamp the
|
|
146
|
+
# exit time in UTC (Python-side, so the reconciler's backoff math does not
|
|
147
|
+
# depend on the DB dialect's now()/timezone semantics).
|
|
148
|
+
worker.status = WorkerStatus.CRASHED
|
|
149
|
+
worker.last_exit_at = datetime.now(timezone.utc)
|
|
150
|
+
worker.restart_count = (worker.restart_count or 0) + 1
|
|
151
|
+
|
|
152
|
+
|
|
141
153
|
def _ensure_boot_restore(manager):
|
|
142
154
|
# Best-effort and never raising: a freshly started worker transparently
|
|
143
155
|
# registers the OS boot-restore hook so it survives a reboot.
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
"""The reconcile daemon: the single owner of worker processes for a context.
|
|
2
|
+
|
|
3
|
+
Clients (HTTP API, CLI, scripts) only write desired state to the shared DB; the
|
|
4
|
+
daemon runs a loop that drives the observed state toward it — starting,
|
|
5
|
+
stopping and crash-restarting processes. Exactly one daemon owns a given
|
|
6
|
+
workers_dir/DB at a time (enforced by a lock in ``crazy_workers.daemon.runner``).
|
|
7
|
+
|
|
8
|
+
Run it with ``python -m crazy_workers.daemon`` (a thin ``__main__`` shim over
|
|
9
|
+
:func:`crazy_workers.daemon.runner.main`).
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from .reconciler import Reconciler
|
|
13
|
+
from .runner import main
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
__all__ = ['Reconciler', 'main']
|