@c4t4/heyamigo 0.9.2 → 0.9.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -5,6 +5,7 @@ import { attachIncoming } from './gateway/incoming.js';
5
5
  import { handleReply } from './gateway/outgoing.js';
6
6
  import { logger } from './logger.js';
7
7
  import { startScheduler } from './memory/scheduler.js';
8
+ import { requestShutdown, startOrchestrator, stopOrchestrator, } from './queue/orchestrator.js';
8
9
  import { replayPending } from './queue/queue.js';
9
10
  import { startSenderWorker, stopSenderWorker } from './queue/sender-worker.js';
10
11
  import { startSocket } from './wa/socket.js';
@@ -14,6 +15,16 @@ async function main() {
14
15
  initDb();
15
16
  // Derived view: populate persons + identities from access.json.
16
17
  syncIdentitiesFromAccess();
18
+ // Orchestrator handles cross-cutting bookkeeping: control table
19
+ // signals, stuck-claim reclaim, dead-worker detection, cron polling
20
+ // (Phase 2.2+). Starts before workers so it can see them register.
21
+ startOrchestrator({
22
+ onShutdownDrained: () => {
23
+ stopSenderWorker();
24
+ stopOrchestrator();
25
+ closeDb();
26
+ },
27
+ });
17
28
  // Sender worker drains outbound queue → channel adapters. Started
18
29
  // before the socket so it's ready when handleReply enqueues rows.
19
30
  startSenderWorker();
@@ -30,17 +41,17 @@ async function main() {
30
41
  await handleReply(job, result, {});
31
42
  }).catch((err) => logger.error({ err }, 'replay failed'));
32
43
  }
44
+ // Graceful shutdown: signal handler writes a 'shutdown' row to the
45
+ // control table; orchestrator picks it up, drains in-flight work,
46
+ // then runs onShutdownDrained (stops workers, closes DB) and exits.
47
+ // A 30s timer inside the orchestrator force-exits if drain hangs.
33
48
  process.on('SIGINT', () => {
34
- logger.info('SIGINT received, shutting down');
35
- stopSenderWorker();
36
- closeDb();
37
- process.exit(0);
49
+ logger.info('SIGINT received, requesting graceful shutdown');
50
+ requestShutdown('SIGINT');
38
51
  });
39
52
  process.on('SIGTERM', () => {
40
- logger.info('SIGTERM received, shutting down');
41
- stopSenderWorker();
42
- closeDb();
43
- process.exit(0);
53
+ logger.info('SIGTERM received, requesting graceful shutdown');
54
+ requestShutdown('SIGTERM');
44
55
  });
45
56
  main().catch((err) => {
46
57
  logger.error({ err }, 'fatal error during boot');
@@ -0,0 +1,40 @@
1
+ // Helpers for the control table — the bot's runtime signalling
2
+ // channel. SIGTERM, /shutdown chat command, or external trigger all
3
+ // insert a control row; the orchestrator picks it up on its next tick
4
+ // and acts.
5
+ //
6
+ // Single-row-per-key: PK on `key` gives natural upsert semantics.
7
+ import { eq } from 'drizzle-orm';
8
+ import { getDb } from '../db/index.js';
9
+ import { control } from '../db/schema.js';
10
+ export function requestControl(key, value = null, requestedBy = null) {
11
+ const db = getDb();
12
+ const now = Math.floor(Date.now() / 1000);
13
+ db.insert(control)
14
+ .values({ key, value, requestedBy, requestedAt: now })
15
+ .onConflictDoUpdate({
16
+ target: control.key,
17
+ set: { value, requestedBy, requestedAt: now },
18
+ })
19
+ .run();
20
+ }
21
+ export function readControl(key) {
22
+ const db = getDb();
23
+ const row = db.select().from(control).where(eq(control.key, key)).get();
24
+ if (!row)
25
+ return null;
26
+ return {
27
+ value: row.value,
28
+ requestedBy: row.requestedBy,
29
+ requestedAt: row.requestedAt,
30
+ };
31
+ }
32
+ export function clearControl(key) {
33
+ const db = getDb();
34
+ const result = db
35
+ .delete(control)
36
+ .where(eq(control.key, key))
37
+ .returning({ key: control.key })
38
+ .all();
39
+ return result.length > 0;
40
+ }
@@ -0,0 +1,158 @@
1
+ // Bot-wide orchestrator. One process-wide instance. Polls every
2
+ // ~500ms and does the cross-cutting work no single worker should
3
+ // own:
4
+ // - Read control table → apply shutdown/pause/reload signals.
5
+ // - Reclaim stuck claims on outbound (and later: async, browser,
6
+ // memory_writes).
7
+ // - Mark dead workers (last_seen past threshold).
8
+ // - Poll the cron table → enqueue due jobs (Phase 2.2; not yet).
9
+ // - Log queue depths to a metrics buffer (Phase 7; not yet).
10
+ //
11
+ // Distinct from the sender worker: sender pulls from outbound and
12
+ // sends. Orchestrator pulls signals and metadata; it dispatches but
13
+ // doesn't do per-row work itself.
14
+ import { hostname } from 'os';
15
+ import { and, eq, lt, ne } from 'drizzle-orm';
16
+ import { getDb } from '../db/index.js';
17
+ import { workers } from '../db/schema.js';
18
+ import { logger } from '../logger.js';
19
+ import { reclaimStuckOutbound } from './outbound.js';
20
+ import { clearControl, readControl, requestControl } from './control.js';
21
+ const TICK_INTERVAL_MS = 500;
22
+ const HEARTBEAT_INTERVAL_MS = 5_000;
23
+ const WORKER_DEAD_AFTER_SECONDS = 30;
24
+ const SHUTDOWN_GRACE_MS = 30_000; // total drain window before force-exit
25
+ let workerId = null;
26
+ let stopping = false;
27
+ let draining = false;
28
+ let tickTimer = null;
29
+ let heartbeatTimer = null;
30
+ let exitHook = null;
31
+ function newOrchestratorId() {
32
+ return `${hostname()}-${process.pid}-orchestrator-0`;
33
+ }
34
+ function registerSelf(id) {
35
+ const db = getDb();
36
+ const now = Math.floor(Date.now() / 1000);
37
+ db.insert(workers)
38
+ .values({
39
+ id,
40
+ kind: 'orchestrator',
41
+ status: 'idle',
42
+ currentJob: null,
43
+ lastSeen: now,
44
+ startedAt: now,
45
+ })
46
+ .onConflictDoUpdate({
47
+ target: workers.id,
48
+ set: { status: 'idle', currentJob: null, lastSeen: now, startedAt: now },
49
+ })
50
+ .run();
51
+ }
52
+ function heartbeat(id) {
53
+ const db = getDb();
54
+ db.update(workers)
55
+ .set({ lastSeen: Math.floor(Date.now() / 1000) })
56
+ .where(eq(workers.id, id))
57
+ .run();
58
+ }
59
+ // Mark workers as dead when their last_seen has aged past the
60
+ // threshold. Used as a liveness signal in observability queries and
61
+ // (eventually) to reclaim their claimed jobs across all queues.
62
+ function markDeadWorkers() {
63
+ const db = getDb();
64
+ const cutoff = Math.floor(Date.now() / 1000) - WORKER_DEAD_AFTER_SECONDS;
65
+ const result = db
66
+ .update(workers)
67
+ .set({ status: 'dead' })
68
+ .where(and(lt(workers.lastSeen, cutoff), ne(workers.status, 'dead')))
69
+ .returning({ id: workers.id, kind: workers.kind })
70
+ .all();
71
+ for (const w of result) {
72
+ logger.warn({ id: w.id, kind: w.kind }, 'worker marked dead (no heartbeat)');
73
+ }
74
+ return result.length;
75
+ }
76
+ function busyWorkerCount() {
77
+ const db = getDb();
78
+ const row = db
79
+ .select({ id: workers.id })
80
+ .from(workers)
81
+ .where(eq(workers.status, 'busy'))
82
+ .all();
83
+ return row.length;
84
+ }
85
+ async function tick(id) {
86
+ try {
87
+ const ctl = readControl('shutdown');
88
+ if (ctl && !draining) {
89
+ logger.info({ requestedBy: ctl.requestedBy }, 'shutdown requested via control table');
90
+ draining = true;
91
+ // Mark ourselves draining so observability shows it.
92
+ const db = getDb();
93
+ db.update(workers)
94
+ .set({ status: 'draining' })
95
+ .where(eq(workers.id, id))
96
+ .run();
97
+ }
98
+ // Cross-queue housekeeping. More queues land in later phases.
99
+ const reclaimed = reclaimStuckOutbound();
100
+ if (reclaimed > 0) {
101
+ logger.info({ reclaimed }, 'reclaimed stuck outbound rows');
102
+ }
103
+ markDeadWorkers();
104
+ if (draining) {
105
+ const busy = busyWorkerCount();
106
+ if (busy === 0) {
107
+ logger.info('all workers idle, exiting cleanly');
108
+ clearControl('shutdown');
109
+ if (exitHook) {
110
+ await exitHook();
111
+ }
112
+ process.exit(0);
113
+ }
114
+ }
115
+ }
116
+ catch (err) {
117
+ logger.error({ err }, 'orchestrator tick error');
118
+ }
119
+ }
120
+ export function startOrchestrator(opts = {}) {
121
+ if (workerId) {
122
+ logger.warn('orchestrator already started; ignoring');
123
+ return;
124
+ }
125
+ workerId = newOrchestratorId();
126
+ exitHook = opts.onShutdownDrained ?? null;
127
+ registerSelf(workerId);
128
+ heartbeatTimer = setInterval(() => workerId && heartbeat(workerId), HEARTBEAT_INTERVAL_MS);
129
+ const id = workerId;
130
+ tickTimer = setInterval(() => {
131
+ void tick(id);
132
+ }, TICK_INTERVAL_MS);
133
+ logger.info({ workerId }, 'orchestrator started');
134
+ }
135
+ export function stopOrchestrator() {
136
+ stopping = true;
137
+ if (tickTimer)
138
+ clearInterval(tickTimer);
139
+ if (heartbeatTimer)
140
+ clearInterval(heartbeatTimer);
141
+ }
142
+ // Public entry point for "begin graceful shutdown." Inserts the
143
+ // control row + sets a force-exit timer so we don't hang forever if
144
+ // some worker refuses to drain.
145
+ export function requestShutdown(by) {
146
+ if (draining)
147
+ return;
148
+ requestControl('shutdown', 'requested', by);
149
+ setTimeout(() => {
150
+ if (!stopping) {
151
+ logger.warn({ graceMs: SHUTDOWN_GRACE_MS }, 'graceful shutdown timed out, forcing exit');
152
+ process.exit(1);
153
+ }
154
+ }, SHUTDOWN_GRACE_MS).unref();
155
+ }
156
+ export function isDraining() {
157
+ return draining;
158
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@c4t4/heyamigo",
3
- "version": "0.9.2",
3
+ "version": "0.9.3",
4
4
  "description": "WhatsApp AI bot powered by Claude with long-term memory, browser control, and role-based access",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",