@c4t4/heyamigo 0.9.15 → 0.9.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/boot.js CHANGED
@@ -8,6 +8,7 @@ import { syncIdentitiesFromAccess } from './db/identity-sync.js';
8
8
  import { attachIncoming } from './gateway/incoming.js';
9
9
  import { logger } from './logger.js';
10
10
  import { startScheduler } from './memory/scheduler.js';
11
+ import { startBrowserWorkers, stopBrowserWorkers } from './queue/browser-worker.js';
11
12
  import { startChatWorkers, stopChatWorkers } from './queue/chat-worker.js';
12
13
  import { startMemoryWorker, stopMemoryWorker, } from './queue/memory-worker.js';
13
14
  import { requestShutdown, startOrchestrator, stopOrchestrator, } from './queue/orchestrator.js';
@@ -31,6 +32,7 @@ export async function bootBot() {
31
32
  startOrchestrator({
32
33
  onShutdownDrained: () => {
33
34
  stopChatWorkers();
35
+ stopBrowserWorkers();
34
36
  stopSenderWorker();
35
37
  stopMemoryWorker();
36
38
  stopOrchestrator();
@@ -42,6 +44,7 @@ export async function bootBot() {
42
44
  // No separate replay step needed.
43
45
  startSenderWorker();
44
46
  startMemoryWorker();
47
+ startBrowserWorkers();
45
48
  startChatWorkers();
46
49
  startScheduler();
47
50
  await startSocket((sock) => {
package/dist/db/schema.js CHANGED
@@ -216,3 +216,33 @@ export const memoryWrites = sqliteTable('memory_writes', {
216
216
  .on(t.idempotencyKey)
217
217
  .where(sql `${t.idempotencyKey} IS NOT NULL`),
218
218
  }));
219
+ // ──────────────────────────────────────────────────────────────────
220
+ // Browser tasks (Phase 4, durable)
221
+ // ──────────────────────────────────────────────────────────────────
222
+ // Browser-driven background tasks ([ASYNC-BROWSER:] markers). Replaces
223
+ // the in-memory fastq queue with SQLite-backed durable storage: tasks
224
+ // survive process crashes and reclaim via TTL.
225
+ //
226
+ // Browser worker pool (config.browser.maxWorkers) drains; each task
227
+ // runs as a fresh agent (no persistent session — Phase 4) and opens
228
+ // its own tab on the shared Chrome.
229
+ export const browserTasks = sqliteTable('browser_tasks', {
230
+ id: integer('id').primaryKey({ autoIncrement: true }),
231
+ address: text('address').notNull(),
232
+ actorPersonId: text('actor_person_id'),
233
+ description: text('description').notNull(),
234
+ originatingMessage: text('originating_message').notNull(),
235
+ senderNumber: text('sender_number').notNull(),
236
+ senderName: text('sender_name'),
237
+ allowedTools: text('allowed_tools'), // JSON: 'all' | string[]
238
+ status: text('status').notNull(),
239
+ attempts: integer('attempts').notNull().default(0),
240
+ nextAttemptAt: integer('next_attempt_at'),
241
+ lastError: text('last_error'),
242
+ claimedBy: text('claimed_by'),
243
+ claimedAt: integer('claimed_at'),
244
+ createdAt: integer('created_at').notNull(),
245
+ updatedAt: integer('updated_at').notNull(),
246
+ }, t => ({
247
+ byStatusNext: index('btasks_by_status_next').on(t.status, t.nextAttemptAt),
248
+ }));
@@ -1,8 +1,10 @@
1
1
  import { getProvider } from '../ai/providers.js';
2
+ import { formatAddress, jidToAddress } from '../db/address.js';
2
3
  import { config } from '../config.js';
3
4
  import fastq from 'fastq';
4
5
  import { initiate } from '../gateway/outgoing.js';
5
6
  import { logger } from '../logger.js';
7
+ import { enqueueBrowserJob } from './browser-queue.js';
6
8
  // Concurrency: how many async workers can run simultaneously.
7
9
  // Start conservative — each process is expensive (Playwright, multi-minute runs).
8
10
  // Tune via config.asyncTasks.concurrency once we have real usage data.
@@ -241,36 +243,32 @@ function truncate(s, n) {
241
243
  // (the chat-track agent writes self-contained task descriptions).
242
244
  // Per-task tab isolation is enforced by the prompt instructions
243
245
  // below.
244
- // Browser pool: multiple agents share one Chrome (the logged-in
245
- // profile), each task opens its own tab. Persistent agent session is
246
- // dropped every task is fresh, with self-contained instructions
247
- // from the chat-track agent. The trade-off: no cross-task agent
248
- // memory; the win: real parallelism.
249
- const BROWSER_CONCURRENCY = Math.max(1, config.browser?.maxWorkers ?? 3);
250
- const browserQueue = fastq.promise(async (task) => {
251
- inProgress.set(task.id, task);
252
- try {
253
- await runBrowserTask(task);
254
- }
255
- catch (err) {
256
- logger.error({ err, id: task.id, jid: task.jid }, 'browser task failed unexpectedly');
257
- }
258
- finally {
259
- inProgress.delete(task.id);
260
- }
261
- }, BROWSER_CONCURRENCY);
246
+ // Browser tasks now go into the durable browser_tasks SQLite table.
247
+ // The browser worker pool (src/queue/browser-worker.ts) drains it.
248
+ // In-flight tasks survive process crashes; the orchestrator reclaims
249
+ // stuck claims via the TTL on the table.
262
250
  export function enqueueBrowserTask(input) {
251
+ // Keep AsyncTask shape exported so existing callers (worker.ts)
252
+ // don't change. The returned id is informational only — the real
253
+ // row id is the DB auto-increment.
263
254
  const task = {
264
255
  ...input,
265
256
  id: `browser-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`,
266
257
  startedAt: Math.floor(Date.now() / 1000),
267
258
  };
259
+ enqueueBrowserJob({
260
+ address: formatAddress(jidToAddress(task.jid)),
261
+ description: task.description,
262
+ originatingMessage: task.originatingMessage,
263
+ senderNumber: task.senderNumber,
264
+ senderName: task.senderName ?? null,
265
+ allowedTools: task.allowedTools,
266
+ });
268
267
  logger.info({
269
268
  id: task.id,
270
269
  jid: task.jid,
271
270
  description: task.description.slice(0, 200),
272
271
  }, 'browser task enqueued');
273
- browserQueue.push(task).catch((err) => logger.error({ err, id: task.id }, 'browser queue push failed'));
274
272
  return task;
275
273
  }
276
274
  function buildBrowserPrompt(task) {
@@ -324,7 +322,10 @@ function browserAddDirs() {
324
322
  config.storage.mediaDir,
325
323
  ];
326
324
  }
327
- async function runBrowserTask(task) {
325
+ // Exported so the browser worker (src/queue/browser-worker.ts) can
326
+ // invoke it for each claimed row. Body unchanged from the pre-queue
327
+ // version — just rehomed for direct invocation by the pool.
328
+ export async function runBrowserTask(task) {
328
329
  const provider = getProvider();
329
330
  // Each task is fresh (Phase 4 browser parallelism). No persistent
330
331
  // session — would force serialization on concurrent tasks.
@@ -0,0 +1,141 @@
1
+ // Browser tasks queue helpers. Producers call enqueueBrowserJob;
2
+ // the browser worker pool drains via claimNextBrowserTask. Same
3
+ // primitives as inbound/outbound — claim is atomic, retry uses
4
+ // per-task backoff, claimed_by safety check on completion.
5
+ //
6
+ // No per-address serialization: multiple browser tasks for the same
7
+ // originating chat CAN run concurrently (each opens its own tab on
8
+ // the shared Chrome). Reply order isn't a concern because each browser
9
+ // task ends with an outbound row, and the sender worker serializes
10
+ // per-address there.
11
+ import { and, asc, eq, isNull, lte, or, sql } from 'drizzle-orm';
12
+ import { getDb } from '../db/index.js';
13
+ import { browserTasks } from '../db/schema.js';
14
+ export function enqueueBrowserJob(input) {
15
+ const db = getDb();
16
+ const now = Math.floor(Date.now() / 1000);
17
+ return db
18
+ .insert(browserTasks)
19
+ .values({
20
+ address: input.address,
21
+ actorPersonId: input.actorPersonId ?? null,
22
+ description: input.description,
23
+ originatingMessage: input.originatingMessage,
24
+ senderNumber: input.senderNumber,
25
+ senderName: input.senderName ?? null,
26
+ allowedTools: input.allowedTools
27
+ ? JSON.stringify(input.allowedTools)
28
+ : null,
29
+ status: 'pending',
30
+ attempts: 0,
31
+ nextAttemptAt: null,
32
+ lastError: null,
33
+ claimedBy: null,
34
+ claimedAt: null,
35
+ createdAt: now,
36
+ updatedAt: now,
37
+ })
38
+ .returning()
39
+ .get();
40
+ }
41
+ export function claimNextBrowserTask(workerId) {
42
+ const db = getDb();
43
+ const now = Math.floor(Date.now() / 1000);
44
+ return db.transaction((tx) => {
45
+ const target = tx
46
+ .select({ id: browserTasks.id })
47
+ .from(browserTasks)
48
+ .where(and(eq(browserTasks.status, 'pending'), or(isNull(browserTasks.nextAttemptAt), lte(browserTasks.nextAttemptAt, now))))
49
+ .orderBy(asc(browserTasks.id))
50
+ .limit(1)
51
+ .get();
52
+ if (!target)
53
+ return null;
54
+ const claimed = tx
55
+ .update(browserTasks)
56
+ .set({
57
+ status: 'claimed',
58
+ claimedBy: workerId,
59
+ claimedAt: now,
60
+ updatedAt: now,
61
+ })
62
+ .where(and(eq(browserTasks.id, target.id), eq(browserTasks.status, 'pending')))
63
+ .returning()
64
+ .get();
65
+ return claimed ?? null;
66
+ });
67
+ }
68
+ export function markBrowserTaskDone(id, workerId) {
69
+ const db = getDb();
70
+ const now = Math.floor(Date.now() / 1000);
71
+ const result = db
72
+ .update(browserTasks)
73
+ .set({ status: 'done', updatedAt: now })
74
+ .where(and(eq(browserTasks.id, id), eq(browserTasks.status, 'claimed'), eq(browserTasks.claimedBy, workerId)))
75
+ .returning({ id: browserTasks.id })
76
+ .all();
77
+ return result.length > 0;
78
+ }
79
+ // Browser tasks are expensive (multi-minute Playwright sessions) so
80
+ // retries are sparse: 30s, 5min, give up (DLQ after 2 attempts past
81
+ // the first). Most browser failures are deterministic (login wall,
82
+ // bot detection) and won't benefit from rapid retries.
83
+ const BACKOFF_SECONDS = [30, 300];
84
+ const MAX_ATTEMPTS = BACKOFF_SECONDS.length;
85
+ export function markBrowserTaskRetryOrDlq(id, workerId, errorMessage) {
86
+ const db = getDb();
87
+ return db.transaction((tx) => {
88
+ const row = tx.select().from(browserTasks).where(eq(browserTasks.id, id)).get();
89
+ if (!row || row.status !== 'claimed' || row.claimedBy !== workerId) {
90
+ return { retried: false, deadLettered: false };
91
+ }
92
+ const now = Math.floor(Date.now() / 1000);
93
+ const nextAttempts = row.attempts + 1;
94
+ if (nextAttempts > MAX_ATTEMPTS) {
95
+ tx.update(browserTasks)
96
+ .set({
97
+ status: 'dlq',
98
+ attempts: nextAttempts,
99
+ lastError: errorMessage,
100
+ claimedBy: null,
101
+ claimedAt: null,
102
+ updatedAt: now,
103
+ })
104
+ .where(eq(browserTasks.id, id))
105
+ .run();
106
+ return { retried: false, deadLettered: true };
107
+ }
108
+ const backoff = BACKOFF_SECONDS[Math.min(row.attempts, BACKOFF_SECONDS.length - 1)];
109
+ tx.update(browserTasks)
110
+ .set({
111
+ status: 'pending',
112
+ attempts: nextAttempts,
113
+ nextAttemptAt: now + backoff,
114
+ lastError: errorMessage,
115
+ claimedBy: null,
116
+ claimedAt: null,
117
+ updatedAt: now,
118
+ })
119
+ .where(eq(browserTasks.id, id))
120
+ .run();
121
+ return { retried: true, deadLettered: false };
122
+ });
123
+ }
124
+ // Browser tasks take 1-15 min routinely. Generous reclaim TTL.
125
+ const CLAIM_TTL_SECONDS = 20 * 60;
126
+ export function reclaimStuckBrowserTasks() {
127
+ const db = getDb();
128
+ const cutoff = Math.floor(Date.now() / 1000) - CLAIM_TTL_SECONDS;
129
+ const result = db
130
+ .update(browserTasks)
131
+ .set({
132
+ status: 'pending',
133
+ claimedBy: null,
134
+ claimedAt: null,
135
+ updatedAt: sql `${browserTasks.updatedAt}`,
136
+ })
137
+ .where(and(eq(browserTasks.status, 'claimed'), lte(browserTasks.claimedAt, cutoff)))
138
+ .returning({ id: browserTasks.id })
139
+ .all();
140
+ return result.length;
141
+ }
@@ -0,0 +1,170 @@
1
+ // Browser worker pool. N workers (config.browser.maxWorkers, default
2
+ // 3) drain the browser_tasks SQLite table. Each task runs as a fresh
3
+ // agent with its own tab on the shared Chrome — same model as before
4
+ // the durability change, just claimable from the DB now.
5
+ //
6
+ // Differences vs in-memory fastq:
7
+ // - Tasks survive process crashes (durable rows).
8
+ // - Orchestrator reclaims stuck claims via reclaimStuckBrowserTasks.
9
+ // - Retry / DLQ semantics live in the queue helpers.
10
+ import { hostname } from 'os';
11
+ import { eq } from 'drizzle-orm';
12
+ import { config } from '../config.js';
13
+ import { getDb } from '../db/index.js';
14
+ import { parseAddress } from '../db/address.js';
15
+ import { workers } from '../db/schema.js';
16
+ import { logger } from '../logger.js';
17
+ import { claimNextBrowserTask, markBrowserTaskDone, markBrowserTaskRetryOrDlq, } from './browser-queue.js';
18
+ import { initiate } from '../gateway/outgoing.js';
19
+ import { runBrowserTask } from './async-tasks.js';
20
+ const HEARTBEAT_INTERVAL_MS = 5_000;
21
+ const IDLE_POLL_INTERVAL_MS = 500;
22
+ const BUSY_POLL_INTERVAL_MS = 0;
23
+ const activeWorkers = [];
24
+ let stopping = false;
25
+ let heartbeatTimer = null;
26
+ function newWorkerId(slot) {
27
+ return `${hostname()}-${process.pid}-browser-${slot}`;
28
+ }
29
+ function registerWorker(id) {
30
+ const db = getDb();
31
+ const now = Math.floor(Date.now() / 1000);
32
+ db.insert(workers)
33
+ .values({
34
+ id,
35
+ kind: 'browser',
36
+ status: 'idle',
37
+ currentJob: null,
38
+ lastSeen: now,
39
+ startedAt: now,
40
+ })
41
+ .onConflictDoUpdate({
42
+ target: workers.id,
43
+ set: { status: 'idle', currentJob: null, lastSeen: now, startedAt: now },
44
+ })
45
+ .run();
46
+ }
47
+ function setWorkerStatus(id, status, currentJob = null) {
48
+ const db = getDb();
49
+ db.update(workers)
50
+ .set({
51
+ status,
52
+ currentJob,
53
+ lastSeen: Math.floor(Date.now() / 1000),
54
+ })
55
+ .where(eq(workers.id, id))
56
+ .run();
57
+ }
58
+ function heartbeatAll() {
59
+ if (activeWorkers.length === 0)
60
+ return;
61
+ const db = getDb();
62
+ const now = Math.floor(Date.now() / 1000);
63
+ for (const id of activeWorkers) {
64
+ db.update(workers)
65
+ .set({ lastSeen: now })
66
+ .where(eq(workers.id, id))
67
+ .run();
68
+ }
69
+ }
70
+ // Convert a row into the AsyncTask shape that runBrowserTask expects.
71
+ // The id field is a synthetic string for log lines; the real row id
72
+ // is used for queue bookkeeping.
73
+ function rowToAsyncTask(row) {
74
+ let allowedTools = 'all';
75
+ if (row.allowedTools) {
76
+ try {
77
+ allowedTools = JSON.parse(row.allowedTools);
78
+ }
79
+ catch {
80
+ // bad JSON → fall back to 'all'
81
+ }
82
+ }
83
+ return {
84
+ id: `browser-${row.id}`,
85
+ jid: parseAddress(row.address).externalId,
86
+ senderNumber: row.senderNumber,
87
+ senderName: row.senderName ?? undefined,
88
+ description: row.description,
89
+ originatingMessage: row.originatingMessage,
90
+ allowedTools,
91
+ startedAt: row.claimedAt ?? row.createdAt,
92
+ };
93
+ }
94
+ async function processOne(workerId, row) {
95
+ setWorkerStatus(workerId, 'busy', `browser_tasks:${row.id}`);
96
+ const task = rowToAsyncTask(row);
97
+ try {
98
+ await runBrowserTask(task);
99
+ const ok = markBrowserTaskDone(row.id, workerId);
100
+ if (!ok) {
101
+ logger.warn({ id: row.id, workerId }, 'browser task markDone failed (claim lost?). work already done.');
102
+ }
103
+ }
104
+ catch (err) {
105
+ const msg = err instanceof Error ? err.message : String(err);
106
+ const result = markBrowserTaskRetryOrDlq(row.id, workerId, msg);
107
+ if (result.deadLettered) {
108
+ logger.error({ err, id: row.id, address: row.address }, 'browser task dead-lettered after max attempts');
109
+ // User-facing failure ack so the chat isn't left hanging.
110
+ try {
111
+ await initiate({
112
+ jid: parseAddress(row.address).externalId,
113
+ text: `Heads up: the browser task "${row.description.slice(0, 80)}" failed. Ask me again and I'll retry.`,
114
+ });
115
+ }
116
+ catch (e) {
117
+ logger.error({ err: e, id: row.id }, 'failed to send DLQ-ack reply');
118
+ }
119
+ }
120
+ else if (result.retried) {
121
+ logger.warn({ err, id: row.id, address: row.address }, 'browser task transient fail, will retry');
122
+ }
123
+ }
124
+ finally {
125
+ setWorkerStatus(workerId, 'idle');
126
+ }
127
+ }
128
+ async function loop(workerId) {
129
+ while (!stopping) {
130
+ let processed = false;
131
+ try {
132
+ const row = claimNextBrowserTask(workerId);
133
+ if (row) {
134
+ await processOne(workerId, row);
135
+ processed = true;
136
+ }
137
+ }
138
+ catch (err) {
139
+ logger.error({ err, workerId }, 'browser worker loop error');
140
+ }
141
+ const delay = processed ? BUSY_POLL_INTERVAL_MS : IDLE_POLL_INTERVAL_MS;
142
+ if (delay > 0) {
143
+ await new Promise((res) => setTimeout(res, delay));
144
+ }
145
+ else {
146
+ await new Promise((res) => setImmediate(res));
147
+ }
148
+ }
149
+ setWorkerStatus(workerId, 'dead');
150
+ }
151
+ export function startBrowserWorkers() {
152
+ if (activeWorkers.length > 0) {
153
+ logger.warn('browser workers already started; ignoring');
154
+ return;
155
+ }
156
+ const pool = Math.max(1, config.browser?.maxWorkers ?? 3);
157
+ for (let i = 0; i < pool; i++) {
158
+ const id = newWorkerId(i);
159
+ activeWorkers.push(id);
160
+ registerWorker(id);
161
+ void loop(id).catch((err) => logger.fatal({ err, workerId: id }, 'browser worker loop crashed'));
162
+ }
163
+ heartbeatTimer = setInterval(heartbeatAll, HEARTBEAT_INTERVAL_MS);
164
+ logger.info({ pool }, 'browser worker pool started');
165
+ }
166
+ export function stopBrowserWorkers() {
167
+ stopping = true;
168
+ if (heartbeatTimer)
169
+ clearInterval(heartbeatTimer);
170
+ }
@@ -16,6 +16,7 @@ import { and, eq, lt, ne } from 'drizzle-orm';
16
16
  import { getDb } from '../db/index.js';
17
17
  import { workers } from '../db/schema.js';
18
18
  import { logger } from '../logger.js';
19
+ import { reclaimStuckBrowserTasks } from './browser-queue.js';
19
20
  import { reclaimStuckInbound } from './inbound.js';
20
21
  import { reclaimStuckMemoryWrites } from './memory-writes.js';
21
22
  import { reclaimStuckOutbound } from './outbound.js';
@@ -112,6 +113,10 @@ async function tick(id) {
112
113
  if (reclaimedMemWr > 0) {
113
114
  logger.info({ reclaimed: reclaimedMemWr }, 'reclaimed stuck memory_writes rows');
114
115
  }
116
+ const reclaimedBrowser = reclaimStuckBrowserTasks();
117
+ if (reclaimedBrowser > 0) {
118
+ logger.info({ reclaimed: reclaimedBrowser }, 'reclaimed stuck browser_tasks rows');
119
+ }
115
120
  // Fire any due crons. Order: dispatch each in turn; if dispatch
116
121
  // throws (it shouldn't — dispatch swallows), the cron is NOT
117
122
  // marked fired and we'll retry on the next tick.
@@ -0,0 +1,20 @@
1
+ CREATE TABLE `browser_tasks` (
2
+ `id` integer PRIMARY KEY AUTOINCREMENT NOT NULL,
3
+ `address` text NOT NULL,
4
+ `actor_person_id` text,
5
+ `description` text NOT NULL,
6
+ `originating_message` text NOT NULL,
7
+ `sender_number` text NOT NULL,
8
+ `sender_name` text,
9
+ `allowed_tools` text,
10
+ `status` text NOT NULL,
11
+ `attempts` integer DEFAULT 0 NOT NULL,
12
+ `next_attempt_at` integer,
13
+ `last_error` text,
14
+ `claimed_by` text,
15
+ `claimed_at` integer,
16
+ `created_at` integer NOT NULL,
17
+ `updated_at` integer NOT NULL
18
+ );
19
+ --> statement-breakpoint
20
+ CREATE INDEX `btasks_by_status_next` ON `browser_tasks` (`status`,`next_attempt_at`);