@c4t4/heyamigo 0.9.15 → 0.9.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/boot.js +3 -0
- package/dist/db/schema.js +38 -0
- package/dist/estimates/image-gen.js +36 -0
- package/dist/estimates/index.js +12 -0
- package/dist/estimates/registry.js +113 -0
- package/dist/estimates/types.js +6 -0
- package/dist/gateway/incoming.js +26 -6
- package/dist/queue/async-tasks.js +21 -20
- package/dist/queue/browser-queue.js +141 -0
- package/dist/queue/browser-worker.js +170 -0
- package/dist/queue/inbound.js +1 -0
- package/dist/queue/orchestrator.js +5 -0
- package/migrations/0006_phase4_browser_tasks.sql +20 -0
- package/migrations/0007_estimates_kind.sql +2 -0
- package/migrations/meta/0006_snapshot.json +909 -0
- package/migrations/meta/0007_snapshot.json +924 -0
- package/migrations/meta/_journal.json +14 -0
- package/package.json +1 -1
package/dist/boot.js
CHANGED
|
@@ -8,6 +8,7 @@ import { syncIdentitiesFromAccess } from './db/identity-sync.js';
|
|
|
8
8
|
import { attachIncoming } from './gateway/incoming.js';
|
|
9
9
|
import { logger } from './logger.js';
|
|
10
10
|
import { startScheduler } from './memory/scheduler.js';
|
|
11
|
+
import { startBrowserWorkers, stopBrowserWorkers } from './queue/browser-worker.js';
|
|
11
12
|
import { startChatWorkers, stopChatWorkers } from './queue/chat-worker.js';
|
|
12
13
|
import { startMemoryWorker, stopMemoryWorker, } from './queue/memory-worker.js';
|
|
13
14
|
import { requestShutdown, startOrchestrator, stopOrchestrator, } from './queue/orchestrator.js';
|
|
@@ -31,6 +32,7 @@ export async function bootBot() {
|
|
|
31
32
|
startOrchestrator({
|
|
32
33
|
onShutdownDrained: () => {
|
|
33
34
|
stopChatWorkers();
|
|
35
|
+
stopBrowserWorkers();
|
|
34
36
|
stopSenderWorker();
|
|
35
37
|
stopMemoryWorker();
|
|
36
38
|
stopOrchestrator();
|
|
@@ -42,6 +44,7 @@ export async function bootBot() {
|
|
|
42
44
|
// No separate replay step needed.
|
|
43
45
|
startSenderWorker();
|
|
44
46
|
startMemoryWorker();
|
|
47
|
+
startBrowserWorkers();
|
|
45
48
|
startChatWorkers();
|
|
46
49
|
startScheduler();
|
|
47
50
|
await startSocket((sock) => {
|
package/dist/db/schema.js
CHANGED
|
@@ -159,6 +159,12 @@ export const inbound = sqliteTable('inbound', {
|
|
|
159
159
|
mediaBytes: integer('media_bytes'),
|
|
160
160
|
pushName: text('push_name'), // sender's display name at send time
|
|
161
161
|
triggerReason: text('trigger_reason'), // 'alias'|'mention'|'reply'|'owner'|...
|
|
162
|
+
// Job-kind tag for duration estimation (see src/estimates/). Set
|
|
163
|
+
// at ingest time when a registered estimator matches the message
|
|
164
|
+
// (e.g. 'image-gen', 'browser:ig'). Null otherwise. Queried by
|
|
165
|
+
// the estimator on subsequent invocations to compute past-sample
|
|
166
|
+
// averages.
|
|
167
|
+
kind: text('kind'),
|
|
162
168
|
// Producer-built worker payload (JSON). Chat worker deserializes
|
|
163
169
|
// at claim time to reconstruct the Job. Keeps the rebuild logic
|
|
164
170
|
// out of the worker for Phase 4; later phases may move portions
|
|
@@ -179,6 +185,8 @@ export const inbound = sqliteTable('inbound', {
|
|
|
179
185
|
byStatusNext: index('inbound_by_status_next').on(t.status, t.nextAttemptAt),
|
|
180
186
|
byAddress: index('inbound_by_address').on(t.address),
|
|
181
187
|
byPerson: index('inbound_by_person').on(t.personId, t.receivedAt),
|
|
188
|
+
// Used by the duration estimator: "last N done rows of this kind".
|
|
189
|
+
byKindDone: index('inbound_by_kind_done').on(t.kind, t.status),
|
|
182
190
|
// Sparse unique on external_msg_id: enforced only when set. Same
|
|
183
191
|
// pattern as outbound's idempotency_key.
|
|
184
192
|
uniqExtId: uniqueIndex('inbound_external_msg_id_uq')
|
|
@@ -216,3 +224,33 @@ export const memoryWrites = sqliteTable('memory_writes', {
|
|
|
216
224
|
.on(t.idempotencyKey)
|
|
217
225
|
.where(sql `${t.idempotencyKey} IS NOT NULL`),
|
|
218
226
|
}));
|
|
227
|
+
// ──────────────────────────────────────────────────────────────────
|
|
228
|
+
// Browser tasks (Phase 4, durable)
|
|
229
|
+
// ──────────────────────────────────────────────────────────────────
|
|
230
|
+
// Browser-driven background tasks ([ASYNC-BROWSER:] markers). Replaces
|
|
231
|
+
// the in-memory fastq queue with SQLite-backed durable storage: tasks
|
|
232
|
+
// survive process crashes and reclaim via TTL.
|
|
233
|
+
//
|
|
234
|
+
// Browser worker pool (config.browser.maxWorkers) drains; each task
|
|
235
|
+
// runs as a fresh agent (no persistent session — Phase 4) and opens
|
|
236
|
+
// its own tab on the shared Chrome.
|
|
237
|
+
export const browserTasks = sqliteTable('browser_tasks', {
|
|
238
|
+
id: integer('id').primaryKey({ autoIncrement: true }),
|
|
239
|
+
address: text('address').notNull(),
|
|
240
|
+
actorPersonId: text('actor_person_id'),
|
|
241
|
+
description: text('description').notNull(),
|
|
242
|
+
originatingMessage: text('originating_message').notNull(),
|
|
243
|
+
senderNumber: text('sender_number').notNull(),
|
|
244
|
+
senderName: text('sender_name'),
|
|
245
|
+
allowedTools: text('allowed_tools'), // JSON: 'all' | string[]
|
|
246
|
+
status: text('status').notNull(),
|
|
247
|
+
attempts: integer('attempts').notNull().default(0),
|
|
248
|
+
nextAttemptAt: integer('next_attempt_at'),
|
|
249
|
+
lastError: text('last_error'),
|
|
250
|
+
claimedBy: text('claimed_by'),
|
|
251
|
+
claimedAt: integer('claimed_at'),
|
|
252
|
+
createdAt: integer('created_at').notNull(),
|
|
253
|
+
updatedAt: integer('updated_at').notNull(),
|
|
254
|
+
}, t => ({
|
|
255
|
+
byStatusNext: index('btasks_by_status_next').on(t.status, t.nextAttemptAt),
|
|
256
|
+
}));
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
// Image-generation estimator. Matches when the user message looks
|
|
2
|
+
// like a request to produce an image. Tracks duration of the chat-
|
|
3
|
+
// track turn that handles it (claimedAt → updatedAt on the inbound
|
|
4
|
+
// row).
|
|
5
|
+
import { aggregateMean, registerEstimator } from './registry.js';
|
|
6
|
+
// Conservative regex. Requires a generation verb AND an image-class
|
|
7
|
+
// noun within 80 chars. Prefers false-negative to false-positive —
|
|
8
|
+
// a single mistagged sample drags the average for everyone.
|
|
9
|
+
const IMAGE_GEN_RE = /\b(generate|create|make|draw|render|design|sketch|paint|illustrate)\b[^.?!\n]{0,80}\b(image|picture|drawing|art|artwork|photo|portrait|illustration|sketch|render|painting|wallpaper|logo|icon|graphic)\b/i;
|
|
10
|
+
class ImageGenEstimator {
|
|
11
|
+
kind = 'image-gen';
|
|
12
|
+
// 30s starting point — reasonable ballpark for current
|
|
13
|
+
// image-generation APIs (DALL-E 3, Imagen, Flux, etc.). The very
|
|
14
|
+
// first request shows this; from sample 1 onward it averages real
|
|
15
|
+
// observations.
|
|
16
|
+
defaultMs = 30_000;
|
|
17
|
+
matches(ctx) {
|
|
18
|
+
return IMAGE_GEN_RE.test(ctx.description);
|
|
19
|
+
}
|
|
20
|
+
estimate(samples) {
|
|
21
|
+
return aggregateMean(samples, this.defaultMs);
|
|
22
|
+
}
|
|
23
|
+
format(estimate) {
|
|
24
|
+
if (estimate.rangeMs) {
|
|
25
|
+
return `generating image, anywhere from ~${secs(estimate.rangeMs.lowMs)} to ~${secs(estimate.rangeMs.highMs)}`;
|
|
26
|
+
}
|
|
27
|
+
return `generating image, ~${secs(estimate.pointMs)}`;
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
function secs(ms) {
|
|
31
|
+
const s = Math.max(1, Math.round(ms / 1000));
|
|
32
|
+
if (s < 60)
|
|
33
|
+
return `${s}s`;
|
|
34
|
+
return `${Math.round(s / 60)}min`;
|
|
35
|
+
}
|
|
36
|
+
registerEstimator(new ImageGenEstimator());
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
// Estimates module entry point. Importing this side-effect-loads
|
|
2
|
+
// every built-in plugin (each plugin file calls registerEstimator()
|
|
3
|
+
// at module load). Outside callers only need:
|
|
4
|
+
//
|
|
5
|
+
// import { classify, estimate } from './estimates/index.js'
|
|
6
|
+
//
|
|
7
|
+
// Adding a new kind = drop a file alongside image-gen.ts and import
|
|
8
|
+
// it below. No other code in the codebase needs to change.
|
|
9
|
+
import './image-gen.js';
|
|
10
|
+
// future: import './browser-ig.js'
|
|
11
|
+
// future: import './voice-gen.js'
|
|
12
|
+
export { classify, estimate, formatEstimateDefault, humanDur, listEstimators, querySamplesForKind, registerEstimator, } from './registry.js';
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
// Estimator registry + the single entry points the rest of the bot
|
|
2
|
+
// uses: classify() and estimate(). Plugins self-register by importing
|
|
3
|
+
// this module and calling registerEstimator().
|
|
4
|
+
import { and, desc, eq, isNotNull } from 'drizzle-orm';
|
|
5
|
+
import { getDb } from '../db/index.js';
|
|
6
|
+
import { inbound } from '../db/schema.js';
|
|
7
|
+
const REGISTRY = [];
|
|
8
|
+
export function registerEstimator(e) {
|
|
9
|
+
// Idempotent on kind so hot-reload during dev doesn't duplicate.
|
|
10
|
+
const i = REGISTRY.findIndex((x) => x.kind === e.kind);
|
|
11
|
+
if (i >= 0)
|
|
12
|
+
REGISTRY[i] = e;
|
|
13
|
+
else
|
|
14
|
+
REGISTRY.push(e);
|
|
15
|
+
}
|
|
16
|
+
export function listEstimators() {
|
|
17
|
+
return REGISTRY;
|
|
18
|
+
}
|
|
19
|
+
// Find the first estimator whose matches() returns true. First-match
|
|
20
|
+
// wins — order matters when registering. More-specific kinds should
|
|
21
|
+
// register before broad fallbacks.
|
|
22
|
+
export function classify(ctx) {
|
|
23
|
+
for (const e of REGISTRY) {
|
|
24
|
+
if (e.matches(ctx))
|
|
25
|
+
return e;
|
|
26
|
+
}
|
|
27
|
+
return null;
|
|
28
|
+
}
|
|
29
|
+
// Pull the last N completed inbound rows for this kind. Returns
|
|
30
|
+
// newest-first; estimators that care about recency can use that
|
|
31
|
+
// order directly, the mean-based aggregator below doesn't.
|
|
32
|
+
//
|
|
33
|
+
// Limited to N=20 by default. The mean is fast and stable past 5-10
|
|
34
|
+
// samples; older data isn't helpful and risks staleness.
|
|
35
|
+
const SAMPLE_LIMIT = 20;
|
|
36
|
+
export function querySamplesForKind(kind, limit = SAMPLE_LIMIT) {
|
|
37
|
+
const db = getDb();
|
|
38
|
+
const rows = db
|
|
39
|
+
.select({
|
|
40
|
+
claimedAt: inbound.claimedAt,
|
|
41
|
+
updatedAt: inbound.updatedAt,
|
|
42
|
+
})
|
|
43
|
+
.from(inbound)
|
|
44
|
+
.where(and(eq(inbound.kind, kind), eq(inbound.status, 'done'), isNotNull(inbound.claimedAt)))
|
|
45
|
+
.orderBy(desc(inbound.id))
|
|
46
|
+
.limit(limit)
|
|
47
|
+
.all();
|
|
48
|
+
return rows
|
|
49
|
+
.filter((r) => r.claimedAt !== null)
|
|
50
|
+
.map((r) => ({
|
|
51
|
+
durationMs: (r.updatedAt - r.claimedAt) * 1000,
|
|
52
|
+
finishedAt: r.updatedAt,
|
|
53
|
+
}))
|
|
54
|
+
.filter((s) => s.durationMs > 0);
|
|
55
|
+
}
|
|
56
|
+
// Public entry point. Returns the kind + formatted text, or null
|
|
57
|
+
// when no estimator matched (i.e. this isn't a job-kind we estimate).
|
|
58
|
+
// If an estimator matches, the result is ALWAYS non-null — the
|
|
59
|
+
// estimator falls back to its defaultMs when no samples exist.
|
|
60
|
+
export function estimate(ctx) {
|
|
61
|
+
const e = classify(ctx);
|
|
62
|
+
if (!e)
|
|
63
|
+
return null;
|
|
64
|
+
const samples = querySamplesForKind(e.kind);
|
|
65
|
+
const result = e.estimate(samples);
|
|
66
|
+
const text = (e.format ?? formatEstimateDefault)(result);
|
|
67
|
+
return { kind: e.kind, result, text };
|
|
68
|
+
}
|
|
69
|
+
// Default UX-friendly rendering. Each estimator can override.
|
|
70
|
+
export function formatEstimateDefault(r) {
|
|
71
|
+
if (r.rangeMs) {
|
|
72
|
+
return `anywhere from ~${humanDur(r.rangeMs.lowMs)} to ~${humanDur(r.rangeMs.highMs)}`;
|
|
73
|
+
}
|
|
74
|
+
return `~${humanDur(r.pointMs)}`;
|
|
75
|
+
}
|
|
76
|
+
export function humanDur(ms) {
|
|
77
|
+
const s = Math.max(1, Math.round(ms / 1000));
|
|
78
|
+
if (s < 60)
|
|
79
|
+
return `${s}s`;
|
|
80
|
+
const m = Math.round(s / 60);
|
|
81
|
+
if (m < 60)
|
|
82
|
+
return `${m}min`;
|
|
83
|
+
return `${Math.round(m / 60)}h`;
|
|
84
|
+
}
|
|
85
|
+
// Shared aggregator used by built-in estimators. Each estimator may
|
|
86
|
+
// implement its own estimate() but most just call this.
|
|
87
|
+
export function aggregateMean(samples, defaultMs) {
|
|
88
|
+
if (samples.length === 0) {
|
|
89
|
+
return { pointMs: defaultMs, sampleSize: 0, confidence: 'low' };
|
|
90
|
+
}
|
|
91
|
+
const ds = samples.map((s) => s.durationMs);
|
|
92
|
+
const mean = ds.reduce((a, b) => a + b, 0) / ds.length;
|
|
93
|
+
if (samples.length === 1) {
|
|
94
|
+
return { pointMs: mean, sampleSize: 1, confidence: 'low' };
|
|
95
|
+
}
|
|
96
|
+
const variance = ds.reduce((acc, x) => acc + (x - mean) ** 2, 0) / ds.length;
|
|
97
|
+
const std = Math.sqrt(variance);
|
|
98
|
+
const confidence = samples.length >= 10 ? 'high' : samples.length >= 5 ? 'medium' : 'low';
|
|
99
|
+
// Disclose range when stddev is a large fraction of the mean.
|
|
100
|
+
// Threshold chosen at 50% — beyond that, a single point estimate
|
|
101
|
+
// hides too much.
|
|
102
|
+
return std / mean > 0.5
|
|
103
|
+
? {
|
|
104
|
+
pointMs: mean,
|
|
105
|
+
sampleSize: samples.length,
|
|
106
|
+
confidence,
|
|
107
|
+
rangeMs: {
|
|
108
|
+
lowMs: Math.max(0, mean - std),
|
|
109
|
+
highMs: mean + std,
|
|
110
|
+
},
|
|
111
|
+
}
|
|
112
|
+
: { pointMs: mean, sampleSize: samples.length, confidence };
|
|
113
|
+
}
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
// Job duration estimation interface. The system stays "blackbox" by
|
|
2
|
+
// design — outside callers only touch the registry's classify() /
|
|
3
|
+
// estimate() functions. Each kind plugs in via a self-contained file
|
|
4
|
+
// in src/estimates/<kind>.ts that calls registerEstimator() at module
|
|
5
|
+
// load.
|
|
6
|
+
export {};
|
package/dist/gateway/incoming.js
CHANGED
|
@@ -5,6 +5,7 @@ import { getSession } from '../ai/sessions.js';
|
|
|
5
5
|
import { formatAddress, jidToAddress } from '../db/address.js';
|
|
6
6
|
import { personIdForAddress } from '../db/identity-sync.js';
|
|
7
7
|
import { config } from '../config.js';
|
|
8
|
+
import { estimate as estimateJob } from '../estimates/index.js';
|
|
8
9
|
import { logger } from '../logger.js';
|
|
9
10
|
import { buildMemoryPreamble } from '../memory/preamble.js';
|
|
10
11
|
import { enqueueInbound } from '../queue/inbound.js';
|
|
@@ -216,12 +217,30 @@ async function processMessages(messages, sock, ownerJid, isHistorySync = false)
|
|
|
216
217
|
const actorPersonId = senderAddress
|
|
217
218
|
? personIdForAddress(senderAddress)
|
|
218
219
|
: null;
|
|
219
|
-
//
|
|
220
|
-
//
|
|
221
|
-
//
|
|
222
|
-
//
|
|
223
|
-
//
|
|
224
|
-
|
|
220
|
+
// Estimator: classify this message and, when a kind matches,
|
|
221
|
+
// (a) tag the inbound row so future estimates of the same kind
|
|
222
|
+
// get a fresh sample, and (b) send the estimate text as an
|
|
223
|
+
// immediate ack so the user sees a timeline before the agent
|
|
224
|
+
// even starts.
|
|
225
|
+
const est = estimateJob({
|
|
226
|
+
description: stored.text,
|
|
227
|
+
attachments: media ? [{ kind: media.mediaType }] : undefined,
|
|
228
|
+
senderPersonId: actorPersonId ?? undefined,
|
|
229
|
+
});
|
|
230
|
+
const jobKind = est?.kind ?? null;
|
|
231
|
+
if (est) {
|
|
232
|
+
enqueueOutbound({
|
|
233
|
+
address: chatAddress,
|
|
234
|
+
kind: 'text',
|
|
235
|
+
text: est.text,
|
|
236
|
+
idempotencyKey: `estimate-${msg.key.id}`,
|
|
237
|
+
});
|
|
238
|
+
}
|
|
239
|
+
else if (media && config.reply.ackOnMedia !== false) {
|
|
240
|
+
// Fallback media-ack when no estimator matched — keeps the
|
|
241
|
+
// pre-estimator behavior so image messages still get the
|
|
242
|
+
// "looking…" hint. A future MediaIncomingEstimator can replace
|
|
243
|
+
// this with a real average.
|
|
225
244
|
enqueueOutbound({
|
|
226
245
|
address: chatAddress,
|
|
227
246
|
kind: 'text',
|
|
@@ -238,6 +257,7 @@ async function processMessages(messages, sock, ownerJid, isHistorySync = false)
|
|
|
238
257
|
text: stored.text,
|
|
239
258
|
pushName: stored.pushName ?? null,
|
|
240
259
|
triggerReason,
|
|
260
|
+
kind: jobKind,
|
|
241
261
|
receivedAt: stored.timestamp,
|
|
242
262
|
payload: job,
|
|
243
263
|
});
|
|
@@ -1,8 +1,10 @@
|
|
|
1
1
|
import { getProvider } from '../ai/providers.js';
|
|
2
|
+
import { formatAddress, jidToAddress } from '../db/address.js';
|
|
2
3
|
import { config } from '../config.js';
|
|
3
4
|
import fastq from 'fastq';
|
|
4
5
|
import { initiate } from '../gateway/outgoing.js';
|
|
5
6
|
import { logger } from '../logger.js';
|
|
7
|
+
import { enqueueBrowserJob } from './browser-queue.js';
|
|
6
8
|
// Concurrency: how many async workers can run simultaneously.
|
|
7
9
|
// Start conservative — each process is expensive (Playwright, multi-minute runs).
|
|
8
10
|
// Tune via config.asyncTasks.concurrency once we have real usage data.
|
|
@@ -241,36 +243,32 @@ function truncate(s, n) {
|
|
|
241
243
|
// (the chat-track agent writes self-contained task descriptions).
|
|
242
244
|
// Per-task tab isolation is enforced by the prompt instructions
|
|
243
245
|
// below.
|
|
244
|
-
// Browser
|
|
245
|
-
//
|
|
246
|
-
//
|
|
247
|
-
//
|
|
248
|
-
// memory; the win: real parallelism.
|
|
249
|
-
const BROWSER_CONCURRENCY = Math.max(1, config.browser?.maxWorkers ?? 3);
|
|
250
|
-
const browserQueue = fastq.promise(async (task) => {
|
|
251
|
-
inProgress.set(task.id, task);
|
|
252
|
-
try {
|
|
253
|
-
await runBrowserTask(task);
|
|
254
|
-
}
|
|
255
|
-
catch (err) {
|
|
256
|
-
logger.error({ err, id: task.id, jid: task.jid }, 'browser task failed unexpectedly');
|
|
257
|
-
}
|
|
258
|
-
finally {
|
|
259
|
-
inProgress.delete(task.id);
|
|
260
|
-
}
|
|
261
|
-
}, BROWSER_CONCURRENCY);
|
|
246
|
+
// Browser tasks now go into the durable browser_tasks SQLite table.
|
|
247
|
+
// The browser worker pool (src/queue/browser-worker.ts) drains it.
|
|
248
|
+
// In-flight tasks survive process crashes; the orchestrator reclaims
|
|
249
|
+
// stuck claims via the TTL on the table.
|
|
262
250
|
export function enqueueBrowserTask(input) {
|
|
251
|
+
// Keep AsyncTask shape exported so existing callers (worker.ts)
|
|
252
|
+
// don't change. The returned id is informational only — the real
|
|
253
|
+
// row id is the DB auto-increment.
|
|
263
254
|
const task = {
|
|
264
255
|
...input,
|
|
265
256
|
id: `browser-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`,
|
|
266
257
|
startedAt: Math.floor(Date.now() / 1000),
|
|
267
258
|
};
|
|
259
|
+
enqueueBrowserJob({
|
|
260
|
+
address: formatAddress(jidToAddress(task.jid)),
|
|
261
|
+
description: task.description,
|
|
262
|
+
originatingMessage: task.originatingMessage,
|
|
263
|
+
senderNumber: task.senderNumber,
|
|
264
|
+
senderName: task.senderName ?? null,
|
|
265
|
+
allowedTools: task.allowedTools,
|
|
266
|
+
});
|
|
268
267
|
logger.info({
|
|
269
268
|
id: task.id,
|
|
270
269
|
jid: task.jid,
|
|
271
270
|
description: task.description.slice(0, 200),
|
|
272
271
|
}, 'browser task enqueued');
|
|
273
|
-
browserQueue.push(task).catch((err) => logger.error({ err, id: task.id }, 'browser queue push failed'));
|
|
274
272
|
return task;
|
|
275
273
|
}
|
|
276
274
|
function buildBrowserPrompt(task) {
|
|
@@ -324,7 +322,10 @@ function browserAddDirs() {
|
|
|
324
322
|
config.storage.mediaDir,
|
|
325
323
|
];
|
|
326
324
|
}
|
|
327
|
-
|
|
325
|
+
// Exported so the browser worker (src/queue/browser-worker.ts) can
|
|
326
|
+
// invoke it for each claimed row. Body unchanged from the pre-queue
|
|
327
|
+
// version — just rehomed for direct invocation by the pool.
|
|
328
|
+
export async function runBrowserTask(task) {
|
|
328
329
|
const provider = getProvider();
|
|
329
330
|
// Each task is fresh (Phase 4 browser parallelism). No persistent
|
|
330
331
|
// session — would force serialization on concurrent tasks.
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
// Browser tasks queue helpers. Producers call enqueueBrowserJob;
|
|
2
|
+
// the browser worker pool drains via claimNextBrowserTask. Same
|
|
3
|
+
// primitives as inbound/outbound — claim is atomic, retry uses
|
|
4
|
+
// per-task backoff, claimed_by safety check on completion.
|
|
5
|
+
//
|
|
6
|
+
// No per-address serialization: multiple browser tasks for the same
|
|
7
|
+
// originating chat CAN run concurrently (each opens its own tab on
|
|
8
|
+
// the shared Chrome). Reply order isn't a concern because each browser
|
|
9
|
+
// task ends with an outbound row, and the sender worker serializes
|
|
10
|
+
// per-address there.
|
|
11
|
+
import { and, asc, eq, isNull, lte, or, sql } from 'drizzle-orm';
|
|
12
|
+
import { getDb } from '../db/index.js';
|
|
13
|
+
import { browserTasks } from '../db/schema.js';
|
|
14
|
+
export function enqueueBrowserJob(input) {
|
|
15
|
+
const db = getDb();
|
|
16
|
+
const now = Math.floor(Date.now() / 1000);
|
|
17
|
+
return db
|
|
18
|
+
.insert(browserTasks)
|
|
19
|
+
.values({
|
|
20
|
+
address: input.address,
|
|
21
|
+
actorPersonId: input.actorPersonId ?? null,
|
|
22
|
+
description: input.description,
|
|
23
|
+
originatingMessage: input.originatingMessage,
|
|
24
|
+
senderNumber: input.senderNumber,
|
|
25
|
+
senderName: input.senderName ?? null,
|
|
26
|
+
allowedTools: input.allowedTools
|
|
27
|
+
? JSON.stringify(input.allowedTools)
|
|
28
|
+
: null,
|
|
29
|
+
status: 'pending',
|
|
30
|
+
attempts: 0,
|
|
31
|
+
nextAttemptAt: null,
|
|
32
|
+
lastError: null,
|
|
33
|
+
claimedBy: null,
|
|
34
|
+
claimedAt: null,
|
|
35
|
+
createdAt: now,
|
|
36
|
+
updatedAt: now,
|
|
37
|
+
})
|
|
38
|
+
.returning()
|
|
39
|
+
.get();
|
|
40
|
+
}
|
|
41
|
+
export function claimNextBrowserTask(workerId) {
|
|
42
|
+
const db = getDb();
|
|
43
|
+
const now = Math.floor(Date.now() / 1000);
|
|
44
|
+
return db.transaction((tx) => {
|
|
45
|
+
const target = tx
|
|
46
|
+
.select({ id: browserTasks.id })
|
|
47
|
+
.from(browserTasks)
|
|
48
|
+
.where(and(eq(browserTasks.status, 'pending'), or(isNull(browserTasks.nextAttemptAt), lte(browserTasks.nextAttemptAt, now))))
|
|
49
|
+
.orderBy(asc(browserTasks.id))
|
|
50
|
+
.limit(1)
|
|
51
|
+
.get();
|
|
52
|
+
if (!target)
|
|
53
|
+
return null;
|
|
54
|
+
const claimed = tx
|
|
55
|
+
.update(browserTasks)
|
|
56
|
+
.set({
|
|
57
|
+
status: 'claimed',
|
|
58
|
+
claimedBy: workerId,
|
|
59
|
+
claimedAt: now,
|
|
60
|
+
updatedAt: now,
|
|
61
|
+
})
|
|
62
|
+
.where(and(eq(browserTasks.id, target.id), eq(browserTasks.status, 'pending')))
|
|
63
|
+
.returning()
|
|
64
|
+
.get();
|
|
65
|
+
return claimed ?? null;
|
|
66
|
+
});
|
|
67
|
+
}
|
|
68
|
+
export function markBrowserTaskDone(id, workerId) {
|
|
69
|
+
const db = getDb();
|
|
70
|
+
const now = Math.floor(Date.now() / 1000);
|
|
71
|
+
const result = db
|
|
72
|
+
.update(browserTasks)
|
|
73
|
+
.set({ status: 'done', updatedAt: now })
|
|
74
|
+
.where(and(eq(browserTasks.id, id), eq(browserTasks.status, 'claimed'), eq(browserTasks.claimedBy, workerId)))
|
|
75
|
+
.returning({ id: browserTasks.id })
|
|
76
|
+
.all();
|
|
77
|
+
return result.length > 0;
|
|
78
|
+
}
|
|
79
|
+
// Browser tasks are expensive (multi-minute Playwright sessions) so
|
|
80
|
+
// retries are sparse: 30s, 5min, give up (DLQ after 2 attempts past
|
|
81
|
+
// the first). Most browser failures are deterministic (login wall,
|
|
82
|
+
// bot detection) and won't benefit from rapid retries.
|
|
83
|
+
const BACKOFF_SECONDS = [30, 300];
|
|
84
|
+
const MAX_ATTEMPTS = BACKOFF_SECONDS.length;
|
|
85
|
+
export function markBrowserTaskRetryOrDlq(id, workerId, errorMessage) {
|
|
86
|
+
const db = getDb();
|
|
87
|
+
return db.transaction((tx) => {
|
|
88
|
+
const row = tx.select().from(browserTasks).where(eq(browserTasks.id, id)).get();
|
|
89
|
+
if (!row || row.status !== 'claimed' || row.claimedBy !== workerId) {
|
|
90
|
+
return { retried: false, deadLettered: false };
|
|
91
|
+
}
|
|
92
|
+
const now = Math.floor(Date.now() / 1000);
|
|
93
|
+
const nextAttempts = row.attempts + 1;
|
|
94
|
+
if (nextAttempts > MAX_ATTEMPTS) {
|
|
95
|
+
tx.update(browserTasks)
|
|
96
|
+
.set({
|
|
97
|
+
status: 'dlq',
|
|
98
|
+
attempts: nextAttempts,
|
|
99
|
+
lastError: errorMessage,
|
|
100
|
+
claimedBy: null,
|
|
101
|
+
claimedAt: null,
|
|
102
|
+
updatedAt: now,
|
|
103
|
+
})
|
|
104
|
+
.where(eq(browserTasks.id, id))
|
|
105
|
+
.run();
|
|
106
|
+
return { retried: false, deadLettered: true };
|
|
107
|
+
}
|
|
108
|
+
const backoff = BACKOFF_SECONDS[Math.min(row.attempts, BACKOFF_SECONDS.length - 1)];
|
|
109
|
+
tx.update(browserTasks)
|
|
110
|
+
.set({
|
|
111
|
+
status: 'pending',
|
|
112
|
+
attempts: nextAttempts,
|
|
113
|
+
nextAttemptAt: now + backoff,
|
|
114
|
+
lastError: errorMessage,
|
|
115
|
+
claimedBy: null,
|
|
116
|
+
claimedAt: null,
|
|
117
|
+
updatedAt: now,
|
|
118
|
+
})
|
|
119
|
+
.where(eq(browserTasks.id, id))
|
|
120
|
+
.run();
|
|
121
|
+
return { retried: true, deadLettered: false };
|
|
122
|
+
});
|
|
123
|
+
}
|
|
124
|
+
// Browser tasks take 1-15 min routinely. Generous reclaim TTL.
|
|
125
|
+
const CLAIM_TTL_SECONDS = 20 * 60;
|
|
126
|
+
export function reclaimStuckBrowserTasks() {
|
|
127
|
+
const db = getDb();
|
|
128
|
+
const cutoff = Math.floor(Date.now() / 1000) - CLAIM_TTL_SECONDS;
|
|
129
|
+
const result = db
|
|
130
|
+
.update(browserTasks)
|
|
131
|
+
.set({
|
|
132
|
+
status: 'pending',
|
|
133
|
+
claimedBy: null,
|
|
134
|
+
claimedAt: null,
|
|
135
|
+
updatedAt: sql `${browserTasks.updatedAt}`,
|
|
136
|
+
})
|
|
137
|
+
.where(and(eq(browserTasks.status, 'claimed'), lte(browserTasks.claimedAt, cutoff)))
|
|
138
|
+
.returning({ id: browserTasks.id })
|
|
139
|
+
.all();
|
|
140
|
+
return result.length;
|
|
141
|
+
}
|