@c4t4/heyamigo 0.9.11 → 0.9.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/config.js +14 -0
- package/dist/gateway/incoming.js +14 -0
- package/dist/queue/async-tasks.js +28 -88
- package/package.json +1 -1
package/dist/config.js
CHANGED
|
@@ -44,6 +44,14 @@ const ConfigSchema = z.object({
|
|
|
44
44
|
size: z.number().int().positive().default(5),
|
|
45
45
|
})
|
|
46
46
|
.default({ size: 5 }),
|
|
47
|
+
browser: z
|
|
48
|
+
.object({
|
|
49
|
+
// How many browser tasks can run in parallel on the shared
|
|
50
|
+
// Chrome. Each worker drives its own tab. Persistent agent
|
|
51
|
+
// session was dropped in Phase 4; every task is fresh.
|
|
52
|
+
maxWorkers: z.number().int().positive().default(3),
|
|
53
|
+
})
|
|
54
|
+
.default({ maxWorkers: 3 }),
|
|
47
55
|
codex: z
|
|
48
56
|
.object({
|
|
49
57
|
// Optional model override. If unset, Codex uses its default. Passed
|
|
@@ -82,6 +90,12 @@ const ConfigSchema = z.object({
|
|
|
82
90
|
// Default 25MB matches WhatsApp's published per-message media limit
|
|
83
91
|
// for most kinds. Set to null to disable the check.
|
|
84
92
|
maxOutboundMediaBytes: z.number().int().positive().nullable().default(25 * 1024 * 1024),
|
|
93
|
+
// Send a quick acknowledgement when an incoming message has media.
|
|
94
|
+
// Bridge for the typing-indicator regression in Phase 4 — without
|
|
95
|
+
// this, users wait silently while the chat worker processes the
|
|
96
|
+
// image. Set false to disable.
|
|
97
|
+
ackOnMedia: z.boolean().default(true),
|
|
98
|
+
mediaAckText: z.string().default('looking…'),
|
|
85
99
|
}),
|
|
86
100
|
storage: z.object({
|
|
87
101
|
messagesDir: z.string(),
|
package/dist/gateway/incoming.js
CHANGED
|
@@ -8,6 +8,7 @@ import { config } from '../config.js';
|
|
|
8
8
|
import { logger } from '../logger.js';
|
|
9
9
|
import { buildMemoryPreamble } from '../memory/preamble.js';
|
|
10
10
|
import { enqueueInbound } from '../queue/inbound.js';
|
|
11
|
+
import { enqueueOutbound } from '../queue/outbound.js';
|
|
11
12
|
import { detectMediaType, downloadAndSave, getMediaSize, mediaPromptTag, } from '../store/media.js';
|
|
12
13
|
import { append } from '../store/messages.js';
|
|
13
14
|
import { getDailyTokens } from '../store/usage.js';
|
|
@@ -215,6 +216,19 @@ async function processMessages(messages, sock, ownerJid, isHistorySync = false)
|
|
|
215
216
|
const actorPersonId = senderAddress
|
|
216
217
|
? personIdForAddress(senderAddress)
|
|
217
218
|
: null;
|
|
219
|
+
// For media-bearing messages, send an immediate "looking…" ack
|
|
220
|
+
// via outbound so the user isn't left wondering whether the bot
|
|
221
|
+
// saw the image (typing indicator was dropped in Phase 4 —
|
|
222
|
+
// followup commit will reinstate via ChannelAdapter.sendTyping).
|
|
223
|
+
// The chat worker still processes the actual reply normally.
|
|
224
|
+
if (media && config.reply.ackOnMedia !== false) {
|
|
225
|
+
enqueueOutbound({
|
|
226
|
+
address: chatAddress,
|
|
227
|
+
kind: 'text',
|
|
228
|
+
text: config.reply.mediaAckText,
|
|
229
|
+
idempotencyKey: `media-ack-${msg.key.id}`,
|
|
230
|
+
});
|
|
231
|
+
}
|
|
218
232
|
enqueueInbound({
|
|
219
233
|
address: chatAddress,
|
|
220
234
|
actorAddress: senderAddress,
|
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'fs';
|
|
2
|
-
import { dirname, resolve } from 'path';
|
|
3
1
|
import { getProvider } from '../ai/providers.js';
|
|
4
2
|
import { config } from '../config.js';
|
|
5
3
|
import fastq from 'fastq';
|
|
@@ -236,67 +234,19 @@ function truncate(s, n) {
|
|
|
236
234
|
//
|
|
237
235
|
// - Concurrency is 1. Serialized against itself because (a) the shared
|
|
238
236
|
// Playwright MCP + Chrome is one physical resource, (b) the session below
|
|
239
|
-
// is
|
|
240
|
-
// -
|
|
241
|
-
//
|
|
242
|
-
//
|
|
243
|
-
//
|
|
244
|
-
//
|
|
245
|
-
//
|
|
246
|
-
//
|
|
247
|
-
//
|
|
248
|
-
//
|
|
249
|
-
// the
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
}
|
|
253
|
-
function legacyBrowserSessionFilePath() {
|
|
254
|
-
return resolve(process.cwd(), config.memory.dir, 'browser-session.json');
|
|
255
|
-
}
|
|
256
|
-
function loadBrowserSession(provider) {
|
|
257
|
-
const path = browserSessionFilePath(provider);
|
|
258
|
-
let source = path;
|
|
259
|
-
if (!existsSync(path)) {
|
|
260
|
-
const legacy = legacyBrowserSessionFilePath();
|
|
261
|
-
if (provider === 'claude' && existsSync(legacy)) {
|
|
262
|
-
// One-time migration: legacy file was implicitly claude.
|
|
263
|
-
source = legacy;
|
|
264
|
-
}
|
|
265
|
-
else {
|
|
266
|
-
return { sessionId: null, createdAt: 0, lastUsedAt: 0, resumeCount: 0 };
|
|
267
|
-
}
|
|
268
|
-
}
|
|
269
|
-
try {
|
|
270
|
-
const parsed = JSON.parse(readFileSync(source, 'utf-8'));
|
|
271
|
-
return {
|
|
272
|
-
sessionId: parsed.sessionId ?? null,
|
|
273
|
-
createdAt: parsed.createdAt ?? 0,
|
|
274
|
-
lastUsedAt: parsed.lastUsedAt ?? 0,
|
|
275
|
-
resumeCount: parsed.resumeCount ?? 0,
|
|
276
|
-
};
|
|
277
|
-
}
|
|
278
|
-
catch {
|
|
279
|
-
return { sessionId: null, createdAt: 0, lastUsedAt: 0, resumeCount: 0 };
|
|
280
|
-
}
|
|
281
|
-
}
|
|
282
|
-
function saveBrowserSession(provider, state) {
|
|
283
|
-
const path = browserSessionFilePath(provider);
|
|
284
|
-
mkdirSync(dirname(path), { recursive: true });
|
|
285
|
-
writeFileSync(path, JSON.stringify(state, null, 2) + '\n', 'utf-8');
|
|
286
|
-
}
|
|
287
|
-
// Reset the browser session for the active provider. Callable from outside
|
|
288
|
-
// if the session gets corrupted or we want a fresh start. Not wired into
|
|
289
|
-
// any command yet.
|
|
290
|
-
export function resetBrowserSession() {
|
|
291
|
-
const provider = getProvider().name;
|
|
292
|
-
saveBrowserSession(provider, {
|
|
293
|
-
sessionId: null,
|
|
294
|
-
createdAt: 0,
|
|
295
|
-
lastUsedAt: 0,
|
|
296
|
-
resumeCount: 0,
|
|
297
|
-
});
|
|
298
|
-
logger.info({ provider }, 'browser session reset');
|
|
299
|
-
}
|
|
237
|
+
// is one physical resource.
|
|
238
|
+
// - Persistent agent session DROPPED in Phase 4 — multiple browser
|
|
239
|
+
// tasks now run concurrently, each in its own Chrome tab, each as
|
|
240
|
+
// a fresh agent. Cross-task agent memory was rarely load-bearing
|
|
241
|
+
// (the chat-track agent writes self-contained task descriptions).
|
|
242
|
+
// Per-task tab isolation is enforced by the prompt instructions
|
|
243
|
+
// below.
|
|
244
|
+
// Browser pool: multiple agents share one Chrome (the logged-in
|
|
245
|
+
// profile), each task opens its own tab. Persistent agent session is
|
|
246
|
+
// dropped — every task is fresh, with self-contained instructions
|
|
247
|
+
// from the chat-track agent. The trade-off: no cross-task agent
|
|
248
|
+
// memory; the win: real parallelism.
|
|
249
|
+
const BROWSER_CONCURRENCY = Math.max(1, config.browser?.maxWorkers ?? 3);
|
|
300
250
|
const browserQueue = fastq.promise(async (task) => {
|
|
301
251
|
inProgress.set(task.id, task);
|
|
302
252
|
try {
|
|
@@ -308,7 +258,7 @@ const browserQueue = fastq.promise(async (task) => {
|
|
|
308
258
|
finally {
|
|
309
259
|
inProgress.delete(task.id);
|
|
310
260
|
}
|
|
311
|
-
},
|
|
261
|
+
}, BROWSER_CONCURRENCY);
|
|
312
262
|
export function enqueueBrowserTask(input) {
|
|
313
263
|
const task = {
|
|
314
264
|
...input,
|
|
@@ -323,12 +273,15 @@ export function enqueueBrowserTask(input) {
|
|
|
323
273
|
browserQueue.push(task).catch((err) => logger.error({ err, id: task.id }, 'browser queue push failed'));
|
|
324
274
|
return task;
|
|
325
275
|
}
|
|
326
|
-
function buildBrowserPrompt(task
|
|
327
|
-
// Framing tuned for the dedicated browser worker.
|
|
276
|
+
function buildBrowserPrompt(task) {
|
|
277
|
+
// Framing tuned for the dedicated browser worker. Each task is its
|
|
278
|
+
// own fresh agent run (no persistent session) — multiple browser
|
|
279
|
+
// tasks may be running in parallel on the same Chrome, each in its
|
|
280
|
+
// own tab.
|
|
328
281
|
const lines = [
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
282
|
+
`You are the BROWSER WORKER. The chat already got its ack; your output IS the follow-up chat reply the owner is waiting for. Use the shared Chrome at localhost:9222 via Playwright MCP (already authenticated with the owner's sessions — TikTok, Instagram, etc. — do NOT log out, do NOT launch a new browser).`,
|
|
283
|
+
``,
|
|
284
|
+
`TAB OWNERSHIP: Other browser workers may be running concurrently on the SAME Chrome instance, each driving its own tab. Your FIRST action is to open a new tab for this task (browser_tabs with action=new). Operate ONLY on that tab for the rest of the task. Do NOT switch to or interact with tabs you didn't open — they belong to other workers. Close your tab when you finish.`,
|
|
332
285
|
``,
|
|
333
286
|
`TASK:`,
|
|
334
287
|
task.description,
|
|
@@ -373,25 +326,24 @@ function browserAddDirs() {
|
|
|
373
326
|
}
|
|
374
327
|
async function runBrowserTask(task) {
|
|
375
328
|
const provider = getProvider();
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
329
|
+
// Each task is fresh (Phase 4 browser parallelism). No persistent
|
|
330
|
+
// session — would force serialization on concurrent tasks.
|
|
331
|
+
// Chat-track agent writes self-contained task descriptions, so the
|
|
332
|
+
// worker doesn't need cross-task agent memory.
|
|
333
|
+
const prompt = buildBrowserPrompt(task);
|
|
379
334
|
const elapsedLog = () => `${Math.round((Date.now() - task.startedAt * 1000) / 1000)}s`;
|
|
380
335
|
let reply;
|
|
381
|
-
let returnedSessionId;
|
|
382
336
|
try {
|
|
383
337
|
const result = await provider.runTask({
|
|
384
338
|
input: prompt,
|
|
385
339
|
caller: 'browser-task',
|
|
386
340
|
mode: 'auto',
|
|
387
341
|
lane: 'async',
|
|
388
|
-
includeSystemPrompt:
|
|
342
|
+
includeSystemPrompt: true,
|
|
389
343
|
addDirs: browserAddDirs(),
|
|
390
344
|
allowedTools: task.allowedTools,
|
|
391
|
-
sessionId: session.sessionId ?? undefined,
|
|
392
345
|
});
|
|
393
346
|
reply = result.reply;
|
|
394
|
-
returnedSessionId = result.sessionId;
|
|
395
347
|
}
|
|
396
348
|
catch (err) {
|
|
397
349
|
logger.error({ err, id: task.id, jid: task.jid, elapsed: elapsedLog() }, 'browser task provider call failed');
|
|
@@ -401,17 +353,6 @@ async function runBrowserTask(task) {
|
|
|
401
353
|
});
|
|
402
354
|
return;
|
|
403
355
|
}
|
|
404
|
-
// Persist the session id. On first call the provider returns the new
|
|
405
|
-
// sessionId; on resume it may return the same or a rotated one.
|
|
406
|
-
if (returnedSessionId) {
|
|
407
|
-
const now = Math.floor(Date.now() / 1000);
|
|
408
|
-
saveBrowserSession(provider.name, {
|
|
409
|
-
sessionId: returnedSessionId,
|
|
410
|
-
createdAt: session.createdAt || now,
|
|
411
|
-
lastUsedAt: now,
|
|
412
|
-
resumeCount: (session.resumeCount ?? 0) + (isResume ? 1 : 0),
|
|
413
|
-
});
|
|
414
|
-
}
|
|
415
356
|
// Route markers the same way the general async lane does.
|
|
416
357
|
const { extractFlags } = await import('../memory/digest-flag.js');
|
|
417
358
|
const { clean, digest, journals, journalCreates, sendTexts } = extractFlags(reply);
|
|
@@ -487,7 +428,6 @@ async function runBrowserTask(task) {
|
|
|
487
428
|
id: task.id,
|
|
488
429
|
jid: task.jid,
|
|
489
430
|
elapsed: elapsedLog(),
|
|
490
|
-
isResume,
|
|
491
431
|
appended: appendedCount,
|
|
492
432
|
createdJournals: journalCreates.length,
|
|
493
433
|
digestFired: !!digest,
|