@rubytech/taskmaster 1.2.1 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agents/auth-profiles/oauth.js +24 -0
- package/dist/agents/auth-profiles/profiles.js +37 -0
- package/dist/agents/auth-profiles.js +1 -1
- package/dist/agents/pi-tools.policy.js +4 -0
- package/dist/agents/taskmaster-tools.js +14 -0
- package/dist/agents/tool-policy.js +5 -2
- package/dist/agents/tools/apikeys-tool.js +16 -5
- package/dist/agents/tools/contact-create-tool.js +59 -0
- package/dist/agents/tools/contact-delete-tool.js +48 -0
- package/dist/agents/tools/contact-update-tool.js +17 -2
- package/dist/agents/tools/file-delete-tool.js +137 -0
- package/dist/agents/tools/file-list-tool.js +127 -0
- package/dist/agents/tools/message-history-tool.js +2 -3
- package/dist/auto-reply/media-note.js +11 -0
- package/dist/auto-reply/reply/commands-tts.js +7 -2
- package/dist/auto-reply/reply/get-reply.js +4 -0
- package/dist/build-info.json +3 -3
- package/dist/cli/provision-seed.js +1 -2
- package/dist/commands/doctor-config-flow.js +13 -0
- package/dist/config/agent-tools-reconcile.js +53 -0
- package/dist/config/defaults.js +10 -1
- package/dist/config/legacy.migrations.part-3.js +26 -0
- package/dist/config/zod-schema.core.js +9 -1
- package/dist/config/zod-schema.js +1 -0
- package/dist/control-ui/assets/{index-N8du4fwV.js → index-BDETQp97.js} +692 -600
- package/dist/control-ui/assets/index-BDETQp97.js.map +1 -0
- package/dist/control-ui/assets/index-CPawOl_z.css +1 -0
- package/dist/control-ui/index.html +2 -2
- package/dist/gateway/chat-sanitize.js +5 -1
- package/dist/gateway/config-reload.js +1 -0
- package/dist/gateway/media-http.js +28 -0
- package/dist/gateway/server/tls.js +2 -2
- package/dist/gateway/server-http.js +34 -4
- package/dist/gateway/server-methods/apikeys.js +56 -4
- package/dist/gateway/server-methods/chat.js +64 -25
- package/dist/gateway/server-methods/tts.js +11 -2
- package/dist/gateway/server.impl.js +38 -5
- package/dist/infra/tls/gateway.js +19 -3
- package/dist/media-understanding/apply.js +35 -0
- package/dist/media-understanding/providers/deepgram/audio.js +1 -1
- package/dist/media-understanding/providers/google/audio.js +1 -1
- package/dist/media-understanding/providers/google/video.js +1 -1
- package/dist/media-understanding/providers/index.js +2 -0
- package/dist/media-understanding/providers/openai/audio.js +1 -1
- package/dist/media-understanding/providers/sherpa-onnx/index.js +10 -0
- package/dist/media-understanding/runner.js +61 -72
- package/dist/media-understanding/sherpa-onnx-local.js +223 -0
- package/dist/memory/audit.js +9 -0
- package/dist/memory/manager.js +1 -1
- package/dist/records/records-manager.js +10 -0
- package/dist/tts/tts.js +98 -10
- package/dist/web/auto-reply/monitor/process-message.js +45 -17
- package/dist/web/inbound/monitor.js +9 -1
- package/extensions/diagnostics-otel/node_modules/.bin/acorn +0 -0
- package/extensions/googlechat/node_modules/.bin/taskmaster +2 -2
- package/extensions/googlechat/package.json +2 -2
- package/extensions/line/node_modules/.bin/taskmaster +2 -2
- package/extensions/line/package.json +1 -1
- package/extensions/matrix/node_modules/.bin/markdown-it +0 -0
- package/extensions/matrix/node_modules/.bin/taskmaster +2 -2
- package/extensions/matrix/package.json +1 -1
- package/extensions/memory-lancedb/node_modules/.bin/arrow2csv +0 -0
- package/extensions/memory-lancedb/node_modules/.bin/openai +0 -0
- package/extensions/msteams/node_modules/.bin/taskmaster +2 -2
- package/extensions/msteams/package.json +1 -1
- package/extensions/nostr/node_modules/.bin/taskmaster +2 -2
- package/extensions/nostr/node_modules/.bin/tsc +0 -0
- package/extensions/nostr/node_modules/.bin/tsserver +0 -0
- package/extensions/nostr/package.json +1 -1
- package/extensions/zalo/node_modules/.bin/taskmaster +2 -2
- package/extensions/zalo/package.json +1 -1
- package/extensions/zalouser/node_modules/.bin/taskmaster +2 -2
- package/extensions/zalouser/package.json +1 -1
- package/package.json +56 -65
- package/scripts/install.sh +0 -0
- package/scripts/postinstall.js +76 -0
- package/skills/business-assistant/references/crm.md +32 -8
- package/taskmaster-docs/USER-GUIDE.md +111 -6
- package/templates/.DS_Store +0 -0
- package/templates/beagle/agents/admin/AGENTS.md +4 -2
- package/templates/customer/.DS_Store +0 -0
- package/templates/customer/agents/.DS_Store +0 -0
- package/templates/maxy/.DS_Store +0 -0
- package/templates/maxy/.gitignore +1 -0
- package/templates/maxy/agents/.DS_Store +0 -0
- package/templates/maxy/agents/admin/.DS_Store +0 -0
- package/templates/maxy/memory/.DS_Store +0 -0
- package/templates/maxy/skills/.DS_Store +0 -0
- package/templates/taskmaster/.gitignore +1 -0
- package/templates/taskmaster/agents/admin/AGENTS.md +1 -0
- package/dist/control-ui/assets/index-DtQHRIVD.css +0 -1
- package/dist/control-ui/assets/index-N8du4fwV.js.map +0 -1
|
@@ -379,8 +379,12 @@ export const chatHandlers = {
|
|
|
379
379
|
}
|
|
380
380
|
}
|
|
381
381
|
}
|
|
382
|
-
// Save document attachments to workspace uploads dir (persistent, accessible by agent)
|
|
382
|
+
// Save document attachments to workspace uploads dir (persistent, accessible by agent).
|
|
383
|
+
// Audio files are separated so they can be routed through the media understanding
|
|
384
|
+
// pipeline (STT) instead of being treated as generic file attachments.
|
|
383
385
|
const savedDocPaths = [];
|
|
386
|
+
const savedAudioPaths = [];
|
|
387
|
+
const savedAudioTypes = [];
|
|
384
388
|
if (documentAttachments.length > 0 && uploadsDir) {
|
|
385
389
|
for (const doc of documentAttachments) {
|
|
386
390
|
if (!doc.content || typeof doc.content !== "string")
|
|
@@ -389,7 +393,14 @@ export const chatHandlers = {
|
|
|
389
393
|
const destPath = path.join(uploadsDir, safeName);
|
|
390
394
|
try {
|
|
391
395
|
fs.writeFileSync(destPath, Buffer.from(doc.content, "base64"));
|
|
392
|
-
|
|
396
|
+
const mimeBase = doc.mimeType?.split(";")[0]?.trim() ?? "";
|
|
397
|
+
if (mimeBase.startsWith("audio/")) {
|
|
398
|
+
savedAudioPaths.push(destPath);
|
|
399
|
+
savedAudioTypes.push(doc.mimeType ?? "audio/webm");
|
|
400
|
+
}
|
|
401
|
+
else {
|
|
402
|
+
savedDocPaths.push(destPath);
|
|
403
|
+
}
|
|
393
404
|
}
|
|
394
405
|
catch (err) {
|
|
395
406
|
context.logGateway.warn(`chat document save failed: ${String(err)}`);
|
|
@@ -460,18 +471,29 @@ export const chatHandlers = {
|
|
|
460
471
|
const trimmedMessage = p.message.trim();
|
|
461
472
|
const injectThinking = Boolean(p.thinking && trimmedMessage && !trimmedMessage.startsWith("/"));
|
|
462
473
|
const commandBody = injectThinking ? `/think ${p.thinking} ${p.message}` : p.message;
|
|
463
|
-
// If documents were saved, prepend file paths to message
|
|
474
|
+
// If non-audio documents were saved, prepend file paths to message.
|
|
475
|
+
// Audio files are NOT annotated here — they go through MediaPaths so the
|
|
476
|
+
// media understanding pipeline (STT) handles them, and buildInboundMediaNote
|
|
477
|
+
// generates the proper [media attached: ...] annotation.
|
|
464
478
|
const docNote = savedDocPaths.length > 0
|
|
465
479
|
? savedDocPaths.map((p) => `[file: ${p}]`).join("\n") + "\n\n"
|
|
466
480
|
: "";
|
|
467
|
-
|
|
481
|
+
// Audio-only message (voice note, no text): use placeholder so
|
|
482
|
+
// applyMediaUnderstanding knows to replace with transcript or error.
|
|
483
|
+
const hasAudioMedia = savedAudioPaths.length > 0;
|
|
484
|
+
const effectiveBody = hasAudioMedia && !trimmedMessage ? "<media:audio>" : p.message;
|
|
485
|
+
const messageWithDocs = docNote + effectiveBody;
|
|
486
|
+
const effectiveCommandBody = hasAudioMedia && !trimmedMessage ? "<media:audio>" : commandBody;
|
|
487
|
+
// Merge image and audio paths so the media understanding pipeline sees both.
|
|
488
|
+
const allMediaPaths = [...savedImagePaths, ...savedAudioPaths];
|
|
489
|
+
const allMediaTypes = [...savedImageTypes, ...savedAudioTypes];
|
|
468
490
|
const clientInfo = client?.connect?.client;
|
|
469
491
|
const ctx = {
|
|
470
492
|
Body: messageWithDocs,
|
|
471
493
|
BodyForAgent: messageWithDocs,
|
|
472
|
-
BodyForCommands: docNote +
|
|
494
|
+
BodyForCommands: docNote + effectiveCommandBody,
|
|
473
495
|
RawBody: messageWithDocs,
|
|
474
|
-
CommandBody: docNote +
|
|
496
|
+
CommandBody: docNote + effectiveCommandBody,
|
|
475
497
|
SessionKey: p.sessionKey,
|
|
476
498
|
Provider: INTERNAL_MESSAGE_CHANNEL,
|
|
477
499
|
Surface: INTERNAL_MESSAGE_CHANNEL,
|
|
@@ -485,10 +507,10 @@ export const chatHandlers = {
|
|
|
485
507
|
// Image/media paths — same pattern as WhatsApp. buildInboundMediaNote()
|
|
486
508
|
// will generate [media attached: ...] annotations that the agent runner
|
|
487
509
|
// detects and loads from disk at inference time.
|
|
488
|
-
MediaPaths:
|
|
489
|
-
MediaPath:
|
|
490
|
-
MediaTypes:
|
|
491
|
-
MediaType:
|
|
510
|
+
MediaPaths: allMediaPaths.length > 0 ? allMediaPaths : undefined,
|
|
511
|
+
MediaPath: allMediaPaths[0],
|
|
512
|
+
MediaTypes: allMediaTypes.length > 0 ? allMediaTypes : undefined,
|
|
513
|
+
MediaType: allMediaTypes[0],
|
|
492
514
|
};
|
|
493
515
|
const agentId = resolveSessionAgentId({
|
|
494
516
|
sessionKey: p.sessionKey,
|
|
@@ -496,16 +518,26 @@ export const chatHandlers = {
|
|
|
496
518
|
});
|
|
497
519
|
// Fire message:inbound hook for conversation archiving.
|
|
498
520
|
// Include image paths so the archive references the attached media.
|
|
521
|
+
// Audio archive is deferred until after media understanding resolves (see
|
|
522
|
+
// onMediaResolved below) so the transcript is available instead of the
|
|
523
|
+
// raw <media:audio> placeholder.
|
|
499
524
|
const imageNote = savedImagePaths.length > 0 ? savedImagePaths.map((ip) => `[image: ${ip}]`).join("\n") : "";
|
|
500
|
-
const
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
525
|
+
const fireArchiveHook = (resolvedBody) => {
|
|
526
|
+
const body = resolvedBody ?? p.message;
|
|
527
|
+
const archiveText = [body, imageNote].filter(Boolean).join("\n").trim();
|
|
528
|
+
void triggerInternalHook(createInternalHookEvent("message", "inbound", p.sessionKey, {
|
|
529
|
+
text: archiveText || undefined,
|
|
530
|
+
timestamp: now,
|
|
531
|
+
chatType: "direct",
|
|
532
|
+
agentId,
|
|
533
|
+
channel: "webchat",
|
|
534
|
+
cfg,
|
|
535
|
+
}));
|
|
536
|
+
};
|
|
537
|
+
if (!hasAudioMedia) {
|
|
538
|
+
// No audio — fire immediately (no STT to wait for).
|
|
539
|
+
fireArchiveHook();
|
|
540
|
+
}
|
|
509
541
|
let prefixContext = {
|
|
510
542
|
identityName: resolveIdentityName(cfg, agentId),
|
|
511
543
|
};
|
|
@@ -541,7 +573,7 @@ export const chatHandlers = {
|
|
|
541
573
|
},
|
|
542
574
|
});
|
|
543
575
|
let agentRunStarted = false;
|
|
544
|
-
context.logGateway.info(`webchat dispatch: sessionKey=${p.sessionKey} runId=${clientRunId} body=${messageWithDocs.length}ch images=${savedImagePaths.length} docs=${savedDocPaths.length}`);
|
|
576
|
+
context.logGateway.info(`webchat dispatch: sessionKey=${p.sessionKey} runId=${clientRunId} body=${messageWithDocs.length}ch images=${savedImagePaths.length} audio=${savedAudioPaths.length} docs=${savedDocPaths.length}`);
|
|
545
577
|
void dispatchInboundMessage({
|
|
546
578
|
ctx,
|
|
547
579
|
cfg,
|
|
@@ -554,11 +586,18 @@ export const chatHandlers = {
|
|
|
554
586
|
agentRunStarted = true;
|
|
555
587
|
context.logGateway.info(`webchat agent run started: sessionKey=${p.sessionKey} runId=${runId}`);
|
|
556
588
|
},
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
589
|
+
onMediaResolved: hasAudioMedia
|
|
590
|
+
? () => {
|
|
591
|
+
// STT complete — archive the resolved body (transcript) instead
|
|
592
|
+
// of the raw <media:audio> placeholder.
|
|
593
|
+
fireArchiveHook(ctx.Body);
|
|
594
|
+
}
|
|
595
|
+
: undefined,
|
|
596
|
+
onModelSelected: (modelCtx) => {
|
|
597
|
+
prefixContext.provider = modelCtx.provider;
|
|
598
|
+
prefixContext.model = extractShortModelName(modelCtx.model);
|
|
599
|
+
prefixContext.modelFull = `${modelCtx.provider}/${modelCtx.model}`;
|
|
600
|
+
prefixContext.thinkingLevel = modelCtx.thinkLevel ?? "off";
|
|
562
601
|
},
|
|
563
602
|
},
|
|
564
603
|
})
|
|
@@ -80,8 +80,11 @@ export const ttsHandlers = {
|
|
|
80
80
|
},
|
|
81
81
|
"tts.setProvider": async ({ params, respond }) => {
|
|
82
82
|
const provider = typeof params.provider === "string" ? params.provider.trim() : "";
|
|
83
|
-
if (provider !== "openai" &&
|
|
84
|
-
|
|
83
|
+
if (provider !== "openai" &&
|
|
84
|
+
provider !== "elevenlabs" &&
|
|
85
|
+
provider !== "edge" &&
|
|
86
|
+
provider !== "hume") {
|
|
87
|
+
respond(false, undefined, errorShape(ErrorCodes.INVALID_REQUEST, "Invalid provider. Use openai, elevenlabs, hume, or edge."));
|
|
85
88
|
return;
|
|
86
89
|
}
|
|
87
90
|
try {
|
|
@@ -115,6 +118,12 @@ export const ttsHandlers = {
|
|
|
115
118
|
configured: Boolean(resolveTtsApiKey(config, "elevenlabs")),
|
|
116
119
|
models: ["eleven_multilingual_v2", "eleven_turbo_v2_5", "eleven_monolingual_v1"],
|
|
117
120
|
},
|
|
121
|
+
{
|
|
122
|
+
id: "hume",
|
|
123
|
+
name: "Hume",
|
|
124
|
+
configured: Boolean(resolveTtsApiKey(config, "hume")),
|
|
125
|
+
models: [],
|
|
126
|
+
},
|
|
118
127
|
{
|
|
119
128
|
id: "edge",
|
|
120
129
|
name: "Edge TTS",
|
|
@@ -9,6 +9,7 @@ import { CONFIG_PATH_TASKMASTER, isNixMode, loadConfig, migrateLegacyConfig, rea
|
|
|
9
9
|
import { VERSION } from "../version.js";
|
|
10
10
|
import { isDiagnosticsEnabled } from "../infra/diagnostic-events.js";
|
|
11
11
|
import { logAcceptedEnvOption } from "../infra/env.js";
|
|
12
|
+
import { reconcileAgentContactTools } from "../config/agent-tools-reconcile.js";
|
|
12
13
|
import { applyPluginAutoEnable } from "../config/plugin-auto-enable.js";
|
|
13
14
|
import { clearAgentRunContext, onAgentEvent } from "../infra/agent-events.js";
|
|
14
15
|
import { onHeartbeatEvent } from "../infra/heartbeat-events.js";
|
|
@@ -53,6 +54,7 @@ import { ensureWatchdogUnitOnStartup, scheduleWatchdogStabilityConfirmation, } f
|
|
|
53
54
|
import { startGatewayTailscaleExposure } from "./server-tailscale.js";
|
|
54
55
|
import { startWifiWatchdog } from "./server-wifi-watchdog.js";
|
|
55
56
|
import { loadGatewayTlsRuntime } from "./server/tls.js";
|
|
57
|
+
import { isLoopbackHost } from "./net.js";
|
|
56
58
|
import { createWizardSessionTracker } from "./server-wizard-sessions.js";
|
|
57
59
|
import { attachGatewayWsHandlers } from "./server-ws-runtime.js";
|
|
58
60
|
import { isLicenseValid } from "../license/validate.js";
|
|
@@ -121,6 +123,20 @@ export async function startGatewayServer(port = 18789, opts = {}) {
|
|
|
121
123
|
log.warn(`gateway: failed to persist plugin auto-enable changes: ${String(err)}`);
|
|
122
124
|
}
|
|
123
125
|
}
|
|
126
|
+
// Reconcile agent tool groups (e.g. individual contact tools → group:contacts).
|
|
127
|
+
const toolReconcile = reconcileAgentContactTools({ config: configSnapshot.config });
|
|
128
|
+
if (toolReconcile.changes.length > 0) {
|
|
129
|
+
try {
|
|
130
|
+
await writeConfigFile(toolReconcile.config);
|
|
131
|
+
configSnapshot = await readConfigFileSnapshot();
|
|
132
|
+
log.info(`gateway: reconciled agent tools:\n${toolReconcile.changes
|
|
133
|
+
.map((entry) => `- ${entry}`)
|
|
134
|
+
.join("\n")}`);
|
|
135
|
+
}
|
|
136
|
+
catch (err) {
|
|
137
|
+
log.warn(`gateway: failed to persist agent tools reconciliation: ${String(err)}`);
|
|
138
|
+
}
|
|
139
|
+
}
|
|
124
140
|
// Stamp config with running version on startup so upgrades keep the stamp current.
|
|
125
141
|
const storedVersion = configSnapshot.config.meta?.lastTouchedVersion;
|
|
126
142
|
if (configSnapshot.exists && storedVersion !== VERSION) {
|
|
@@ -211,10 +227,30 @@ export async function startGatewayServer(port = 18789, opts = {}) {
|
|
|
211
227
|
const { wizardSessions, findRunningWizard, purgeWizardSession } = createWizardSessionTracker();
|
|
212
228
|
const deps = createDefaultDeps();
|
|
213
229
|
let canvasHostServer = null;
|
|
214
|
-
|
|
215
|
-
|
|
230
|
+
// Auto-enable TLS when binding to a non-loopback address (LAN, custom, etc.)
|
|
231
|
+
// so that browser secure-context APIs (getUserMedia, etc.) work over .local.
|
|
232
|
+
// Only auto-enable when the user hasn't explicitly configured tls.enabled.
|
|
233
|
+
const tlsExplicit = cfgAtStart.gateway?.tls?.enabled;
|
|
234
|
+
const tlsAutoEnable = tlsExplicit === undefined && !isLoopbackHost(bindHost);
|
|
235
|
+
const effectiveTlsConfig = tlsAutoEnable
|
|
236
|
+
? { ...cfgAtStart.gateway?.tls, enabled: true }
|
|
237
|
+
: cfgAtStart.gateway?.tls;
|
|
238
|
+
if (tlsAutoEnable) {
|
|
239
|
+
log.child("tls").info("gateway tls: auto-enabled for non-loopback bind");
|
|
240
|
+
}
|
|
241
|
+
const bonjourHostname = cfgAtStart.discovery?.bonjourHostname || "taskmaster";
|
|
242
|
+
const tlsHostnames = [bonjourHostname];
|
|
243
|
+
const gatewayTls = await loadGatewayTlsRuntime(effectiveTlsConfig, log.child("tls"), tlsHostnames);
|
|
244
|
+
if (tlsExplicit === true && !gatewayTls.enabled) {
|
|
245
|
+
// User explicitly enabled TLS — fail hard if it can't start.
|
|
216
246
|
throw new Error(gatewayTls.error ?? "gateway tls: failed to enable");
|
|
217
247
|
}
|
|
248
|
+
if (tlsAutoEnable && !gatewayTls.enabled) {
|
|
249
|
+
// Auto-enabled TLS failed — fall back to HTTP with a warning.
|
|
250
|
+
log
|
|
251
|
+
.child("tls")
|
|
252
|
+
.warn(`gateway tls: auto-enable failed (${gatewayTls.error ?? "unknown"}), continuing with HTTP`);
|
|
253
|
+
}
|
|
218
254
|
const { canvasHost, httpServer, httpServers, httpBindHosts, wss, clients, broadcast, agentRunSeq, dedupe, chatRunState, chatRunBuffers, chatDeltaSentAt, addChatRun, removeChatRun, chatAbortControllers, } = await createGatewayRuntimeState({
|
|
219
255
|
cfg: cfgAtStart,
|
|
220
256
|
bindHost,
|
|
@@ -268,9 +304,6 @@ export async function startGatewayServer(port = 18789, opts = {}) {
|
|
|
268
304
|
});
|
|
269
305
|
const { getRuntimeSnapshot, startChannels, startChannel, stopChannel, markChannelLoggedOut } = channelManager;
|
|
270
306
|
const machineDisplayName = await getMachineDisplayName();
|
|
271
|
-
// Default to "taskmaster" hostname for mDNS so taskmaster.local works out of the box.
|
|
272
|
-
// Users can override via discovery.bonjourHostname config if needed.
|
|
273
|
-
const bonjourHostname = cfgAtStart.discovery?.bonjourHostname || "taskmaster";
|
|
274
307
|
const discovery = await startGatewayDiscovery({
|
|
275
308
|
machineDisplayName,
|
|
276
309
|
port,
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import { execFile } from "node:child_process";
|
|
2
2
|
import { X509Certificate } from "node:crypto";
|
|
3
3
|
import fs from "node:fs/promises";
|
|
4
|
+
import os from "node:os";
|
|
4
5
|
import path from "node:path";
|
|
5
6
|
import { promisify } from "node:util";
|
|
6
7
|
import { CONFIG_DIR, ensureDir, resolveUserPath, shortenHomeInString } from "../../utils.js";
|
|
@@ -15,6 +16,18 @@ async function fileExists(filePath) {
|
|
|
15
16
|
return false;
|
|
16
17
|
}
|
|
17
18
|
}
|
|
19
|
+
function buildSanString(hostnames) {
|
|
20
|
+
const sans = new Set(["DNS:localhost", "IP:127.0.0.1"]);
|
|
21
|
+
const raw = hostnames?.length ? hostnames : [os.hostname()];
|
|
22
|
+
for (const h of raw) {
|
|
23
|
+
const name = h.replace(/\.local$/i, "").trim();
|
|
24
|
+
if (!name)
|
|
25
|
+
continue;
|
|
26
|
+
sans.add(`DNS:${name}`);
|
|
27
|
+
sans.add(`DNS:${name}.local`);
|
|
28
|
+
}
|
|
29
|
+
return [...sans].join(",");
|
|
30
|
+
}
|
|
18
31
|
async function generateSelfSignedCert(params) {
|
|
19
32
|
const certDir = path.dirname(params.certPath);
|
|
20
33
|
const keyDir = path.dirname(params.keyPath);
|
|
@@ -22,6 +35,7 @@ async function generateSelfSignedCert(params) {
|
|
|
22
35
|
if (keyDir !== certDir) {
|
|
23
36
|
await ensureDir(keyDir);
|
|
24
37
|
}
|
|
38
|
+
const san = buildSanString(params.hostnames);
|
|
25
39
|
await execFileAsync("openssl", [
|
|
26
40
|
"req",
|
|
27
41
|
"-x509",
|
|
@@ -37,12 +51,14 @@ async function generateSelfSignedCert(params) {
|
|
|
37
51
|
params.certPath,
|
|
38
52
|
"-subj",
|
|
39
53
|
"/CN=taskmaster-gateway",
|
|
54
|
+
"-addext",
|
|
55
|
+
`subjectAltName=${san}`,
|
|
40
56
|
]);
|
|
41
57
|
await fs.chmod(params.keyPath, 0o600).catch(() => { });
|
|
42
58
|
await fs.chmod(params.certPath, 0o600).catch(() => { });
|
|
43
|
-
params.log?.info?.(`gateway tls: generated self-signed cert at ${shortenHomeInString(params.certPath)}`);
|
|
59
|
+
params.log?.info?.(`gateway tls: generated self-signed cert at ${shortenHomeInString(params.certPath)} (SAN: ${san})`);
|
|
44
60
|
}
|
|
45
|
-
export async function loadGatewayTlsRuntime(cfg, log) {
|
|
61
|
+
export async function loadGatewayTlsRuntime(cfg, log, hostnames) {
|
|
46
62
|
if (!cfg || cfg.enabled !== true)
|
|
47
63
|
return { enabled: false, required: false };
|
|
48
64
|
const autoGenerate = cfg.autoGenerate !== false;
|
|
@@ -54,7 +70,7 @@ export async function loadGatewayTlsRuntime(cfg, log) {
|
|
|
54
70
|
const hasKey = await fileExists(keyPath);
|
|
55
71
|
if (!hasCert && !hasKey && autoGenerate) {
|
|
56
72
|
try {
|
|
57
|
-
await generateSelfSignedCert({ certPath, keyPath, log });
|
|
73
|
+
await generateSelfSignedCert({ certPath, keyPath, hostnames, log });
|
|
58
74
|
}
|
|
59
75
|
catch (err) {
|
|
60
76
|
return {
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { finalizeInboundContext } from "../auto-reply/reply/inbound-context.js";
|
|
2
|
+
import { logVerbose } from "../globals.js";
|
|
2
3
|
import { extractMediaUserText, formatAudioTranscripts, formatMediaUnderstandingBody, } from "./format.js";
|
|
3
4
|
import { runWithConcurrency } from "./concurrency.js";
|
|
4
5
|
import { resolveConcurrency } from "./resolve.js";
|
|
@@ -42,6 +43,40 @@ export async function applyMediaUnderstanding(params) {
|
|
|
42
43
|
if (decisions.length > 0) {
|
|
43
44
|
ctx.MediaUnderstandingDecisions = [...(ctx.MediaUnderstandingDecisions ?? []), ...decisions];
|
|
44
45
|
}
|
|
46
|
+
// Surface audio failures so the agent can inform the user instead of receiving
|
|
47
|
+
// a bare <media:audio> placeholder with no context about what went wrong.
|
|
48
|
+
const audioDecision = decisions.find((d) => d.capability === "audio");
|
|
49
|
+
const audioTranscribed = outputs.some((o) => o.kind === "audio.transcription");
|
|
50
|
+
const bodyHint = ctx.CommandBody ?? ctx.RawBody ?? ctx.Body ?? "";
|
|
51
|
+
const isAudioPlaceholder = /^<media:audio>/i.test(bodyHint.trim());
|
|
52
|
+
if (isAudioPlaceholder && !audioTranscribed) {
|
|
53
|
+
let reason;
|
|
54
|
+
if (ctx.MediaDownloadFailed) {
|
|
55
|
+
reason = "media download failed — the voice note could not be retrieved from WhatsApp";
|
|
56
|
+
}
|
|
57
|
+
else if (audioDecision?.outcome === "no-attachment") {
|
|
58
|
+
reason = "no audio file available for transcription";
|
|
59
|
+
}
|
|
60
|
+
else if (audioDecision?.outcome === "skipped") {
|
|
61
|
+
// Distinguish between "no providers at all" (empty attempts) and "providers tried but all failed"
|
|
62
|
+
const hasAttempts = audioDecision.attachments?.some((a) => a.attempts.length > 0);
|
|
63
|
+
reason = hasAttempts
|
|
64
|
+
? "all transcription attempts failed"
|
|
65
|
+
: "no transcription provider configured (add an OpenAI, Google, Groq, or Deepgram API key)";
|
|
66
|
+
}
|
|
67
|
+
else if (audioDecision?.outcome === "disabled") {
|
|
68
|
+
reason = "audio transcription is disabled in config";
|
|
69
|
+
}
|
|
70
|
+
else {
|
|
71
|
+
reason = `transcription ${audioDecision?.outcome ?? "unavailable"}`;
|
|
72
|
+
}
|
|
73
|
+
const note = `[Voice note received but could not be transcribed: ${reason}]`;
|
|
74
|
+
logVerbose(`applyMediaUnderstanding: ${note}`);
|
|
75
|
+
ctx.Body = note;
|
|
76
|
+
ctx.CommandBody = note;
|
|
77
|
+
ctx.RawBody = note;
|
|
78
|
+
finalizeInboundContext(ctx, { forceBodyForAgent: true, forceBodyForCommands: true });
|
|
79
|
+
}
|
|
45
80
|
if (outputs.length > 0) {
|
|
46
81
|
ctx.Body = formatMediaUnderstandingBody({ body: ctx.Body, outputs });
|
|
47
82
|
const audioOutputs = outputs.filter((output) => output.kind === "audio.transcription");
|
|
@@ -22,7 +22,7 @@ export async function transcribeDeepgramAudio(params) {
|
|
|
22
22
|
}
|
|
23
23
|
const headers = new Headers(params.headers);
|
|
24
24
|
if (!headers.has("authorization")) {
|
|
25
|
-
headers.set("authorization", `Token ${params.apiKey}`);
|
|
25
|
+
headers.set("authorization", `Token ${params.apiKey ?? ""}`);
|
|
26
26
|
}
|
|
27
27
|
if (!headers.has("content-type")) {
|
|
28
28
|
headers.set("content-type", params.mime ?? "application/octet-stream");
|
|
@@ -23,7 +23,7 @@ export async function transcribeGeminiAudio(params) {
|
|
|
23
23
|
headers.set("content-type", "application/json");
|
|
24
24
|
}
|
|
25
25
|
if (!headers.has("x-goog-api-key")) {
|
|
26
|
-
headers.set("x-goog-api-key", params.apiKey);
|
|
26
|
+
headers.set("x-goog-api-key", params.apiKey ?? "");
|
|
27
27
|
}
|
|
28
28
|
const body = {
|
|
29
29
|
contents: [
|
|
@@ -23,7 +23,7 @@ export async function describeGeminiVideo(params) {
|
|
|
23
23
|
headers.set("content-type", "application/json");
|
|
24
24
|
}
|
|
25
25
|
if (!headers.has("x-goog-api-key")) {
|
|
26
|
-
headers.set("x-goog-api-key", params.apiKey);
|
|
26
|
+
headers.set("x-goog-api-key", params.apiKey ?? "");
|
|
27
27
|
}
|
|
28
28
|
const body = {
|
|
29
29
|
contents: [
|
|
@@ -5,6 +5,7 @@ import { googleProvider } from "./google/index.js";
|
|
|
5
5
|
import { groqProvider } from "./groq/index.js";
|
|
6
6
|
import { minimaxProvider } from "./minimax/index.js";
|
|
7
7
|
import { openaiProvider } from "./openai/index.js";
|
|
8
|
+
import { sherpaOnnxProvider } from "./sherpa-onnx/index.js";
|
|
8
9
|
const PROVIDERS = [
|
|
9
10
|
groqProvider,
|
|
10
11
|
openaiProvider,
|
|
@@ -12,6 +13,7 @@ const PROVIDERS = [
|
|
|
12
13
|
anthropicProvider,
|
|
13
14
|
minimaxProvider,
|
|
14
15
|
deepgramProvider,
|
|
16
|
+
sherpaOnnxProvider,
|
|
15
17
|
];
|
|
16
18
|
export function normalizeMediaProviderId(id) {
|
|
17
19
|
const normalized = normalizeProviderId(id);
|
|
@@ -25,7 +25,7 @@ export async function transcribeOpenAiCompatibleAudio(params) {
|
|
|
25
25
|
form.append("prompt", params.prompt.trim());
|
|
26
26
|
const headers = new Headers(params.headers);
|
|
27
27
|
if (!headers.has("authorization")) {
|
|
28
|
-
headers.set("authorization", `Bearer ${params.apiKey}`);
|
|
28
|
+
headers.set("authorization", `Bearer ${params.apiKey ?? ""}`);
|
|
29
29
|
}
|
|
30
30
|
const res = await fetchWithTimeout(url, {
|
|
31
31
|
method: "POST",
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
import { transcribeLocal, MODEL_LABEL } from "../../sherpa-onnx-local.js";
|
|
2
|
+
export const sherpaOnnxProvider = {
|
|
3
|
+
id: "sherpa-onnx",
|
|
4
|
+
isLocal: true,
|
|
5
|
+
capabilities: ["audio"],
|
|
6
|
+
transcribeAudio: async (req) => {
|
|
7
|
+
const result = await transcribeLocal(req.buffer, req.fileName);
|
|
8
|
+
return { text: result.text, model: result.model ?? MODEL_LABEL };
|
|
9
|
+
},
|
|
10
|
+
};
|