niahere 0.2.91 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,58 @@
1
+ /**
2
+ * Transcribe a short audio clip via OpenAI's gpt-4o-mini-transcribe.
3
+ *
4
+ * Used by the WhatsApp channel for voice notes. We accept the raw bytes
5
+ * + MIME (Twilio's WhatsApp voice notes are typically audio/ogg with
6
+ * opus codec — the endpoint handles ogg natively).
7
+ */
8
+ import { log } from "../../utils/log";
9
+
10
+ const ENDPOINT = "https://api.openai.com/v1/audio/transcriptions";
11
+ const MODEL = "gpt-4o-mini-transcribe";
12
+ const TIMEOUT_MS = 30_000;
13
+
14
+ const MIME_TO_FILENAME: Record<string, string> = {
15
+ "audio/ogg": "audio.ogg",
16
+ "audio/mpeg": "audio.mp3",
17
+ "audio/mp4": "audio.m4a",
18
+ "audio/wav": "audio.wav",
19
+ "audio/webm": "audio.webm",
20
+ "audio/flac": "audio.flac",
21
+ };
22
+
23
+ export interface TranscribeOpts {
24
+ apiKey: string;
25
+ data: Buffer;
26
+ mime: string;
27
+ language?: string;
28
+ }
29
+
30
+ export async function transcribeAudio(opts: TranscribeOpts): Promise<string> {
31
+ const filename = MIME_TO_FILENAME[opts.mime] ?? "audio.ogg";
32
+ const form = new FormData();
33
+ form.set("file", new Blob([new Uint8Array(opts.data)], { type: opts.mime }), filename);
34
+ form.set("model", MODEL);
35
+ if (opts.language) form.set("language", opts.language);
36
+ form.set("response_format", "json");
37
+
38
+ const controller = new AbortController();
39
+ const timer = setTimeout(() => controller.abort(), TIMEOUT_MS);
40
+ try {
41
+ const resp = await fetch(ENDPOINT, {
42
+ method: "POST",
43
+ headers: { Authorization: `Bearer ${opts.apiKey}` },
44
+ body: form,
45
+ signal: controller.signal,
46
+ });
47
+ if (!resp.ok) {
48
+ const text = await resp.text().catch(() => "");
49
+ throw new Error(`OpenAI transcribe failed: ${resp.status} ${text}`);
50
+ }
51
+ const json = (await resp.json()) as { text?: string };
52
+ const text = (json.text || "").trim();
53
+ log.info({ chars: text.length, mime: opts.mime }, "twilio: voice note transcribed");
54
+ return text;
55
+ } finally {
56
+ clearTimeout(timer);
57
+ }
58
+ }
@@ -0,0 +1,408 @@
1
+ /**
2
+ * WhatsApp channel via Twilio (Sandbox by default).
3
+ *
4
+ * Reuses the shared TwilioWebhookServer. Inbound webhook hits
5
+ * /twilio/whatsapp/incoming; we ack immediately (Twilio's 15s budget)
6
+ * and reply via REST under a per-sender lock.
7
+ *
8
+ * Parity targets with the Telegram channel: text + images + documents +
9
+ * voice notes (transcribed), /reset to start a new room, WhatsApp-flavored
10
+ * markdown for outbound, 4096-char chunking, [error] reporting, delivery
11
+ * status tracking. Outbound media is served from
12
+ * channels/twilio/media-cache via GET /twilio/media/<sha>.<ext>.
13
+ *
14
+ * Sandbox: in Twilio Console → Messaging → Try it out → WhatsApp, point
15
+ * the inbound webhook at `${PUBLIC_BASE_URL}/twilio/whatsapp/incoming`.
16
+ * Users opt in by sending `join <two-words>` to `+1 415 523 8886`. Opt-in
17
+ * expires after 72h of inactivity; the join code stays valid. Outbound
18
+ * is further gated by Meta's 24-hour customer-service window.
19
+ */
20
+ import { createChatEngine } from "../chat/engine";
21
+ import { getMcpServers } from "../mcp";
22
+ import { Session, Message } from "../db/models";
23
+ import { runMigrations } from "../db/migrate";
24
+ import type { Attachment, Channel, ChatState, TwilioConfig, WhatsappConfig, PhoneConfig } from "../types";
25
+ import { getConfig } from "../utils/config";
26
+ import { log } from "../utils/log";
27
+ import { classifyMime, prepareImage, validateAttachment } from "../utils/attachment";
28
+ import { sendMessage as twilioSendMessage } from "./twilio/rest";
29
+ import { getTwilioServer } from "./twilio/server";
30
+ import { downloadInboundMedia, extractMedia } from "./twilio/media";
31
+ import { transcribeAudio } from "./twilio/transcribe";
32
+
33
+ const TWENTY_FOUR_HOURS_MS = 24 * 60 * 60 * 1000;
34
+ const WA_PREFIX = "whatsapp:";
35
+ const EMPTY_TWIML = '<?xml version="1.0" encoding="UTF-8"?><Response></Response>';
36
+ const CHUNK_LIMIT = 4096;
37
+ const RESET_RE = /^\s*\/(reset|new)\s*$/i;
38
+ const VOICE_MIME_PREFIX = "audio/";
39
+
40
+ class WhatsAppChannel implements Channel {
41
+ name = "whatsapp";
42
+ private readonly twilio: TwilioConfig;
43
+ private readonly whatsapp: WhatsappConfig;
44
+ private readonly phone: PhoneConfig;
45
+ private readonly chats = new Map<string, ChatState>();
46
+ private readonly lastInboundAt = new Map<string, number>();
47
+
48
+ constructor(twilio: TwilioConfig, whatsapp: WhatsappConfig, phone: PhoneConfig) {
49
+ this.twilio = twilio;
50
+ this.whatsapp = whatsapp;
51
+ this.phone = phone;
52
+ }
53
+
54
+ async start(): Promise<void> {
55
+ await runMigrations();
56
+
57
+ const server = getTwilioServer();
58
+ server.configure({
59
+ port: this.twilio.port,
60
+ publicBaseUrl: this.twilio.public_base_url,
61
+ signingToken: this.twilio.auth_token || this.twilio.secret,
62
+ });
63
+
64
+ server.registerHttp("/twilio/whatsapp/incoming", (_req, ctx) => this.handleInbound(ctx.params), {
65
+ dedupOn: "MessageSid",
66
+ rateLimitOn: "From",
67
+ });
68
+ server.registerHttp("/twilio/whatsapp/status", (_req, ctx) => this.handleStatus(ctx.params), {
69
+ dedupOn: "MessageSid",
70
+ });
71
+
72
+ if (this.twilio.owner_number) {
73
+ server.exemptFromRateLimit(`${WA_PREFIX}${this.twilio.owner_number}`);
74
+ }
75
+
76
+ await server.start();
77
+
78
+ log.info(
79
+ {
80
+ from: this.whatsapp.from_number,
81
+ owner: this.twilio.owner_number,
82
+ publicBaseUrl: this.twilio.public_base_url,
83
+ },
84
+ "whatsapp channel started",
85
+ );
86
+ }
87
+
88
+ async stop(): Promise<void> {
89
+ for (const state of this.chats.values()) state.engine.close();
90
+ this.chats.clear();
91
+ }
92
+
93
+ /** Outbound text to the owner — used by send_message MCP tool. */
94
+ async sendMessage(text: string): Promise<void> {
95
+ if (!this.twilio.owner_number) throw new Error("whatsapp: owner_number not set");
96
+ await this.sendTextTo(this.twilio.owner_number, text);
97
+ }
98
+
99
+ /** Outbound media to the owner — used by send_message MCP tool with attachments. */
100
+ async sendMedia(data: Buffer, mimeType: string, filename?: string): Promise<void> {
101
+ if (!this.twilio.owner_number) throw new Error("whatsapp: owner_number not set");
102
+ await this.sendMediaTo(this.twilio.owner_number, data, mimeType, filename);
103
+ }
104
+
105
+ // --- Inbound webhook ---
106
+
107
+ private async handleInbound(params: Record<string, string>): Promise<Response> {
108
+ const from = (params.From || "").replace(/^whatsapp:/, "");
109
+ const body = (params.Body || "").trim();
110
+
111
+ if (!this.isAllowed(from)) {
112
+ log.warn({ from }, "whatsapp: rejecting non-allowlisted sender");
113
+ return new Response(EMPTY_TWIML, { status: 200, headers: { "Content-Type": "text/xml" } });
114
+ }
115
+
116
+ this.lastInboundAt.set(from, Date.now());
117
+
118
+ if (RESET_RE.test(body)) {
119
+ // Serialize through the same lock so a /reset chasing an in-flight
120
+ // engine.send() waits its turn instead of yanking the engine away.
121
+ const state = await this.getState(from);
122
+ state.lock = state.lock.then(
123
+ async () => {
124
+ const newState = await this.restartChat(from);
125
+ await this.sendTextTo(
126
+ from,
127
+ `New conversation started (room ${this.roomPrefix(from)}-${newState.roomIndex}).`,
128
+ );
129
+ },
130
+ (err) => log.error({ err, from }, "whatsapp: /reset lock chain error"),
131
+ );
132
+ return new Response(EMPTY_TWIML, { status: 200, headers: { "Content-Type": "text/xml" } });
133
+ }
134
+
135
+ const descriptors = extractMedia(params);
136
+
137
+ const state = await this.getState(from);
138
+ state.lock = state.lock.then(
139
+ async () => {
140
+ let userText = body;
141
+ let attachments: Attachment[] | undefined;
142
+
143
+ if (descriptors.length > 0) {
144
+ const downloaded = await downloadInboundMedia(descriptors, {
145
+ accountSid: this.twilio.sid!,
146
+ authSid: this.twilio.sid!,
147
+ authSecret: this.twilio.secret!,
148
+ });
149
+
150
+ const voiceParts: string[] = [];
151
+ const built: Attachment[] = [];
152
+
153
+ for (const item of downloaded) {
154
+ if (item.mime.startsWith(VOICE_MIME_PREFIX)) {
155
+ if (!this.phone.openai_api_key) {
156
+ voiceParts.push("[voice note: transcription unavailable — channels.phone.openai_api_key not set]");
157
+ continue;
158
+ }
159
+ try {
160
+ const transcript = await transcribeAudio({
161
+ apiKey: this.phone.openai_api_key,
162
+ data: item.data,
163
+ mime: item.mime,
164
+ });
165
+ voiceParts.push(transcript || "[empty voice note]");
166
+ } catch (err) {
167
+ log.error({ err, from }, "whatsapp: voice transcription failed");
168
+ voiceParts.push(
169
+ `[voice note: transcription failed — ${err instanceof Error ? err.message : String(err)}]`,
170
+ );
171
+ }
172
+ continue;
173
+ }
174
+
175
+ const error = validateAttachment(item.data, item.mime);
176
+ if (error) {
177
+ log.warn({ from, mime: item.mime, error }, "whatsapp: rejecting attachment");
178
+ await this.sendTextTo(from, `[error] ${error}`).catch(() => {});
179
+ continue;
180
+ }
181
+
182
+ const attType = classifyMime(item.mime) || "file";
183
+ let data = item.data;
184
+ let mime = item.mime;
185
+ if (attType === "image") {
186
+ const prepared = await prepareImage(data, mime);
187
+ data = prepared.data;
188
+ mime = prepared.mimeType;
189
+ }
190
+ built.push({ type: attType, data, mimeType: mime });
191
+ }
192
+
193
+ if (voiceParts.length > 0) {
194
+ const joined = voiceParts.join("\n\n");
195
+ userText = userText ? `${userText}\n\n${joined}` : joined;
196
+ }
197
+ if (built.length > 0) attachments = built;
198
+ }
199
+
200
+ if (!userText && !attachments) {
201
+ log.debug({ from }, "whatsapp: empty inbound (no body, no usable media)");
202
+ return;
203
+ }
204
+
205
+ try {
206
+ const { result, messageId } = await state.engine.send(userText || "(media only)", {}, attachments);
207
+ const reply = result.trim() || "(no response)";
208
+ try {
209
+ await this.sendTextTo(from, reply);
210
+ if (messageId) await Message.updateDeliveryStatus(messageId, "sent").catch(() => {});
211
+ } catch (sendErr) {
212
+ if (messageId) await Message.updateDeliveryStatus(messageId, "failed").catch(() => {});
213
+ throw sendErr;
214
+ }
215
+ } catch (err) {
216
+ log.error({ err, from }, "whatsapp: engine error");
217
+ const errText = err instanceof Error ? err.message : String(err);
218
+ await this.sendTextTo(from, `[error] ${errText}`).catch(() => {});
219
+ }
220
+ },
221
+ (err) => log.error({ err, from }, "whatsapp: lock chain error"),
222
+ );
223
+
224
+ return new Response(EMPTY_TWIML, { status: 200, headers: { "Content-Type": "text/xml" } });
225
+ }
226
+
227
+ private handleStatus(params: Record<string, string>): Response {
228
+ log.info(
229
+ {
230
+ messageSid: params.MessageSid,
231
+ status: params.MessageStatus,
232
+ errorCode: params.ErrorCode,
233
+ to: params.To,
234
+ },
235
+ "whatsapp: delivery status",
236
+ );
237
+ return new Response("", { status: 204 });
238
+ }
239
+
240
+ // --- Outbound ---
241
+
242
+ private async sendTextTo(remoteE164: string, body: string): Promise<void> {
243
+ if (!this.canSend(remoteE164)) return;
244
+ const converted = toWhatsAppMarkdown(body);
245
+ const chunks = chunkText(converted, CHUNK_LIMIT);
246
+ for (const chunk of chunks) {
247
+ await this.postMessage(remoteE164, chunk, undefined);
248
+ }
249
+ }
250
+
251
+ private async sendMediaTo(remoteE164: string, data: Buffer, mimeType: string, filename?: string): Promise<void> {
252
+ if (!this.canSend(remoteE164)) return;
253
+ const ext = filename ? extOf(filename) : undefined;
254
+ let mediaUrl: string;
255
+ try {
256
+ mediaUrl = await getTwilioServer().serveMedia(new Uint8Array(data), mimeType, ext);
257
+ } catch (err) {
258
+ log.error({ err }, "whatsapp: serveMedia failed");
259
+ return;
260
+ }
261
+ await this.postMessage(remoteE164, "", [mediaUrl]);
262
+ }
263
+
264
+ private async postMessage(remoteE164: string, body: string, mediaUrl: string[] | undefined): Promise<void> {
265
+ try {
266
+ const res = await twilioSendMessage({
267
+ accountSid: this.twilio.sid!,
268
+ authSid: this.twilio.sid!,
269
+ authSecret: this.twilio.secret!,
270
+ to: `${WA_PREFIX}${remoteE164}`,
271
+ from: `${WA_PREFIX}${this.whatsapp.from_number}`,
272
+ body,
273
+ mediaUrl,
274
+ statusCallbackUrl: this.twilio.public_base_url
275
+ ? `${this.twilio.public_base_url}/twilio/whatsapp/status`
276
+ : undefined,
277
+ });
278
+ log.info({ to: remoteE164, sid: res.messageSid, status: res.status, hasMedia: !!mediaUrl }, "whatsapp: sent");
279
+ } catch (err) {
280
+ log.error({ err, to: remoteE164 }, "whatsapp: send failed");
281
+ throw err;
282
+ }
283
+ }
284
+
285
+ /** Returns true if we have credentials AND we're inside the 24h window. */
286
+ private canSend(remoteE164: string): boolean {
287
+ if (!this.twilio.sid || !this.twilio.secret) {
288
+ log.warn("whatsapp: twilio sid/secret missing, cannot send");
289
+ return false;
290
+ }
291
+ if (!this.whatsapp.from_number) {
292
+ log.warn("whatsapp: from_number not configured");
293
+ return false;
294
+ }
295
+ const lastIn = this.lastInboundAt.get(remoteE164);
296
+ const now = Date.now();
297
+ if (!lastIn || now - lastIn > TWENTY_FOUR_HOURS_MS) {
298
+ log.warn(
299
+ {
300
+ remoteE164,
301
+ lastInboundAt: lastIn ? new Date(lastIn).toISOString() : null,
302
+ },
303
+ "whatsapp: outside 24h customer-service window — drop (Twilio rejects free-form; approved template needed)",
304
+ );
305
+ return false;
306
+ }
307
+ return true;
308
+ }
309
+
310
+ // --- Helpers ---
311
+
312
+ private isAllowed(remoteE164: string): boolean {
313
+ if (this.twilio.owner_number && remoteE164 === this.twilio.owner_number) return true;
314
+ return this.twilio.allowlist.includes(remoteE164);
315
+ }
316
+
317
+ private roomPrefix(remoteE164: string): string {
318
+ return `wa-${remoteE164}`;
319
+ }
320
+
321
+ private async getState(remoteE164: string): Promise<ChatState> {
322
+ let state = this.chats.get(remoteE164);
323
+ if (state) return state;
324
+ const prefix = this.roomPrefix(remoteE164);
325
+ const idx = await Session.getLatestRoomIndex(prefix);
326
+ const room = `${prefix}-${idx}`;
327
+ log.info({ remoteE164, room }, "whatsapp: creating chat engine");
328
+ const engine = await createChatEngine({
329
+ room,
330
+ channel: "whatsapp",
331
+ resume: true,
332
+ mcpServers: getMcpServers(),
333
+ });
334
+ state = { engine, roomIndex: idx, lock: Promise.resolve() };
335
+ this.chats.set(remoteE164, state);
336
+ return state;
337
+ }
338
+
339
+ private async restartChat(remoteE164: string): Promise<ChatState> {
340
+ const old = this.chats.get(remoteE164);
341
+ if (old) old.engine.close();
342
+
343
+ const prefix = this.roomPrefix(remoteE164);
344
+ const prevIdx = await Session.getLatestRoomIndex(prefix);
345
+ const newIdx = prevIdx + 1;
346
+ const room = `${prefix}-${newIdx}`;
347
+
348
+ // Persist a placeholder session so the room index survives daemon
349
+ // restarts (otherwise getState falls back to the old room).
350
+ await Session.create(`placeholder-${room}`, room);
351
+
352
+ const engine = await createChatEngine({
353
+ room,
354
+ channel: "whatsapp",
355
+ resume: false,
356
+ mcpServers: getMcpServers(),
357
+ });
358
+ const state: ChatState = { engine, roomIndex: newIdx, lock: Promise.resolve() };
359
+ this.chats.set(remoteE164, state);
360
+ log.info({ remoteE164, room }, "whatsapp: new conversation started");
361
+ return state;
362
+ }
363
+ }
364
+
365
+ /**
366
+ * Translate the slice of Markdown the agent uses to WhatsApp's flavor.
367
+ * WhatsApp's renderer accepts `*bold*`, `_italic_`, `~strike~`, and
368
+ * triple-backtick code blocks. We only rewrite forms that would render
369
+ * as literal punctuation otherwise (`**bold**`, `~~strike~~`); single
370
+ * `*italic*` is left alone since detecting it without false positives
371
+ * around bold is more trouble than it's worth.
372
+ */
373
+ export function toWhatsAppMarkdown(text: string): string {
374
+ return text.replace(/\*\*(.+?)\*\*/gs, "*$1*").replace(/~~(.+?)~~/gs, "~$1~");
375
+ }
376
+
377
+ /** Split text into chunks bounded by `limit` chars, preferring paragraph then line breaks. */
378
+ export function chunkText(text: string, limit: number): string[] {
379
+ if (text.length <= limit) return [text];
380
+ const chunks: string[] = [];
381
+ let remaining = text;
382
+ while (remaining.length > limit) {
383
+ let cut = remaining.lastIndexOf("\n\n", limit);
384
+ if (cut < limit / 2) cut = remaining.lastIndexOf("\n", limit);
385
+ if (cut < limit / 2) cut = remaining.lastIndexOf(" ", limit);
386
+ if (cut <= 0) cut = limit;
387
+ chunks.push(remaining.slice(0, cut).trimEnd());
388
+ remaining = remaining.slice(cut).trimStart();
389
+ }
390
+ if (remaining.length > 0) chunks.push(remaining);
391
+ return chunks;
392
+ }
393
+
394
+ function extOf(filename: string): string | undefined {
395
+ const dot = filename.lastIndexOf(".");
396
+ if (dot < 0 || dot === filename.length - 1) return undefined;
397
+ return filename.slice(dot + 1).toLowerCase();
398
+ }
399
+
400
+ export function createWhatsAppChannel(): WhatsAppChannel | null {
401
+ const { twilio, whatsapp, phone } = getConfig().channels;
402
+ if (!whatsapp.enabled) return null;
403
+ if (!twilio.sid || !twilio.secret) return null;
404
+ if (!whatsapp.from_number) return null;
405
+ return new WhatsAppChannel(twilio, whatsapp, phone);
406
+ }
407
+
408
+ export type { WhatsAppChannel };
package/src/cli/phone.ts CHANGED
@@ -45,21 +45,21 @@ async function phoneCallCommand(): Promise<void> {
45
45
  const channel = createPhoneChannel();
46
46
  if (!channel) {
47
47
  fail(
48
- "Phone channel not configured. Need TWILIO_SID, TWILIO_SECRET, PHONE_FROM_NUMBER in .env (plus OPENAI_API_KEY and PUBLIC_BASE_URL for the realtime voice loop).",
48
+ "Phone channel not configured. Set channels.phone.{twilio_sid,twilio_secret,from_number} in ~/.niahere/config.yaml (also channels.phone.{openai_api_key,public_base_url} for the realtime voice loop). Env vars TWILIO_SID / TWILIO_SECRET / PHONE_FROM_NUMBER / OPENAI_API_KEY / PUBLIC_BASE_URL override if you prefer .env.",
49
49
  );
50
50
  }
51
51
 
52
52
  await channel!.start();
53
- const cfg = getConfig().channels.phone;
54
- console.log(`${ICON_PASS} phone server up on :${cfg.port}`);
55
- if (!cfg.public_base_url) {
56
- console.log(`${ICON_WARN} PUBLIC_BASE_URL not set — Twilio cannot reach this server.`);
57
- console.log(` Start cloudflared (or your tunnel) and set PUBLIC_BASE_URL in .env first.`);
53
+ const { twilio, phone } = getConfig().channels;
54
+ console.log(`${ICON_PASS} phone server up on :${twilio.port}`);
55
+ if (!twilio.public_base_url) {
56
+ console.log(`${ICON_WARN} public_base_url not set — Twilio cannot reach this server.`);
57
+ console.log(` Start cloudflared (or your tunnel) and set channels.twilio.public_base_url in config.yaml.`);
58
58
  await channel!.stop();
59
59
  process.exit(1);
60
60
  }
61
- if (!cfg.openai_api_key) {
62
- console.log(`${ICON_WARN} OPENAI_API_KEY not set — realtime voice loop will fall back to TwiML <Say>.`);
61
+ if (!phone.openai_api_key) {
62
+ console.log(`${ICON_WARN} openai_api_key not set — realtime voice loop will fall back to TwiML <Say>.`);
63
63
  }
64
64
 
65
65
  console.log(` dialing ${number} ...`);
@@ -91,17 +91,23 @@ async function phoneCallCommand(): Promise<void> {
91
91
  }
92
92
 
93
93
  function phoneStatusCommand(): void {
94
- const cfg = getConfig().channels.phone;
94
+ const { twilio, phone, sms, whatsapp } = getConfig().channels;
95
95
  const lines = [
96
- `from: ${cfg.from_number ?? "(not set)"}`,
97
- `owner: ${cfg.owner_number ?? "(not set)"}`,
98
- `allowlist: ${cfg.allowlist.length ? cfg.allowlist.join(", ") : "(empty)"}`,
99
- `port: ${cfg.port}`,
100
- `public_base_url:${cfg.public_base_url ?? "(not set)"}`,
101
- `realtime_model: ${cfg.realtime_model}`,
102
- `voice: ${cfg.voice}`,
103
- `twilio creds: ${cfg.twilio_sid && cfg.twilio_secret ? "configured" : "MISSING"}`,
104
- `openai key: ${cfg.openai_api_key ? "configured" : "MISSING"}`,
96
+ `phone enabled: ${phone.enabled}`,
97
+ `phone from: ${phone.from_number ?? "(not set)"}`,
98
+ `sms enabled: ${sms.enabled}`,
99
+ `sms from: ${sms.from_number ?? `(defaults to phone: ${phone.from_number ?? "unset"})`}`,
100
+ `whatsapp enabled: ${whatsapp.enabled}`,
101
+ `whatsapp from: ${whatsapp.from_number ?? "(not set)"}`,
102
+ `owner: ${twilio.owner_number ?? "(not set)"}`,
103
+ `allowlist: ${twilio.allowlist.length ? twilio.allowlist.join(", ") : "(empty)"}`,
104
+ `port: ${twilio.port}`,
105
+ `public_base_url: ${twilio.public_base_url ?? "(not set)"}`,
106
+ `realtime_model: ${phone.realtime_model}`,
107
+ `voice: ${phone.voice}`,
108
+ `twilio creds: ${twilio.sid && twilio.secret ? "configured" : "MISSING"}`,
109
+ `twilio auth_token:${twilio.auth_token ? "configured" : "(falling back to secret)"}`,
110
+ `openai key: ${phone.openai_api_key ? "configured" : "MISSING"}`,
105
111
  ];
106
112
  console.log(lines.join("\n"));
107
113
  }
@@ -119,9 +125,13 @@ function helpText(): string {
119
125
  " phone server, dials, waits, prints transcript.",
120
126
  " status Show phone channel configuration.",
121
127
  "",
122
- "Required env:",
123
- " TWILIO_SID, TWILIO_SECRET, PHONE_FROM_NUMBER",
124
- " OPENAI_API_KEY (for realtime voice loop)",
125
- " PUBLIC_BASE_URL (cloudflared/ngrok tunnel pointing at PHONE_PORT)",
128
+ "Config lives in ~/.niahere/config.yaml under channels.phone:",
129
+ " twilio_sid, twilio_secret, from_number (required)",
130
+ " openai_api_key, public_base_url (required for realtime voice loop)",
131
+ " twilio_auth_token (required if twilio_sid is an API Key SID)",
132
+ " port, voice, realtime_model, allowlist (optional)",
133
+ "",
134
+ "Each field can be overridden by the matching env var (TWILIO_SID, OPENAI_API_KEY, etc.)",
135
+ "if you prefer .env. See the nia-phone skill for full deploy walkthrough.",
126
136
  ].join("\n");
127
137
  }
@@ -215,6 +215,60 @@ export async function runInit(): Promise<void> {
215
215
  }
216
216
  }
217
217
 
218
+ // Phone (Twilio Voice + OpenAI Realtime)
219
+ const exPh = (exCh.phone || {}) as Record<string, unknown>;
220
+ let phoneTwilioSid = (exPh.twilio_sid as string) || "";
221
+ let phoneTwilioSecret = (exPh.twilio_secret as string) || "";
222
+ let phoneTwilioAuthToken = (exPh.twilio_auth_token as string) || "";
223
+ let phoneFromNumber = (exPh.from_number as string) || "";
224
+ let phoneOwnerNumber = (exPh.owner_number as string) || "";
225
+ let phonePublicBaseUrl = (exPh.public_base_url as string) || "";
226
+ let phoneOpenAiKey = (exPh.openai_api_key as string) || "";
227
+ let phoneVoice = (exPh.voice as string) || "";
228
+
229
+ const existingPhoneSid = phoneTwilioSid;
230
+ if (existingPhoneSid) {
231
+ const masked = `...${existingPhoneSid.slice(-6)}`;
232
+ const reconfigure = await ask(rl, `\nPhone (Twilio + Realtime): configured (${masked}). Reconfigure? (y/n)`, "n");
233
+ if (reconfigure.toLowerCase() === "y") {
234
+ phoneTwilioSid = (await ask(rl, "Twilio SID (AC… or SK…)", phoneTwilioSid)) || phoneTwilioSid;
235
+ phoneTwilioSecret =
236
+ (await ask(rl, "Twilio Secret (Auth Token if AC, API Key Secret if SK)", phoneTwilioSecret)) ||
237
+ phoneTwilioSecret;
238
+ if (phoneTwilioSid.startsWith("SK")) {
239
+ phoneTwilioAuthToken =
240
+ (await ask(rl, "Twilio Auth Token (account-level — needed for webhook signing)", phoneTwilioAuthToken)) ||
241
+ phoneTwilioAuthToken;
242
+ }
243
+ phoneFromNumber =
244
+ (await ask(rl, "Twilio number to dial from (E.164, e.g. +13025551234)", phoneFromNumber)) || phoneFromNumber;
245
+ phoneOwnerNumber = (await ask(rl, "Your phone (E.164)", phoneOwnerNumber)) || phoneOwnerNumber;
246
+ phonePublicBaseUrl =
247
+ (await ask(rl, "Public base URL (cloudflared/ngrok https://…)", phonePublicBaseUrl)) || phonePublicBaseUrl;
248
+ phoneOpenAiKey = (await ask(rl, "OpenAI API key (for Realtime voice loop)", phoneOpenAiKey)) || phoneOpenAiKey;
249
+ phoneVoice =
250
+ (await ask(rl, "Realtime voice (marin, cedar, shimmer, coral, alloy…)", phoneVoice || "marin")) || phoneVoice;
251
+ }
252
+ } else {
253
+ const setupPhone = await ask(rl, "\nSet up phone (Twilio + OpenAI Realtime voice calls)? (y/n)", "n");
254
+ if (setupPhone.toLowerCase() === "y") {
255
+ console.log(" You'll need: a Twilio voice number, your phone number, an OpenAI API key, and a public tunnel.");
256
+ console.log(" See /nia-phone skill for the full deploy walkthrough.\n");
257
+ phoneTwilioSid = await ask(rl, "Twilio SID (AC… or SK…)", "");
258
+ if (phoneTwilioSid) {
259
+ phoneTwilioSecret = await ask(rl, "Twilio Secret (Auth Token if AC, API Key Secret if SK)", "");
260
+ if (phoneTwilioSid.startsWith("SK")) {
261
+ phoneTwilioAuthToken = await ask(rl, "Twilio Auth Token (account-level — for webhook signing)", "");
262
+ }
263
+ phoneFromNumber = await ask(rl, "Twilio number to dial from (E.164, e.g. +13025551234)", "");
264
+ phoneOwnerNumber = await ask(rl, "Your phone (E.164)", "");
265
+ phonePublicBaseUrl = await ask(rl, "Public base URL (cloudflared/ngrok https://…)", "");
266
+ phoneOpenAiKey = await ask(rl, "OpenAI API key", "");
267
+ phoneVoice = await ask(rl, "Realtime voice", "marin");
268
+ }
269
+ }
270
+ }
271
+
218
272
  // Gemini API key (for image generation)
219
273
  let geminiApiKey = "";
220
274
  const existingGemini = (existing.gemini_api_key as string) || "";
@@ -430,6 +484,19 @@ export async function runInit(): Promise<void> {
430
484
  if (slackBotToken && !telegramToken) {
431
485
  channels.default = "slack";
432
486
  }
487
+ if (phoneTwilioSid && phoneTwilioSecret && phoneFromNumber) {
488
+ const ph: Record<string, unknown> = {
489
+ twilio_sid: phoneTwilioSid,
490
+ twilio_secret: phoneTwilioSecret,
491
+ from_number: phoneFromNumber,
492
+ };
493
+ if (phoneTwilioAuthToken) ph.twilio_auth_token = phoneTwilioAuthToken;
494
+ if (phoneOwnerNumber) ph.owner_number = phoneOwnerNumber;
495
+ if (phonePublicBaseUrl) ph.public_base_url = phonePublicBaseUrl.replace(/\/$/, "");
496
+ if (phoneOpenAiKey) ph.openai_api_key = phoneOpenAiKey;
497
+ if (phoneVoice && phoneVoice !== "marin") ph.voice = phoneVoice;
498
+ channels.phone = ph;
499
+ }
433
500
  if (Object.keys(channels).length > 0) {
434
501
  config.channels = channels;
435
502
  }
package/src/mcp/tools.ts CHANGED
@@ -480,7 +480,7 @@ export async function placeCall(args: {
480
480
  const { getPhoneChannel } = await import("../channels/phone");
481
481
  const phone = getPhoneChannel();
482
482
  if (!phone) {
483
- return "Phone channel is not configured. Set TWILIO_SID, TWILIO_SECRET, PHONE_FROM_NUMBER, PUBLIC_BASE_URL, OPENAI_API_KEY in .env and restart the daemon.";
483
+ return "Phone channel is not configured. Add a channels.phone block to ~/.niahere/config.yaml with twilio_sid, twilio_secret, from_number, public_base_url, openai_api_key (or set the matching env vars in .env), then restart the daemon.";
484
484
  }
485
485
  try {
486
486
  const result = await phone.placeCall({