niahere 0.3.0 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -37,6 +37,8 @@ nia start # starts daemon + registers OS service
37
37
  - **Telegram** — message your agent from your phone, typing indicator while processing
38
38
  - **Slack** — Socket Mode bot with thread awareness, thinking emoji, watch channels for proactive monitoring
39
39
  - **Phone (voice)** — Twilio + OpenAI Realtime. Inbound calls from allowlisted contacts and outbound calls via `place_call` MCP tool. Scheduled jobs can dial you (morning standup, evening retro, escalation). See `/nia-phone` skill.
40
+ - **SMS** — Twilio Messaging on the same number. Inbound webhook → chat engine → REST reply. Reachability fallback when data is unavailable but cellular works (treks, basements, patchy zones).
41
+ - **WhatsApp** — Twilio Sandbox by default (production WABA when policy permits). Rich messaging with `whatsapp:` prefix. Enforces Meta's 24-hour customer-service window.
40
42
  - **Terminal chat** — REPL with session resume support
41
43
  - **Scheduled jobs** — recurring jobs and crons that run Claude and can message you back. Stateful by default (working memory), per-job model routing for cost savings
42
44
  - **Persona system** — customizable identity, soul, owner profile, rules, and memory (preloaded every session)
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "niahere",
3
- "version": "0.3.0",
3
+ "version": "0.3.2",
4
4
  "description": "A personal AI assistant daemon — chat, scheduled jobs, persona system, extensible via skills.",
5
5
  "type": "module",
6
6
  "scripts": {
@@ -1,68 +1,94 @@
1
1
  ---
2
2
  name: nia-phone
3
3
  description: >
4
- Use when setting up, deploying, or debugging Nia's phone channel
5
- (Twilio + OpenAI Realtime voice calls). Covers env vars, cloudflared
6
- named-tunnel setup, Twilio number webhook configuration, the
7
- `nia phone` CLI subcommands, and the `place_call` MCP tool. Trigger
8
- on mentions of "phone", "call", "voice", "twilio", "realtime", "ngrok",
9
- "cloudflared", "media stream", or when the user is deploying Nia to a
10
- new machine and needs the phone surface to come up.
4
+ Use when setting up, deploying, or debugging Nia's Twilio-based channels:
5
+ voice (phone), SMS, and WhatsApp. All three share one Twilio number,
6
+ one webhook server, and one set of credentials under channels.twilio.
7
+ Covers config schema, cloudflared named-tunnel setup, Twilio Console
8
+ webhook wiring, `nia phone` CLI, `place_call` / `send_message` MCP
9
+ tools, the WhatsApp Sandbox 24h customer-service window, and the
10
+ shared TwilioWebhookServer's dedup + rate-limit middleware. Trigger
11
+ on mentions of "phone", "call", "voice", "sms", "whatsapp", "twilio",
12
+ "realtime", "media stream", "cloudflared", or when deploying Nia to
13
+ a new machine and a Twilio surface needs to come up.
11
14
  ---
12
15
 
13
16
  ## Overview
14
17
 
15
- The phone channel (`src/channels/phone/`) bridges Twilio Programmable
16
- Voice to the OpenAI Realtime API. It exposes:
18
+ Three Twilio-based channels share one phone number, one webhook server,
19
+ and one set of credentials:
17
20
 
18
- - **Inbound calls** — owner or allowlisted contacts dial the Twilio
19
- number; the call is bridged to the realtime model with full persona
20
- context. Unknown callers are politely declined.
21
- - **Outbound calls** `place_call` MCP tool (or `nia phone call` CLI)
22
- dials a number, seeds a per-call goal into the realtime session, and
23
- Nia speaks first. Used by scheduled jobs (morning standup, evening
24
- retro, escalation pings) and by chat ("call the dentist for me").
21
+ - **Phone** (`src/channels/phone/`) voice via Twilio Programmable
22
+ Voice + OpenAI Realtime. Inbound (caller dials) and outbound
23
+ (`place_call` MCP tool / `nia phone call` CLI).
24
+ - **SMS** (`src/channels/sms.ts`) Twilio Messaging on the same number.
25
+ Inbound webhook chat engine REST reply. The reachability
26
+ fallback when data is unavailable but cellular works.
27
+ - **WhatsApp** (`src/channels/whatsapp.ts`) Twilio WhatsApp Sandbox
28
+ by default; production WABA when policy permits. Enforces Meta's
29
+ 24-hour customer-service window — outside it, free-form replies are
30
+ dropped (Twilio rejects without an approved template).
25
31
 
26
- Transcripts persist to the `messages` table with `channel = 'phone'` and
27
- `room = phone-<callSid>`.
32
+ All three register routes on the shared `TwilioWebhookServer`
33
+ (`src/channels/twilio/server.ts`), which centralizes:
34
+
35
+ - `X-Twilio-Signature` HMAC-SHA1 validation
36
+ - `MessageSid` / `CallSid` deduplication (Twilio retries on 5xx/timeouts)
37
+ - Per-remote-number rate limiting (30/min default; owner exempt)
38
+ - `/healthz` and `/twilio/health` endpoints
39
+
40
+ Transcripts persist to the `messages` table with `channel='phone'`,
41
+ `'sms'`, or `'whatsapp'` and `room=<channel>-<callSid|E164>`.
28
42
 
29
43
  ## Configuration
30
44
 
31
- Phone config lives in `~/.niahere/config.yaml` under `channels.phone`
32
- same place as `channels.telegram` and `channels.slack`. Every field is
33
- overridable by the matching env var if you prefer `.env` for secrets.
45
+ Twilio creds + identity are shared across all three channels under
46
+ `channels.twilio`. Each channel has its own enable flag and channel-specific
47
+ config under `channels.{phone,sms,whatsapp}`.
34
48
 
35
49
  ```yaml
36
50
  # ~/.niahere/config.yaml
37
51
  channels:
38
- phone:
39
- twilio_sid: AC... # Account SID or an API Key SID (SK)
40
- twilio_secret: ... # Auth Token if SID is AC, API Key Secret if SID is SK
41
- twilio_auth_token:
42
- ... # Required when twilio_sid is an API Key (SK…); signs webhooks.
43
- # Omit if twilio_secret is already the Auth Token.
44
- from_number: "+1..." # Your Twilio number (E.164)
45
- owner_number: "+91..." # Owner's phone (E.164) — highest-trust caller
52
+ twilio:
53
+ sid: AC... # Account SID (or API Key SID SK…; Twilio resolves both)
54
+ secret: ... # Auth Token if sid is AC…, API Key Secret if SK
55
+ auth_token: ... # Required when sid is SK… (signs webhooks). Omit if secret is the Auth Token.
56
+ owner_number: "+91..." # Highest-trust caller/messenger
57
+ allowlist: ["+12025550100"] # Extra allowed senders/callers (E.164)
46
58
  public_base_url: https://nia.example.com # No trailing slash
47
- openai_api_key: sk-proj-... # For the Realtime voice loop
59
+ port: 7079 # Local port the shared webhook server binds to
48
60
 
49
- # Optional
50
- port: 7079 # Local port the webhook server binds to
51
- allowlist: ["+12025550100"] # Extra allowed inbound callers (E.164)
52
- voice: marin # Realtime voice (marin | cedar | shimmer | coral | alloy | ash | …)
61
+ phone:
62
+ enabled: true
63
+ from_number: "+13025480697" # Twilio number for voice
64
+ openai_api_key: sk-proj-... # For the Realtime voice loop
53
65
  realtime_model: gpt-realtime
66
+ voice: marin # marin | cedar | shimmer | coral | alloy | ash | …
67
+
68
+ sms:
69
+ enabled: true
70
+ from_number: "+13025480697" # Defaults to phone.from_number if omitted
71
+
72
+ whatsapp:
73
+ enabled: true
74
+ from_number: "+14155238886" # Twilio Sandbox shared number; replace once WABA is approved
54
75
  ```
55
76
 
56
77
  Env overrides (use these if you'd rather keep secrets in `.env`):
57
78
 
58
79
  ```
59
80
  TWILIO_SID, TWILIO_SECRET, TWILIO_AUTH_TOKEN
60
- PHONE_FROM_NUMBER, PRIMARY_PHONE_USER
61
- PUBLIC_BASE_URL, OPENAI_API_KEY
62
- PHONE_PORT, PHONE_ALLOWLIST (comma-separated), PHONE_VOICE, PHONE_REALTIME_MODEL
81
+ PRIMARY_PHONE_USER (owner), PHONE_ALLOWLIST (comma-separated)
82
+ PUBLIC_BASE_URL, PHONE_PORT
83
+ PHONE_FROM_NUMBER, OPENAI_API_KEY, PHONE_VOICE, PHONE_REALTIME_MODEL
84
+ SMS_FROM_NUMBER, WHATSAPP_FROM_NUMBER
63
85
  ```
64
86
 
65
- `nia phone status` prints which fields are set / missing.
87
+ **Backward compat:** the previous `channels.phone.{twilio_sid, twilio_secret,
88
+ twilio_auth_token, owner_number, allowlist, public_base_url, port}` shape
89
+ is still read as a fallback for one release cycle. Migrate when convenient.
90
+
91
+ `nia phone status` prints which fields are set across all three channels.
66
92
 
67
93
  ## Cloudflared named tunnel (production)
68
94
 
@@ -112,23 +138,47 @@ Then set in `.env`: `PUBLIC_BASE_URL=https://nia.example.com`.
112
138
  Verify with `curl https://nia.example.com/healthz` — should return `ok`
113
139
  once the daemon is running.
114
140
 
115
- ## Twilio number webhook (inbound only)
141
+ ## Twilio Console webhook wiring
116
142
 
117
- Outbound calls do NOT need any Twilio Console config — `placeCall`
118
- controls the TwiML URL itself.
143
+ Outbound paths (voice via `placeCall`, SMS/WhatsApp via Messages REST)
144
+ control their own URLs and don't need Console config. Inbound paths do.
119
145
 
120
- Inbound calls need the Twilio number's Voice webhook pointed at your
121
- public URL:
146
+ **Voice** (per phone number):
122
147
 
123
148
  1. Twilio Console → Phone Numbers → Active Numbers → click your number.
124
- 2. Voice Configuration → "A call comes in" → Webhook.
125
- 3. URL: `https://<PUBLIC_BASE_URL>/twilio/voice/incoming` — Method: POST.
126
- 4. Status callback URL: `https://<PUBLIC_BASE_URL>/twilio/voice/status`
127
- — Method: POST.
128
- 5. Save.
149
+ 2. Voice Configuration → "A call comes in" → Webhook (POST).
150
+ 3. URL: `https://<PUBLIC_BASE_URL>/twilio/voice/incoming`
151
+ 4. Status callback: `https://<PUBLIC_BASE_URL>/twilio/voice/status` (POST)
152
+
153
+ **SMS** (per phone number):
129
154
 
130
- If the Twilio account is on trial, every destination number for
131
- outbound calls must be in the Verified Caller IDs list.
155
+ 1. Same number Messaging Configuration "A message comes in" → Webhook (POST).
156
+ 2. URL: `https://<PUBLIC_BASE_URL>/twilio/sms/incoming`
157
+ 3. Status callback: `https://<PUBLIC_BASE_URL>/twilio/sms/status` (POST)
158
+
159
+ You can also set both via REST in one shot:
160
+
161
+ ```bash
162
+ curl -X POST "https://api.twilio.com/2010-04-01/Accounts/<AC...>/IncomingPhoneNumbers/<PN...>.json" \
163
+ -u "<SID>:<SECRET>" \
164
+ --data-urlencode "VoiceUrl=https://nia.example.com/twilio/voice/incoming" \
165
+ --data-urlencode "SmsUrl=https://nia.example.com/twilio/sms/incoming" \
166
+ --data-urlencode "StatusCallback=https://nia.example.com/twilio/voice/status"
167
+ ```
168
+
169
+ **WhatsApp Sandbox**:
170
+
171
+ 1. Console → Messaging → Try it out → Send a WhatsApp message.
172
+ 2. Note the printed `join <two-words>` token.
173
+ 3. From your phone's WhatsApp, send `join <two-words>` to `+1 415 523 8886`. You're opted in.
174
+ 4. Sandbox settings → "When a message comes in" → Webhook (POST).
175
+ 5. URL: `https://<PUBLIC_BASE_URL>/twilio/whatsapp/incoming`
176
+ 6. Status callback: `https://<PUBLIC_BASE_URL>/twilio/whatsapp/status`
177
+
178
+ Sandbox opt-in expires after 72h of inactivity — rejoin with the same code.
179
+
180
+ If the Twilio account is on trial, every destination number (SMS,
181
+ WhatsApp, outbound voice) must be in the Verified Caller IDs list.
132
182
 
133
183
  ## CLI
134
184
 
@@ -169,17 +219,31 @@ nia job add morning-standup "0 8 * * *" \
169
219
  Daily standup at 8 AM owner-local time. Same pattern for evening retro,
170
220
  weekly review, urgent escalation, etc.
171
221
 
172
- ## Architecture (one-paragraph)
173
-
174
- `src/channels/phone/index.ts` boots a Bun HTTP+WS server on `PHONE_PORT`.
175
- Twilio reaches it via cloudflared. `twiml.ts` builds the `<Connect><Stream>`
176
- TwiML. `twilio.ts` calls Twilio's REST API and validates webhook
177
- signatures (HMAC-SHA1 with the account Auth Token). `relay.ts` bridges
178
- Twilio's Media Streams (mulaw, JSON-enveloped) to OpenAI Realtime
179
- (same g711_ulaw format — no resampling). `tools.ts` exposes
180
- `consult_claude`, `send_telegram`, `save_memory`, and `end_call` to the
181
- voice agent. `consult.ts` is the Claude escape hatch for reasoning-heavy
182
- turns. `instructions.ts` builds the system prompts.
222
+ ## Architecture
223
+
224
+ `src/channels/twilio/server.ts` owns the Bun HTTP+WS server on
225
+ `channels.twilio.port` (default 7079). All three Twilio channels register
226
+ their routes on it during `start()`; the server handles signature
227
+ validation, dedup, and rate-limit middleware before dispatching to the
228
+ channel's handler.
229
+
230
+ - `src/channels/twilio/signature.ts` HMAC-SHA1 X-Twilio-Signature check.
231
+ - `src/channels/twilio/dedup.ts` TTL set for `MessageSid`/`CallSid`.
232
+ - `src/channels/twilio/rate-limit.ts` sliding-window per-key limiter.
233
+ - `src/channels/twilio/rest.ts` — `placeCall`, `sendMessage`,
234
+ `updateIncomingPhoneNumber`, etc. (Twilio REST helpers, no SDK).
235
+ - `src/channels/phone/` — voice channel: `twiml.ts` builds the
236
+ `<Connect><Stream>` TwiML, `relay.ts` bridges Twilio Media Streams
237
+ (mulaw 8 kHz) to OpenAI Realtime (same `g711_ulaw` format — no
238
+ resampling), `tools.ts` exposes `consult_claude` / `send_telegram` /
239
+ `save_memory` / `end_call` to the voice agent, `consult.ts` is the
240
+ Claude escape hatch, `instructions.ts` builds the system prompts.
241
+ - `src/channels/sms.ts` — SMS channel. Inbound webhook → chat engine →
242
+ REST reply. One engine per remote E.164 (`sms-<E164>` room).
243
+ - `src/channels/whatsapp.ts` — WhatsApp channel. Same shape, plus
244
+ `whatsapp:` prefix on Twilio addresses and `lastInboundAt` tracking
245
+ for the 24h customer-service window (outside it, replies are dropped
246
+ with a log entry — Twilio would reject them anyway).
183
247
 
184
248
  ## Cost model
185
249
 
@@ -221,3 +285,17 @@ prefer Telegram voice notes over live calls for long-form things.
221
285
  first, which defeats "Nia calling you".
222
286
  6. **Mac Mini sleep kills the daemon.** `sudo pmset -a sleep 0` keeps
223
287
  the host awake (display can still sleep).
288
+ 7. **WhatsApp 24-hour customer-service window is enforced.** Outside it,
289
+ Twilio rejects free-form replies (template-only). The whatsapp channel
290
+ tracks `lastInboundAt` per remote and fails closed with a log line
291
+ instead of sending what Twilio will reject.
292
+ 8. **WhatsApp Sandbox opt-in expires after 72h of inactivity.** Aman gets
293
+ silently disconnected mid-trip; rejoin by texting the same
294
+ `join <two-words>` code to +1 415 523 8886.
295
+ 9. **US-Twilio-long-code → India SMS outbound deliverability is unreliable.**
296
+ Meta's TRAI scrubbing rules + US-side A2P 10DLC throttling drop a
297
+ chunk of outbound. Inbound (India → US Twilio) is more reliable.
298
+ Treat outbound as best-effort; smoke-test empirically.
299
+ 10. **Shared WhatsApp Sandbox number means strangers can opt in.** The
300
+ allowlist + per-remote rate limit are the only defenses; both are
301
+ enforced by the channel/server layers — don't remove them.
@@ -0,0 +1,56 @@
1
+ /**
2
+ * Shared chat-engine lifecycle helpers used by the message-driven
3
+ * channels (telegram, slack, sms, whatsapp). Each channel keeps its
4
+ * own `Map<senderKey, ChatState>`; these helpers cover the bits that
5
+ * were copy-pasted between them:
6
+ *
7
+ * - resolve the latest room index for a prefix and open a fresh engine
8
+ * - rotate to a new room (for `/reset` / `/new` / `/restart`), persisting
9
+ * a placeholder session so the new index survives daemon restarts
10
+ * - chain work onto a per-sender lock so messages from the same sender
11
+ * don't race
12
+ *
13
+ * The caller supplies a builder lambda for the EngineOptions so channels
14
+ * that need room-aware fields (e.g. Slack's per-room `mcpServers`) can
15
+ * compute them with the resolved room name. Channels with static options
16
+ * just ignore the `room` argument in their builder.
17
+ */
18
+ import { createChatEngine } from "../../chat/engine";
19
+ import { Session } from "../../db/models";
20
+ import { log } from "../../utils/log";
21
+ import type { ChatState } from "../../types";
22
+ import type { EngineOptions } from "../../types/engine";
23
+
24
+ type EngineFactory = (room: string) => Omit<EngineOptions, "room" | "resume">;
25
+
26
+ /** Open (or resume) a chat engine for `prefix`. The resulting ChatState is the caller's to cache. */
27
+ export async function openChatEngine(prefix: string, buildOpts: EngineFactory): Promise<ChatState> {
28
+ const roomIndex = await Session.getLatestRoomIndex(prefix);
29
+ const room = `${prefix}-${roomIndex}`;
30
+ const opts = buildOpts(room);
31
+ log.info({ channel: opts.channel, room }, "chat-session: opening engine");
32
+ const engine = await createChatEngine({ ...opts, room, resume: true });
33
+ return { engine, roomIndex, lock: Promise.resolve() };
34
+ }
35
+
36
+ /** Rotate to a fresh room. Closes `prev` if supplied, persists a placeholder Session so the index survives restarts. */
37
+ export async function rotateRoom(
38
+ prefix: string,
39
+ prev: ChatState | undefined,
40
+ buildOpts: EngineFactory,
41
+ ): Promise<ChatState> {
42
+ if (prev) prev.engine.close();
43
+ const prevIdx = await Session.getLatestRoomIndex(prefix);
44
+ const roomIndex = prevIdx + 1;
45
+ const room = `${prefix}-${roomIndex}`;
46
+ await Session.create(`placeholder-${room}`, room);
47
+ const opts = buildOpts(room);
48
+ log.info({ channel: opts.channel, room }, "chat-session: rotated room");
49
+ const engine = await createChatEngine({ ...opts, room, resume: false });
50
+ return { engine, roomIndex, lock: Promise.resolve() };
51
+ }
52
+
53
+ /** Serialize `fn` onto `state.lock`. Both success and failure forward so a thrown error doesn't poison the chain. */
54
+ export function chainLock(state: ChatState, fn: () => Promise<void>): void {
55
+ state.lock = state.lock.then(fn, fn);
56
+ }
@@ -4,6 +4,9 @@ import { log } from "../utils/log";
4
4
  import { createTelegramChannel } from "./telegram";
5
5
  import { createSlackChannel } from "./slack";
6
6
  import { createPhoneChannel } from "./phone";
7
+ import { createSmsChannel } from "./sms";
8
+ import { createWhatsAppChannel } from "./whatsapp";
9
+ import { getTwilioServer } from "./twilio/server";
7
10
 
8
11
  export { getChannel, getStarted } from "./registry";
9
12
 
@@ -12,6 +15,8 @@ export function registerAllChannels(): void {
12
15
  registerChannel(() => createTelegramChannel());
13
16
  registerChannel(() => createSlackChannel());
14
17
  registerChannel(() => createPhoneChannel());
18
+ registerChannel(() => createSmsChannel());
19
+ registerChannel(() => createWhatsAppChannel());
15
20
  }
16
21
 
17
22
  export interface StartResult {
@@ -70,5 +75,12 @@ export async function stopChannels(channels: Channel[]): Promise<void> {
70
75
  log.error({ err: result.reason, channel: channels[i].name }, "channel failed to stop");
71
76
  }
72
77
  }
78
+ // Shared Twilio webhook server outlives any single channel; stop it once
79
+ // all channels are torn down.
80
+ try {
81
+ getTwilioServer().stop();
82
+ } catch (err) {
83
+ log.warn({ err }, "twilio-server stop failed");
84
+ }
73
85
  clearStarted();
74
86
  }