niahere 0.2.90 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -36,13 +36,14 @@ nia start # starts daemon + registers OS service
36
36
 
37
37
  - **Telegram** — message your agent from your phone, typing indicator while processing
38
38
  - **Slack** — Socket Mode bot with thread awareness, thinking emoji, watch channels for proactive monitoring
39
+ - **Phone (voice)** — Twilio + OpenAI Realtime. Inbound calls from allowlisted contacts and outbound calls via `place_call` MCP tool. Scheduled jobs can dial you (morning standup, evening retro, escalation). See `/nia-phone` skill.
39
40
  - **Terminal chat** — REPL with session resume support
40
41
  - **Scheduled jobs** — recurring jobs and crons that run Claude and can message you back. Stateful by default (working memory), per-job model routing for cost savings
41
42
  - **Persona system** — customizable identity, soul, owner profile, rules, and memory (preloaded every session)
42
43
  - **Agents** — domain specialists (marketer, senior-dev) via Claude Agent SDK subagents
43
44
  - **Skills** — loads skills from multiple directories, invokable as slash commands
44
45
  - **Cross-platform service** — launchd (macOS), systemd (Linux), service-aware restart
45
- - **MCP tools** — 20 tools for job management, messaging, memory, rules, and channel control
46
+ - **MCP tools** — 21 tools for job management, messaging, memory, rules, channel control, and outbound phone calls
46
47
  - **Background memory consolidation** — stages memory candidates from conversations automatically
47
48
  - **Session summaries** — optional handoff notes between sessions for continuity
48
49
  - **Backups** — `nia backup` with auto-backup before updates
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "niahere",
3
- "version": "0.2.90",
3
+ "version": "0.3.0",
4
4
  "description": "A personal AI assistant daemon — chat, scheduled jobs, persona system, extensible via skills.",
5
5
  "type": "module",
6
6
  "scripts": {
@@ -0,0 +1,223 @@
1
+ ---
2
+ name: nia-phone
3
+ description: >
4
+ Use when setting up, deploying, or debugging Nia's phone channel
5
+ (Twilio + OpenAI Realtime voice calls). Covers env vars, cloudflared
6
+ named-tunnel setup, Twilio number webhook configuration, the
7
+ `nia phone` CLI subcommands, and the `place_call` MCP tool. Trigger
8
+ on mentions of "phone", "call", "voice", "twilio", "realtime", "ngrok",
9
+ "cloudflared", "media stream", or when the user is deploying Nia to a
10
+ new machine and needs the phone surface to come up.
11
+ ---
12
+
13
+ ## Overview
14
+
15
+ The phone channel (`src/channels/phone/`) bridges Twilio Programmable
16
+ Voice to the OpenAI Realtime API. It exposes:
17
+
18
+ - **Inbound calls** — owner or allowlisted contacts dial the Twilio
19
+ number; the call is bridged to the realtime model with full persona
20
+ context. Unknown callers are politely declined.
21
+ - **Outbound calls** — `place_call` MCP tool (or `nia phone call` CLI)
22
+ dials a number, seeds a per-call goal into the realtime session, and
23
+ Nia speaks first. Used by scheduled jobs (morning standup, evening
24
+ retro, escalation pings) and by chat ("call the dentist for me").
25
+
26
+ Transcripts persist to the `messages` table with `channel = 'phone'` and
27
+ `room = phone-<callSid>`.
28
+
29
+ ## Configuration
30
+
31
+ Phone config lives in `~/.niahere/config.yaml` under `channels.phone` —
32
+ same place as `channels.telegram` and `channels.slack`. Every field is
33
+ overridable by the matching env var if you prefer `.env` for secrets.
34
+
35
+ ```yaml
36
+ # ~/.niahere/config.yaml
37
+ channels:
38
+ phone:
39
+ twilio_sid: AC... # Account SID — or an API Key SID (SK…)
40
+ twilio_secret: ... # Auth Token if SID is AC, API Key Secret if SID is SK
41
+ twilio_auth_token:
42
+ ... # Required when twilio_sid is an API Key (SK…); signs webhooks.
43
+ # Omit if twilio_secret is already the Auth Token.
44
+ from_number: "+1..." # Your Twilio number (E.164)
45
+ owner_number: "+91..." # Owner's phone (E.164) — highest-trust caller
46
+ public_base_url: https://nia.example.com # No trailing slash
47
+ openai_api_key: sk-proj-... # For the Realtime voice loop
48
+
49
+ # Optional
50
+ port: 7079 # Local port the webhook server binds to
51
+ allowlist: ["+12025550100"] # Extra allowed inbound callers (E.164)
52
+ voice: marin # Realtime voice (marin | cedar | shimmer | coral | alloy | ash | …)
53
+ realtime_model: gpt-realtime
54
+ ```
55
+
56
+ Env overrides (use these if you'd rather keep secrets in `.env`):
57
+
58
+ ```
59
+ TWILIO_SID, TWILIO_SECRET, TWILIO_AUTH_TOKEN
60
+ PHONE_FROM_NUMBER, PRIMARY_PHONE_USER
61
+ PUBLIC_BASE_URL, OPENAI_API_KEY
62
+ PHONE_PORT, PHONE_ALLOWLIST (comma-separated), PHONE_VOICE, PHONE_REALTIME_MODEL
63
+ ```
64
+
65
+ `nia phone status` prints which fields are set / missing.
66
+
67
+ ## Cloudflared named tunnel (production)
68
+
69
+ The ephemeral `trycloudflare.com` URL is fine for testing but dies on
70
+ restart. For a persistent deploy:
71
+
72
+ ```bash
73
+ brew install cloudflared
74
+ cloudflared tunnel login # opens browser, writes cert.pem
75
+ cloudflared tunnel create nia-mac # creates the tunnel
76
+ cloudflared tunnel route dns nia-mac nia.example.com # CNAME on a Cloudflare-managed domain
77
+ ```
78
+
79
+ Keep nia's tunnel config in nia's home as a single flat file:
80
+ `~/.niahere/cloudflared-config.yaml`. The cloudflared-internal artifacts
81
+ (`cert.pem`, the per-tunnel credentials JSON written by `tunnel create`)
82
+ stay where cloudflared put them — those are cloudflared's territory, not
83
+ nia's.
84
+
85
+ ```yaml
86
+ # ~/.niahere/cloudflared-config.yaml
87
+ tunnel: nia-mac
88
+ credentials-file: /Users/<you>/.cloudflared/<tunnel-id>.json
89
+
90
+ ingress:
91
+ - hostname: nia.example.com
92
+ service: http://localhost:7079
93
+ - service: http_status:404
94
+ ```
95
+
96
+ Install as a launchd service, pointing it at our config:
97
+
98
+ ```bash
99
+ sudo cloudflared --config ~/.niahere/cloudflared-config.yaml service install
100
+ ```
101
+
102
+ If the generated plist at `/Library/LaunchDaemons/com.cloudflare.cloudflared.plist`
103
+ doesn't include the `--config` arg in `ProgramArguments`, edit it in, then:
104
+
105
+ ```bash
106
+ sudo launchctl bootout system/com.cloudflare.cloudflared
107
+ sudo launchctl bootstrap system /Library/LaunchDaemons/com.cloudflare.cloudflared.plist
108
+ ```
109
+
110
+ Then set in `.env`: `PUBLIC_BASE_URL=https://nia.example.com`.
111
+
112
+ Verify with `curl https://nia.example.com/healthz` — should return `ok`
113
+ once the daemon is running.
114
+
115
+ ## Twilio number webhook (inbound only)
116
+
117
+ Outbound calls do NOT need any Twilio Console config — `placeCall`
118
+ controls the TwiML URL itself.
119
+
120
+ Inbound calls need the Twilio number's Voice webhook pointed at your
121
+ public URL:
122
+
123
+ 1. Twilio Console → Phone Numbers → Active Numbers → click your number.
124
+ 2. Voice Configuration → "A call comes in" → Webhook.
125
+ 3. URL: `https://<PUBLIC_BASE_URL>/twilio/voice/incoming` — Method: POST.
126
+ 4. Status callback URL: `https://<PUBLIC_BASE_URL>/twilio/voice/status`
127
+ — Method: POST.
128
+ 5. Save.
129
+
130
+ If the Twilio account is on trial, every destination number for
131
+ outbound calls must be in the Verified Caller IDs list.
132
+
133
+ ## CLI
134
+
135
+ ```bash
136
+ nia phone status # show config + missing fields
137
+ nia phone call <+E164> "<goal sentence>" # one-shot outbound smoke test
138
+ ```
139
+
140
+ The `call` subcommand boots a standalone phone server, places the call,
141
+ waits for it to complete, then exits — does NOT need the daemon running.
142
+ Useful for smoke-testing without spinning up the full daemon.
143
+
144
+ ## MCP tool — `place_call`
145
+
146
+ ```ts
147
+ place_call({
148
+ number: string, // E.164, e.g. "+13025551234"
149
+ goal: string, // What Nia should accomplish — seeded into session instructions
150
+ context?: string, // Extra background (calendar dump, prior notes…)
151
+ max_minutes?: number, // Hard cap (default 10, max 30)
152
+ voice?: string, // Override default voice per call
153
+ })
154
+ ```
155
+
156
+ Returns immediately with `{ callSid, status }`. The call completes
157
+ asynchronously; transcript lands in the `messages` table.
158
+
159
+ ## Scheduled-job pattern (morning standup)
160
+
161
+ ```bash
162
+ nia job add morning-standup "0 8 * * *" \
163
+ "Call me at +917667078414 and run my morning standup. \
164
+ Ask what I want to ship today, what's blocking me, \
165
+ and what to dig into while I sleep. Listen more than \
166
+ you talk. End cleanly when we're wrapped."
167
+ ```
168
+
169
+ Daily standup at 8 AM owner-local time. Same pattern for evening retro,
170
+ weekly review, urgent escalation, etc.
171
+
172
+ ## Architecture (one-paragraph)
173
+
174
+ `src/channels/phone/index.ts` boots a Bun HTTP+WS server on `PHONE_PORT`.
175
+ Twilio reaches it via cloudflared. `twiml.ts` builds the `<Connect><Stream>`
176
+ TwiML. `twilio.ts` calls Twilio's REST API and validates webhook
177
+ signatures (HMAC-SHA1 with the account Auth Token). `relay.ts` bridges
178
+ Twilio's Media Streams (mulaw, JSON-enveloped) to OpenAI Realtime
179
+ (same g711_ulaw format — no resampling). `tools.ts` exposes
180
+ `consult_claude`, `send_telegram`, `save_memory`, and `end_call` to the
181
+ voice agent. `consult.ts` is the Claude escape hatch for reasoning-heavy
182
+ turns. `instructions.ts` builds the system prompts.
183
+
184
+ ## Cost model
185
+
186
+ | Component | US → India | US → US |
187
+ | -------------------------------------------- | ---------- | ------- |
188
+ | Twilio voice (per min) | $0.10–0.15 | $0.014 |
189
+ | OpenAI Realtime (per min, mixed in+out) | $0.20–0.30 | same |
190
+ | `consult_claude` (per invocation, when used) | $0.01–0.05 | same |
191
+
192
+ A 5-minute call to India runs ~$1.50–2.25. Daily morning standup is
193
+ ~$45–70/month. Levers to cut cost: buy an Indian Twilio number for
194
+ domestic rates, tighten `max_minutes` (most standups finish in 2–3 min),
195
+ prefer Telegram voice notes over live calls for long-form things.
196
+
197
+ ## Debugging
198
+
199
+ - `nia phone status` — verify env.
200
+ - `curl https://<PUBLIC_BASE_URL>/healthz` — confirms the tunnel reaches
201
+ the daemon.
202
+ - `LOG_LEVEL=debug bun run src/cli/index.ts phone call …` — dumps every
203
+ OpenAI Realtime event type (incl. ones we don't handle) so you can
204
+ see what the GA API is actually sending.
205
+ - `bun test tests/phone.test.ts` — unit tests for TwiML + signature.
206
+
207
+ ## Common pitfalls (learned the hard way)
208
+
209
+ 1. **Don't try to update a queued call's TwiML URL.** Twilio rejects
210
+ redirect until `in-progress`. Bake the routing into the URL Twilio
211
+ first fetches, or read `CallSid` from the webhook body.
212
+ 2. **GA Realtime needs `output_modalities: ["audio"]` in `session.update`.**
213
+ Without it, the model silently drops audio synthesis and only emits
214
+ transcripts — you'll get "done" events with no "delta" events.
215
+ 3. **Drop the `OpenAI-Beta: realtime=v1` header.** GA endpoint rejects it.
216
+ 4. **API Key SIDs (SK…) work for REST auth but NOT for webhook signature
217
+ validation.** Set `TWILIO_AUTH_TOKEN` separately to the account-level
218
+ Auth Token when using an API Key.
219
+ 5. **Send the opener immediately on session open, don't wait for first
220
+ media frame.** Otherwise the user picks up to silence and speaks
221
+ first, which defeats "Nia calling you".
222
+ 6. **Mac Mini sleep kills the daemon.** `sudo pmset -a sleep 0` keeps
223
+ the host awake (display can still sleep).
@@ -3,6 +3,7 @@ import { registerChannel, getFactories, trackStarted, clearStarted } from "./reg
3
3
  import { log } from "../utils/log";
4
4
  import { createTelegramChannel } from "./telegram";
5
5
  import { createSlackChannel } from "./slack";
6
+ import { createPhoneChannel } from "./phone";
6
7
 
7
8
  export { getChannel, getStarted } from "./registry";
8
9
 
@@ -10,6 +11,7 @@ export { getChannel, getStarted } from "./registry";
10
11
  export function registerAllChannels(): void {
11
12
  registerChannel(() => createTelegramChannel());
12
13
  registerChannel(() => createSlackChannel());
14
+ registerChannel(() => createPhoneChannel());
13
15
  }
14
16
 
15
17
  export interface StartResult {
@@ -0,0 +1,43 @@
1
+ /**
2
+ * `consult_claude` — single-shot escape hatch from the realtime voice model
3
+ * into Claude for memory-aware or reasoning-heavy questions. Voice agents
4
+ * use this when a turn exceeds the seeded context.
5
+ *
6
+ * Heavyweight (multi-second latency) by design — keep usage selective.
7
+ */
8
+ import Anthropic from "@anthropic-ai/sdk";
9
+ import { loadIdentity } from "../../chat/identity";
10
+ import { log } from "../../utils/log";
11
+
12
+ let _anthropic: Anthropic | null = null;
13
+ function client(): Anthropic {
14
+ if (!_anthropic) _anthropic = new Anthropic();
15
+ return _anthropic;
16
+ }
17
+
18
+ export async function consultClaude(question: string, callerLabel: string): Promise<string> {
19
+ const identity = loadIdentity();
20
+ const system = [
21
+ identity,
22
+ "You are answering a one-shot question from Nia's voice loop during an active phone call.",
23
+ `Caller: ${callerLabel}.`,
24
+ "Answer in under 60 words, conversational, no markdown. The voice model will speak your answer verbatim.",
25
+ ]
26
+ .filter(Boolean)
27
+ .join("\n\n");
28
+
29
+ try {
30
+ const resp = await client().messages.create({
31
+ model: "claude-sonnet-4-6",
32
+ max_tokens: 400,
33
+ system,
34
+ messages: [{ role: "user", content: question }],
35
+ });
36
+ const block = resp.content[0];
37
+ if (block && block.type === "text") return block.text.trim();
38
+ return "(no answer)";
39
+ } catch (err) {
40
+ log.error({ err }, "phone: consult_claude failed");
41
+ return `error consulting Claude: ${err instanceof Error ? err.message : String(err)}`;
42
+ }
43
+ }