niahere 0.2.89 → 0.2.91
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/skills/image-generation/SKILL.md +53 -24
- package/skills/image-generation/scripts/generate_image.py +116 -61
- package/skills/nia-phone/SKILL.md +206 -0
- package/src/channels/index.ts +2 -0
- package/src/channels/phone/consult.ts +43 -0
- package/src/channels/phone/index.ts +464 -0
- package/src/channels/phone/instructions.ts +42 -0
- package/src/channels/phone/relay.ts +334 -0
- package/src/channels/phone/tools.ts +83 -0
- package/src/channels/phone/twilio.ts +125 -0
- package/src/channels/phone/twiml.ts +60 -0
- package/src/cli/index.ts +6 -0
- package/src/cli/phone.ts +127 -0
- package/src/mcp/server.ts +24 -1
- package/src/mcp/tools.ts +37 -38
- package/src/types/config.ts +26 -0
- package/src/types/index.ts +1 -1
- package/src/utils/config.ts +71 -4
- package/src/utils/memory.ts +49 -0
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: nia-phone
|
|
3
|
+
description: >
|
|
4
|
+
Use when setting up, deploying, or debugging Nia's phone channel
|
|
5
|
+
(Twilio + OpenAI Realtime voice calls). Covers env vars, cloudflared
|
|
6
|
+
named-tunnel setup, Twilio number webhook configuration, the
|
|
7
|
+
`nia phone` CLI subcommands, and the `place_call` MCP tool. Trigger
|
|
8
|
+
on mentions of "phone", "call", "voice", "twilio", "realtime", "ngrok",
|
|
9
|
+
"cloudflared", "media stream", or when the user is deploying Nia to a
|
|
10
|
+
new machine and needs the phone surface to come up.
|
|
11
|
+
---
|
|
12
|
+
|
|
13
|
+
## Overview
|
|
14
|
+
|
|
15
|
+
The phone channel (`src/channels/phone/`) bridges Twilio Programmable
|
|
16
|
+
Voice to the OpenAI Realtime API. It exposes:
|
|
17
|
+
|
|
18
|
+
- **Inbound calls** — owner or allowlisted contacts dial the Twilio
|
|
19
|
+
number; the call is bridged to the realtime model with full persona
|
|
20
|
+
context. Unknown callers are politely declined.
|
|
21
|
+
- **Outbound calls** — `place_call` MCP tool (or `nia phone call` CLI)
|
|
22
|
+
dials a number, seeds a per-call goal into the realtime session, and
|
|
23
|
+
Nia speaks first. Used by scheduled jobs (morning standup, evening
|
|
24
|
+
retro, escalation pings) and by chat ("call the dentist for me").
|
|
25
|
+
|
|
26
|
+
Transcripts persist to the `messages` table with `channel = 'phone'` and
|
|
27
|
+
`room = phone-<callSid>`.
|
|
28
|
+
|
|
29
|
+
## Required env vars
|
|
30
|
+
|
|
31
|
+
```bash
|
|
32
|
+
TWILIO_SID # Account SID (AC…) — or an API Key SID (SK…)
|
|
33
|
+
TWILIO_SECRET # Auth Token (if SID is AC) — or API Key Secret (if SID is SK)
|
|
34
|
+
TWILIO_AUTH_TOKEN # Required when SID is an API Key — signs webhooks.
|
|
35
|
+
# Omit if TWILIO_SECRET is already the Auth Token.
|
|
36
|
+
PRIMARY_PHONE_USER # Owner's number in E.164 (e.g. +917667078414).
|
|
37
|
+
PHONE_FROM_NUMBER # Your Twilio number in E.164 (e.g. +13025480697).
|
|
38
|
+
PUBLIC_BASE_URL # https://<your-tunnel-hostname> — NO trailing slash.
|
|
39
|
+
OPENAI_API_KEY # For the Realtime voice loop.
|
|
40
|
+
|
|
41
|
+
# Optional
|
|
42
|
+
PHONE_PORT=7079 # Local port the webhook server binds to.
|
|
43
|
+
PHONE_ALLOWLIST=+12025550100,+14155551234 # Extra allowed inbound callers.
|
|
44
|
+
PHONE_VOICE=marin # Realtime voice (marin | cedar | shimmer | coral | alloy | ash | …).
|
|
45
|
+
PHONE_REALTIME_MODEL=gpt-realtime # Override if you want a specific model.
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
`nia phone status` prints which vars are set / missing.
|
|
49
|
+
|
|
50
|
+
## Cloudflared named tunnel (production)
|
|
51
|
+
|
|
52
|
+
The ephemeral `trycloudflare.com` URL is fine for testing but dies on
|
|
53
|
+
restart. For a persistent deploy:
|
|
54
|
+
|
|
55
|
+
```bash
|
|
56
|
+
brew install cloudflared
|
|
57
|
+
cloudflared tunnel login # opens browser, writes cert.pem
|
|
58
|
+
cloudflared tunnel create nia-mac # creates the tunnel
|
|
59
|
+
cloudflared tunnel route dns nia-mac nia.example.com # CNAME on a Cloudflare-managed domain
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
Keep nia's tunnel config in nia's home as a single flat file:
|
|
63
|
+
`~/.niahere/cloudflared-config.yaml`. The cloudflared-internal artifacts
|
|
64
|
+
(`cert.pem`, the per-tunnel credentials JSON written by `tunnel create`)
|
|
65
|
+
stay where cloudflared put them — those are cloudflared's territory, not
|
|
66
|
+
nia's.
|
|
67
|
+
|
|
68
|
+
```yaml
|
|
69
|
+
# ~/.niahere/cloudflared-config.yaml
|
|
70
|
+
tunnel: nia-mac
|
|
71
|
+
credentials-file: /Users/<you>/.cloudflared/<tunnel-id>.json
|
|
72
|
+
|
|
73
|
+
ingress:
|
|
74
|
+
- hostname: nia.example.com
|
|
75
|
+
service: http://localhost:7079
|
|
76
|
+
- service: http_status:404
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
Install as a launchd service, pointing it at our config:
|
|
80
|
+
|
|
81
|
+
```bash
|
|
82
|
+
sudo cloudflared --config ~/.niahere/cloudflared-config.yaml service install
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
If the generated plist at `/Library/LaunchDaemons/com.cloudflare.cloudflared.plist`
|
|
86
|
+
doesn't include the `--config` arg in `ProgramArguments`, edit it in, then:
|
|
87
|
+
|
|
88
|
+
```bash
|
|
89
|
+
sudo launchctl bootout system/com.cloudflare.cloudflared
|
|
90
|
+
sudo launchctl bootstrap system /Library/LaunchDaemons/com.cloudflare.cloudflared.plist
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
Then set in `.env`: `PUBLIC_BASE_URL=https://nia.example.com`.
|
|
94
|
+
|
|
95
|
+
Verify with `curl https://nia.example.com/healthz` — should return `ok`
|
|
96
|
+
once the daemon is running.
|
|
97
|
+
|
|
98
|
+
## Twilio number webhook (inbound only)
|
|
99
|
+
|
|
100
|
+
Outbound calls do NOT need any Twilio Console config — `placeCall`
|
|
101
|
+
controls the TwiML URL itself.
|
|
102
|
+
|
|
103
|
+
Inbound calls need the Twilio number's Voice webhook pointed at your
|
|
104
|
+
public URL:
|
|
105
|
+
|
|
106
|
+
1. Twilio Console → Phone Numbers → Active Numbers → click your number.
|
|
107
|
+
2. Voice Configuration → "A call comes in" → Webhook.
|
|
108
|
+
3. URL: `https://<PUBLIC_BASE_URL>/twilio/voice/incoming` — Method: POST.
|
|
109
|
+
4. Status callback URL: `https://<PUBLIC_BASE_URL>/twilio/voice/status`
|
|
110
|
+
— Method: POST.
|
|
111
|
+
5. Save.
|
|
112
|
+
|
|
113
|
+
If the Twilio account is on trial, every destination number for
|
|
114
|
+
outbound calls must be in the Verified Caller IDs list.
|
|
115
|
+
|
|
116
|
+
## CLI
|
|
117
|
+
|
|
118
|
+
```bash
|
|
119
|
+
nia phone status # show config + missing fields
|
|
120
|
+
nia phone call <+E164> "<goal sentence>" # one-shot outbound smoke test
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
The `call` subcommand boots a standalone phone server, places the call,
|
|
124
|
+
waits for it to complete, then exits — does NOT need the daemon running.
|
|
125
|
+
Useful for smoke-testing without spinning up the full daemon.
|
|
126
|
+
|
|
127
|
+
## MCP tool — `place_call`
|
|
128
|
+
|
|
129
|
+
```ts
|
|
130
|
+
place_call({
|
|
131
|
+
number: string, // E.164, e.g. "+13025551234"
|
|
132
|
+
goal: string, // What Nia should accomplish — seeded into session instructions
|
|
133
|
+
context?: string, // Extra background (calendar dump, prior notes…)
|
|
134
|
+
max_minutes?: number, // Hard cap (default 10, max 30)
|
|
135
|
+
voice?: string, // Override default voice per call
|
|
136
|
+
})
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
Returns immediately with `{ callSid, status }`. The call completes
|
|
140
|
+
asynchronously; transcript lands in the `messages` table.
|
|
141
|
+
|
|
142
|
+
## Scheduled-job pattern (morning standup)
|
|
143
|
+
|
|
144
|
+
```bash
|
|
145
|
+
nia job add morning-standup "0 8 * * *" \
|
|
146
|
+
"Call me at +917667078414 and run my morning standup. \
|
|
147
|
+
Ask what I want to ship today, what's blocking me, \
|
|
148
|
+
and what to dig into while I sleep. Listen more than \
|
|
149
|
+
you talk. End cleanly when we're wrapped."
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
Daily standup at 8 AM owner-local time. Same pattern for evening retro,
|
|
153
|
+
weekly review, urgent escalation, etc.
|
|
154
|
+
|
|
155
|
+
## Architecture (one-paragraph)
|
|
156
|
+
|
|
157
|
+
`src/channels/phone/index.ts` boots a Bun HTTP+WS server on `PHONE_PORT`.
|
|
158
|
+
Twilio reaches it via cloudflared. `twiml.ts` builds the `<Connect><Stream>`
|
|
159
|
+
TwiML. `twilio.ts` calls Twilio's REST API and validates webhook
|
|
160
|
+
signatures (HMAC-SHA1 with the account Auth Token). `relay.ts` bridges
|
|
161
|
+
Twilio's Media Streams (mulaw, JSON-enveloped) to OpenAI Realtime
|
|
162
|
+
(same g711_ulaw format — no resampling). `tools.ts` exposes
|
|
163
|
+
`consult_claude`, `send_telegram`, `save_memory`, and `end_call` to the
|
|
164
|
+
voice agent. `consult.ts` is the Claude escape hatch for reasoning-heavy
|
|
165
|
+
turns. `instructions.ts` builds the system prompts.
|
|
166
|
+
|
|
167
|
+
## Cost model
|
|
168
|
+
|
|
169
|
+
| Component | US → India | US → US |
|
|
170
|
+
| -------------------------------------------- | ---------- | ------- |
|
|
171
|
+
| Twilio voice (per min) | $0.10–0.15 | $0.014 |
|
|
172
|
+
| OpenAI Realtime (per min, mixed in+out) | $0.20–0.30 | same |
|
|
173
|
+
| `consult_claude` (per invocation, when used) | $0.01–0.05 | same |
|
|
174
|
+
|
|
175
|
+
A 5-minute call to India runs ~$1.50–2.25. Daily morning standup is
|
|
176
|
+
~$45–70/month. Levers to cut cost: buy an Indian Twilio number for
|
|
177
|
+
domestic rates, tighten `max_minutes` (most standups finish in 2–3 min),
|
|
178
|
+
prefer Telegram voice notes over live calls for long-form things.
|
|
179
|
+
|
|
180
|
+
## Debugging
|
|
181
|
+
|
|
182
|
+
- `nia phone status` — verify env.
|
|
183
|
+
- `curl https://<PUBLIC_BASE_URL>/healthz` — confirms the tunnel reaches
|
|
184
|
+
the daemon.
|
|
185
|
+
- `LOG_LEVEL=debug bun run src/cli/index.ts phone call …` — dumps every
|
|
186
|
+
OpenAI Realtime event type (incl. ones we don't handle) so you can
|
|
187
|
+
see what the GA API is actually sending.
|
|
188
|
+
- `bun test tests/phone.test.ts` — unit tests for TwiML + signature.
|
|
189
|
+
|
|
190
|
+
## Common pitfalls (learned the hard way)
|
|
191
|
+
|
|
192
|
+
1. **Don't try to update a queued call's TwiML URL.** Twilio rejects
|
|
193
|
+
redirect until `in-progress`. Bake the routing into the URL Twilio
|
|
194
|
+
first fetches, or read `CallSid` from the webhook body.
|
|
195
|
+
2. **GA Realtime needs `output_modalities: ["audio"]` in `session.update`.**
|
|
196
|
+
Without it, the model silently drops audio synthesis and only emits
|
|
197
|
+
transcripts — you'll get "done" events with no "delta" events.
|
|
198
|
+
3. **Drop the `OpenAI-Beta: realtime=v1` header.** GA endpoint rejects it.
|
|
199
|
+
4. **API Key SIDs (SK…) work for REST auth but NOT for webhook signature
|
|
200
|
+
validation.** Set `TWILIO_AUTH_TOKEN` separately to the account-level
|
|
201
|
+
Auth Token when using an API Key.
|
|
202
|
+
5. **Send the opener immediately on session open, don't wait for first
|
|
203
|
+
media frame.** Otherwise the user picks up to silence and speaks
|
|
204
|
+
first, which defeats "Nia calling you".
|
|
205
|
+
6. **Mac Mini sleep kills the daemon.** `sudo pmset -a sleep 0` keeps
|
|
206
|
+
the host awake (display can still sleep).
|
package/src/channels/index.ts
CHANGED
|
@@ -3,6 +3,7 @@ import { registerChannel, getFactories, trackStarted, clearStarted } from "./reg
|
|
|
3
3
|
import { log } from "../utils/log";
|
|
4
4
|
import { createTelegramChannel } from "./telegram";
|
|
5
5
|
import { createSlackChannel } from "./slack";
|
|
6
|
+
import { createPhoneChannel } from "./phone";
|
|
6
7
|
|
|
7
8
|
export { getChannel, getStarted } from "./registry";
|
|
8
9
|
|
|
@@ -10,6 +11,7 @@ export { getChannel, getStarted } from "./registry";
|
|
|
10
11
|
export function registerAllChannels(): void {
|
|
11
12
|
registerChannel(() => createTelegramChannel());
|
|
12
13
|
registerChannel(() => createSlackChannel());
|
|
14
|
+
registerChannel(() => createPhoneChannel());
|
|
13
15
|
}
|
|
14
16
|
|
|
15
17
|
export interface StartResult {
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* `consult_claude` — single-shot escape hatch from the realtime voice model
|
|
3
|
+
* into Claude for memory-aware or reasoning-heavy questions. Voice agents
|
|
4
|
+
* use this when a turn exceeds the seeded context.
|
|
5
|
+
*
|
|
6
|
+
* Heavyweight (multi-second latency) by design — keep usage selective.
|
|
7
|
+
*/
|
|
8
|
+
import Anthropic from "@anthropic-ai/sdk";
|
|
9
|
+
import { loadIdentity } from "../../chat/identity";
|
|
10
|
+
import { log } from "../../utils/log";
|
|
11
|
+
|
|
12
|
+
let _anthropic: Anthropic | null = null;
|
|
13
|
+
function client(): Anthropic {
|
|
14
|
+
if (!_anthropic) _anthropic = new Anthropic();
|
|
15
|
+
return _anthropic;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
export async function consultClaude(question: string, callerLabel: string): Promise<string> {
|
|
19
|
+
const identity = loadIdentity();
|
|
20
|
+
const system = [
|
|
21
|
+
identity,
|
|
22
|
+
"You are answering a one-shot question from Nia's voice loop during an active phone call.",
|
|
23
|
+
`Caller: ${callerLabel}.`,
|
|
24
|
+
"Answer in under 60 words, conversational, no markdown. The voice model will speak your answer verbatim.",
|
|
25
|
+
]
|
|
26
|
+
.filter(Boolean)
|
|
27
|
+
.join("\n\n");
|
|
28
|
+
|
|
29
|
+
try {
|
|
30
|
+
const resp = await client().messages.create({
|
|
31
|
+
model: "claude-sonnet-4-6",
|
|
32
|
+
max_tokens: 400,
|
|
33
|
+
system,
|
|
34
|
+
messages: [{ role: "user", content: question }],
|
|
35
|
+
});
|
|
36
|
+
const block = resp.content[0];
|
|
37
|
+
if (block && block.type === "text") return block.text.trim();
|
|
38
|
+
return "(no answer)";
|
|
39
|
+
} catch (err) {
|
|
40
|
+
log.error({ err }, "phone: consult_claude failed");
|
|
41
|
+
return `error consulting Claude: ${err instanceof Error ? err.message : String(err)}`;
|
|
42
|
+
}
|
|
43
|
+
}
|