niahere 0.2.90 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -1
- package/package.json +1 -1
- package/skills/nia-phone/SKILL.md +223 -0
- package/src/channels/index.ts +2 -0
- package/src/channels/phone/consult.ts +43 -0
- package/src/channels/phone/index.ts +467 -0
- package/src/channels/phone/instructions.ts +42 -0
- package/src/channels/phone/relay.ts +334 -0
- package/src/channels/phone/tools.ts +83 -0
- package/src/channels/phone/twilio.ts +125 -0
- package/src/channels/phone/twiml.ts +60 -0
- package/src/cli/index.ts +6 -0
- package/src/cli/phone.ts +131 -0
- package/src/commands/init.ts +67 -0
- package/src/mcp/server.ts +24 -1
- package/src/mcp/tools.ts +37 -38
- package/src/types/config.ts +26 -0
- package/src/types/index.ts +1 -1
- package/src/utils/config.ts +71 -4
- package/src/utils/memory.ts +49 -0
package/README.md
CHANGED
|
@@ -36,13 +36,14 @@ nia start # starts daemon + registers OS service
|
|
|
36
36
|
|
|
37
37
|
- **Telegram** — message your agent from your phone, typing indicator while processing
|
|
38
38
|
- **Slack** — Socket Mode bot with thread awareness, thinking emoji, watch channels for proactive monitoring
|
|
39
|
+
- **Phone (voice)** — Twilio + OpenAI Realtime. Inbound calls from allowlisted contacts and outbound calls via `place_call` MCP tool. Scheduled jobs can dial you (morning standup, evening retro, escalation). See `/nia-phone` skill.
|
|
39
40
|
- **Terminal chat** — REPL with session resume support
|
|
40
41
|
- **Scheduled jobs** — recurring jobs and crons that run Claude and can message you back. Stateful by default (working memory), per-job model routing for cost savings
|
|
41
42
|
- **Persona system** — customizable identity, soul, owner profile, rules, and memory (preloaded every session)
|
|
42
43
|
- **Agents** — domain specialists (marketer, senior-dev) via Claude Agent SDK subagents
|
|
43
44
|
- **Skills** — loads skills from multiple directories, invokable as slash commands
|
|
44
45
|
- **Cross-platform service** — launchd (macOS), systemd (Linux), service-aware restart
|
|
45
|
-
- **MCP tools** —
|
|
46
|
+
- **MCP tools** — 21 tools for job management, messaging, memory, rules, channel control, and outbound phone calls
|
|
46
47
|
- **Background memory consolidation** — stages memory candidates from conversations automatically
|
|
47
48
|
- **Session summaries** — optional handoff notes between sessions for continuity
|
|
48
49
|
- **Backups** — `nia backup` with auto-backup before updates
|
package/package.json
CHANGED
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: nia-phone
|
|
3
|
+
description: >
|
|
4
|
+
Use when setting up, deploying, or debugging Nia's phone channel
|
|
5
|
+
(Twilio + OpenAI Realtime voice calls). Covers env vars, cloudflared
|
|
6
|
+
named-tunnel setup, Twilio number webhook configuration, the
|
|
7
|
+
`nia phone` CLI subcommands, and the `place_call` MCP tool. Trigger
|
|
8
|
+
on mentions of "phone", "call", "voice", "twilio", "realtime", "ngrok",
|
|
9
|
+
"cloudflared", "media stream", or when the user is deploying Nia to a
|
|
10
|
+
new machine and needs the phone surface to come up.
|
|
11
|
+
---
|
|
12
|
+
|
|
13
|
+
## Overview
|
|
14
|
+
|
|
15
|
+
The phone channel (`src/channels/phone/`) bridges Twilio Programmable
|
|
16
|
+
Voice to the OpenAI Realtime API. It exposes:
|
|
17
|
+
|
|
18
|
+
- **Inbound calls** — owner or allowlisted contacts dial the Twilio
|
|
19
|
+
number; the call is bridged to the realtime model with full persona
|
|
20
|
+
context. Unknown callers are politely declined.
|
|
21
|
+
- **Outbound calls** — `place_call` MCP tool (or `nia phone call` CLI)
|
|
22
|
+
dials a number, seeds a per-call goal into the realtime session, and
|
|
23
|
+
Nia speaks first. Used by scheduled jobs (morning standup, evening
|
|
24
|
+
retro, escalation pings) and by chat ("call the dentist for me").
|
|
25
|
+
|
|
26
|
+
Transcripts persist to the `messages` table with `channel = 'phone'` and
|
|
27
|
+
`room = phone-<callSid>`.
|
|
28
|
+
|
|
29
|
+
## Configuration
|
|
30
|
+
|
|
31
|
+
Phone config lives in `~/.niahere/config.yaml` under `channels.phone` —
|
|
32
|
+
same place as `channels.telegram` and `channels.slack`. Every field is
|
|
33
|
+
overridable by the matching env var if you prefer `.env` for secrets.
|
|
34
|
+
|
|
35
|
+
```yaml
|
|
36
|
+
# ~/.niahere/config.yaml
|
|
37
|
+
channels:
|
|
38
|
+
phone:
|
|
39
|
+
twilio_sid: AC... # Account SID — or an API Key SID (SK…)
|
|
40
|
+
twilio_secret: ... # Auth Token if SID is AC, API Key Secret if SID is SK
|
|
41
|
+
twilio_auth_token:
|
|
42
|
+
... # Required when twilio_sid is an API Key (SK…); signs webhooks.
|
|
43
|
+
# Omit if twilio_secret is already the Auth Token.
|
|
44
|
+
from_number: "+1..." # Your Twilio number (E.164)
|
|
45
|
+
owner_number: "+91..." # Owner's phone (E.164) — highest-trust caller
|
|
46
|
+
public_base_url: https://nia.example.com # No trailing slash
|
|
47
|
+
openai_api_key: sk-proj-... # For the Realtime voice loop
|
|
48
|
+
|
|
49
|
+
# Optional
|
|
50
|
+
port: 7079 # Local port the webhook server binds to
|
|
51
|
+
allowlist: ["+12025550100"] # Extra allowed inbound callers (E.164)
|
|
52
|
+
voice: marin # Realtime voice (marin | cedar | shimmer | coral | alloy | ash | …)
|
|
53
|
+
realtime_model: gpt-realtime
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
Env overrides (use these if you'd rather keep secrets in `.env`):
|
|
57
|
+
|
|
58
|
+
```
|
|
59
|
+
TWILIO_SID, TWILIO_SECRET, TWILIO_AUTH_TOKEN
|
|
60
|
+
PHONE_FROM_NUMBER, PRIMARY_PHONE_USER
|
|
61
|
+
PUBLIC_BASE_URL, OPENAI_API_KEY
|
|
62
|
+
PHONE_PORT, PHONE_ALLOWLIST (comma-separated), PHONE_VOICE, PHONE_REALTIME_MODEL
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
`nia phone status` prints which fields are set / missing.
|
|
66
|
+
|
|
67
|
+
## Cloudflared named tunnel (production)
|
|
68
|
+
|
|
69
|
+
The ephemeral `trycloudflare.com` URL is fine for testing but dies on
|
|
70
|
+
restart. For a persistent deploy:
|
|
71
|
+
|
|
72
|
+
```bash
|
|
73
|
+
brew install cloudflared
|
|
74
|
+
cloudflared tunnel login # opens browser, writes cert.pem
|
|
75
|
+
cloudflared tunnel create nia-mac # creates the tunnel
|
|
76
|
+
cloudflared tunnel route dns nia-mac nia.example.com # CNAME on a Cloudflare-managed domain
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
Keep nia's tunnel config in nia's home as a single flat file:
|
|
80
|
+
`~/.niahere/cloudflared-config.yaml`. The cloudflared-internal artifacts
|
|
81
|
+
(`cert.pem`, the per-tunnel credentials JSON written by `tunnel create`)
|
|
82
|
+
stay where cloudflared put them — those are cloudflared's territory, not
|
|
83
|
+
nia's.
|
|
84
|
+
|
|
85
|
+
```yaml
|
|
86
|
+
# ~/.niahere/cloudflared-config.yaml
|
|
87
|
+
tunnel: nia-mac
|
|
88
|
+
credentials-file: /Users/<you>/.cloudflared/<tunnel-id>.json
|
|
89
|
+
|
|
90
|
+
ingress:
|
|
91
|
+
- hostname: nia.example.com
|
|
92
|
+
service: http://localhost:7079
|
|
93
|
+
- service: http_status:404
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
Install as a launchd service, pointing it at our config:
|
|
97
|
+
|
|
98
|
+
```bash
|
|
99
|
+
sudo cloudflared --config ~/.niahere/cloudflared-config.yaml service install
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
If the generated plist at `/Library/LaunchDaemons/com.cloudflare.cloudflared.plist`
|
|
103
|
+
doesn't include the `--config` arg in `ProgramArguments`, edit it in, then:
|
|
104
|
+
|
|
105
|
+
```bash
|
|
106
|
+
sudo launchctl bootout system/com.cloudflare.cloudflared
|
|
107
|
+
sudo launchctl bootstrap system /Library/LaunchDaemons/com.cloudflare.cloudflared.plist
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
Then set in `.env`: `PUBLIC_BASE_URL=https://nia.example.com`.
|
|
111
|
+
|
|
112
|
+
Verify with `curl https://nia.example.com/healthz` — should return `ok`
|
|
113
|
+
once the daemon is running.
|
|
114
|
+
|
|
115
|
+
## Twilio number webhook (inbound only)
|
|
116
|
+
|
|
117
|
+
Outbound calls do NOT need any Twilio Console config — `placeCall`
|
|
118
|
+
controls the TwiML URL itself.
|
|
119
|
+
|
|
120
|
+
Inbound calls need the Twilio number's Voice webhook pointed at your
|
|
121
|
+
public URL:
|
|
122
|
+
|
|
123
|
+
1. Twilio Console → Phone Numbers → Active Numbers → click your number.
|
|
124
|
+
2. Voice Configuration → "A call comes in" → Webhook.
|
|
125
|
+
3. URL: `https://<PUBLIC_BASE_URL>/twilio/voice/incoming` — Method: POST.
|
|
126
|
+
4. Status callback URL: `https://<PUBLIC_BASE_URL>/twilio/voice/status`
|
|
127
|
+
— Method: POST.
|
|
128
|
+
5. Save.
|
|
129
|
+
|
|
130
|
+
If the Twilio account is on trial, every destination number for
|
|
131
|
+
outbound calls must be in the Verified Caller IDs list.
|
|
132
|
+
|
|
133
|
+
## CLI
|
|
134
|
+
|
|
135
|
+
```bash
|
|
136
|
+
nia phone status # show config + missing fields
|
|
137
|
+
nia phone call <+E164> "<goal sentence>" # one-shot outbound smoke test
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
The `call` subcommand boots a standalone phone server, places the call,
|
|
141
|
+
waits for it to complete, then exits — does NOT need the daemon running.
|
|
142
|
+
Useful for smoke-testing without spinning up the full daemon.
|
|
143
|
+
|
|
144
|
+
## MCP tool — `place_call`
|
|
145
|
+
|
|
146
|
+
```ts
|
|
147
|
+
place_call({
|
|
148
|
+
number: string, // E.164, e.g. "+13025551234"
|
|
149
|
+
goal: string, // What Nia should accomplish — seeded into session instructions
|
|
150
|
+
context?: string, // Extra background (calendar dump, prior notes…)
|
|
151
|
+
max_minutes?: number, // Hard cap (default 10, max 30)
|
|
152
|
+
voice?: string, // Override default voice per call
|
|
153
|
+
})
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
Returns immediately with `{ callSid, status }`. The call completes
|
|
157
|
+
asynchronously; transcript lands in the `messages` table.
|
|
158
|
+
|
|
159
|
+
## Scheduled-job pattern (morning standup)
|
|
160
|
+
|
|
161
|
+
```bash
|
|
162
|
+
nia job add morning-standup "0 8 * * *" \
|
|
163
|
+
"Call me at +917667078414 and run my morning standup. \
|
|
164
|
+
Ask what I want to ship today, what's blocking me, \
|
|
165
|
+
and what to dig into while I sleep. Listen more than \
|
|
166
|
+
you talk. End cleanly when we're wrapped."
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
Daily standup at 8 AM owner-local time. Same pattern for evening retro,
|
|
170
|
+
weekly review, urgent escalation, etc.
|
|
171
|
+
|
|
172
|
+
## Architecture (one-paragraph)
|
|
173
|
+
|
|
174
|
+
`src/channels/phone/index.ts` boots a Bun HTTP+WS server on `PHONE_PORT`.
|
|
175
|
+
Twilio reaches it via cloudflared. `twiml.ts` builds the `<Connect><Stream>`
|
|
176
|
+
TwiML. `twilio.ts` calls Twilio's REST API and validates webhook
|
|
177
|
+
signatures (HMAC-SHA1 with the account Auth Token). `relay.ts` bridges
|
|
178
|
+
Twilio's Media Streams (mulaw, JSON-enveloped) to OpenAI Realtime
|
|
179
|
+
(same g711_ulaw format — no resampling). `tools.ts` exposes
|
|
180
|
+
`consult_claude`, `send_telegram`, `save_memory`, and `end_call` to the
|
|
181
|
+
voice agent. `consult.ts` is the Claude escape hatch for reasoning-heavy
|
|
182
|
+
turns. `instructions.ts` builds the system prompts.
|
|
183
|
+
|
|
184
|
+
## Cost model
|
|
185
|
+
|
|
186
|
+
| Component | US → India | US → US |
|
|
187
|
+
| -------------------------------------------- | ---------- | ------- |
|
|
188
|
+
| Twilio voice (per min) | $0.10–0.15 | $0.014 |
|
|
189
|
+
| OpenAI Realtime (per min, mixed in+out) | $0.20–0.30 | same |
|
|
190
|
+
| `consult_claude` (per invocation, when used) | $0.01–0.05 | same |
|
|
191
|
+
|
|
192
|
+
A 5-minute call to India runs ~$1.50–2.25. Daily morning standup is
|
|
193
|
+
~$45–70/month. Levers to cut cost: buy an Indian Twilio number for
|
|
194
|
+
domestic rates, tighten `max_minutes` (most standups finish in 2–3 min),
|
|
195
|
+
prefer Telegram voice notes over live calls for long-form things.
|
|
196
|
+
|
|
197
|
+
## Debugging
|
|
198
|
+
|
|
199
|
+
- `nia phone status` — verify env.
|
|
200
|
+
- `curl https://<PUBLIC_BASE_URL>/healthz` — confirms the tunnel reaches
|
|
201
|
+
the daemon.
|
|
202
|
+
- `LOG_LEVEL=debug bun run src/cli/index.ts phone call …` — dumps every
|
|
203
|
+
OpenAI Realtime event type (incl. ones we don't handle) so you can
|
|
204
|
+
see what the GA API is actually sending.
|
|
205
|
+
- `bun test tests/phone.test.ts` — unit tests for TwiML + signature.
|
|
206
|
+
|
|
207
|
+
## Common pitfalls (learned the hard way)
|
|
208
|
+
|
|
209
|
+
1. **Don't try to update a queued call's TwiML URL.** Twilio rejects
|
|
210
|
+
redirect until `in-progress`. Bake the routing into the URL Twilio
|
|
211
|
+
first fetches, or read `CallSid` from the webhook body.
|
|
212
|
+
2. **GA Realtime needs `output_modalities: ["audio"]` in `session.update`.**
|
|
213
|
+
Without it, the model silently drops audio synthesis and only emits
|
|
214
|
+
transcripts — you'll get "done" events with no "delta" events.
|
|
215
|
+
3. **Drop the `OpenAI-Beta: realtime=v1` header.** GA endpoint rejects it.
|
|
216
|
+
4. **API Key SIDs (SK…) work for REST auth but NOT for webhook signature
|
|
217
|
+
validation.** Set `TWILIO_AUTH_TOKEN` separately to the account-level
|
|
218
|
+
Auth Token when using an API Key.
|
|
219
|
+
5. **Send the opener immediately on session open, don't wait for first
|
|
220
|
+
media frame.** Otherwise the user picks up to silence and speaks
|
|
221
|
+
first, which defeats "Nia calling you".
|
|
222
|
+
6. **Mac Mini sleep kills the daemon.** `sudo pmset -a sleep 0` keeps
|
|
223
|
+
the host awake (display can still sleep).
|
package/src/channels/index.ts
CHANGED
|
@@ -3,6 +3,7 @@ import { registerChannel, getFactories, trackStarted, clearStarted } from "./reg
|
|
|
3
3
|
import { log } from "../utils/log";
|
|
4
4
|
import { createTelegramChannel } from "./telegram";
|
|
5
5
|
import { createSlackChannel } from "./slack";
|
|
6
|
+
import { createPhoneChannel } from "./phone";
|
|
6
7
|
|
|
7
8
|
export { getChannel, getStarted } from "./registry";
|
|
8
9
|
|
|
@@ -10,6 +11,7 @@ export { getChannel, getStarted } from "./registry";
|
|
|
10
11
|
export function registerAllChannels(): void {
|
|
11
12
|
registerChannel(() => createTelegramChannel());
|
|
12
13
|
registerChannel(() => createSlackChannel());
|
|
14
|
+
registerChannel(() => createPhoneChannel());
|
|
13
15
|
}
|
|
14
16
|
|
|
15
17
|
export interface StartResult {
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* `consult_claude` — single-shot escape hatch from the realtime voice model
|
|
3
|
+
* into Claude for memory-aware or reasoning-heavy questions. Voice agents
|
|
4
|
+
* use this when a turn exceeds the seeded context.
|
|
5
|
+
*
|
|
6
|
+
* Heavyweight (multi-second latency) by design — keep usage selective.
|
|
7
|
+
*/
|
|
8
|
+
import Anthropic from "@anthropic-ai/sdk";
|
|
9
|
+
import { loadIdentity } from "../../chat/identity";
|
|
10
|
+
import { log } from "../../utils/log";
|
|
11
|
+
|
|
12
|
+
let _anthropic: Anthropic | null = null;
|
|
13
|
+
function client(): Anthropic {
|
|
14
|
+
if (!_anthropic) _anthropic = new Anthropic();
|
|
15
|
+
return _anthropic;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
export async function consultClaude(question: string, callerLabel: string): Promise<string> {
|
|
19
|
+
const identity = loadIdentity();
|
|
20
|
+
const system = [
|
|
21
|
+
identity,
|
|
22
|
+
"You are answering a one-shot question from Nia's voice loop during an active phone call.",
|
|
23
|
+
`Caller: ${callerLabel}.`,
|
|
24
|
+
"Answer in under 60 words, conversational, no markdown. The voice model will speak your answer verbatim.",
|
|
25
|
+
]
|
|
26
|
+
.filter(Boolean)
|
|
27
|
+
.join("\n\n");
|
|
28
|
+
|
|
29
|
+
try {
|
|
30
|
+
const resp = await client().messages.create({
|
|
31
|
+
model: "claude-sonnet-4-6",
|
|
32
|
+
max_tokens: 400,
|
|
33
|
+
system,
|
|
34
|
+
messages: [{ role: "user", content: question }],
|
|
35
|
+
});
|
|
36
|
+
const block = resp.content[0];
|
|
37
|
+
if (block && block.type === "text") return block.text.trim();
|
|
38
|
+
return "(no answer)";
|
|
39
|
+
} catch (err) {
|
|
40
|
+
log.error({ err }, "phone: consult_claude failed");
|
|
41
|
+
return `error consulting Claude: ${err instanceof Error ? err.message : String(err)}`;
|
|
42
|
+
}
|
|
43
|
+
}
|