@vellumai/assistant 0.3.6 → 0.3.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. package/package.json +1 -1
  2. package/src/__tests__/send-endpoint-busy.test.ts +284 -0
  3. package/src/__tests__/subagent-manager-notify.test.ts +3 -3
  4. package/src/config/bundled-skills/media-processing/SKILL.md +81 -14
  5. package/src/config/bundled-skills/media-processing/TOOLS.json +3 -3
  6. package/src/config/bundled-skills/media-processing/services/preprocess.ts +3 -3
  7. package/src/config/defaults.ts +1 -1
  8. package/src/config/env-registry.ts +7 -0
  9. package/src/config/memory-schema.ts +3 -3
  10. package/src/config/schema.ts +1 -1
  11. package/src/daemon/daemon-control.ts +44 -6
  12. package/src/daemon/handlers/sessions.ts +20 -0
  13. package/src/daemon/handlers/subagents.ts +10 -3
  14. package/src/daemon/ipc-contract/sessions.ts +6 -0
  15. package/src/daemon/ipc-contract-inventory.json +2 -0
  16. package/src/daemon/ipc-contract.ts +2 -1
  17. package/src/daemon/lifecycle.ts +16 -0
  18. package/src/daemon/server.ts +8 -0
  19. package/src/daemon/session-queue-manager.ts +13 -11
  20. package/src/daemon/session-surfaces.ts +8 -1
  21. package/src/memory/migrations/016-memory-segments-indexes.ts +5 -4
  22. package/src/memory/migrations/017-memory-items-indexes.ts +5 -3
  23. package/src/memory/retriever.ts +4 -1
  24. package/src/memory/schema.ts +0 -1
  25. package/src/permissions/checker.ts +14 -7
  26. package/src/runtime/assistant-event-hub.ts +3 -1
  27. package/src/runtime/http-server.ts +22 -5
  28. package/src/runtime/http-types.ts +22 -0
  29. package/src/runtime/routes/conversation-routes.ts +77 -1
  30. package/src/runtime/routes/pairing-routes.ts +2 -1
  31. package/src/subagent/manager.ts +6 -6
  32. package/src/tools/browser/browser-execution.ts +4 -1
  33. package/src/tools/executor.ts +12 -9
  34. package/src/tools/subagent/message.ts +9 -2
  35. package/src/util/logger.ts +7 -2
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@vellumai/assistant",
3
- "version": "0.3.6",
3
+ "version": "0.3.8",
4
4
  "type": "module",
5
5
  "bin": {
6
6
  "vellum": "./src/index.ts"
@@ -0,0 +1,284 @@
1
+ /**
2
+ * Tests for POST /v1/messages queue-if-busy behavior and hub publishing.
3
+ *
4
+ * Validates that:
5
+ * - Messages are accepted (202) when the session is idle, with hub events published.
6
+ * - Messages are queued (202, queued: true) when the session is busy, not 409.
7
+ * - SSE subscribers receive events from messages sent via this endpoint.
8
+ */
9
+ import { describe, test, expect, beforeEach, afterAll, mock } from 'bun:test';
10
+ import { mkdtempSync, rmSync, realpathSync } from 'node:fs';
11
+ import { tmpdir } from 'node:os';
12
+ import { join } from 'node:path';
13
+ import type { ServerMessage } from '../daemon/ipc-protocol.js';
14
+ import type { Session } from '../daemon/session.js';
15
+
16
+ const testDir = realpathSync(mkdtempSync(join(tmpdir(), 'send-endpoint-busy-test-')));
17
+
18
+ mock.module('../util/platform.js', () => ({
19
+ getRootDir: () => testDir,
20
+ getDataDir: () => testDir,
21
+ isMacOS: () => process.platform === 'darwin',
22
+ isLinux: () => process.platform === 'linux',
23
+ isWindows: () => process.platform === 'win32',
24
+ getSocketPath: () => join(testDir, 'test.sock'),
25
+ getPidPath: () => join(testDir, 'test.pid'),
26
+ getDbPath: () => join(testDir, 'test.db'),
27
+ getLogPath: () => join(testDir, 'test.log'),
28
+ ensureDataDir: () => {},
29
+ }));
30
+
31
+ mock.module('../util/logger.js', () => ({
32
+ getLogger: () => new Proxy({} as Record<string, unknown>, {
33
+ get: () => () => {},
34
+ }),
35
+ }));
36
+
37
+ mock.module('../config/loader.js', () => ({
38
+ getConfig: () => ({
39
+ model: 'test',
40
+ provider: 'test',
41
+ apiKeys: {},
42
+ memory: { enabled: false },
43
+ rateLimit: { maxRequestsPerMinute: 0, maxTokensPerSession: 0 },
44
+ secretDetection: { enabled: false },
45
+ }),
46
+ }));
47
+
48
+ import { initializeDb, getDb, resetDb } from '../memory/db.js';
49
+ import { RuntimeHttpServer } from '../runtime/http-server.js';
50
+ import { AssistantEventHub } from '../runtime/assistant-event-hub.js';
51
+ import type { AssistantEvent } from '../runtime/assistant-event.js';
52
+
53
+ initializeDb();
54
+
55
+ // ---------------------------------------------------------------------------
56
+ // Session helpers
57
+ // ---------------------------------------------------------------------------
58
+
59
+ /** Session that completes its agent loop quickly and emits a text delta + message_complete. */
60
+ function makeCompletingSession(): Session {
61
+ let processing = false;
62
+ return {
63
+ isProcessing: () => processing,
64
+ persistUserMessage: (_content: string, _attachments: unknown[], requestId?: string) => {
65
+ processing = true;
66
+ return requestId ?? 'msg-1';
67
+ },
68
+ memoryPolicy: { scopeId: 'default', includeDefaultFallback: false, strictSideEffects: false },
69
+ setChannelCapabilities: () => {},
70
+ setAssistantId: () => {},
71
+ setGuardianContext: () => {},
72
+ setCommandIntent: () => {},
73
+ updateClient: () => {},
74
+ enqueueMessage: () => ({ queued: false, requestId: 'noop' }),
75
+ runAgentLoop: async (_content: string, _messageId: string, onEvent: (msg: ServerMessage) => void) => {
76
+ onEvent({ type: 'assistant_text_delta', text: 'Hello!' });
77
+ onEvent({ type: 'message_complete', sessionId: 'test-session' });
78
+ processing = false;
79
+ },
80
+ handleConfirmationResponse: () => {},
81
+ handleSecretResponse: () => {},
82
+ } as unknown as Session;
83
+ }
84
+
85
+ /** Session that hangs forever in the agent loop (simulates a busy session). */
86
+ function makeHangingSession(): Session {
87
+ let processing = false;
88
+ const enqueuedMessages: Array<{ content: string; onEvent: (msg: ServerMessage) => void; requestId: string }> = [];
89
+ return {
90
+ isProcessing: () => processing,
91
+ persistUserMessage: (_content: string, _attachments: unknown[], requestId?: string) => {
92
+ processing = true;
93
+ return requestId ?? 'msg-1';
94
+ },
95
+ memoryPolicy: { scopeId: 'default', includeDefaultFallback: false, strictSideEffects: false },
96
+ setChannelCapabilities: () => {},
97
+ setAssistantId: () => {},
98
+ setGuardianContext: () => {},
99
+ setCommandIntent: () => {},
100
+ updateClient: () => {},
101
+ enqueueMessage: (content: string, _attachments: unknown[], onEvent: (msg: ServerMessage) => void, requestId: string) => {
102
+ enqueuedMessages.push({ content, onEvent, requestId });
103
+ return { queued: true, requestId };
104
+ },
105
+ runAgentLoop: async () => {
106
+ // Hang forever
107
+ await new Promise<void>(() => {});
108
+ },
109
+ handleConfirmationResponse: () => {},
110
+ handleSecretResponse: () => {},
111
+ _enqueuedMessages: enqueuedMessages,
112
+ } as unknown as Session;
113
+ }
114
+
115
+ // ---------------------------------------------------------------------------
116
+ // Tests
117
+ // ---------------------------------------------------------------------------
118
+
119
+ const TEST_TOKEN = 'test-bearer-token-send';
120
+ const AUTH_HEADERS = { Authorization: `Bearer ${TEST_TOKEN}` };
121
+
122
+ describe('POST /v1/messages — queue-if-busy and hub publishing', () => {
123
+ let server: RuntimeHttpServer;
124
+ let port: number;
125
+ let eventHub: AssistantEventHub;
126
+
127
+ beforeEach(() => {
128
+ const db = getDb();
129
+ db.run('DELETE FROM messages');
130
+ db.run('DELETE FROM conversations');
131
+ db.run('DELETE FROM conversation_keys');
132
+ eventHub = new AssistantEventHub();
133
+ });
134
+
135
+ afterAll(() => {
136
+ resetDb();
137
+ try { rmSync(testDir, { recursive: true, force: true }); } catch { /* best effort */ }
138
+ });
139
+
140
+ async function startServer(sessionFactory: () => Session): Promise<void> {
141
+ port = 19000 + Math.floor(Math.random() * 1000);
142
+ server = new RuntimeHttpServer({
143
+ port,
144
+ bearerToken: TEST_TOKEN,
145
+ sendMessageDeps: {
146
+ getOrCreateSession: async () => sessionFactory(),
147
+ assistantEventHub: eventHub,
148
+ resolveAttachments: () => [],
149
+ },
150
+ });
151
+ await server.start();
152
+ }
153
+
154
+ async function stopServer(): Promise<void> {
155
+ await server?.stop();
156
+ }
157
+
158
+ function messagesUrl(): string {
159
+ return `http://127.0.0.1:${port}/v1/messages`;
160
+ }
161
+
162
+ // ── Idle session: immediate processing ──────────────────────────────
163
+
164
+ test('returns 202 with accepted: true and messageId when session is idle', async () => {
165
+ await startServer(() => makeCompletingSession());
166
+
167
+ const res = await fetch(messagesUrl(), {
168
+ method: 'POST',
169
+ headers: { 'Content-Type': 'application/json', ...AUTH_HEADERS },
170
+ body: JSON.stringify({ conversationKey: 'conv-idle', content: 'Hello', sourceChannel: 'macos' }),
171
+ });
172
+ const body = await res.json() as { accepted: boolean; messageId: string };
173
+
174
+ expect(res.status).toBe(202);
175
+ expect(body.accepted).toBe(true);
176
+ expect(body.messageId).toBeDefined();
177
+
178
+ await stopServer();
179
+ });
180
+
181
+ test('publishes events to assistantEventHub when session is idle', async () => {
182
+ const publishedEvents: AssistantEvent[] = [];
183
+
184
+ await startServer(() => makeCompletingSession());
185
+
186
+ eventHub.subscribe(
187
+ { assistantId: 'self' },
188
+ (event) => { publishedEvents.push(event); },
189
+ );
190
+
191
+ const res = await fetch(messagesUrl(), {
192
+ method: 'POST',
193
+ headers: { 'Content-Type': 'application/json', ...AUTH_HEADERS },
194
+ body: JSON.stringify({ conversationKey: 'conv-hub', content: 'Hello hub', sourceChannel: 'macos' }),
195
+ });
196
+ expect(res.status).toBe(202);
197
+
198
+ // Wait for the async agent loop to complete and events to be published
199
+ await new Promise((r) => setTimeout(r, 100));
200
+
201
+ // Should have received assistant_text_delta and message_complete
202
+ const types = publishedEvents.map((e) => e.message.type);
203
+ expect(types).toContain('assistant_text_delta');
204
+ expect(types).toContain('message_complete');
205
+
206
+ await stopServer();
207
+ });
208
+
209
+ // ── Busy session: queue-if-busy ─────────────────────────────────────
210
+
211
+ test('returns 202 with queued: true when session is busy (not 409)', async () => {
212
+ const session = makeHangingSession();
213
+ await startServer(() => session);
214
+
215
+ // First message starts the agent loop and makes the session busy
216
+ const res1 = await fetch(messagesUrl(), {
217
+ method: 'POST',
218
+ headers: { 'Content-Type': 'application/json', ...AUTH_HEADERS },
219
+ body: JSON.stringify({ conversationKey: 'conv-busy', content: 'First', sourceChannel: 'macos' }),
220
+ });
221
+ expect(res1.status).toBe(202);
222
+ const body1 = await res1.json() as { accepted: boolean; messageId: string };
223
+ expect(body1.accepted).toBe(true);
224
+ expect(body1.messageId).toBeDefined();
225
+
226
+ // Wait for the agent loop to start
227
+ await new Promise((r) => setTimeout(r, 30));
228
+
229
+ // Second message should be queued, not rejected
230
+ const res2 = await fetch(messagesUrl(), {
231
+ method: 'POST',
232
+ headers: { 'Content-Type': 'application/json', ...AUTH_HEADERS },
233
+ body: JSON.stringify({ conversationKey: 'conv-busy', content: 'Second', sourceChannel: 'macos' }),
234
+ });
235
+ const body2 = await res2.json() as { accepted: boolean; queued: boolean };
236
+
237
+ expect(res2.status).toBe(202);
238
+ expect(body2.accepted).toBe(true);
239
+ expect(body2.queued).toBe(true);
240
+
241
+ await stopServer();
242
+ });
243
+
244
+ // ── Validation ──────────────────────────────────────────────────────
245
+
246
+ test('returns 400 when sourceChannel is missing', async () => {
247
+ await startServer(() => makeCompletingSession());
248
+
249
+ const res = await fetch(messagesUrl(), {
250
+ method: 'POST',
251
+ headers: { 'Content-Type': 'application/json', ...AUTH_HEADERS },
252
+ body: JSON.stringify({ conversationKey: 'conv-val', content: 'Hello' }),
253
+ });
254
+ expect(res.status).toBe(400);
255
+
256
+ await stopServer();
257
+ });
258
+
259
+ test('returns 400 when content is empty', async () => {
260
+ await startServer(() => makeCompletingSession());
261
+
262
+ const res = await fetch(messagesUrl(), {
263
+ method: 'POST',
264
+ headers: { 'Content-Type': 'application/json', ...AUTH_HEADERS },
265
+ body: JSON.stringify({ conversationKey: 'conv-empty', content: '', sourceChannel: 'macos' }),
266
+ });
267
+ expect(res.status).toBe(400);
268
+
269
+ await stopServer();
270
+ });
271
+
272
+ test('returns 400 when conversationKey is missing', async () => {
273
+ await startServer(() => makeCompletingSession());
274
+
275
+ const res = await fetch(messagesUrl(), {
276
+ method: 'POST',
277
+ headers: { 'Content-Type': 'application/json', ...AUTH_HEADERS },
278
+ body: JSON.stringify({ content: 'Hello', sourceChannel: 'macos' }),
279
+ });
280
+ expect(res.status).toBe(400);
281
+
282
+ await stopServer();
283
+ });
284
+ });
@@ -397,8 +397,8 @@ describe('SubagentManager sendMessage validation', () => {
397
397
  const subagentId = 'sub-1';
398
398
  injectFakeSubagent(manager, subagentId, makeState(subagentId));
399
399
 
400
- expect(manager.sendMessage(subagentId, '')).toBe(false);
401
- expect(manager.sendMessage(subagentId, ' ')).toBe(false);
402
- expect(manager.sendMessage(subagentId, '\n\t')).toBe(false);
400
+ expect(manager.sendMessage(subagentId, '')).toBe('empty');
401
+ expect(manager.sendMessage(subagentId, ' ')).toBe('empty');
402
+ expect(manager.sendMessage(subagentId, '\n\t')).toBe('empty');
403
403
  });
404
404
  });
@@ -34,11 +34,11 @@ Preprocess a video asset: detect dead time via mpdecimate, segment the video int
34
34
 
35
35
  Parameters:
36
36
  - `asset_id` (required) — ID of the media asset.
37
- - `interval_seconds` — Interval between keyframes (default: 3s).
38
- - `segment_duration` — Duration of each segment window (default: 20s).
37
+ - `interval_seconds` — Interval between keyframes (default: 1s). Use 0.5s for sports/action content where frame density matters.
38
+ - `segment_duration` — Duration of each segment window (default: 15s).
39
39
  - `dead_time_threshold` — Sensitivity for dead-time detection (default: 0.02).
40
40
  - `section_config` — Path to a JSON file with manual section boundaries.
41
- - `skip_dead_time` — Whether to detect and skip dead time (default: true).
41
+ - `skip_dead_time` — Whether to detect and skip dead time (default: false). Dead-time detection can be too aggressive for continuous action video like sports — it may incorrectly skip live play. Enable only for content with clear idle periods (e.g., lectures, surveillance footage).
42
42
  - `short_edge` — Short edge resolution for downscaled frames in pixels (default: 480).
43
43
 
44
44
  ### analyze_keyframes
@@ -74,7 +74,7 @@ Get a diagnostic report for a media asset. Returns:
74
74
  - **Processing stats**: total keyframes extracted.
75
75
  - **Per-stage status and timing**: which stages (preprocess, map, reduce) have run, how long each took, current progress.
76
76
  - **Failure reasons**: last error from any failed stage.
77
- - **Cost estimation**: based on segment count and Gemini 2.5 Flash pricing, plus a note about Claude reduce costs.
77
+ - **Cost estimation**: based on segment count and current Gemini pricing.
78
78
 
79
79
  ## Services
80
80
 
@@ -110,6 +110,82 @@ Limits concurrent API calls during the Map phase to avoid rate limiting.
110
110
 
111
111
  Tracks estimated API costs during pipeline execution.
112
112
 
113
+ ## Best Practices
114
+
115
+ ### Map Prompt Strategy: Go Broad, Not Targeted
116
+
117
+ The single most important insight: **always use a broad, descriptive map prompt** instead of a targeted one.
118
+
119
+ A targeted prompt like "find turnovers" locks you into one topic. If the user later wants to ask about defense, formations, or specific players, you'd need to reprocess the entire video. Instead, run a general-purpose descriptive prompt that captures everything visible, creating a rich, reusable dataset. Then all follow-up questions can be handled via `query_media` with no reprocessing.
120
+
121
+ **One map run, many queries.**
122
+
123
+ The map output will be larger (more tokens per segment), but Gemini Flash is cheap enough that this is a good tradeoff. Only use a targeted prompt if the user explicitly asks for something narrow.
124
+
125
+ #### Sample General-Purpose Map Prompt
126
+
127
+ Use this as a starting point for the `system_prompt` parameter in `analyze_keyframes`:
128
+
129
+ ```
130
+ You are analyzing keyframes from a video. For each segment, describe everything you can observe:
131
+
132
+ - People visible: count, positions, identifying features (jersey numbers, clothing, names if visible)
133
+ - Actions and movements: what people are doing, direction of movement, interactions
134
+ - Objects of interest: ball location, equipment, vehicles, on-screen graphics
135
+ - Environment: setting, lighting, weather if outdoors
136
+ - Text on screen: scores, captions, titles, signs, timestamps
137
+ - Scene composition: camera angle, zoom level, any transitions between shots
138
+ - Any stoppages, pauses, or changes in activity
139
+
140
+ Be specific and factual. Describe what you see, not what you infer happened between frames.
141
+ ```
142
+
143
+ #### Sample Output Schema
144
+
145
+ ```json
146
+ {
147
+ "type": "object",
148
+ "properties": {
149
+ "scene_description": { "type": "string" },
150
+ "people": {
151
+ "type": "array",
152
+ "items": {
153
+ "type": "object",
154
+ "properties": {
155
+ "description": { "type": "string" },
156
+ "position": { "type": "string" },
157
+ "action": { "type": "string" }
158
+ }
159
+ }
160
+ },
161
+ "objects_of_interest": { "type": "array", "items": { "type": "string" } },
162
+ "on_screen_text": { "type": "array", "items": { "type": "string" } },
163
+ "camera": { "type": "string" },
164
+ "notable_events": { "type": "array", "items": { "type": "string" } }
165
+ }
166
+ }
167
+ ```
168
+
169
+ ### Clip Delivery
170
+
171
+ The `generate_clip` tool outputs clips as temporary files. These may not deliver reliably via sandbox attachments. For reliable delivery, use `host_bash` + ffmpeg to save clips to a user-specified location as a fallback.
172
+
173
+ ## Known Limitations — Vision Analysis
174
+
175
+ Gemini performs well at **spatial/descriptive analysis** from static keyframes:
176
+ - Player positions, formations, and spacing
177
+ - Jersey numbers and identifying features
178
+ - Ball location and which team has possession
179
+ - Score and on-screen text
180
+ - Camera angles and scene composition
181
+
182
+ Gemini **hallucinates when asked to detect fast temporal events** from static frames, regardless of frame density:
183
+ - Turnovers, steals, fouls, and specific plays
184
+ - Fast transitions and split-second actions
185
+ - Causality between frames (what "happened" vs. what's visible)
186
+
187
+ The model is good at describing **what is there** but bad at detecting **what happened**. Structure your map prompts and queries accordingly — ask the model to describe scenes, then use `query_media` (Claude) to reason about patterns and events across the descriptive data.
188
+
113
189
  ## Operator Runbook
114
190
 
115
191
  ### Monitoring Progress
@@ -137,16 +213,7 @@ After fixing the root cause, re-run the failed stage. The pipeline is resumable
137
213
 
138
214
  ### Cost Expectations
139
215
 
140
- The Map phase (Gemini 2.5 Flash) is the primary cost driver. Cost scales with video duration, keyframe interval, and segment size:
141
-
142
- | Video Duration | Interval | Keyframes | Segments (~10 frames each) | Estimated Map Cost |
143
- |----------------|----------|-----------|----------------------------|--------------------|
144
- | 30 min | 3s | ~600 | ~60 | ~$0.06 |
145
- | 60 min | 3s | ~1,200 | ~120 | ~$0.12 |
146
- | 90 min | 3s | ~1,800 | ~180 | ~$0.18 |
147
- | 90 min | 5s | ~1,080 | ~108 | ~$0.11 |
148
-
149
- The Reduce phase (Claude) adds a small additional cost per query. The `media_diagnostics` tool provides per-asset cost estimates.
216
+ Use `media_diagnostics` to get per-asset cost estimates. The Map phase (Gemini) is the primary cost driver it scales with video duration and keyframe interval. The Q&A phase (Claude) is negligible per query.
150
217
 
151
218
  ### Known Limitations
152
219
 
@@ -67,11 +67,11 @@
67
67
  },
68
68
  "interval_seconds": {
69
69
  "type": "number",
70
- "description": "Interval between keyframes in seconds. Default: 3"
70
+ "description": "Interval between keyframes in seconds. Default: 1. Use 0.5 for sports/action content."
71
71
  },
72
72
  "segment_duration": {
73
73
  "type": "number",
74
- "description": "Duration of each segment window in seconds. Default: 20"
74
+ "description": "Duration of each segment window in seconds. Default: 15"
75
75
  },
76
76
  "dead_time_threshold": {
77
77
  "type": "number",
@@ -83,7 +83,7 @@
83
83
  },
84
84
  "skip_dead_time": {
85
85
  "type": "boolean",
86
- "description": "Whether to detect and skip dead time. Default: true"
86
+ "description": "Whether to detect and skip dead time. Default: false. Can be too aggressive for continuous action video like sports."
87
87
  },
88
88
  "short_edge": {
89
89
  "type": "number",
@@ -355,13 +355,13 @@ export async function preprocessForAsset(
355
355
  onProgress?: (msg: string) => void,
356
356
  ): Promise<PreprocessManifest> {
357
357
  const config: PreprocessConfig = {
358
- intervalSeconds: options.intervalSeconds ?? 3,
359
- segmentDuration: options.segmentDuration ?? 20,
358
+ intervalSeconds: options.intervalSeconds ?? 1,
359
+ segmentDuration: options.segmentDuration ?? 15,
360
360
  deadTimeThreshold: options.deadTimeThreshold ?? 0.02,
361
361
  shortEdge: options.shortEdge ?? 480,
362
362
  };
363
363
 
364
- const skipDeadTime = options.skipDeadTime ?? true;
364
+ const skipDeadTime = options.skipDeadTime ?? false;
365
365
 
366
366
  const asset = getMediaAssetById(assetId);
367
367
  if (!asset) {
@@ -47,7 +47,7 @@ export const DEFAULT_CONFIG: AssistantConfig = {
47
47
  injectionFormat: 'markdown' as const,
48
48
  injectionStrategy: 'prepend_user_block' as const,
49
49
  reranking: {
50
- enabled: true,
50
+ enabled: false,
51
51
  model: 'claude-haiku-4-5-20251001',
52
52
  topK: 20,
53
53
  },
@@ -136,11 +136,18 @@ const KNOWN_VELLUM_VARS = new Set([
136
136
  'VELLUM_DAEMON_TCP_ENABLED',
137
137
  'VELLUM_DAEMON_TCP_HOST',
138
138
  'VELLUM_DAEMON_IOS_PAIRING',
139
+ 'VELLUM_DAEMON_NOAUTH',
140
+ 'VELLUM_DAEMON_AUTOSTART',
139
141
  'VELLUM_DEBUG',
140
142
  'VELLUM_LOG_STDERR',
141
143
  'VELLUM_ENABLE_MONITORING',
142
144
  'VELLUM_HOOK_EVENT',
143
145
  'VELLUM_HOOK_NAME',
146
+ 'VELLUM_HOOK_SETTINGS',
147
+ 'VELLUM_ROOT_DIR',
148
+ 'VELLUM_WORKSPACE_DIR',
149
+ 'VELLUM_CLAUDE_CODE_DEPTH',
150
+ 'VELLUM_ASSISTANT_PLATFORM_URL',
144
151
  ]);
145
152
 
146
153
  /**
@@ -60,7 +60,7 @@ export const QdrantConfigSchema = z.object({
60
60
  export const MemoryRerankingConfigSchema = z.object({
61
61
  enabled: z
62
62
  .boolean({ error: 'memory.retrieval.reranking.enabled must be a boolean' })
63
- .default(true),
63
+ .default(false),
64
64
  model: z
65
65
  .string({ error: 'memory.retrieval.reranking.model must be a string' })
66
66
  .default('claude-haiku-4-5-20251001'),
@@ -186,7 +186,7 @@ export const MemoryRetrievalConfigSchema = z.object({
186
186
  })
187
187
  .default('prepend_user_block'),
188
188
  reranking: MemoryRerankingConfigSchema.default({
189
- enabled: true,
189
+ enabled: false,
190
190
  model: 'claude-haiku-4-5-20251001',
191
191
  topK: 20,
192
192
  }),
@@ -430,7 +430,7 @@ export const MemoryConfigSchema = z.object({
430
430
  injectionFormat: 'markdown',
431
431
  injectionStrategy: 'prepend_user_block',
432
432
  reranking: {
433
- enabled: true,
433
+ enabled: false,
434
434
  model: 'claude-haiku-4-5-20251001',
435
435
  topK: 20,
436
436
  },
@@ -215,7 +215,7 @@ export const AssistantConfigSchema = z.object({
215
215
  injectionFormat: 'markdown',
216
216
  injectionStrategy: 'prepend_user_block',
217
217
  reranking: {
218
- enabled: true,
218
+ enabled: false,
219
219
  model: 'claude-haiku-4-5-20251001',
220
220
  topK: 20,
221
221
  },
@@ -5,14 +5,52 @@ import {
5
5
  getSocketPath,
6
6
  getPidPath,
7
7
  getRootDir,
8
+ getWorkspaceConfigPath,
8
9
  removeSocketFile,
9
10
  } from '../util/platform.js';
10
11
  import { getLogger } from '../util/logger.js';
11
12
  import { DaemonError } from '../util/errors.js';
12
- import { getConfig } from '../config/loader.js';
13
13
 
14
14
  const log = getLogger('lifecycle');
15
15
 
16
+ const DAEMON_TIMEOUT_DEFAULTS = {
17
+ startupSocketWaitMs: 5000,
18
+ stopTimeoutMs: 5000,
19
+ sigkillGracePeriodMs: 2000,
20
+ };
21
+
22
+ function isPositiveInteger(v: unknown): v is number {
23
+ return typeof v === 'number' && Number.isInteger(v) && v > 0;
24
+ }
25
+
26
+ /**
27
+ * Read daemon timeout values directly from the config JSON file, bypassing
28
+ * loadConfig() and its ensureMigratedDataDir()/ensureDataDir() side effects.
29
+ * Falls back to hardcoded defaults on any error (missing file, malformed JSON,
30
+ * unexpected shape) so daemon stop/start never fails due to config issues.
31
+ */
32
+ function readDaemonTimeouts(): typeof DAEMON_TIMEOUT_DEFAULTS {
33
+ try {
34
+ const raw = JSON.parse(readFileSync(getWorkspaceConfigPath(), 'utf-8'));
35
+ if (raw.daemon && typeof raw.daemon === 'object') {
36
+ return {
37
+ startupSocketWaitMs: isPositiveInteger(raw.daemon.startupSocketWaitMs)
38
+ ? raw.daemon.startupSocketWaitMs
39
+ : DAEMON_TIMEOUT_DEFAULTS.startupSocketWaitMs,
40
+ stopTimeoutMs: isPositiveInteger(raw.daemon.stopTimeoutMs)
41
+ ? raw.daemon.stopTimeoutMs
42
+ : DAEMON_TIMEOUT_DEFAULTS.stopTimeoutMs,
43
+ sigkillGracePeriodMs: isPositiveInteger(raw.daemon.sigkillGracePeriodMs)
44
+ ? raw.daemon.sigkillGracePeriodMs
45
+ : DAEMON_TIMEOUT_DEFAULTS.sigkillGracePeriodMs,
46
+ };
47
+ }
48
+ } catch {
49
+ // Missing file, malformed JSON, etc. — use defaults.
50
+ }
51
+ return { ...DAEMON_TIMEOUT_DEFAULTS };
52
+ }
53
+
16
54
  function isProcessRunning(pid: number): boolean {
17
55
  try {
18
56
  process.kill(pid, 0);
@@ -125,8 +163,8 @@ export async function startDaemon(): Promise<{
125
163
  writePid(pid);
126
164
 
127
165
  // Wait for socket to appear
128
- const config = getConfig();
129
- const maxWait = config.daemon.startupSocketWaitMs;
166
+ const timeouts = readDaemonTimeouts();
167
+ const maxWait = timeouts.startupSocketWaitMs;
130
168
  const interval = 100;
131
169
  let waited = 0;
132
170
  while (waited < maxWait) {
@@ -165,10 +203,10 @@ export async function stopDaemon(): Promise<StopResult> {
165
203
 
166
204
  process.kill(pid, 'SIGTERM');
167
205
 
168
- const config = getConfig();
206
+ const timeouts = readDaemonTimeouts();
169
207
 
170
208
  // Wait for process to exit
171
- const maxWait = config.daemon.stopTimeoutMs;
209
+ const maxWait = timeouts.stopTimeoutMs;
172
210
  const interval = 100;
173
211
  let waited = 0;
174
212
  while (waited < maxWait) {
@@ -190,7 +228,7 @@ export async function stopDaemon(): Promise<StopResult> {
190
228
  // Wait for the process to actually die after SIGKILL. Without this,
191
229
  // startDaemon() can race with the dying process's shutdown handler,
192
230
  // which removes the socket file and bricks the new daemon.
193
- const killMaxWait = config.daemon.sigkillGracePeriodMs;
231
+ const killMaxWait = timeouts.sigkillGracePeriodMs;
194
232
  let killWaited = 0;
195
233
  while (killWaited < killMaxWait && isProcessRunning(pid)) {
196
234
  await new Promise((r) => setTimeout(r, 100));
@@ -16,6 +16,7 @@ import type {
16
16
  SecretResponse,
17
17
  SessionCreateRequest,
18
18
  SessionSwitchRequest,
19
+ SessionRenameRequest,
19
20
  CancelRequest,
20
21
  DeleteQueuedMessage,
21
22
  HistoryRequest,
@@ -352,6 +353,24 @@ export async function handleSessionSwitch(
352
353
  });
353
354
  }
354
355
 
356
+ export function handleSessionRename(
357
+ msg: SessionRenameRequest,
358
+ socket: net.Socket,
359
+ ctx: HandlerContext,
360
+ ): void {
361
+ const conversation = conversationStore.getConversation(msg.sessionId);
362
+ if (!conversation) {
363
+ ctx.send(socket, { type: 'error', message: `Session ${msg.sessionId} not found` });
364
+ return;
365
+ }
366
+ conversationStore.updateConversationTitle(msg.sessionId, msg.title);
367
+ ctx.send(socket, {
368
+ type: 'session_title_updated',
369
+ sessionId: msg.sessionId,
370
+ title: msg.title,
371
+ });
372
+ }
373
+
355
374
  export function handleCancel(msg: CancelRequest, socket: net.Socket, ctx: HandlerContext): void {
356
375
  const sessionId = msg.sessionId || ctx.socketToSession.get(socket);
357
376
  if (sessionId) {
@@ -597,6 +616,7 @@ export const sessionHandlers = defineHandlers({
597
616
  session_create: handleSessionCreate,
598
617
  sessions_clear: (_msg, socket, ctx) => handleSessionsClear(socket, ctx),
599
618
  session_switch: handleSessionSwitch,
619
+ session_rename: handleSessionRename,
600
620
  cancel: handleCancel,
601
621
  delete_queued_message: handleDeleteQueuedMessage,
602
622
  history_request: handleHistoryRequest,