@vellumai/assistant 0.8.2 → 0.8.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (231) hide show
  1. package/ARCHITECTURE.md +11 -12
  2. package/docker-entrypoint.sh +13 -1
  3. package/docker-init-apt-root.sh +79 -6
  4. package/openapi.yaml +336 -21
  5. package/package.json +1 -1
  6. package/src/__tests__/agent-loop-exit-reason.test.ts +272 -0
  7. package/src/__tests__/agent-loop-provider-error-recording.test.ts +195 -0
  8. package/src/__tests__/compactor-tail-resolution.test.ts +107 -1
  9. package/src/__tests__/config-get-vision-flag.test.ts +136 -0
  10. package/src/__tests__/config-loader-backfill.test.ts +115 -18
  11. package/src/__tests__/context-token-estimator.test.ts +30 -65
  12. package/src/__tests__/conversation-agent-loop.test.ts +57 -1
  13. package/src/__tests__/conversation-media-retry.test.ts +19 -8
  14. package/src/__tests__/conversation-runtime-assembly.test.ts +26 -4
  15. package/src/__tests__/date-context.test.ts +45 -0
  16. package/src/__tests__/external-plugin-loader.test.ts +91 -19
  17. package/src/__tests__/guardian-action-no-hardcoded-copy.test.ts +0 -1
  18. package/src/__tests__/guardian-dispatch.test.ts +1 -0
  19. package/src/__tests__/heartbeat-service.test.ts +24 -164
  20. package/src/__tests__/helpers/channel-test-adapter.ts +0 -2
  21. package/src/__tests__/host-app-control-proxy.test.ts +241 -0
  22. package/src/__tests__/host-proxy-preactivation.test.ts +200 -13
  23. package/src/__tests__/injector-background-turn.test.ts +153 -0
  24. package/src/__tests__/injector-chain.test.ts +5 -0
  25. package/src/__tests__/lifecycle-memory-v2-seed.test.ts +9 -2
  26. package/src/__tests__/llm-callsite-catalog.test.ts +25 -0
  27. package/src/__tests__/llm-catalog-parity.test.ts +3 -0
  28. package/src/__tests__/llm-request-log-agent-loop-exit-reason.test.ts +116 -0
  29. package/src/__tests__/llm-request-log-error-payload.test.ts +138 -0
  30. package/src/__tests__/llm-request-log-source-clickhouse.test.ts +2 -0
  31. package/src/__tests__/llm-resolver.test.ts +255 -2
  32. package/src/__tests__/managed-profile-guard.test.ts +10 -0
  33. package/src/__tests__/notification-decision-fallback.test.ts +0 -91
  34. package/src/__tests__/notification-decision-strategy.test.ts +14 -31
  35. package/src/__tests__/notification-deep-link.test.ts +15 -0
  36. package/src/__tests__/notification-guardian-path.test.ts +1 -2
  37. package/src/__tests__/notification-platform-adapter.test.ts +5 -4
  38. package/src/__tests__/notification-telegram-adapter.test.ts +1 -0
  39. package/src/__tests__/notification-vellum-adapter.test.ts +113 -0
  40. package/src/__tests__/openai-provider.test.ts +218 -3
  41. package/src/__tests__/openai-responses-cutover-guard.test.ts +3 -3
  42. package/src/__tests__/openrouter-provider-only.test.ts +51 -3
  43. package/src/__tests__/openrouter-token-estimation.test.ts +34 -25
  44. package/src/__tests__/platform-proxy-context.test.ts +6 -1
  45. package/src/__tests__/plugin-tool-contribution.test.ts +3 -3
  46. package/src/__tests__/plugin-types.test.ts +2 -2
  47. package/src/__tests__/provider-catalog-visibility.test.ts +16 -0
  48. package/src/__tests__/provider-platform-proxy-integration.test.ts +27 -25
  49. package/src/__tests__/secret-routes-platform-proxy.test.ts +1 -1
  50. package/src/__tests__/system-prompt.test.ts +6 -73
  51. package/src/__tests__/workspace-migration-087-memory-router-balanced-profile.test.ts +228 -0
  52. package/src/a2a/__tests__/agent-card.test.ts +98 -0
  53. package/src/a2a/__tests__/e2e-a2a-channel.test.ts +597 -0
  54. package/src/a2a/__tests__/protocol-helpers.test.ts +113 -0
  55. package/src/a2a/__tests__/task-store.test.ts +246 -0
  56. package/src/a2a/agent-card.ts +58 -0
  57. package/src/a2a/feature-gate.ts +8 -0
  58. package/src/a2a/protocol-constants.ts +21 -0
  59. package/src/a2a/protocol-errors.ts +50 -0
  60. package/src/a2a/protocol-types.ts +162 -0
  61. package/src/a2a/task-store.ts +168 -0
  62. package/src/agent/loop.ts +167 -18
  63. package/src/channels/config.ts +9 -0
  64. package/src/channels/types.ts +14 -0
  65. package/src/cli/{__tests__ → commands/__tests__}/notifications.test.ts +201 -28
  66. package/src/cli/commands/__tests__/schedules.test.ts +469 -0
  67. package/src/cli/commands/notifications.ts +65 -35
  68. package/src/cli/commands/plugins.ts +67 -0
  69. package/src/cli/commands/schedules.ts +297 -5
  70. package/src/cli/lib/__tests__/search-plugins.test.ts +261 -0
  71. package/src/cli/lib/install-from-github.ts +8 -9
  72. package/src/cli/lib/search-plugins.ts +163 -0
  73. package/src/cli/program.ts +14 -0
  74. package/src/config/assistant-feature-flags.ts +24 -54
  75. package/src/config/bundled-skills/app-builder/SKILL.md +117 -1
  76. package/src/config/bundled-skills/phone-calls/SKILL.md +1 -1
  77. package/src/config/call-site-defaults.ts +105 -0
  78. package/src/config/feature-flag-registry.json +21 -29
  79. package/src/config/llm-resolver.ts +52 -1
  80. package/src/config/schema.ts +2 -0
  81. package/src/config/schemas/__tests__/memory-v2.test.ts +3 -3
  82. package/src/config/schemas/channels.ts +9 -0
  83. package/src/config/schemas/conversations.ts +10 -0
  84. package/src/config/schemas/heartbeat.ts +14 -0
  85. package/src/config/schemas/llm.ts +1 -3
  86. package/src/config/schemas/memory-retrospective.ts +1 -1
  87. package/src/config/schemas/memory-v2.ts +4 -4
  88. package/src/config/schemas/memory.ts +3 -1
  89. package/src/config/seed-inference-profiles.ts +99 -29
  90. package/src/context/compactor.ts +72 -12
  91. package/src/context/token-estimator.ts +32 -34
  92. package/src/daemon/__tests__/conversation-lifecycle-auto-analyze.test.ts +3 -22
  93. package/src/daemon/conversation-agent-loop-handlers.ts +78 -0
  94. package/src/daemon/conversation-agent-loop.ts +29 -2
  95. package/src/daemon/conversation-runtime-assembly.ts +9 -0
  96. package/src/daemon/conversation.ts +0 -7
  97. package/src/daemon/date-context.ts +40 -0
  98. package/src/daemon/guardian-action-generators.ts +1 -125
  99. package/src/daemon/handlers/__tests__/config-a2a-complete.test.ts +248 -0
  100. package/src/daemon/handlers/__tests__/config-a2a-invite.test.ts +154 -0
  101. package/src/daemon/handlers/__tests__/config-a2a-redeem.test.ts +133 -0
  102. package/src/daemon/handlers/__tests__/config-a2a.test.ts +95 -0
  103. package/src/daemon/handlers/config-a2a.ts +289 -0
  104. package/src/daemon/handlers/conversations.ts +1 -0
  105. package/src/daemon/host-app-control-proxy.ts +69 -18
  106. package/src/daemon/host-proxy-preactivation.ts +85 -18
  107. package/src/daemon/lifecycle.ts +49 -61
  108. package/src/daemon/memory-v2-startup.ts +49 -13
  109. package/src/daemon/message-types/notifications.ts +21 -0
  110. package/src/daemon/pkb-reminder-builder.test.ts +10 -53
  111. package/src/daemon/pkb-reminder-builder.ts +4 -19
  112. package/src/daemon/process-message.ts +3 -0
  113. package/src/daemon/skill-memory-refresh.ts +5 -1
  114. package/src/daemon/wake-target-adapter.ts +2 -0
  115. package/src/export/__tests__/transcript-formatter.test.ts +121 -0
  116. package/src/export/transcript-formatter.ts +54 -20
  117. package/src/heartbeat/__tests__/heartbeat-service.test.ts +44 -0
  118. package/src/heartbeat/heartbeat-service.ts +34 -191
  119. package/src/home/__tests__/feed-types.test.ts +40 -0
  120. package/src/home/feed-types.ts +14 -2
  121. package/src/ipc/cli-client.ts +147 -45
  122. package/src/memory/__tests__/conversation-queries.test.ts +220 -0
  123. package/src/memory/__tests__/memory-retrospective-enqueue.test.ts +2 -50
  124. package/src/memory/__tests__/memory-retrospective-job.test.ts +87 -4
  125. package/src/memory/conversation-queries.ts +87 -1
  126. package/src/memory/conversation-title-service.ts +26 -4
  127. package/src/memory/db-init.ts +6 -0
  128. package/src/memory/graph/__tests__/conversation-graph-memory-v2-routing.test.ts +84 -3
  129. package/src/memory/graph/conversation-graph-memory.ts +18 -6
  130. package/src/memory/graph/tools.ts +6 -37
  131. package/src/memory/invite-store.ts +53 -0
  132. package/src/memory/llm-request-log-source-clickhouse.ts +7 -2
  133. package/src/memory/llm-request-log-store.ts +92 -1
  134. package/src/memory/memory-retrospective-enqueue.ts +1 -20
  135. package/src/memory/memory-retrospective-job.ts +33 -6
  136. package/src/memory/migrations/250-provider-connection-base-url-and-models.ts +28 -0
  137. package/src/memory/migrations/251-a2a-tasks.ts +49 -0
  138. package/src/memory/migrations/252-llm-request-log-agent-loop-exit-reason.ts +32 -0
  139. package/src/memory/migrations/index.ts +3 -0
  140. package/src/memory/migrations/registry.ts +8 -0
  141. package/src/memory/schema/a2a.ts +15 -0
  142. package/src/memory/schema/index.ts +1 -0
  143. package/src/memory/schema/inference.ts +2 -0
  144. package/src/memory/schema/infrastructure.ts +1 -0
  145. package/src/memory/v2/__tests__/activation-store.test.ts +25 -23
  146. package/src/memory/v2/__tests__/cli-command-store.test.ts +404 -0
  147. package/src/memory/v2/__tests__/frontmatter-sweep.test.ts +25 -4
  148. package/src/memory/v2/__tests__/injection.test.ts +190 -3
  149. package/src/memory/v2/__tests__/static-context.test.ts +12 -1
  150. package/src/memory/v2/activation-store.ts +14 -16
  151. package/src/memory/v2/cli-command-content.ts +19 -0
  152. package/src/memory/v2/cli-command-store.ts +304 -0
  153. package/src/memory/v2/frontmatter-sweep.ts +7 -1
  154. package/src/memory/v2/injection.ts +49 -20
  155. package/src/memory/v2/page-index.ts +38 -13
  156. package/src/memory/v2/static-context.ts +4 -4
  157. package/src/memory/v2/types.ts +23 -0
  158. package/src/messaging/providers/a2a/__tests__/deliver.test.ts +274 -0
  159. package/src/messaging/providers/a2a/deliver.ts +156 -0
  160. package/src/messaging/providers/gmail/client.ts +9 -2
  161. package/src/messaging/providers/index.ts +11 -2
  162. package/src/notifications/__tests__/broadcaster.test.ts +203 -0
  163. package/src/notifications/__tests__/decision-engine.test.ts +283 -0
  164. package/src/notifications/__tests__/deterministic-checks.test.ts +286 -0
  165. package/src/notifications/__tests__/emit-signal-home-feed.test.ts +1 -0
  166. package/src/notifications/__tests__/home-feed-side-effect.test.ts +430 -7
  167. package/src/notifications/adapters/macos.ts +12 -2
  168. package/src/notifications/broadcaster.ts +29 -4
  169. package/src/notifications/copy-composer.ts +17 -64
  170. package/src/notifications/decision-engine.ts +111 -44
  171. package/src/notifications/deterministic-checks.ts +96 -0
  172. package/src/notifications/emit-signal.ts +1 -0
  173. package/src/notifications/home-feed-side-effect.ts +85 -6
  174. package/src/notifications/signal.ts +0 -4
  175. package/src/notifications/types.ts +8 -0
  176. package/src/oauth/platform-connection.test.ts +43 -3
  177. package/src/oauth/platform-connection.ts +13 -4
  178. package/src/plugins/defaults/injectors.ts +38 -19
  179. package/src/plugins/external-plugin-loader.ts +82 -10
  180. package/src/plugins/types.ts +16 -7
  181. package/src/prompts/__tests__/system-prompt.test.ts +6 -51
  182. package/src/prompts/__tests__/task-progress-hint-section.test.ts +4 -8
  183. package/src/prompts/system-prompt.ts +0 -8
  184. package/src/prompts/templates/BOOTSTRAP.md +5 -5
  185. package/src/prompts/templates/system-sections.ts +0 -9
  186. package/src/providers/__tests__/inference.test.ts +2 -0
  187. package/src/providers/call-site-routing.ts +24 -6
  188. package/src/providers/connection-resolution.ts +63 -13
  189. package/src/providers/inference/__tests__/adapter-factory-openai-compatible.test.ts +74 -0
  190. package/src/providers/inference/__tests__/connections-openai-compatible.test.ts +175 -0
  191. package/src/providers/inference/__tests__/connections-status-label.test.ts +15 -0
  192. package/src/providers/inference/adapter-factory.ts +9 -20
  193. package/src/providers/inference/auth.ts +12 -0
  194. package/src/providers/inference/backfill.ts +14 -1
  195. package/src/providers/inference/connections.ts +85 -5
  196. package/src/providers/inference/resolve-auth.ts +2 -0
  197. package/src/providers/model-catalog.ts +199 -244
  198. package/src/providers/model-intents.ts +3 -3
  199. package/src/providers/openai/__tests__/chat-completions-provider-reasoning.test.ts +235 -0
  200. package/src/providers/openai/chat-completions-provider.ts +159 -6
  201. package/src/providers/openrouter/client.ts +42 -4
  202. package/src/providers/platform-proxy/constants.ts +3 -4
  203. package/src/providers/provider-catalog-visibility.ts +3 -1
  204. package/src/providers/provider-send-message.ts +27 -12
  205. package/src/providers/registry.ts +30 -1
  206. package/src/runtime/agent-wake.ts +61 -1
  207. package/src/runtime/auth/route-policy.ts +13 -0
  208. package/src/runtime/http-server.ts +7 -16
  209. package/src/runtime/http-types.ts +0 -47
  210. package/src/runtime/routes/__tests__/consolidation-routes.test.ts +258 -0
  211. package/src/runtime/routes/__tests__/conversation-query-routes.test.ts +66 -4
  212. package/src/runtime/routes/__tests__/inference-provider-connection-routes.test.ts +275 -44
  213. package/src/runtime/routes/__tests__/llm-call-sites-routes.test.ts +12 -0
  214. package/src/runtime/routes/channel-availability-routes.ts +5 -0
  215. package/src/runtime/routes/consolidation-routes.ts +100 -0
  216. package/src/runtime/routes/conversation-query-routes.ts +70 -11
  217. package/src/runtime/routes/conversation-routes.ts +7 -0
  218. package/src/runtime/routes/index.ts +2 -0
  219. package/src/runtime/routes/inference-provider-connection-routes.ts +134 -1
  220. package/src/runtime/routes/integrations/a2a.ts +235 -0
  221. package/src/runtime/routes/llm-call-sites-routes.ts +11 -1
  222. package/src/runtime/routes/subagents-routes.ts +41 -0
  223. package/src/subagent/manager.ts +2 -0
  224. package/src/tools/memory/register.ts +1 -9
  225. package/src/tools/registry.ts +2 -2
  226. package/src/tools/types.ts +37 -2
  227. package/src/workspace/migrations/087-memory-router-balanced-profile.ts +91 -0
  228. package/src/workspace/migrations/registry.ts +2 -0
  229. package/src/__tests__/guardian-action-conversation-turn.test.ts +0 -441
  230. package/src/memory/graph/__tests__/remember-description.test.ts +0 -55
  231. package/src/runtime/guardian-action-conversation-turn.ts +0 -99
@@ -286,19 +286,18 @@ async function copyFile(
286
286
 
287
287
  /**
288
288
  * Wraps `fetchFn` with the headers we want to send to GitHub for every
289
- * request. Honors `GITHUB_TOKEN` when present so users who hit the
290
- * unauthenticated rate limit can opt into a higher cap.
289
+ * request. Unauthenticated the canonical source is a public repo, so
290
+ * there is nothing for an `Authorization` header to do.
291
291
  */
292
292
  async function githubFetch(
293
293
  url: string,
294
294
  accept: string,
295
295
  fetchFn: FetchLike,
296
296
  ): Promise<Response> {
297
- const headers: Record<string, string> = {
298
- Accept: accept,
299
- "User-Agent": "vellum-assistant-cli",
300
- };
301
- const token = process.env.GITHUB_TOKEN?.trim();
302
- if (token) headers.Authorization = `Bearer ${token}`;
303
- return fetchFn(url, { headers });
297
+ return fetchFn(url, {
298
+ headers: {
299
+ Accept: accept,
300
+ "User-Agent": "vellum-assistant-cli",
301
+ },
302
+ });
304
303
  }
@@ -0,0 +1,163 @@
1
+ /**
2
+ * Search for plugin directories in the canonical GitHub source.
3
+ *
4
+ * Lists `vellum-ai/vellum-assistant/experimental/plugins/` at the configured
5
+ * git ref and filters the directory entries by case-insensitive ECMAScript
6
+ * regex. A plain query like `"memory"` matches anywhere in the name; anchors
7
+ * like `"^simple"` work without escaping.
8
+ *
9
+ * Designed for direct programmatic use. The CLI command
10
+ * `assistant plugins search <query>` is a thin wrapper that supplies
11
+ * production deps (`globalThis.fetch`) and formats the result for the
12
+ * terminal; downstream callers may supply their own `fetch` (e.g. a
13
+ * retry-decorated client, or a test fixture).
14
+ */
15
+
16
+ import type { FetchLike } from "./install-from-github.js";
17
+ import { DEFAULT_PLUGIN_REF } from "./install-from-github.js";
18
+
19
+ // Re-export the dep-injection type so callers can grab everything they need
20
+ // from one module rather than reaching into `install-from-github.js`.
21
+ export type { FetchLike } from "./install-from-github.js";
22
+
23
+ const PLUGIN_SOURCE_OWNER = "vellum-ai";
24
+ const PLUGIN_SOURCE_REPO = "vellum-assistant";
25
+ const PLUGIN_SOURCE_PATH_PREFIX = "experimental/plugins";
26
+
27
+ /** Entry shape returned by the GitHub Contents API for a directory listing. */
28
+ interface GitHubContentEntry {
29
+ readonly name: string;
30
+ readonly path: string;
31
+ readonly type: "file" | "dir" | "symlink" | "submodule";
32
+ readonly size: number;
33
+ readonly download_url: string | null;
34
+ }
35
+
36
+ /** Options that control the search. */
37
+ export interface SearchPluginsOptions {
38
+ /**
39
+ * ECMAScript regex pattern. Matched case-insensitively against directory
40
+ * names. Empty string matches everything.
41
+ */
42
+ readonly query: string;
43
+ /** Git ref to list from. Defaults to {@link DEFAULT_PLUGIN_REF}. */
44
+ readonly ref?: string;
45
+ }
46
+
47
+ /** Dependencies injected by the caller. */
48
+ export interface SearchPluginsDeps {
49
+ /** HTTP client. Production callers pass `globalThis.fetch.bind(globalThis)`. */
50
+ readonly fetch: FetchLike;
51
+ }
52
+
53
+ /** One matching plugin directory. */
54
+ export interface PluginSearchMatch {
55
+ /** Directory name under `experimental/plugins/`. */
56
+ readonly name: string;
57
+ /** Path within the repo (e.g. `experimental/plugins/<name>`). */
58
+ readonly path: string;
59
+ }
60
+
61
+ /** Search result envelope. */
62
+ export interface SearchPluginsResult {
63
+ readonly query: string;
64
+ readonly ref: string;
65
+ readonly matches: readonly PluginSearchMatch[];
66
+ }
67
+
68
+ /** Caller passed a query that doesn't compile as an ECMAScript regex. */
69
+ export class InvalidSearchPatternError extends Error {
70
+ constructor(pattern: string, cause: unknown) {
71
+ const detail = cause instanceof Error ? cause.message : String(cause);
72
+ super(`Invalid regex pattern ${JSON.stringify(pattern)}: ${detail}`);
73
+ this.name = "InvalidSearchPatternError";
74
+ }
75
+ }
76
+
77
+ /**
78
+ * List directories under `experimental/plugins/` at {@link opts.ref} and
79
+ * filter by {@link opts.query}.
80
+ *
81
+ * Only `type === "dir"` entries are returned — `experimental/plugins/`
82
+ * follows a convention where each plugin lives in its own directory, so
83
+ * loose files at the prefix are not plugins.
84
+ */
85
+ export async function searchPlugins(
86
+ opts: SearchPluginsOptions,
87
+ deps: SearchPluginsDeps,
88
+ ): Promise<SearchPluginsResult> {
89
+ const ref = opts.ref ?? DEFAULT_PLUGIN_REF;
90
+
91
+ // Compile the matcher up front so an invalid regex fails before we hit
92
+ // the network — keeps "user typo" cheap to recover from.
93
+ const matcher = buildMatcher(opts.query);
94
+
95
+ const entries = await listDir(PLUGIN_SOURCE_PATH_PREFIX, ref, deps.fetch);
96
+
97
+ const matches: PluginSearchMatch[] = [];
98
+ for (const entry of entries) {
99
+ if (entry.type !== "dir") continue;
100
+ if (!matcher(entry.name)) continue;
101
+ matches.push({ name: entry.name, path: entry.path });
102
+ }
103
+ matches.sort((a, b) => a.name.localeCompare(b.name));
104
+
105
+ return { query: opts.query, ref, matches };
106
+ }
107
+
108
+ function buildMatcher(query: string): (name: string) => boolean {
109
+ let re: RegExp;
110
+ try {
111
+ re = new RegExp(query, "i");
112
+ } catch (err) {
113
+ throw new InvalidSearchPatternError(query, err);
114
+ }
115
+ return (name) => re.test(name);
116
+ }
117
+
118
+ async function listDir(
119
+ apiPath: string,
120
+ ref: string,
121
+ fetchFn: FetchLike,
122
+ ): Promise<readonly GitHubContentEntry[]> {
123
+ const url =
124
+ `https://api.github.com/repos/${PLUGIN_SOURCE_OWNER}/${PLUGIN_SOURCE_REPO}` +
125
+ `/contents/${encodeURIComponent(apiPath).replaceAll("%2F", "/")}` +
126
+ `?ref=${encodeURIComponent(ref)}`;
127
+
128
+ const res = await githubFetch(url, fetchFn);
129
+ if (!res.ok) {
130
+ // Unlike `installPlugin`, where 404 on a specific plugin name is a
131
+ // legitimate "not found" outcome, 404 on the plugins prefix itself
132
+ // means the canonical source path is gone — surface it as an error
133
+ // rather than silently returning empty results.
134
+ throw new Error(
135
+ `GitHub contents listing failed for ${apiPath} @ ${ref}: HTTP ${res.status}`,
136
+ );
137
+ }
138
+
139
+ const body = (await res.json()) as unknown;
140
+ if (!Array.isArray(body)) {
141
+ // A non-array body for a /contents/<dir> path means the path is a
142
+ // file, not a directory — treat the prefix as empty rather than crash.
143
+ return [];
144
+ }
145
+ return body as readonly GitHubContentEntry[];
146
+ }
147
+
148
+ /**
149
+ * Wraps `fetchFn` with the headers we want to send to GitHub for every
150
+ * request. Unauthenticated — the canonical source is a public repo, mirroring
151
+ * `installPlugin` which uses the same envelope.
152
+ */
153
+ async function githubFetch(
154
+ url: string,
155
+ fetchFn: FetchLike,
156
+ ): Promise<Response> {
157
+ return fetchFn(url, {
158
+ headers: {
159
+ Accept: "application/vnd.github+json",
160
+ "User-Agent": "vellum-assistant-cli",
161
+ },
162
+ });
163
+ }
@@ -62,6 +62,20 @@ import { log } from "./logger.js";
62
62
  */
63
63
  export async function buildCliProgram(): Promise<Command> {
64
64
  await initFeatureFlagOverrides({ retryBackoffsMs: [], callTimeoutMs: 200 });
65
+ return buildCliProgramTree();
66
+ }
67
+
68
+ /**
69
+ * Synchronously build the CLI program tree without pre-populating the
70
+ * feature-flag cache. Use this from inside the daemon, where flags are
71
+ * already initialized — calling `buildCliProgram` from there would round-trip
72
+ * to the gateway unnecessarily.
73
+ *
74
+ * Same shape as `buildCliProgram` minus the async feature-flag init: registers
75
+ * the full subcommand set (conditionally gated on email / external-plugins
76
+ * flags via `getConfigReadOnly()`) and installs the workspace-preAction hook.
77
+ */
78
+ export function buildCliProgramTree(): Command {
65
79
  const program = new Command();
66
80
 
67
81
  program
@@ -110,20 +110,22 @@ function parseRegistryToDefaults(parsed: unknown): FeatureFlagDefaultsRegistry {
110
110
  }
111
111
 
112
112
  // ---------------------------------------------------------------------------
113
- // Override loading — reads from gateway IPC socket or local file
113
+ // Override loading — reads from gateway IPC socket
114
114
  // ---------------------------------------------------------------------------
115
115
 
116
116
  /**
117
- * Module-level cache of feature flag override values. Populated lazily on
118
- * first access, invalidated by `clearFeatureFlagOverridesCache()`.
117
+ * Module-level cache of feature flag override values. Populated by
118
+ * `initFeatureFlagOverrides()` at startup, invalidated by
119
+ * `clearFeatureFlagOverridesCache()`.
119
120
  */
120
121
  let cachedOverrides: Record<string, boolean> | null = null;
121
122
 
122
123
  /**
123
- * True when `cachedOverrides` was populated by the gateway IPC fetch (or
124
- * preseeded by a test). False/unset when the cache was populated by the sync
125
- * file fallback in `loadOverrides()`, which must not prevent a subsequent
126
- * authoritative gateway fetch from running.
124
+ * True when `cachedOverrides` was populated by the gateway IPC fetch or
125
+ * preseeded by a test via `_setOverridesForTesting()`. Guards
126
+ * `initFeatureFlagOverrides()` from clobbering an existing populated cache
127
+ * when called a second time (e.g. by a CLI entry point after the daemon
128
+ * has already initialized).
127
129
  */
128
130
  let cachedOverridesFromGateway = false;
129
131
 
@@ -247,59 +249,30 @@ function loadOverrides(): Record<string, boolean> {
247
249
  return cachedOverrides ?? {};
248
250
  }
249
251
 
250
- // ---------------------------------------------------------------------------
251
- // Remote values — platform-pushed flags cached in a local JSON file
252
- // ---------------------------------------------------------------------------
253
-
254
252
  /**
255
- * Module-level cache of remote feature flag values. Populated lazily on
256
- * first access, invalidated by `clearFeatureFlagOverridesCache()`.
257
- */
258
- let cachedRemoteValues: Record<string, boolean> | null = null;
259
-
260
- /**
261
- * Load remote values with module-level caching.
253
+ * Invalidate the cached overrides so the next call to
254
+ * `isAssistantFeatureFlagEnabled` re-reads from the gateway.
262
255
  *
263
- * Remote values are now always included in the gateway IPC response (merged
264
- * server-side), so this only returns the injected test cache. In production,
265
- * remote values flow through the overrides cache.
266
- */
267
- function loadRemoteValues(): Record<string, boolean> {
268
- return cachedRemoteValues ?? {};
269
- }
270
-
271
- /**
272
- * Invalidate the cached override and remote values so the next call to
273
- * `isAssistantFeatureFlagEnabled` re-reads from the source.
274
- *
275
- * Called by the config watcher when the feature-flags file changes.
256
+ * Used by tests between cases to reset module state.
276
257
  */
277
258
  export function clearFeatureFlagOverridesCache(): void {
278
259
  cachedOverrides = null;
279
260
  cachedOverridesFromGateway = false;
280
- cachedRemoteValues = null;
281
261
  }
282
262
 
283
263
  /**
284
264
  * Directly inject override values into the module-level cache.
285
265
  *
286
- * **Test-only** — bypasses file/gateway loading so unit tests can control
287
- * flag state without writing to disk. Production code should never call this;
288
- * use `clearFeatureFlagOverridesCache()` instead and let the resolver
289
- * re-read from the appropriate source.
290
- *
291
- * Forces `cachedRemoteValues` to an empty record (not `null`) so the resolver
292
- * does not fall through to reading `feature-flags-remote.json` from disk. This
293
- * matters because a developer's local remote-cache file can leak platform-set
294
- * values into the test environment (e.g. `email-channel: true`), defeating
295
- * test isolation.
266
+ * **Test-only** — bypasses the gateway IPC fetch so unit tests can control
267
+ * flag state without standing up a real gateway. Production code should
268
+ * never call this; use `clearFeatureFlagOverridesCache()` instead and let
269
+ * the resolver re-read from the gateway.
296
270
  */
297
271
  export function _setOverridesForTesting(
298
272
  overrides: Record<string, boolean>,
299
273
  ): void {
300
274
  cachedOverrides = { ...overrides };
301
275
  cachedOverridesFromGateway = true;
302
- cachedRemoteValues = {};
303
276
  }
304
277
 
305
278
  // ---------------------------------------------------------------------------
@@ -310,9 +283,11 @@ export function _setOverridesForTesting(
310
283
  * Resolve whether an assistant feature flag is enabled.
311
284
  *
312
285
  * Resolution order:
313
- * 1. Override from gateway IPC socket
314
- * 2. defaults registry `defaultEnabled` (for declared assistant-scope keys)
315
- * 3. `true` (for undeclared keys with no override)
286
+ * 1. Override from the gateway IPC fetch (includes platform-pushed remote
287
+ * values, which the gateway merges server-side: persisted > remote >
288
+ * registry)
289
+ * 2. Registry `defaultEnabled` (for declared assistant-scope keys)
290
+ * 3. `true` (for undeclared keys with no override)
316
291
  */
317
292
  export function isAssistantFeatureFlagEnabled(
318
293
  key: string,
@@ -322,18 +297,13 @@ export function isAssistantFeatureFlagEnabled(
322
297
  const declared = defaults[key];
323
298
  const overrides = loadOverrides();
324
299
 
325
- // 1. Check overrides from gateway / local file
300
+ // 1. Check overrides from the gateway IPC cache.
326
301
  const explicit = overrides[key];
327
302
  if (typeof explicit === "boolean") return explicit;
328
303
 
329
- // 2. Check remote values (platform-pushed, cached locally)
330
- const remote = loadRemoteValues();
331
- const remoteValue = remote[key];
332
- if (typeof remoteValue === "boolean") return remoteValue;
333
-
334
- // 3. For declared keys, use the registry default
304
+ // 2. For declared keys, use the registry default.
335
305
  if (declared) return declared.defaultEnabled;
336
306
 
337
- // 4. Undeclared keys with no persisted override default to enabled
307
+ // 3. Undeclared keys with no override default to enabled.
338
308
  return true;
339
309
  }
@@ -54,8 +54,114 @@ Each record is a JSON file at `<slug>/records/<uuid>.json` with shape:
54
54
 
55
55
  All new apps use `formatVersion: 2`: source files live under `src/` and compiled output lives under `dist/`. The build system compiles TSX to JS automatically when `app_refresh` is called.
56
56
 
57
+ ## Responsive Baseline & Mobile-First Mode
58
+
59
+ Every app must be responsive across the full width range — phone (~360px) to desktop (~1400px+). The conversation context's `<turn_context>` block carries an `interface:` field. Visual interfaces are `macos`, `ios`, and `web`; the field doesn't toggle responsiveness on or off — it shifts the **design priority**. Non-visual values like `phone` represent voice channels that can't render apps at all and don't need to be considered here.
60
+
61
+ - **`interface: ios`** (or any future mobile-web / android identifier) — mobile-first build. Design the narrow viewport first and progressively enhance upward at wider widths.
62
+ - **`interface: macos` / `web`** — desktop-first build. Design the larger composition first; the narrow-width fallback must still meet the universal baseline below but doesn't need to feel like a native mobile app.
63
+ - **Field absent or ambiguous** — default to desktop-first unless the user's request itself implies phone use ("for my iPhone home screen", "a tap-tracker I'll use on the go").
64
+
65
+ ### Universal baseline (every build, regardless of interface)
66
+
67
+ These rules aren't mobile-specific — they're touch / responsive a11y baselines that any user-resizable WebView needs.
68
+
69
+ **Viewport & safe areas**
70
+
71
+ - Viewport meta: `<meta name="viewport" content="width=device-width, initial-scale=1, viewport-fit=cover">`. Never set `user-scalable=no` — it blocks accessibility zoom.
72
+ - Pad the root container with `env(safe-area-inset-*)` so content clears the notch / home indicator when the app is opened on a notched device: `padding-top: max(var(--v-spacing-lg), env(safe-area-inset-top))`, mirrored for `-bottom`/`-left`/`-right`. On desktop the env vars resolve to `0` and the `max()` falls through to the design-system value — no-op.
73
+ - Use `100dvh` (dynamic viewport height), not `100vh`, for full-height containers. `100vh` creates a scroll-jump on every mobile browser regardless of build mode.
74
+
75
+ **Form controls**
76
+
77
+ - `<input>`, `<textarea>`, `<select>` must be `font-size: 16px` or larger, or iOS Safari will zoom on focus and break the layout. This applies to every build — anyone may open a desktop-built app on their phone.
78
+ - Add `inputmode` to text fields with structured input: `numeric` for integers, `decimal` for amounts, `email`, `tel`, `url`. Add matching `autocomplete` and `autocapitalize` hints where appropriate.
79
+
80
+ **Touch & hover**
81
+
82
+ - Interactive elements (buttons, list rows, nav items, toggles, icon buttons) must be ≥44×44pt. `.v-button` already meets this; for custom controls, set `min-height: 44px` explicitly.
83
+ - Gate hover affordances behind `@media (hover: hover)` so they don't stick on touch devices visiting a desktop-built app.
84
+ - Disable text selection on app chrome (headers, nav, buttons) with `user-select: none; -webkit-user-select: none` so long-press doesn't pop the iOS selection menu over interactive elements.
85
+
86
+ **Layout fluidity**
87
+
88
+ - Fluid widths only — no fixed-pixel layouts. Use `%`, `fr`, `minmax`, `clamp()` instead of `px` on container widths.
89
+ - Horizontal-scroll tables don't work on narrow screens. At narrow widths, collapse rows into stacked cards with labels and values arranged vertically. (Mobile-first builds can use cards everywhere; desktop-first builds can keep the table at wide widths and switch to cards below a breakpoint.)
90
+ - `vellum.widgets.*` chart containers should be sized in `vw`/`%`, not fixed `px`. Prefer simpler chart types (sparkline, bar) at narrow widths — dense multi-series charts lose detail.
91
+
92
+ ### Mobile-first priorities (`interface: ios` or future mobile identifier)
93
+
94
+ These are the **design priority differences** that mobile-first builds adopt on top of the universal baseline. They reflect "narrow viewport is the primary experience, wider widths progressively enhance."
95
+
96
+ **Typography**
97
+
98
+ - Default body text to `--v-font-size-lg` (17px), not `--v-font-size-base` (14px) — the desktop base is too small to read comfortably on a phone. At wider widths the same 17px reads fine.
99
+
100
+ **Spacing**
101
+
102
+ - Bump default vertical rhythm one step (e.g. `--v-spacing-md` → `--v-spacing-lg` between cards and sections) so users can comfortably scroll-stop on each item.
103
+
104
+ **Layout**
105
+
106
+ - One column as the **default**, not as a narrow-width fallback. `flex-direction: column` first; opt into a multi-column grid only above a width breakpoint (`@media (min-width: 720px)`). No side rails, no two-pane master/detail, no fixed-width sidebars in the default view.
107
+ - Bottom-anchor the primary action (e.g. "Add", "Save") so the thumb can reach it: `position: sticky; bottom: env(safe-area-inset-bottom)` over the scrolling list. On wider widths you may re-flow it back inline.
108
+ - Replace side modals and popovers with bottom sheets that animate up from the bottom edge.
109
+
110
+ **Interaction**
111
+
112
+ - Skip the Tab/Enter/Esc keyboard pattern from "Interaction Standards" as the primary affordance — on mobile, focus comes from taps, submit from the soft keyboard's `return`, dismissal from a swipe down on bottom sheets. Keyboard support is still allowed (external-keyboard users exist on iPad) but isn't the design driver.
113
+
114
+ ### Desktop-first priorities (`interface: macos` / `web`)
115
+
116
+ The default behaviour the rest of this skill describes — multi-column composition, hover-rich affordances, denser information, side modals, inline primary actions. The universal baseline above is the floor: the narrow-width view must still work and follow the touch / responsive a11y rules, but it doesn't need to feel native to mobile.
117
+
118
+ Everything else in this skill applies unchanged.
119
+
57
120
  ## Workflow
58
121
 
122
+ ### 0. Preflight — Pin to a high-quality model
123
+
124
+ App building is design-heavy judgment work — color palettes, layout decisions, component architecture, micro-interactions. A stronger model produces meaningfully better apps: more creative visual directions, cleaner component boundaries, fewer generic patterns. Before building, check whether the conversation is already pinned to the quality profile:
125
+
126
+ ```
127
+ assistant inference session list
128
+ ```
129
+
130
+ If no session is active, check the current default profile:
131
+
132
+ ```
133
+ assistant config get llm.default.profile
134
+ ```
135
+
136
+ If the profile is already `quality-optimized`, skip the rest of this step and proceed to Step 1.
137
+
138
+ **If the active profile is `balanced`, `cost-optimized`, or any non-quality profile, you MUST ask the user for permission before switching. Do NOT open an inference session without explicit user confirmation.** Use `assistant ui confirm`:
139
+
140
+ ```
141
+ assistant ui confirm --message "App building works best with a high-quality model — it makes better design decisions, writes cleaner components, and produces more visually polished results. Switch to the quality profile for this build? (You can switch back after.)"
142
+ ```
143
+
144
+ If `assistant ui confirm` isn't available on this binary, ask the user directly in conversation instead. **Either way, wait for the user's answer before proceeding.**
145
+
146
+ **Only if the user confirms**, open an inference session:
147
+
148
+ ```
149
+ assistant inference session open quality-optimized --ttl 1h
150
+ ```
151
+
152
+ If `quality-optimized` isn't a profile name on this workspace, list the available profiles and open against the highest-quality one:
153
+
154
+ ```
155
+ assistant config get llm.profiles
156
+ assistant inference session open <profile-name> --ttl 1h
157
+ ```
158
+
159
+ The `--ttl 1h` gives comfortable headroom for a typical app build without leaving a forever-pinned session if the close in Step 6 is skipped.
160
+
161
+ **If the user declines, do not switch profiles.** Proceed with the current profile — the build still works, the model just won't be pinned. Skip the close in Step 6 too.
162
+
163
+ If `assistant inference session` isn't available on this binary, proceed without it.
164
+
59
165
  ### 1. Gather Requirements
60
166
 
61
167
  **Default: just build.** When a user says "build me a habit tracker," don't ask what colors they want or how many fields to include. Immediately:
@@ -351,6 +457,16 @@ After making all file changes, call `app_refresh(app_id)` once to compile and re
351
457
 
352
458
  Apps should have multiple source files under `src/` (`styles.css`, components, helpers, etc.). Import CSS and modules from TSX so esbuild includes them in the compiled output.
353
459
 
460
+ ### 6. Close the inference session
461
+
462
+ If you opened an inference session in Step 0, close it now:
463
+
464
+ ```
465
+ assistant inference session close
466
+ ```
467
+
468
+ If you skipped the open in Step 0 (because the user declined, the CLI didn't have the command, or the profile was already quality), skip this step too.
469
+
354
470
  ## Interaction Standards
355
471
 
356
472
  Every app must meet these baselines:
@@ -359,7 +475,7 @@ Every app must meet these baselines:
359
475
  - **Confirmation for destructive actions:** Use `window.vellum.confirm(title, message)` before deleting or resetting. Returns `Promise<boolean>`.
360
476
  - **Form validation:** Validate before submit, show errors inline, disable submit during async operations.
361
477
  - **Loading states:** Never show a blank screen while data loads. Use skeleton shimmer or spinners.
362
- - **Keyboard navigation:** `Tab` between elements, `Enter` to submit, `Escape` to close/cancel.
478
+ - **Keyboard navigation:** `Tab` between elements, `Enter` to submit, `Escape` to close/cancel. *(De-prioritised on mobile-first builds — see [Responsive Baseline & Mobile-First Mode](#responsive-baseline--mobile-first-mode).)*
363
479
 
364
480
  ## Presentation Slide Design
365
481
 
@@ -33,7 +33,7 @@ Follow the steps below to ensure everything is prepared to make and receive phon
33
33
 
34
34
  ## Step 1: Twilio Setup
35
35
 
36
- Load the `twilio-setup` skill to determine whether Twilio has been fully configured and set it up if not. This is a prerequisite to all subsequent steps.
36
+ Immediately load the `twilio-setup` skill to begin setup. That skill marks Twilio setup as started before its read-only checks, which gives managed deployments a chance to open the Velay tunnel WebSocket while the user finishes entering credentials and choosing a number. Twilio setup is a prerequisite to all subsequent steps.
37
37
 
38
38
  ## Step 2: Enable Calls
39
39
 
@@ -0,0 +1,105 @@
1
+ import { type LLMCallSite } from "./schemas/llm.js";
2
+
3
+ type CallSiteDefaultConfig = {
4
+ profile: string;
5
+ maxTokens?: number;
6
+ effort?: "none" | "low" | "medium" | "high" | "xhigh" | "max";
7
+ temperature?: number | null;
8
+ thinking?: { enabled?: boolean; streamThinking?: boolean };
9
+ contextWindow?: { maxInputTokens?: number };
10
+ };
11
+
12
+ export const CALL_SITE_DEFAULTS: Record<LLMCallSite, CallSiteDefaultConfig> = {
13
+ mainAgent: { profile: "balanced" },
14
+ subagentSpawn: { profile: "balanced" },
15
+ compactionAgent: { profile: "balanced" },
16
+ analyzeConversation: { profile: "balanced" },
17
+ patternScan: { profile: "balanced" },
18
+ narrativeRefinement: { profile: "balanced" },
19
+ callAgent: { profile: "balanced" },
20
+ proactiveArtifactBuild: { profile: "balanced" },
21
+ memoryConsolidation: { profile: "balanced" },
22
+ identityIntro: { profile: "balanced" },
23
+ emptyStateGreeting: { profile: "balanced" },
24
+
25
+ memoryRouter: {
26
+ profile: "balanced",
27
+ contextWindow: { maxInputTokens: 1000000 },
28
+ },
29
+ recall: {
30
+ profile: "balanced",
31
+ maxTokens: 4096,
32
+ effort: "low",
33
+ thinking: { enabled: false, streamThinking: false },
34
+ temperature: 0,
35
+ },
36
+ conversationStarters: {
37
+ profile: "balanced",
38
+ effort: "low",
39
+ thinking: { enabled: false },
40
+ },
41
+
42
+ filingAgent: { profile: "cost-optimized" },
43
+ proactiveArtifactDecision: { profile: "cost-optimized" },
44
+ memoryExtraction: { profile: "cost-optimized" },
45
+ memoryRetrieval: { profile: "cost-optimized" },
46
+ memoryRetrospective: { profile: "cost-optimized" },
47
+ memoryV2Migration: { profile: "cost-optimized" },
48
+ memoryV2Sweep: { profile: "cost-optimized" },
49
+ memoryV2Consolidation: { profile: "balanced" },
50
+ conversationSummarization: { profile: "cost-optimized" },
51
+ conversationTitle: { profile: "cost-optimized" },
52
+ approvalCopy: { profile: "cost-optimized" },
53
+ approvalConversation: { profile: "cost-optimized" },
54
+ trustRuleSuggestion: { profile: "cost-optimized" },
55
+ styleAnalyzer: { profile: "cost-optimized" },
56
+ meetConsentMonitor: { profile: "cost-optimized" },
57
+ meetChatOpportunity: { profile: "cost-optimized" },
58
+ inference: { profile: "cost-optimized" },
59
+
60
+ heartbeatAgent: {
61
+ profile: "cost-optimized",
62
+ },
63
+ commitMessage: {
64
+ profile: "cost-optimized",
65
+ maxTokens: 120,
66
+ temperature: 0.2,
67
+ effort: "low",
68
+ thinking: { enabled: false },
69
+ },
70
+ replySuggestion: {
71
+ profile: "cost-optimized",
72
+ effort: "low",
73
+ thinking: { enabled: false },
74
+ },
75
+ guardianQuestionCopy: {
76
+ profile: "cost-optimized",
77
+ effort: "low",
78
+ thinking: { enabled: false },
79
+ },
80
+ notificationDecision: {
81
+ profile: "cost-optimized",
82
+ effort: "low",
83
+ thinking: { enabled: false },
84
+ },
85
+ preferenceExtraction: {
86
+ profile: "cost-optimized",
87
+ effort: "low",
88
+ thinking: { enabled: false },
89
+ },
90
+ interactionClassifier: {
91
+ profile: "cost-optimized",
92
+ effort: "low",
93
+ thinking: { enabled: false },
94
+ },
95
+ inviteInstructionGenerator: {
96
+ profile: "cost-optimized",
97
+ effort: "low",
98
+ thinking: { enabled: false },
99
+ },
100
+ skillCategoryInference: {
101
+ profile: "cost-optimized",
102
+ effort: "low",
103
+ thinking: { enabled: false },
104
+ },
105
+ };