@vellumai/assistant 0.8.2 → 0.8.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (231) hide show
  1. package/ARCHITECTURE.md +11 -12
  2. package/docker-entrypoint.sh +13 -1
  3. package/docker-init-apt-root.sh +79 -6
  4. package/openapi.yaml +336 -21
  5. package/package.json +1 -1
  6. package/src/__tests__/agent-loop-exit-reason.test.ts +272 -0
  7. package/src/__tests__/agent-loop-provider-error-recording.test.ts +195 -0
  8. package/src/__tests__/compactor-tail-resolution.test.ts +107 -1
  9. package/src/__tests__/config-get-vision-flag.test.ts +136 -0
  10. package/src/__tests__/config-loader-backfill.test.ts +115 -18
  11. package/src/__tests__/context-token-estimator.test.ts +30 -65
  12. package/src/__tests__/conversation-agent-loop.test.ts +57 -1
  13. package/src/__tests__/conversation-media-retry.test.ts +19 -8
  14. package/src/__tests__/conversation-runtime-assembly.test.ts +26 -4
  15. package/src/__tests__/date-context.test.ts +45 -0
  16. package/src/__tests__/external-plugin-loader.test.ts +91 -19
  17. package/src/__tests__/guardian-action-no-hardcoded-copy.test.ts +0 -1
  18. package/src/__tests__/guardian-dispatch.test.ts +1 -0
  19. package/src/__tests__/heartbeat-service.test.ts +24 -164
  20. package/src/__tests__/helpers/channel-test-adapter.ts +0 -2
  21. package/src/__tests__/host-app-control-proxy.test.ts +241 -0
  22. package/src/__tests__/host-proxy-preactivation.test.ts +200 -13
  23. package/src/__tests__/injector-background-turn.test.ts +153 -0
  24. package/src/__tests__/injector-chain.test.ts +5 -0
  25. package/src/__tests__/lifecycle-memory-v2-seed.test.ts +9 -2
  26. package/src/__tests__/llm-callsite-catalog.test.ts +25 -0
  27. package/src/__tests__/llm-catalog-parity.test.ts +3 -0
  28. package/src/__tests__/llm-request-log-agent-loop-exit-reason.test.ts +116 -0
  29. package/src/__tests__/llm-request-log-error-payload.test.ts +138 -0
  30. package/src/__tests__/llm-request-log-source-clickhouse.test.ts +2 -0
  31. package/src/__tests__/llm-resolver.test.ts +255 -2
  32. package/src/__tests__/managed-profile-guard.test.ts +10 -0
  33. package/src/__tests__/notification-decision-fallback.test.ts +0 -91
  34. package/src/__tests__/notification-decision-strategy.test.ts +14 -31
  35. package/src/__tests__/notification-deep-link.test.ts +15 -0
  36. package/src/__tests__/notification-guardian-path.test.ts +1 -2
  37. package/src/__tests__/notification-platform-adapter.test.ts +5 -4
  38. package/src/__tests__/notification-telegram-adapter.test.ts +1 -0
  39. package/src/__tests__/notification-vellum-adapter.test.ts +113 -0
  40. package/src/__tests__/openai-provider.test.ts +218 -3
  41. package/src/__tests__/openai-responses-cutover-guard.test.ts +3 -3
  42. package/src/__tests__/openrouter-provider-only.test.ts +51 -3
  43. package/src/__tests__/openrouter-token-estimation.test.ts +34 -25
  44. package/src/__tests__/platform-proxy-context.test.ts +6 -1
  45. package/src/__tests__/plugin-tool-contribution.test.ts +3 -3
  46. package/src/__tests__/plugin-types.test.ts +2 -2
  47. package/src/__tests__/provider-catalog-visibility.test.ts +16 -0
  48. package/src/__tests__/provider-platform-proxy-integration.test.ts +27 -25
  49. package/src/__tests__/secret-routes-platform-proxy.test.ts +1 -1
  50. package/src/__tests__/system-prompt.test.ts +6 -73
  51. package/src/__tests__/workspace-migration-087-memory-router-balanced-profile.test.ts +228 -0
  52. package/src/a2a/__tests__/agent-card.test.ts +98 -0
  53. package/src/a2a/__tests__/e2e-a2a-channel.test.ts +597 -0
  54. package/src/a2a/__tests__/protocol-helpers.test.ts +113 -0
  55. package/src/a2a/__tests__/task-store.test.ts +246 -0
  56. package/src/a2a/agent-card.ts +58 -0
  57. package/src/a2a/feature-gate.ts +8 -0
  58. package/src/a2a/protocol-constants.ts +21 -0
  59. package/src/a2a/protocol-errors.ts +50 -0
  60. package/src/a2a/protocol-types.ts +162 -0
  61. package/src/a2a/task-store.ts +168 -0
  62. package/src/agent/loop.ts +167 -18
  63. package/src/channels/config.ts +9 -0
  64. package/src/channels/types.ts +14 -0
  65. package/src/cli/{__tests__ → commands/__tests__}/notifications.test.ts +201 -28
  66. package/src/cli/commands/__tests__/schedules.test.ts +469 -0
  67. package/src/cli/commands/notifications.ts +65 -35
  68. package/src/cli/commands/plugins.ts +67 -0
  69. package/src/cli/commands/schedules.ts +297 -5
  70. package/src/cli/lib/__tests__/search-plugins.test.ts +261 -0
  71. package/src/cli/lib/install-from-github.ts +8 -9
  72. package/src/cli/lib/search-plugins.ts +163 -0
  73. package/src/cli/program.ts +14 -0
  74. package/src/config/assistant-feature-flags.ts +24 -54
  75. package/src/config/bundled-skills/app-builder/SKILL.md +117 -1
  76. package/src/config/bundled-skills/phone-calls/SKILL.md +1 -1
  77. package/src/config/call-site-defaults.ts +105 -0
  78. package/src/config/feature-flag-registry.json +21 -29
  79. package/src/config/llm-resolver.ts +52 -1
  80. package/src/config/schema.ts +2 -0
  81. package/src/config/schemas/__tests__/memory-v2.test.ts +3 -3
  82. package/src/config/schemas/channels.ts +9 -0
  83. package/src/config/schemas/conversations.ts +10 -0
  84. package/src/config/schemas/heartbeat.ts +14 -0
  85. package/src/config/schemas/llm.ts +1 -3
  86. package/src/config/schemas/memory-retrospective.ts +1 -1
  87. package/src/config/schemas/memory-v2.ts +4 -4
  88. package/src/config/schemas/memory.ts +3 -1
  89. package/src/config/seed-inference-profiles.ts +99 -29
  90. package/src/context/compactor.ts +72 -12
  91. package/src/context/token-estimator.ts +32 -34
  92. package/src/daemon/__tests__/conversation-lifecycle-auto-analyze.test.ts +3 -22
  93. package/src/daemon/conversation-agent-loop-handlers.ts +78 -0
  94. package/src/daemon/conversation-agent-loop.ts +29 -2
  95. package/src/daemon/conversation-runtime-assembly.ts +9 -0
  96. package/src/daemon/conversation.ts +0 -7
  97. package/src/daemon/date-context.ts +40 -0
  98. package/src/daemon/guardian-action-generators.ts +1 -125
  99. package/src/daemon/handlers/__tests__/config-a2a-complete.test.ts +248 -0
  100. package/src/daemon/handlers/__tests__/config-a2a-invite.test.ts +154 -0
  101. package/src/daemon/handlers/__tests__/config-a2a-redeem.test.ts +133 -0
  102. package/src/daemon/handlers/__tests__/config-a2a.test.ts +95 -0
  103. package/src/daemon/handlers/config-a2a.ts +289 -0
  104. package/src/daemon/handlers/conversations.ts +1 -0
  105. package/src/daemon/host-app-control-proxy.ts +69 -18
  106. package/src/daemon/host-proxy-preactivation.ts +85 -18
  107. package/src/daemon/lifecycle.ts +49 -61
  108. package/src/daemon/memory-v2-startup.ts +49 -13
  109. package/src/daemon/message-types/notifications.ts +21 -0
  110. package/src/daemon/pkb-reminder-builder.test.ts +10 -53
  111. package/src/daemon/pkb-reminder-builder.ts +4 -19
  112. package/src/daemon/process-message.ts +3 -0
  113. package/src/daemon/skill-memory-refresh.ts +5 -1
  114. package/src/daemon/wake-target-adapter.ts +2 -0
  115. package/src/export/__tests__/transcript-formatter.test.ts +121 -0
  116. package/src/export/transcript-formatter.ts +54 -20
  117. package/src/heartbeat/__tests__/heartbeat-service.test.ts +44 -0
  118. package/src/heartbeat/heartbeat-service.ts +34 -191
  119. package/src/home/__tests__/feed-types.test.ts +40 -0
  120. package/src/home/feed-types.ts +14 -2
  121. package/src/ipc/cli-client.ts +147 -45
  122. package/src/memory/__tests__/conversation-queries.test.ts +220 -0
  123. package/src/memory/__tests__/memory-retrospective-enqueue.test.ts +2 -50
  124. package/src/memory/__tests__/memory-retrospective-job.test.ts +87 -4
  125. package/src/memory/conversation-queries.ts +87 -1
  126. package/src/memory/conversation-title-service.ts +26 -4
  127. package/src/memory/db-init.ts +6 -0
  128. package/src/memory/graph/__tests__/conversation-graph-memory-v2-routing.test.ts +84 -3
  129. package/src/memory/graph/conversation-graph-memory.ts +18 -6
  130. package/src/memory/graph/tools.ts +6 -37
  131. package/src/memory/invite-store.ts +53 -0
  132. package/src/memory/llm-request-log-source-clickhouse.ts +7 -2
  133. package/src/memory/llm-request-log-store.ts +92 -1
  134. package/src/memory/memory-retrospective-enqueue.ts +1 -20
  135. package/src/memory/memory-retrospective-job.ts +33 -6
  136. package/src/memory/migrations/250-provider-connection-base-url-and-models.ts +28 -0
  137. package/src/memory/migrations/251-a2a-tasks.ts +49 -0
  138. package/src/memory/migrations/252-llm-request-log-agent-loop-exit-reason.ts +32 -0
  139. package/src/memory/migrations/index.ts +3 -0
  140. package/src/memory/migrations/registry.ts +8 -0
  141. package/src/memory/schema/a2a.ts +15 -0
  142. package/src/memory/schema/index.ts +1 -0
  143. package/src/memory/schema/inference.ts +2 -0
  144. package/src/memory/schema/infrastructure.ts +1 -0
  145. package/src/memory/v2/__tests__/activation-store.test.ts +25 -23
  146. package/src/memory/v2/__tests__/cli-command-store.test.ts +404 -0
  147. package/src/memory/v2/__tests__/frontmatter-sweep.test.ts +25 -4
  148. package/src/memory/v2/__tests__/injection.test.ts +190 -3
  149. package/src/memory/v2/__tests__/static-context.test.ts +12 -1
  150. package/src/memory/v2/activation-store.ts +14 -16
  151. package/src/memory/v2/cli-command-content.ts +19 -0
  152. package/src/memory/v2/cli-command-store.ts +304 -0
  153. package/src/memory/v2/frontmatter-sweep.ts +7 -1
  154. package/src/memory/v2/injection.ts +49 -20
  155. package/src/memory/v2/page-index.ts +38 -13
  156. package/src/memory/v2/static-context.ts +4 -4
  157. package/src/memory/v2/types.ts +23 -0
  158. package/src/messaging/providers/a2a/__tests__/deliver.test.ts +274 -0
  159. package/src/messaging/providers/a2a/deliver.ts +156 -0
  160. package/src/messaging/providers/gmail/client.ts +9 -2
  161. package/src/messaging/providers/index.ts +11 -2
  162. package/src/notifications/__tests__/broadcaster.test.ts +203 -0
  163. package/src/notifications/__tests__/decision-engine.test.ts +283 -0
  164. package/src/notifications/__tests__/deterministic-checks.test.ts +286 -0
  165. package/src/notifications/__tests__/emit-signal-home-feed.test.ts +1 -0
  166. package/src/notifications/__tests__/home-feed-side-effect.test.ts +430 -7
  167. package/src/notifications/adapters/macos.ts +12 -2
  168. package/src/notifications/broadcaster.ts +29 -4
  169. package/src/notifications/copy-composer.ts +17 -64
  170. package/src/notifications/decision-engine.ts +111 -44
  171. package/src/notifications/deterministic-checks.ts +96 -0
  172. package/src/notifications/emit-signal.ts +1 -0
  173. package/src/notifications/home-feed-side-effect.ts +85 -6
  174. package/src/notifications/signal.ts +0 -4
  175. package/src/notifications/types.ts +8 -0
  176. package/src/oauth/platform-connection.test.ts +43 -3
  177. package/src/oauth/platform-connection.ts +13 -4
  178. package/src/plugins/defaults/injectors.ts +38 -19
  179. package/src/plugins/external-plugin-loader.ts +82 -10
  180. package/src/plugins/types.ts +16 -7
  181. package/src/prompts/__tests__/system-prompt.test.ts +6 -51
  182. package/src/prompts/__tests__/task-progress-hint-section.test.ts +4 -8
  183. package/src/prompts/system-prompt.ts +0 -8
  184. package/src/prompts/templates/BOOTSTRAP.md +5 -5
  185. package/src/prompts/templates/system-sections.ts +0 -9
  186. package/src/providers/__tests__/inference.test.ts +2 -0
  187. package/src/providers/call-site-routing.ts +24 -6
  188. package/src/providers/connection-resolution.ts +63 -13
  189. package/src/providers/inference/__tests__/adapter-factory-openai-compatible.test.ts +74 -0
  190. package/src/providers/inference/__tests__/connections-openai-compatible.test.ts +175 -0
  191. package/src/providers/inference/__tests__/connections-status-label.test.ts +15 -0
  192. package/src/providers/inference/adapter-factory.ts +9 -20
  193. package/src/providers/inference/auth.ts +12 -0
  194. package/src/providers/inference/backfill.ts +14 -1
  195. package/src/providers/inference/connections.ts +85 -5
  196. package/src/providers/inference/resolve-auth.ts +2 -0
  197. package/src/providers/model-catalog.ts +199 -244
  198. package/src/providers/model-intents.ts +3 -3
  199. package/src/providers/openai/__tests__/chat-completions-provider-reasoning.test.ts +235 -0
  200. package/src/providers/openai/chat-completions-provider.ts +159 -6
  201. package/src/providers/openrouter/client.ts +42 -4
  202. package/src/providers/platform-proxy/constants.ts +3 -4
  203. package/src/providers/provider-catalog-visibility.ts +3 -1
  204. package/src/providers/provider-send-message.ts +27 -12
  205. package/src/providers/registry.ts +30 -1
  206. package/src/runtime/agent-wake.ts +61 -1
  207. package/src/runtime/auth/route-policy.ts +13 -0
  208. package/src/runtime/http-server.ts +7 -16
  209. package/src/runtime/http-types.ts +0 -47
  210. package/src/runtime/routes/__tests__/consolidation-routes.test.ts +258 -0
  211. package/src/runtime/routes/__tests__/conversation-query-routes.test.ts +66 -4
  212. package/src/runtime/routes/__tests__/inference-provider-connection-routes.test.ts +275 -44
  213. package/src/runtime/routes/__tests__/llm-call-sites-routes.test.ts +12 -0
  214. package/src/runtime/routes/channel-availability-routes.ts +5 -0
  215. package/src/runtime/routes/consolidation-routes.ts +100 -0
  216. package/src/runtime/routes/conversation-query-routes.ts +70 -11
  217. package/src/runtime/routes/conversation-routes.ts +7 -0
  218. package/src/runtime/routes/index.ts +2 -0
  219. package/src/runtime/routes/inference-provider-connection-routes.ts +134 -1
  220. package/src/runtime/routes/integrations/a2a.ts +235 -0
  221. package/src/runtime/routes/llm-call-sites-routes.ts +11 -1
  222. package/src/runtime/routes/subagents-routes.ts +41 -0
  223. package/src/subagent/manager.ts +2 -0
  224. package/src/tools/memory/register.ts +1 -9
  225. package/src/tools/registry.ts +2 -2
  226. package/src/tools/types.ts +37 -2
  227. package/src/workspace/migrations/087-memory-router-balanced-profile.ts +91 -0
  228. package/src/workspace/migrations/registry.ts +2 -0
  229. package/src/__tests__/guardian-action-conversation-turn.test.ts +0 -441
  230. package/src/memory/graph/__tests__/remember-description.test.ts +0 -55
  231. package/src/runtime/guardian-action-conversation-turn.ts +0 -99
@@ -0,0 +1,235 @@
1
+ import { describe, expect, test } from "bun:test";
2
+
3
+ import { OpenAIChatCompletionsProvider } from "../chat-completions-provider.js";
4
+
5
+ type ReasoningDetail = {
6
+ type?: string;
7
+ summary?: string | null;
8
+ text?: string | null;
9
+ };
10
+
11
+ type MockChunkDelta = {
12
+ content?: string | null;
13
+ reasoning?: string | null;
14
+ reasoning_content?: string | null;
15
+ reasoning_details?: ReasoningDetail[] | null;
16
+ };
17
+
18
+ type MockChunk = {
19
+ choices: Array<{ delta: MockChunkDelta; finish_reason?: string | null }>;
20
+ model?: string;
21
+ usage?: {
22
+ prompt_tokens: number;
23
+ completion_tokens: number;
24
+ };
25
+ };
26
+
27
+ function makeStream(chunks: MockChunk[]): AsyncIterable<MockChunk> {
28
+ return {
29
+ async *[Symbol.asyncIterator]() {
30
+ for (const c of chunks) yield c;
31
+ },
32
+ };
33
+ }
34
+
35
+ function stubProvider(chunks: MockChunk[]): {
36
+ provider: OpenAIChatCompletionsProvider;
37
+ events: Array<{ type: string; thinking?: string; text?: string }>;
38
+ } {
39
+ const provider = new OpenAIChatCompletionsProvider("test-key", "test-model");
40
+ // Swap the SDK client for a stub whose chat.completions.create returns our
41
+ // canned async iterable.
42
+ (provider as unknown as { client: unknown }).client = {
43
+ chat: {
44
+ completions: {
45
+ create: async () => makeStream(chunks),
46
+ },
47
+ },
48
+ };
49
+ const events: Array<{ type: string; thinking?: string; text?: string }> = [];
50
+ (provider as unknown as { __events: typeof events }).__events = events;
51
+ return { provider, events };
52
+ }
53
+
54
+ async function runStream(
55
+ provider: OpenAIChatCompletionsProvider,
56
+ events: Array<{ type: string; thinking?: string; text?: string }>,
57
+ ): Promise<{
58
+ thinking: string;
59
+ }> {
60
+ const response = await provider.sendMessage(
61
+ [{ role: "user", content: [{ type: "text", text: "hi" }] }],
62
+ undefined,
63
+ undefined,
64
+ {
65
+ onEvent: (e) => {
66
+ events.push(e as { type: string; thinking?: string; text?: string });
67
+ },
68
+ },
69
+ );
70
+ const thinkingBlock = response.content.find((b) => b.type === "thinking") as
71
+ | { type: "thinking"; thinking: string }
72
+ | undefined;
73
+ return { thinking: thinkingBlock?.thinking ?? "" };
74
+ }
75
+
76
+ describe("OpenAIChatCompletionsProvider reasoning parsing", () => {
77
+ test("emits flat reasoning_content once (Fireworks/DeepSeek/Together/Groq shape)", async () => {
78
+ const { provider, events } = stubProvider([
79
+ { choices: [{ delta: { reasoning_content: "hello " } }] },
80
+ { choices: [{ delta: { reasoning_content: "world" } }] },
81
+ {
82
+ choices: [{ delta: {}, finish_reason: "stop" }],
83
+ usage: { prompt_tokens: 1, completion_tokens: 2 },
84
+ },
85
+ ]);
86
+ const { thinking } = await runStream(provider, events);
87
+ const deltas = events.filter((e) => e.type === "thinking_delta");
88
+ expect(deltas.map((d) => d.thinking)).toEqual(["hello ", "world"]);
89
+ expect(thinking).toBe("hello world");
90
+ });
91
+
92
+ test("emits flat reasoning once (OpenRouter non-Kimi shape)", async () => {
93
+ const { provider, events } = stubProvider([
94
+ { choices: [{ delta: { reasoning: "step " } }] },
95
+ { choices: [{ delta: { reasoning: "two" } }] },
96
+ {
97
+ choices: [{ delta: {}, finish_reason: "stop" }],
98
+ usage: { prompt_tokens: 1, completion_tokens: 2 },
99
+ },
100
+ ]);
101
+ const { thinking } = await runStream(provider, events);
102
+ const deltas = events.filter((e) => e.type === "thinking_delta");
103
+ expect(deltas.map((d) => d.thinking)).toEqual(["step ", "two"]);
104
+ expect(thinking).toBe("step two");
105
+ });
106
+
107
+ test("emits reasoning_details once when only details present", async () => {
108
+ const { provider, events } = stubProvider([
109
+ {
110
+ choices: [
111
+ {
112
+ delta: {
113
+ reasoning_details: [{ type: "reasoning.text", text: "alpha " }],
114
+ },
115
+ },
116
+ ],
117
+ },
118
+ {
119
+ choices: [
120
+ {
121
+ delta: {
122
+ reasoning_details: [
123
+ { type: "reasoning.summary", summary: "beta" },
124
+ ],
125
+ },
126
+ },
127
+ ],
128
+ },
129
+ {
130
+ choices: [{ delta: {}, finish_reason: "stop" }],
131
+ usage: { prompt_tokens: 1, completion_tokens: 2 },
132
+ },
133
+ ]);
134
+ const { thinking } = await runStream(provider, events);
135
+ const deltas = events.filter((e) => e.type === "thinking_delta");
136
+ expect(deltas.map((d) => d.thinking)).toEqual(["alpha ", "beta"]);
137
+ expect(thinking).toBe("alpha beta");
138
+ });
139
+
140
+ test("skips reasoning.encrypted entries entirely", async () => {
141
+ const { provider, events } = stubProvider([
142
+ {
143
+ choices: [
144
+ {
145
+ delta: {
146
+ reasoning_details: [
147
+ { type: "reasoning.encrypted", text: "opaque" },
148
+ ],
149
+ },
150
+ },
151
+ ],
152
+ },
153
+ {
154
+ choices: [{ delta: {}, finish_reason: "stop" }],
155
+ usage: { prompt_tokens: 1, completion_tokens: 2 },
156
+ },
157
+ ]);
158
+ const { thinking } = await runStream(provider, events);
159
+ const deltas = events.filter((e) => e.type === "thinking_delta");
160
+ expect(deltas).toEqual([]);
161
+ expect(thinking).toBe("");
162
+ });
163
+
164
+ test("falls back to flat reasoning when details carry only encrypted entries", async () => {
165
+ const { provider, events } = stubProvider([
166
+ {
167
+ choices: [
168
+ {
169
+ delta: {
170
+ reasoning: "visible ",
171
+ reasoning_details: [
172
+ { type: "reasoning.encrypted", text: "opaque" },
173
+ ],
174
+ },
175
+ },
176
+ ],
177
+ },
178
+ {
179
+ choices: [{ delta: {}, finish_reason: "stop" }],
180
+ usage: { prompt_tokens: 1, completion_tokens: 2 },
181
+ },
182
+ ]);
183
+ const { thinking } = await runStream(provider, events);
184
+ const deltas = events.filter((e) => e.type === "thinking_delta");
185
+ expect(deltas.map((d) => d.thinking)).toEqual(["visible "]);
186
+ expect(thinking).toBe("visible ");
187
+ });
188
+
189
+ test("does NOT double-emit when Kimi K2.6 mirrors text into both fields", async () => {
190
+ // OpenRouter Kimi K2.6 with `reasoning.summary` set sends the same token
191
+ // in both `delta.reasoning` and `delta.reasoning_details[].text`. The
192
+ // structured field is preferred and the flat field is skipped, so each
193
+ // token appears exactly once in the output stream.
194
+ const { provider, events } = stubProvider([
195
+ {
196
+ choices: [
197
+ {
198
+ delta: {
199
+ reasoning: "it ",
200
+ reasoning_details: [{ type: "reasoning.text", text: "it " }],
201
+ },
202
+ },
203
+ ],
204
+ },
205
+ {
206
+ choices: [
207
+ {
208
+ delta: {
209
+ reasoning: "worked",
210
+ reasoning_details: [{ type: "reasoning.text", text: "worked" }],
211
+ },
212
+ },
213
+ ],
214
+ },
215
+ {
216
+ choices: [
217
+ {
218
+ delta: {
219
+ reasoning: "!",
220
+ reasoning_details: [{ type: "reasoning.text", text: "!" }],
221
+ },
222
+ },
223
+ ],
224
+ },
225
+ {
226
+ choices: [{ delta: {}, finish_reason: "stop" }],
227
+ usage: { prompt_tokens: 1, completion_tokens: 3 },
228
+ },
229
+ ]);
230
+ const { thinking } = await runStream(provider, events);
231
+ const deltas = events.filter((e) => e.type === "thinking_delta");
232
+ expect(deltas.map((d) => d.thinking)).toEqual(["it ", "worked", "!"]);
233
+ expect(thinking).toBe("it worked!");
234
+ });
235
+ });
@@ -69,6 +69,10 @@ export interface OpenAIChatCompletionsProviderOptions {
69
69
  * document `low|medium|high` (e.g. Fireworks) should set this to "high" so
70
70
  * Vellum's `xhigh`/`max` tiers don't 4xx upstream. */
71
71
  maxReasoningEffort?: "high" | "xhigh";
72
+ /** Parse `<think>...</think>` tags from the content stream into thinking
73
+ * blocks. MiniMax and similar providers embed reasoning inside XML-style
74
+ * tags in the regular content field rather than using `reasoning_content`. */
75
+ parseThinkTags?: boolean;
72
76
  }
73
77
 
74
78
  /** Map our internal effort values to OpenAI's reasoning_effort parameter.
@@ -76,7 +80,7 @@ export interface OpenAIChatCompletionsProviderOptions {
76
80
  * passed through explicitly because OpenAI defaults `reasoning_effort` to
77
81
  * "medium" when the field is omitted — the user's opt-out is only honored
78
82
  * when we send it on the wire. */
79
- const EFFORT_TO_REASONING_EFFORT: Record<
83
+ export const EFFORT_TO_REASONING_EFFORT: Record<
80
84
  string,
81
85
  NonNullable<
82
86
  OpenAI.Chat.Completions.ChatCompletionCreateParams["reasoning_effort"]
@@ -97,6 +101,13 @@ const OPENAI_SUPPORTED_IMAGE_TYPES = new Set([
97
101
  "image/webp",
98
102
  ]);
99
103
 
104
+ function partialTagSuffix(text: string, tag: string): number {
105
+ for (let len = Math.min(text.length, tag.length - 1); len > 0; len--) {
106
+ if (text.endsWith(tag.substring(0, len))) return len;
107
+ }
108
+ return 0;
109
+ }
110
+
100
111
  /**
101
112
  * OpenAI-compatible chat-completions transport.
102
113
  *
@@ -113,6 +124,7 @@ export class OpenAIChatCompletionsProvider implements Provider {
113
124
  private extraCreateParams: Record<string, unknown>;
114
125
  private maxReasoningEffort: "high" | "xhigh";
115
126
  private requestHeaders: Record<string, string>;
127
+ private parseThinkTags: boolean;
116
128
 
117
129
  constructor(
118
130
  apiKey: string,
@@ -130,6 +142,7 @@ export class OpenAIChatCompletionsProvider implements Provider {
130
142
  this.extraCreateParams = options.extraCreateParams ?? {};
131
143
  this.maxReasoningEffort = options.maxReasoningEffort ?? "xhigh";
132
144
  this.requestHeaders = options.requestHeaders ?? {};
145
+ this.parseThinkTags = options.parseThinkTags ?? false;
133
146
  }
134
147
 
135
148
  async sendMessage(
@@ -163,10 +176,17 @@ export class OpenAIChatCompletionsProvider implements Provider {
163
176
  params.max_completion_tokens = maxTokens;
164
177
  }
165
178
 
179
+ // Subclasses (OpenRouter) may already have nested effort under
180
+ // `reasoning.effort` via `buildExtraCreateParams`. Skip the flat
181
+ // `reasoning_effort` assignment in that case to avoid sending both forms,
182
+ // which OpenRouter rejects on reasoning models.
183
+ const nestedReasoningEffort = (
184
+ params as { reasoning?: { effort?: unknown } }
185
+ ).reasoning?.effort;
166
186
  const reasoningEffort = effort
167
187
  ? EFFORT_TO_REASONING_EFFORT[effort]
168
188
  : undefined;
169
- if (reasoningEffort) {
189
+ if (reasoningEffort && typeof nestedReasoningEffort !== "string") {
170
190
  params.reasoning_effort =
171
191
  reasoningEffort === "xhigh" && this.maxReasoningEffort === "high"
172
192
  ? "high"
@@ -189,6 +209,68 @@ export class OpenAIChatCompletionsProvider implements Provider {
189
209
 
190
210
  // Accumulate the response from chunks
191
211
  let contentText = "";
212
+ let reasoningText = "";
213
+ let insideThinkBlock = false;
214
+ let pendingContent = "";
215
+
216
+ const flushPendingContent = (final: boolean): void => {
217
+ while (pendingContent.length > 0) {
218
+ if (insideThinkBlock) {
219
+ const closeIdx = pendingContent.indexOf("</think>");
220
+ if (closeIdx >= 0) {
221
+ const thinking = pendingContent.substring(0, closeIdx);
222
+ if (thinking) {
223
+ reasoningText += thinking;
224
+ onEvent?.({ type: "thinking_delta", thinking });
225
+ }
226
+ insideThinkBlock = false;
227
+ pendingContent = pendingContent.substring(
228
+ closeIdx + "</think>".length,
229
+ );
230
+ } else {
231
+ const partial = final
232
+ ? 0
233
+ : partialTagSuffix(pendingContent, "</think>");
234
+ const safeLen = pendingContent.length - partial;
235
+ if (safeLen > 0) {
236
+ const thinking = pendingContent.substring(0, safeLen);
237
+ reasoningText += thinking;
238
+ onEvent?.({ type: "thinking_delta", thinking });
239
+ }
240
+ pendingContent =
241
+ partial > 0 ? pendingContent.substring(safeLen) : "";
242
+ break;
243
+ }
244
+ } else {
245
+ const openIdx = pendingContent.indexOf("<think>");
246
+ if (openIdx >= 0) {
247
+ const text = pendingContent.substring(0, openIdx);
248
+ if (text) {
249
+ contentText += text;
250
+ onEvent?.({ type: "text_delta", text });
251
+ }
252
+ insideThinkBlock = true;
253
+ pendingContent = pendingContent.substring(
254
+ openIdx + "<think>".length,
255
+ );
256
+ } else {
257
+ const partial = final
258
+ ? 0
259
+ : partialTagSuffix(pendingContent, "<think>");
260
+ const safeLen = pendingContent.length - partial;
261
+ if (safeLen > 0) {
262
+ const t = pendingContent.substring(0, safeLen);
263
+ contentText += t;
264
+ onEvent?.({ type: "text_delta", text: t });
265
+ }
266
+ pendingContent =
267
+ partial > 0 ? pendingContent.substring(safeLen) : "";
268
+ break;
269
+ }
270
+ }
271
+ }
272
+ };
273
+
192
274
  const toolCallMap = new Map<
193
275
  number,
194
276
  { id: string; name: string; args: string }
@@ -216,8 +298,62 @@ export class OpenAIChatCompletionsProvider implements Provider {
216
298
  const choice = chunk.choices[0];
217
299
  if (choice) {
218
300
  if (choice.delta.content) {
219
- contentText += choice.delta.content;
220
- onEvent?.({ type: "text_delta", text: choice.delta.content });
301
+ if (this.parseThinkTags) {
302
+ pendingContent += choice.delta.content;
303
+ flushPendingContent(false);
304
+ } else {
305
+ contentText += choice.delta.content;
306
+ onEvent?.({ type: "text_delta", text: choice.delta.content });
307
+ }
308
+ }
309
+
310
+ // Compatibility providers disagree on the field name: Fireworks /
311
+ // DeepSeek / Together / Groq stream `reasoning_content`; OpenRouter
312
+ // (per its ChatAssistantMessage spec) streams `reasoning`, and for
313
+ // reasoning summaries (e.g. Kimi K2.6) also populates
314
+ // `delta.reasoning_details[]` (entries are `reasoning.summary`,
315
+ // `reasoning.text`, or opaque `reasoning.encrypted`).
316
+ //
317
+ // Kimi K2.6 mirrors the same token into BOTH `delta.reasoning` and
318
+ // `delta.reasoning_details[].text` per chunk — prefer details when
319
+ // they carry visible text, otherwise fall through to the flat
320
+ // field. The encrypted-only case must fall through too, so the
321
+ // flat `reasoning` field isn't silently dropped.
322
+ const deltaWithReasoning = choice.delta as {
323
+ reasoning?: string | null;
324
+ reasoning_content?: string | null;
325
+ reasoning_details?: Array<{
326
+ type?: string;
327
+ summary?: string | null;
328
+ text?: string | null;
329
+ }> | null;
330
+ };
331
+
332
+ let sawVisibleDetail = false;
333
+ const reasoningDetails = deltaWithReasoning.reasoning_details;
334
+ if (Array.isArray(reasoningDetails)) {
335
+ for (const entry of reasoningDetails) {
336
+ if (entry.type === "reasoning.encrypted") continue;
337
+ const piece = entry.summary ?? entry.text;
338
+ if (piece) {
339
+ sawVisibleDetail = true;
340
+ reasoningText += piece;
341
+ onEvent?.({ type: "thinking_delta", thinking: piece });
342
+ }
343
+ }
344
+ }
345
+
346
+ if (!sawVisibleDetail) {
347
+ const reasoningContent =
348
+ deltaWithReasoning.reasoning_content ??
349
+ deltaWithReasoning.reasoning;
350
+ if (reasoningContent) {
351
+ reasoningText += reasoningContent;
352
+ onEvent?.({
353
+ type: "thinking_delta",
354
+ thinking: reasoningContent,
355
+ });
356
+ }
221
357
  }
222
358
 
223
359
  if (choice.delta.tool_calls) {
@@ -260,10 +396,27 @@ export class OpenAIChatCompletionsProvider implements Provider {
260
396
  cleanupTimeout();
261
397
  }
262
398
 
399
+ if (this.parseThinkTags && pendingContent) {
400
+ flushPendingContent(true);
401
+ }
402
+
263
403
  // Build content blocks
404
+ const finalReasoning = this.parseThinkTags
405
+ ? reasoningText.trim()
406
+ : reasoningText;
407
+ const finalContent = this.parseThinkTags
408
+ ? contentText.trim()
409
+ : contentText;
264
410
  const content: ContentBlock[] = [];
265
- if (contentText) {
266
- content.push({ type: "text", text: contentText });
411
+ if (finalReasoning) {
412
+ content.push({
413
+ type: "thinking",
414
+ thinking: finalReasoning,
415
+ signature: "",
416
+ });
417
+ }
418
+ if (finalContent) {
419
+ content.push({ type: "text", text: finalContent });
267
420
  }
268
421
  for (const [, tc] of toolCallMap) {
269
422
  let input: Record<string, unknown>;
@@ -1,6 +1,9 @@
1
1
  import { ProviderError } from "../../util/errors.js";
2
2
  import { AnthropicProvider } from "../anthropic/client.js";
3
- import { OpenAIChatCompletionsProvider } from "../openai/chat-completions-provider.js";
3
+ import {
4
+ EFFORT_TO_REASONING_EFFORT,
5
+ OpenAIChatCompletionsProvider,
6
+ } from "../openai/chat-completions-provider.js";
4
7
  import { isThinkingConfigEnabled } from "../thinking-config.js";
5
8
  import type {
6
9
  Message,
@@ -53,6 +56,25 @@ export function extractOnlyList(config: unknown): string[] {
53
56
  return only.filter((x): x is string => typeof x === "string" && x.length > 0);
54
57
  }
55
58
 
59
+ // OpenRouter's `reasoning.summary` field controls whether reasoning models emit
60
+ // a human-readable summary alongside (or instead of) encrypted reasoning blocks.
61
+ // Models like Kimi K2.6 return only encrypted `reasoning_details` unless a
62
+ // summary level is requested, so the stream carries no visible thinking content.
63
+ // Default to "detailed" so users see thinking by default; allow per-call
64
+ // override via `config.openrouter.reasoning.summary`. Per OpenRouter's
65
+ // ChatRequestReasoning schema, valid values are "auto" | "concise" | "detailed".
66
+ const VALID_REASONING_SUMMARIES = new Set(["auto", "concise", "detailed"]);
67
+
68
+ function extractReasoningSummaryOverride(config: unknown): string | undefined {
69
+ const cfg = config as
70
+ | { openrouter?: { reasoning?: { summary?: unknown } } }
71
+ | undefined;
72
+ const summary = cfg?.openrouter?.reasoning?.summary;
73
+ return typeof summary === "string" && VALID_REASONING_SUMMARIES.has(summary)
74
+ ? summary
75
+ : undefined;
76
+ }
77
+
56
78
  /**
57
79
  * Rewrite `options.config` for the Anthropic-compat path so OpenRouter's
58
80
  * `provider: { only: [...] }` body field travels through `AnthropicProvider`'s
@@ -160,14 +182,30 @@ export class OpenRouterProvider extends OpenAIChatCompletionsProvider {
160
182
  // OpenRouter's unified `reasoning` parameter controls extended thinking on
161
183
  // its OpenAI-compatible endpoint. Anthropic models skip this path entirely and
162
184
  // go through AnthropicProvider, which receives the native `thinking` object.
185
+ //
186
+ // `effort` nests under `reasoning` here (rather than flat `reasoning_effort`)
187
+ // because OpenRouter's documented `ChatRequestReasoning` shape is the union of
188
+ // { effort, summary }. `summary` is required for models like Kimi K2.6 that
189
+ // would otherwise return only encrypted reasoning blocks; we default to
190
+ // "detailed" and let callers override via `config.openrouter.reasoning.summary`.
163
191
  protected override buildExtraCreateParams(
164
192
  options?: SendMessageOptions,
165
193
  ): Record<string, unknown> {
166
194
  const config = options?.config as Record<string, unknown> | undefined;
167
195
  const thinkingEnabled = isThinkingConfigEnabled(config?.thinking);
168
- const extras: Record<string, unknown> = {
169
- reasoning: { enabled: thinkingEnabled },
170
- };
196
+ const effort = config?.effort as string | undefined;
197
+ const mappedEffort = effort
198
+ ? EFFORT_TO_REASONING_EFFORT[effort]
199
+ : undefined;
200
+ const summaryOverride = extractReasoningSummaryOverride(config);
201
+ const reasoning: Record<string, unknown> = { enabled: thinkingEnabled };
202
+ if (mappedEffort) {
203
+ reasoning.effort = mappedEffort;
204
+ }
205
+ if (thinkingEnabled) {
206
+ reasoning.summary = summaryOverride ?? "detailed";
207
+ }
208
+ const extras: Record<string, unknown> = { reasoning };
171
209
  const only = extractOnlyList(config);
172
210
  if (only.length > 0) {
173
211
  const existingProvider = (config?.provider ?? {}) as Record<
@@ -43,14 +43,13 @@ export const PLATFORM_PROVIDER_META: Record<string, ManagedProviderMeta> = {
43
43
  },
44
44
  fireworks: {
45
45
  name: "fireworks",
46
- managed: false,
46
+ managed: true,
47
+ proxyPath: "/v1/runtime-proxy/fireworks",
47
48
  },
48
49
  openrouter: {
49
50
  name: "openrouter",
50
51
  managed: false,
51
52
  },
52
53
  ollama: { name: "ollama", managed: false },
53
- zai: { name: "zai", managed: false },
54
- deepseek: { name: "deepseek", managed: false },
55
- minimax: { name: "minimax", managed: false },
54
+ "openai-compatible": { name: "openai-compatible", managed: false },
56
55
  };
@@ -32,5 +32,7 @@ export function getVisibleProviderCatalog(
32
32
  if (visibleModels.length === entry.models.length) return entry;
33
33
  return { ...entry, models: visibleModels };
34
34
  })
35
- .filter((entry) => entry.models.length > 0);
35
+ .filter(
36
+ (entry) => entry.models.length > 0 || entry.defaultModel === "",
37
+ );
36
38
  }
@@ -7,8 +7,10 @@
7
7
  import { resolveCallSiteConfig } from "../config/llm-resolver.js";
8
8
  import { getConfig } from "../config/loader.js";
9
9
  import type { LLMCallSite } from "../config/schemas/llm.js";
10
+ import { getDb } from "../memory/db-connection.js";
10
11
  import { getLogger } from "../util/logger.js";
11
12
  import { tryResolveProviderForConnectionName } from "./connection-resolution.js";
13
+ import { listConnections } from "./inference/connections.js";
12
14
  import { initializeProviders, listProviders } from "./registry.js";
13
15
  import type {
14
16
  ContentBlock,
@@ -110,22 +112,35 @@ export async function resolveConfiguredProvider(
110
112
 
111
113
  const resolved = resolveCallSiteConfig(callSite, config.llm, opts);
112
114
  const inferenceProvider = resolved.provider;
113
- const connectionName = resolved.provider_connection;
115
+ let connectionName = resolved.provider_connection;
114
116
 
115
117
  // Connection-aware path: every dispatch goes through `provider_connection`.
116
118
  // The boot-time backfill ensures every profile has one in production.
117
- // When unset (test envs that skip backfill, freshly-installed configs
118
- // not yet backfilled, or users who manually cleared the field), we
119
- // return null so callsites with deterministic fallbacks (invite
120
- // instructions, telegram username resolution, etc.) keep working.
121
- // Hard config errors — connection lookup failure, provider mismatch —
122
- // still throw via `tryResolveProviderForConnectionName` below.
119
+ // When unset (profile set provider with "Any active" connection, test envs
120
+ // that skip backfill, freshly-installed configs not yet backfilled, or
121
+ // users who manually cleared the field), try to auto-resolve from the
122
+ // provider before falling back to null.
123
123
  if (!connectionName) {
124
- log.debug(
125
- { callSite, inferenceProvider },
126
- "resolveCallSiteConfig yielded no provider_connection — returning null so callsite can fall back",
127
- );
128
- return null;
124
+ if (inferenceProvider) {
125
+ try {
126
+ const candidates = listConnections(getDb(), {
127
+ provider: inferenceProvider,
128
+ });
129
+ const active = candidates.find((c) => c.status === "active");
130
+ if (active) {
131
+ connectionName = active.name;
132
+ }
133
+ } catch {
134
+ // DB not available — fall through to the existing null-return path.
135
+ }
136
+ }
137
+ if (!connectionName) {
138
+ log.debug(
139
+ { callSite, inferenceProvider },
140
+ "resolveCallSiteConfig yielded no provider_connection — returning null so callsite can fall back",
141
+ );
142
+ return null;
143
+ }
129
144
  }
130
145
 
131
146
  const connectionProvider = await tryResolveProviderForConnectionName(