@inbrowser/model 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. package/AGENTS.md +44 -18
  2. package/README.md +128 -20
  3. package/dist/contract.d.ts +104 -0
  4. package/dist/contract.d.ts.map +1 -0
  5. package/dist/contract.js +13 -0
  6. package/dist/contract.js.map +1 -0
  7. package/dist/engine-client.d.ts +44 -0
  8. package/dist/engine-client.d.ts.map +1 -0
  9. package/dist/engine-client.js +136 -0
  10. package/dist/engine-client.js.map +1 -0
  11. package/dist/engine.d.ts.map +1 -1
  12. package/dist/engine.js +20 -10
  13. package/dist/engine.js.map +1 -1
  14. package/dist/index.d.ts +23 -8
  15. package/dist/index.d.ts.map +1 -1
  16. package/dist/index.js +44 -8
  17. package/dist/index.js.map +1 -1
  18. package/dist/presets.d.ts +10 -0
  19. package/dist/presets.d.ts.map +1 -1
  20. package/dist/presets.js +21 -0
  21. package/dist/presets.js.map +1 -1
  22. package/dist/providers/anthropic.d.ts +45 -0
  23. package/dist/providers/anthropic.d.ts.map +1 -0
  24. package/dist/providers/anthropic.js +217 -0
  25. package/dist/providers/anthropic.js.map +1 -0
  26. package/dist/providers/claude-cli.d.ts +135 -0
  27. package/dist/providers/claude-cli.d.ts.map +1 -0
  28. package/dist/providers/claude-cli.js +270 -0
  29. package/dist/providers/claude-cli.js.map +1 -0
  30. package/dist/providers/claude-code.d.ts +188 -0
  31. package/dist/providers/claude-code.d.ts.map +1 -0
  32. package/dist/providers/claude-code.js +182 -0
  33. package/dist/providers/claude-code.js.map +1 -0
  34. package/dist/providers/gemini.d.ts +32 -0
  35. package/dist/providers/gemini.d.ts.map +1 -0
  36. package/dist/providers/gemini.js +441 -0
  37. package/dist/providers/gemini.js.map +1 -0
  38. package/dist/providers/llama-server.d.ts +15 -0
  39. package/dist/providers/llama-server.d.ts.map +1 -0
  40. package/dist/providers/llama-server.js +51 -0
  41. package/dist/providers/llama-server.js.map +1 -0
  42. package/dist/providers/oai-compat.d.ts +100 -0
  43. package/dist/providers/oai-compat.d.ts.map +1 -0
  44. package/dist/providers/oai-compat.js +206 -0
  45. package/dist/providers/oai-compat.js.map +1 -0
  46. package/dist/providers/ollama.d.ts +15 -0
  47. package/dist/providers/ollama.d.ts.map +1 -0
  48. package/dist/providers/ollama.js +51 -0
  49. package/dist/providers/ollama.js.map +1 -0
  50. package/dist/providers/openrouter-oauth.d.ts +67 -0
  51. package/dist/providers/openrouter-oauth.d.ts.map +1 -0
  52. package/dist/providers/openrouter-oauth.js +84 -0
  53. package/dist/providers/openrouter-oauth.js.map +1 -0
  54. package/dist/providers/openrouter.d.ts +13 -0
  55. package/dist/providers/openrouter.d.ts.map +1 -0
  56. package/dist/providers/openrouter.js +218 -0
  57. package/dist/providers/openrouter.js.map +1 -0
  58. package/dist/providers/types.d.ts +50 -0
  59. package/dist/providers/types.d.ts.map +1 -0
  60. package/dist/providers/types.js +2 -0
  61. package/dist/providers/types.js.map +1 -0
  62. package/dist/sse.d.ts +20 -0
  63. package/dist/sse.d.ts.map +1 -0
  64. package/dist/sse.js +47 -0
  65. package/dist/sse.js.map +1 -0
  66. package/dist/types.d.ts +2 -13
  67. package/dist/types.d.ts.map +1 -1
  68. package/dist/with-retry.d.ts +27 -0
  69. package/dist/with-retry.d.ts.map +1 -0
  70. package/dist/with-retry.js +55 -0
  71. package/dist/with-retry.js.map +1 -0
  72. package/dist/worker.d.ts +1 -1
  73. package/dist/worker.js +1 -1
  74. package/package.json +9 -29
  75. package/dist/adapters/agent.d.ts +0 -19
  76. package/dist/adapters/agent.d.ts.map +0 -1
  77. package/dist/adapters/agent.js +0 -96
  78. package/dist/adapters/agent.js.map +0 -1
  79. package/dist/adapters/relay.d.ts +0 -17
  80. package/dist/adapters/relay.d.ts.map +0 -1
  81. package/dist/adapters/relay.js +0 -90
  82. package/dist/adapters/relay.js.map +0 -1
@@ -0,0 +1,182 @@
1
+ import { renderPrompt } from './claude-cli.js';
2
+ /** Map the relay's `reasoningEffort` onto the SDK's `effort` option.
3
+ * `off` is the relay's "do not request reasoning" sentinel; the SDK
4
+ * has no off level, so we omit the field entirely and let the
5
+ * model's default kick in. */
6
+ function toEffort(effort) {
7
+ return effort === 'low' || effort === 'medium' || effort === 'high' ? effort : undefined;
8
+ }
9
+ /**
10
+ * Build a Claude Code SDK `ModelClient`. `claudeCodeModelClient({ model })`
11
+ * with no other options uses the host's ambient subscription credentials:
12
+ *
13
+ * ```ts
14
+ * const relay = createRelay({
15
+ * store,
16
+ * providers: { 'claude-code': (c) => claudeCodeModelClient(c) },
17
+ * });
18
+ * // client request: { provider: 'claude-code', model: 'claude-opus-4-8',
19
+ * // messages, tools: [], apiKey: '' }
20
+ * ```
21
+ *
22
+ * Construction values (model, SDK options) come in the config; per-call
23
+ * values (messages, reasoning) come in the `ModelRequest`.
24
+ */
25
+ export function claudeCodeModelClient(config) {
26
+ const loadSdk = config.loadSdk ??
27
+ (async () => {
28
+ // Non-literal specifier so the browser build (which reaches this module
29
+ // through the root barrel) doesn't resolve the Node-only SDK at build
30
+ // time; it loads at runtime only when this provider is actually used.
31
+ const sdkSpecifier = '@anthropic-ai/claude-agent-sdk';
32
+ const mod = (await import(sdkSpecifier));
33
+ return { query: mod.query };
34
+ });
35
+ return {
36
+ id: `claude-code:${config.model}`,
37
+ supportsTools: false,
38
+ async *chat(req, signal) {
39
+ if (signal?.aborted)
40
+ return;
41
+ if (req.tools.length > 0) {
42
+ yield {
43
+ kind: 'error',
44
+ message: 'claude-code provider does not support caller-defined tools — the bare-model SDK configuration has no tool-registration surface. Send `tools: []` (or use an API provider for tool calling).',
45
+ };
46
+ return;
47
+ }
48
+ const { system, prompt } = renderPrompt(req.messages);
49
+ if (!prompt) {
50
+ yield { kind: 'error', message: 'claude-code provider: no user message to send.' };
51
+ return;
52
+ }
53
+ let sdk;
54
+ try {
55
+ sdk = await loadSdk();
56
+ }
57
+ catch (e) {
58
+ yield {
59
+ kind: 'error',
60
+ message: `claude-code: failed to load @anthropic-ai/claude-agent-sdk (install it as a peer dep): ${e instanceof Error ? e.message : String(e)}`,
61
+ };
62
+ return;
63
+ }
64
+ // Compose the subprocess env. The SDK's auth precedence is
65
+ // ANTHROPIC_API_KEY → CLAUDE_CODE_OAUTH_TOKEN → ~/.claude/.credentials.json.
66
+ // We strip ANTHROPIC_API_KEY so subscription always wins; we
67
+ // optionally inject CLAUDE_CODE_OAUTH_TOKEN for explicit-token
68
+ // hosts.
69
+ const env = { ...process.env };
70
+ delete env.ANTHROPIC_API_KEY;
71
+ if (config.oauthToken)
72
+ env.CLAUDE_CODE_OAUTH_TOKEN = config.oauthToken;
73
+ if (config.env)
74
+ Object.assign(env, config.env);
75
+ // Belt-and-suspenders: if the caller's config.env tried to set
76
+ // ANTHROPIC_API_KEY back, strip it again.
77
+ delete env.ANTHROPIC_API_KEY;
78
+ const abortController = new AbortController();
79
+ const onAbort = () => abortController.abort();
80
+ signal?.addEventListener('abort', onAbort, { once: true });
81
+ const effort = toEffort(req.reasoningEffort);
82
+ const sdkOptions = {
83
+ ...(config.model ? { model: config.model } : {}),
84
+ systemPrompt: system,
85
+ tools: [],
86
+ settingSources: [],
87
+ mcpServers: {},
88
+ strictMcpConfig: true,
89
+ permissionMode: 'bypassPermissions',
90
+ includePartialMessages: true,
91
+ ...(effort ? { effort } : {}),
92
+ abortController,
93
+ env,
94
+ };
95
+ let promptTokens = 0;
96
+ let outputTokens = 0;
97
+ let cachedTokens;
98
+ let sawText = false;
99
+ let sawResult = false;
100
+ let fallbackText = '';
101
+ try {
102
+ for await (const msg of sdk.query({ prompt, options: sdkOptions })) {
103
+ if (signal?.aborted)
104
+ return;
105
+ if (msg.type === 'stream_event' && msg.event?.type === 'content_block_delta') {
106
+ const delta = msg.event.delta;
107
+ if (delta?.type === 'text_delta' && typeof delta.text === 'string') {
108
+ sawText = true;
109
+ yield { kind: 'text', text: delta.text };
110
+ }
111
+ else if (delta?.type === 'thinking_delta' && typeof delta.thinking === 'string') {
112
+ yield { kind: 'thinking', text: delta.thinking };
113
+ }
114
+ continue;
115
+ }
116
+ // Buffer the assistant message's full text in case partial
117
+ // streaming wasn't honored — we fall back to it on terminal
118
+ // result if no text deltas streamed.
119
+ if (msg.type === 'assistant' && msg.message?.content) {
120
+ for (const block of msg.message.content) {
121
+ if (block.type === 'text' && typeof block.text === 'string') {
122
+ fallbackText += block.text;
123
+ }
124
+ }
125
+ continue;
126
+ }
127
+ if (msg.type === 'result') {
128
+ sawResult = true;
129
+ if (msg.is_error || (msg.subtype && msg.subtype !== 'success')) {
130
+ yield {
131
+ kind: 'error',
132
+ message: `claude-code SDK reported ${msg.subtype ?? 'error'}: ${typeof msg.result === 'string' && msg.result
133
+ ? msg.result.slice(0, 400)
134
+ : '(no detail)'}`,
135
+ };
136
+ return;
137
+ }
138
+ // Defensive fallback: terminal result text when no deltas
139
+ // streamed (some SDK paths skip partial events).
140
+ if (!sawText) {
141
+ const text = typeof msg.result === 'string' && msg.result ? msg.result : fallbackText;
142
+ if (text)
143
+ yield { kind: 'text', text };
144
+ }
145
+ promptTokens = msg.usage?.input_tokens ?? promptTokens;
146
+ outputTokens = msg.usage?.output_tokens ?? outputTokens;
147
+ if (typeof msg.usage?.cache_read_input_tokens === 'number') {
148
+ cachedTokens = msg.usage.cache_read_input_tokens;
149
+ }
150
+ yield {
151
+ kind: 'usage',
152
+ usage: {
153
+ promptTokens,
154
+ outputTokens,
155
+ ...(typeof cachedTokens === 'number' ? { cachedTokens } : {}),
156
+ // costUsd intentionally omitted — subscription is N/A.
157
+ },
158
+ };
159
+ return;
160
+ }
161
+ // system / compact_boundary / rate_limit_event / unknown — skip.
162
+ }
163
+ }
164
+ catch (e) {
165
+ if (signal?.aborted)
166
+ return;
167
+ yield { kind: 'error', message: e instanceof Error ? e.message : String(e) };
168
+ return;
169
+ }
170
+ finally {
171
+ signal?.removeEventListener('abort', onAbort);
172
+ }
173
+ if (!sawResult) {
174
+ yield {
175
+ kind: 'error',
176
+ message: 'claude-code SDK stream ended without a result message.',
177
+ };
178
+ }
179
+ },
180
+ };
181
+ }
182
+ //# sourceMappingURL=claude-code.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"claude-code.js","sourceRoot":"","sources":["../../src/providers/claude-code.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,YAAY,EAAE,MAAM,iBAAiB,CAAC;AAmG/C;;;+BAG+B;AAC/B,SAAS,QAAQ,CAAC,MAA0B;IAC1C,OAAO,MAAM,KAAK,KAAK,IAAI,MAAM,KAAK,QAAQ,IAAI,MAAM,KAAK,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,SAAS,CAAC;AAC3F,CAAC;AAkED;;;;;;;;;;;;;;;GAeG;AACH,MAAM,UAAU,qBAAqB,CAAC,MAAwB;IAC5D,MAAM,OAAO,GACX,MAAM,CAAC,OAAO;QACd,CAAC,KAAK,IAAkC,EAAE;YACxC,wEAAwE;YACxE,sEAAsE;YACtE,sEAAsE;YACtE,MAAM,YAAY,GAAG,gCAAgC,CAAC;YACtD,MAAM,GAAG,GAAG,CAAC,MAAM,MAAM,CAAC,YAAY,CAAC,CAAwB,CAAC;YAChE,OAAO,EAAE,KAAK,EAAE,GAAG,CAAC,KAAK,EAAE,CAAC;QAC9B,CAAC,CAAC,CAAC;IAEL,OAAO;QACL,EAAE,EAAE,eAAe,MAAM,CAAC,KAAK,EAAE;QACjC,aAAa,EAAE,KAAK;QACpB,KAAK,CAAC,CAAC,IAAI,CAAC,GAAiB,EAAE,MAAmB;YAChD,IAAI,MAAM,EAAE,OAAO;gBAAE,OAAO;YAE5B,IAAI,GAAG,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACzB,MAAM;oBACJ,IAAI,EAAE,OAAO;oBACb,OAAO,EACL,6LAA6L;iBAChM,CAAC;gBACF,OAAO;YACT,CAAC;YAED,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,GAAG,YAAY,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;YACtD,IAAI,CAAC,MAAM,EAAE,CAAC;gBACZ,MAAM,EAAE,IAAI,EAAE,OAAO,EAAE,OAAO,EAAE,gDAAgD,EAAE,CAAC;gBACnF,OAAO;YACT,CAAC;YAED,IAAI,GAAwB,CAAC;YAC7B,IAAI,CAAC;gBACH,GAAG,GAAG,MAAM,OAAO,EAAE,CAAC;YACxB,CAAC;YAAC,OAAO,CAAC,EAAE,CAAC;gBACX,MAAM;oBACJ,IAAI,EAAE,OAAO;oBACb,OAAO,EAAE,0FACP,CAAC,YAAY,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAC3C,EAAE;iBACH,CAAC;gBACF,OAAO;YACT,CAAC;YAED,2DAA2D;YAC3D,6EAA6E;YAC7E,6DAA6D;YAC7D,+DAA+D;YAC/D,SAAS;YACT,MAAM,GAAG,GAAuC,EAAE,GAAG,OAAO,CAAC,GAAG,EAAE,CAAC;YACnE,OAAO,GAAG,CAAC,iBAAiB,CAAC;YAC7B,IAAI,MAAM,CAAC,UAAU;gBAAE,GAAG,CAAC,uBAAuB,GAAG,MAAM,CAAC,UAAU,CAAC;YACvE,IAAI,MAAM,CAAC,GAAG;gBAAE,MAAM,CAAC,MAAM,CAAC,GAAG,EAAE,MAAM,CAAC,GAAG,CAAC,CAAC;YAC/C,+DAA+D;YAC/D,0CAA0C;YAC1C,OAAO,GAAG,CAAC,iBAAiB,CAAC;YAE7B,MAAM,eAAe,GAAG,IAAI,eAAe,EAAE,CAAC;YAC9C,MAAM,OAAO,GAAG,GAAG,EAAE,CAAC,eAAe,CAAC,KAAK,EAAE,CAAC;YAC9C,MAAM,EAAE,gBAAgB,CAAC,OAAO,EAAE,OAAO,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC,CAAC;YAE3D,MAAM,MAAM,GAAG,QAAQ,CAAC,GAAG,CAAC,eAAe,CAAC,CAAC;YAC7C,MAAM,UAAU,GAAe;gBAC7B,GAAG,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,MAAM,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;gBAChD,YAAY,EAAE,MAAM;gBACpB,KAAK,EAAE,EAAE;gBACT,cAAc,EAAE,EAAE;gBAClB,UAAU,EAAE,EAAE;gBACd,eAAe,EAAE,IAAI;gBACrB,cAAc,EAAE,mBAAmB;gBACnC,sBAAsB,EAAE,IAAI;gBAC5B,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;gBAC7B,eAAe;gBACf,GAAG;aACJ,CAAC;YAEF,IAAI,YAAY,GAAG,CAAC,CAAC;YACrB,IAAI,YAAY,GAAG,CAAC,CAAC;YACrB,IAAI,YAAgC,CAAC;YACrC,IAAI,OAAO,GAAG,KAAK,CAAC;YACpB,IAAI,SAAS,GAAG,KAAK,CAAC;YACtB,IAAI,YAAY,GAAG,EAAE,CAAC;YAEtB,IAAI,CAAC;gBACH,IAAI,KAAK,EAAE,MAAM,GAAG,IAAI,GAAG,CAAC,KAAK,CAAC,EAAE,MAAM,EAAE,OAAO,EAAE,UAAU,EAAE,CAAC,EAAE,CAAC;oBACnE,IAAI,MAAM,EAAE,OAAO;wBAAE,OAAO;oBAE5B,IAAI,GAAG,CAAC,IAAI,KAAK,cAAc,IAAI,GAAG,CAAC,KAAK,EAAE,IAAI,KAAK,qBAAqB,EAAE,CAAC;wBAC7E,MAAM,KAAK,GAAG,GAAG,CAAC,KAAK,CAAC,KAAK,CAAC;wBAC9B,IAAI,KAAK,EAAE,IAAI,KAAK,YAAY,IAAI,OAAO,KAAK,CAAC,IAAI,KAAK,QAAQ,EAAE,CAAC;4BACnE,OAAO,GAAG,IAAI,CAAC;4BACf,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,KAAK,CAAC,IAAI,EAAE,CAAC;wBAC3C,CAAC;6BAAM,IAAI,KAAK,EAAE,IAAI,KAAK,gBAAgB,IAAI,OAAO,KAAK,CAAC,QAAQ,KAAK,QAAQ,EAAE,CAAC;4BAClF,MAAM,EAAE,IAAI,EAAE,UAAU,EAAE,IAAI,EAAE,KAAK,CAAC,QAAQ,EAAE,CAAC;wBACnD,CAAC;wBACD,SAAS;oBACX,CAAC;oBAED,2DAA2D;oBAC3D,4DAA4D;oBAC5D,qCAAqC;oBACrC,IAAI,GAAG,CAAC,IAAI,KAAK,WAAW,IAAI,GAAG,CAAC,OAAO,EAAE,OAAO,EAAE,CAAC;wBACrD,KAAK,MAAM,KAAK,IAAI,GAAG,CAAC,OAAO,CAAC,OAAO,EAAE,CAAC;4BACxC,IAAI,KAAK,CAAC,IAAI,KAAK,MAAM,IAAI,OAAO,KAAK,CAAC,IAAI,KAAK,QAAQ,EAAE,CAAC;gCAC5D,YAAY,IAAI,KAAK,CAAC,IAAI,CAAC;4BAC7B,CAAC;wBACH,CAAC;wBACD,SAAS;oBACX,CAAC;oBAED,IAAI,GAAG,CAAC,IAAI,KAAK,QAAQ,EAAE,CAAC;wBAC1B,SAAS,GAAG,IAAI,CAAC;wBACjB,IAAI,GAAG,CAAC,QAAQ,IAAI,CAAC,GAAG,CAAC,OAAO,IAAI,GAAG,CAAC,OAAO,KAAK,SAAS,CAAC,EAAE,CAAC;4BAC/D,MAAM;gCACJ,IAAI,EAAE,OAAO;gCACb,OAAO,EAAE,4BAA4B,GAAG,CAAC,OAAO,IAAI,OAAO,KACzD,OAAO,GAAG,CAAC,MAAM,KAAK,QAAQ,IAAI,GAAG,CAAC,MAAM;oCAC1C,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC;oCAC1B,CAAC,CAAC,aACN,EAAE;6BACH,CAAC;4BACF,OAAO;wBACT,CAAC;wBACD,0DAA0D;wBAC1D,iDAAiD;wBACjD,IAAI,CAAC,OAAO,EAAE,CAAC;4BACb,MAAM,IAAI,GAAG,OAAO,GAAG,CAAC,MAAM,KAAK,QAAQ,IAAI,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,YAAY,CAAC;4BACtF,IAAI,IAAI;gCAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,CAAC;wBACzC,CAAC;wBACD,YAAY,GAAG,GAAG,CAAC,KAAK,EAAE,YAAY,IAAI,YAAY,CAAC;wBACvD,YAAY,GAAG,GAAG,CAAC,KAAK,EAAE,aAAa,IAAI,YAAY,CAAC;wBACxD,IAAI,OAAO,GAAG,CAAC,KAAK,EAAE,uBAAuB,KAAK,QAAQ,EAAE,CAAC;4BAC3D,YAAY,GAAG,GAAG,CAAC,KAAK,CAAC,uBAAuB,CAAC;wBACnD,CAAC;wBACD,MAAM;4BACJ,IAAI,EAAE,OAAO;4BACb,KAAK,EAAE;gCACL,YAAY;gCACZ,YAAY;gCACZ,GAAG,CAAC,OAAO,YAAY,KAAK,QAAQ,CAAC,CAAC,CAAC,EAAE,YAAY,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;gCAC7D,uDAAuD;6BACxD;yBACF,CAAC;wBACF,OAAO;oBACT,CAAC;oBACD,iEAAiE;gBACnE,CAAC;YACH,CAAC;YAAC,OAAO,CAAC,EAAE,CAAC;gBACX,IAAI,MAAM,EAAE,OAAO;oBAAE,OAAO;gBAC5B,MAAM,EAAE,IAAI,EAAE,OAAO,EAAE,OAAO,EAAE,CAAC,YAAY,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC;gBAC7E,OAAO;YACT,CAAC;oBAAS,CAAC;gBACT,MAAM,EAAE,mBAAmB,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC;YAChD,CAAC;YAED,IAAI,CAAC,SAAS,EAAE,CAAC;gBACf,MAAM;oBACJ,IAAI,EAAE,OAAO;oBACb,OAAO,EAAE,wDAAwD;iBAClE,CAAC;YACJ,CAAC;QACH,CAAC;KACF,CAAC;AACJ,CAAC"}
@@ -0,0 +1,32 @@
1
+ import type { ModelClient, ModelEvent, ModelRequest } from '../contract.js';
2
+ import type { CloudProviderConfig } from './types.js';
3
+ /** Per-request settings the provider reads off the `ModelRequest`. */
4
+ export interface GeminiConfig extends CloudProviderConfig {
5
+ }
6
+ /**
7
+ * Build the upstream Gemini Request — URL + headers + body — without
8
+ * executing it. The returned Request carries no AbortSignal; the
9
+ * caller adds one at fetch time. Construction values (apiKey, model)
10
+ * come from the provider config; per-call values (messages, tools,
11
+ * sampling) from the `ModelRequest`.
12
+ */
13
+ export declare function buildGeminiRequest(config: CloudProviderConfig, req: ModelRequest): Request;
14
+ /**
15
+ * Parse an already-fetched Gemini SSE response into `ModelEvent`s.
16
+ * `signal` is optional — the relay passes the consumer's signal in,
17
+ * the page-direct caller passes its own.
18
+ *
19
+ * Function calls are accumulated across chunks (Gemini re-sends the
20
+ * growing parts list) and flushed as one `tool_call` per logical call
21
+ * at stream end, so a turn with N calls yields exactly N events with
22
+ * complete args and signatures.
23
+ */
24
+ export declare function geminiEventsFromResponse(response: Response, signal?: AbortSignal): AsyncGenerator<ModelEvent>;
25
+ /**
26
+ * Build a Gemini `ModelClient`. Construction values (apiKey, model)
27
+ * come in the config; per-call values (messages, tools, sampling) come
28
+ * in the `ModelRequest`.
29
+ */
30
+ export declare function geminiModelClient(config: GeminiConfig): ModelClient;
31
+ export declare function sanitizeGeminiSchema(node: unknown): unknown;
32
+ //# sourceMappingURL=gemini.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"gemini.d.ts","sourceRoot":"","sources":["../../src/providers/gemini.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,WAAW,EAAE,UAAU,EAAE,YAAY,EAAY,MAAM,gBAAgB,CAAC;AAEtF,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,YAAY,CAAC;AAoBtD,sEAAsE;AACtE,MAAM,WAAW,YAAa,SAAQ,mBAAmB;CAAG;AA4I5D;;;;;;GAMG;AACH,wBAAgB,kBAAkB,CAAC,MAAM,EAAE,mBAAmB,EAAE,GAAG,EAAE,YAAY,GAAG,OAAO,CAY1F;AA0BD;;;;;;;;;GASG;AACH,wBAAuB,wBAAwB,CAC7C,QAAQ,EAAE,QAAQ,EAClB,MAAM,CAAC,EAAE,WAAW,GACnB,cAAc,CAAC,UAAU,CAAC,CAwJ5B;AAsCD;;;;GAIG;AACH,wBAAgB,iBAAiB,CAAC,MAAM,EAAE,YAAY,GAAG,WAAW,CA8CnE;AAuBD,wBAAgB,oBAAoB,CAAC,IAAI,EAAE,OAAO,GAAG,OAAO,CAa3D"}
@@ -0,0 +1,441 @@
1
+ import { readSseDataLines } from '../sse.js';
2
+ /**
3
+ * Gemini provider — raw fetch against the Generative Language REST
4
+ * API, parsing SSE directly. The `@google/genai` SDK is intentionally
5
+ * NOT used here: dropping it lets the same code run unchanged
6
+ * page-side and inside the relay (no SDK transport quirks), and lets
7
+ * both built-in providers (Gemini + OpenRouter) be treated
8
+ * symmetrically — both speak fetch + SSE.
9
+ *
10
+ * Endpoint: POST .../models/{model}:streamGenerateContent?alt=sse
11
+ *
12
+ * The chunk shape and thoughtSignature placement match what the SDK
13
+ * produced. Streamed function calls are accumulated across chunks and
14
+ * emitted exactly once (see `geminiEventsFromResponse`): Gemini
15
+ * re-sends the growing `content.parts[]` list every chunk, so emitting
16
+ * a `tool_call` per chunk would duplicate every call.
17
+ */
18
+ const ENDPOINT_BASE = 'https://generativelanguage.googleapis.com/v1beta/models';
19
+ function toGeminiBody(config, req) {
20
+ const contents = [];
21
+ let systemText = '';
22
+ for (const m of req.messages) {
23
+ if (m.role === 'system') {
24
+ systemText += (systemText ? '\n\n' : '') + (m.text ?? '');
25
+ continue;
26
+ }
27
+ if (m.role === 'user') {
28
+ contents.push({ role: 'user', parts: [{ text: m.text ?? '' }] });
29
+ continue;
30
+ }
31
+ if (m.role === 'assistant') {
32
+ const parts = [];
33
+ if (m.text)
34
+ parts.push({ text: m.text });
35
+ for (const c of m.toolCalls ?? []) {
36
+ // Gemini 3: `thoughtSignature` is a sibling of `functionCall`
37
+ // on the part, NOT a child. Echoing it on a different field
38
+ // returns INVALID_ARGUMENT.
39
+ const part = {
40
+ functionCall: {
41
+ name: c.name,
42
+ args: c.args ?? {},
43
+ },
44
+ };
45
+ if (c.signature)
46
+ part.thoughtSignature = c.signature;
47
+ parts.push(part);
48
+ }
49
+ if (parts.length > 0)
50
+ contents.push({ role: 'model', parts });
51
+ continue;
52
+ }
53
+ if (m.role === 'tool') {
54
+ let parsed = null;
55
+ try {
56
+ if (m.resultJson)
57
+ parsed = JSON.parse(m.resultJson);
58
+ }
59
+ catch {
60
+ parsed = m.resultJson;
61
+ }
62
+ contents.push({
63
+ role: 'user',
64
+ parts: [
65
+ {
66
+ functionResponse: {
67
+ name: m.name ?? 'tool',
68
+ response: { result: parsed },
69
+ },
70
+ },
71
+ ],
72
+ });
73
+ }
74
+ }
75
+ const body = { contents };
76
+ if (systemText)
77
+ body.systemInstruction = { parts: [{ text: systemText }] };
78
+ if (req.tools.length > 0) {
79
+ const functionDeclarations = req.tools.map((t) => ({
80
+ name: t.function.name,
81
+ description: t.function.description,
82
+ parameters: sanitizeGeminiSchema(t.function.parameters),
83
+ }));
84
+ body.tools = [{ functionDeclarations }];
85
+ }
86
+ const gen = {
87
+ thinkingConfig: { includeThoughts: true },
88
+ // Generous output budget. Left unset, the model can truncate a
89
+ // large tool-call argument — writeApp/writeCode emit whole source
90
+ // files as a string arg — and a truncated call is exactly what
91
+ // Gemini then rejects as MALFORMED_FUNCTION_CALL. 65536 is the
92
+ // Gemini 3 family max, so this never *reduces* a model's default;
93
+ // a model that somehow doesn't support it fails loudly with a
94
+ // 400, not silently.
95
+ maxOutputTokens: 65536,
96
+ };
97
+ // Per-request temperature wins; otherwise fall back to the
98
+ // construction-time default (the docs agent pins 0.2; the relay sets
99
+ // neither, preserving "send only what the client did").
100
+ const temperature = req.temperature ?? config.temperature;
101
+ if (typeof temperature === 'number')
102
+ gen.temperature = temperature;
103
+ if (typeof req.topP === 'number')
104
+ gen.topP = req.topP;
105
+ if (typeof req.topK === 'number')
106
+ gen.topK = req.topK;
107
+ body.generationConfig = gen;
108
+ return body;
109
+ }
110
+ /**
111
+ * Build the upstream Gemini Request — URL + headers + body — without
112
+ * executing it. The returned Request carries no AbortSignal; the
113
+ * caller adds one at fetch time. Construction values (apiKey, model)
114
+ * come from the provider config; per-call values (messages, tools,
115
+ * sampling) from the `ModelRequest`.
116
+ */
117
+ export function buildGeminiRequest(config, req) {
118
+ const url = `${ENDPOINT_BASE}/${encodeURIComponent(config.model)}:streamGenerateContent?alt=sse`;
119
+ return new Request(url, {
120
+ method: 'POST',
121
+ headers: {
122
+ // Relay guarantees a resolved key before the provider runs
123
+ // (BYOK 400s if missing; server-managed injects).
124
+ 'x-goog-api-key': config.apiKey ?? '',
125
+ 'Content-Type': 'application/json',
126
+ },
127
+ body: JSON.stringify(toGeminiBody(config, req)),
128
+ });
129
+ }
130
+ /**
131
+ * Format an error for a `{kind:'error'}` event — critically,
132
+ * including `error.cause` when present. Node's `fetch` (undici)
133
+ * reports network failures as a bare `TypeError: fetch failed` and
134
+ * stows the real reason — `UND_ERR_HEADERS_TIMEOUT`,
135
+ * `UND_ERR_BODY_TIMEOUT`, `ECONNRESET`, … — on `.cause`. Without
136
+ * this, every server-side failure surfaces as an identical "fetch
137
+ * failed" that tells us nothing.
138
+ */
139
+ function describeError(e) {
140
+ if (!(e instanceof Error))
141
+ return String(e);
142
+ const cause = e.cause;
143
+ if (cause == null)
144
+ return e.message;
145
+ let causeStr;
146
+ if (cause instanceof Error) {
147
+ causeStr = cause.message ? `${cause.name}: ${cause.message}` : cause.name;
148
+ }
149
+ else if (typeof cause === 'object' && cause !== null && 'code' in cause) {
150
+ causeStr = String(cause.code);
151
+ }
152
+ else {
153
+ causeStr = String(cause);
154
+ }
155
+ return `${e.message} (${causeStr})`;
156
+ }
157
+ /**
158
+ * Parse an already-fetched Gemini SSE response into `ModelEvent`s.
159
+ * `signal` is optional — the relay passes the consumer's signal in,
160
+ * the page-direct caller passes its own.
161
+ *
162
+ * Function calls are accumulated across chunks (Gemini re-sends the
163
+ * growing parts list) and flushed as one `tool_call` per logical call
164
+ * at stream end, so a turn with N calls yields exactly N events with
165
+ * complete args and signatures.
166
+ */
167
+ export async function* geminiEventsFromResponse(response, signal) {
168
+ if (!response.ok) {
169
+ const text = await response.text().catch(() => response.statusText);
170
+ yield { kind: 'error', message: `Gemini ${response.status}: ${text.slice(0, 240)}` };
171
+ return;
172
+ }
173
+ let promptTokens = 0;
174
+ let completionTokens = 0;
175
+ let cachedTokens = 0;
176
+ // Diagnostics for the "thinking-only, no output" case: Gemini can
177
+ // end a response after the thinking phase having produced nothing
178
+ // visible. `finishReason` on the last chunk names why (MAX_TOKENS /
179
+ // SAFETY / RECITATION); a missing one means the stream was simply
180
+ // truncated. `sawOutput` tracks whether any *visible* output (text
181
+ // or a tool call — not thinking) actually came through.
182
+ let sawOutput = false;
183
+ let lastFinishReason;
184
+ // Function calls accumulate here and are flushed once, after the
185
+ // stream closes (see the per-part merge below and the flush loop).
186
+ // Keyed by the call's ordinal position among the turn's function
187
+ // calls — the one correlation key Gemini's stream offers (parts carry
188
+ // no id or index).
189
+ const pending = new Map();
190
+ try {
191
+ for await (const payload of readSseDataLines(response.body)) {
192
+ if (signal?.aborted)
193
+ return;
194
+ let chunk;
195
+ try {
196
+ chunk = JSON.parse(payload);
197
+ }
198
+ catch {
199
+ continue;
200
+ }
201
+ const parts = chunk.candidates?.[0]?.content?.parts ?? [];
202
+ // Position of a functionCall part *among the functionCall parts in
203
+ // this chunk* — NOT its raw index in `parts[]`. Gemini re-sends the
204
+ // whole accumulating parts list each chunk, but the leading text
205
+ // deltas drop out of later chunks, so a call's raw array index
206
+ // shifts between chunks while its ordinal among function calls
207
+ // stays put. That ordinal is the stable correlation key.
208
+ //
209
+ // Invariant this relies on: across re-sends Gemini only ever
210
+ // appends function-call parts, never dropping or reordering an
211
+ // earlier one, so the k-th call keeps ordinal k. That holds for the
212
+ // relay's custom-function tools — it advertises no built-in Google
213
+ // tools whose parts could interleave and shift the ordinals.
214
+ let fnOrdinal = 0;
215
+ for (const p of parts) {
216
+ if (typeof p.text === 'string' && p.text.length > 0) {
217
+ if (p.thought === true) {
218
+ yield { kind: 'thinking', text: p.text };
219
+ }
220
+ else {
221
+ sawOutput = true;
222
+ yield { kind: 'text', text: p.text };
223
+ }
224
+ }
225
+ if (p.functionCall) {
226
+ sawOutput = true;
227
+ const slot = fnOrdinal++;
228
+ let call = pending.get(slot);
229
+ if (!call) {
230
+ call = { name: '', args: {} };
231
+ pending.set(slot, call);
232
+ }
233
+ // Merge re-sends into the slot. Name and args fill in over
234
+ // successive chunks; Gemini sends the complete args object each
235
+ // time, so the latest non-empty snapshot is the full one
236
+ // (replace, don't concatenate). An all-empty re-send never
237
+ // clobbers args already captured.
238
+ //
239
+ // This reads the default `functionCall.args` shape. Gemini 3's
240
+ // opt-in argument-streaming mode instead emits `partialArgs`
241
+ // fragments with `willContinue` — the relay never requests that
242
+ // mode (`toGeminiBody` sends no `functionCallingConfig`), so
243
+ // args always arrive whole. If that ever changes, reconstruct
244
+ // from `partialArgs` here; until then such args would stay `{}`.
245
+ if (p.functionCall.name)
246
+ call.name = p.functionCall.name;
247
+ const incomingArgs = p.functionCall.args;
248
+ if (incomingArgs &&
249
+ typeof incomingArgs === 'object' &&
250
+ !Array.isArray(incomingArgs) &&
251
+ Object.keys(incomingArgs).length > 0) {
252
+ call.args = incomingArgs;
253
+ }
254
+ // thoughtSignature is a sibling of functionCall on the part and
255
+ // frequently arrives on a later chunk than the args. Capturing
256
+ // it per-slot is what keeps the signature attached to its call
257
+ // for Gemini-3 replay.
258
+ if (p.thoughtSignature)
259
+ call.signature = p.thoughtSignature;
260
+ }
261
+ }
262
+ const finishReason = chunk.candidates?.[0]?.finishReason;
263
+ if (finishReason)
264
+ lastFinishReason = finishReason;
265
+ const usage = chunk.usageMetadata;
266
+ if (usage) {
267
+ promptTokens = usage.promptTokenCount ?? promptTokens;
268
+ completionTokens = usage.candidatesTokenCount ?? completionTokens;
269
+ if (typeof usage.cachedContentTokenCount === 'number') {
270
+ cachedTokens = usage.cachedContentTokenCount;
271
+ }
272
+ }
273
+ }
274
+ }
275
+ catch (e) {
276
+ if (signal?.aborted)
277
+ return;
278
+ yield { kind: 'error', message: describeError(e) };
279
+ return;
280
+ }
281
+ // Stream ended cleanly but the model never produced visible output —
282
+ // only thinking. Surface why: a non-STOP `finishReason` names it,
283
+ // `none` means the stream was truncated before one arrived.
284
+ if (!sawOutput) {
285
+ yield {
286
+ kind: 'error',
287
+ message: `Gemini produced no output — finishReason=${lastFinishReason ?? 'none'} (response ended after thinking only)`,
288
+ };
289
+ return;
290
+ }
291
+ // Flush the accumulated function calls — exactly one `tool_call` per
292
+ // logical call, carrying its complete args and thoughtSignature. The
293
+ // `id` is the call's stable ordinal (not a per-chunk random value), so
294
+ // re-parsing the same stream is deterministic and the id is identical
295
+ // across the args/signature that arrived on different chunks.
296
+ for (const [slot, call] of pending) {
297
+ // Skip a slot that never got a name — a stray empty partial, never
298
+ // dispatchable — so a turn with N real calls still yields exactly N
299
+ // events. A named call with empty args is legitimate (a no-arg tool)
300
+ // and is kept.
301
+ if (!call.name)
302
+ continue;
303
+ yield {
304
+ kind: 'tool_call',
305
+ id: `gem_${slot}`,
306
+ name: call.name,
307
+ args: call.args,
308
+ ...(call.signature ? { signature: call.signature } : {}),
309
+ };
310
+ }
311
+ yield {
312
+ kind: 'usage',
313
+ usage: {
314
+ promptTokens,
315
+ outputTokens: completionTokens,
316
+ ...(cachedTokens > 0 ? { cachedTokens } : {}),
317
+ },
318
+ };
319
+ }
320
+ /**
321
+ * Total Gemini attempts per call. Three classes of failure benefit
322
+ * from retry — all transient, all leave the turn with no usable
323
+ * output, and the same prompt frequently succeeds on the next attempt:
324
+ *
325
+ * - `MALFORMED_FUNCTION_CALL` — Gemini 3 intermittently emits a
326
+ * function call its own API then rejects.
327
+ * - `finishReason=STOP` with no visible output — the model decided
328
+ * to think and then said nothing. Common in long-context turns.
329
+ * - `finishReason=none` — the stream was truncated before a
330
+ * finishReason arrived; usually a transient transport blip.
331
+ *
332
+ * Deterministic failures (`finishReason=SAFETY` / `RECITATION` /
333
+ * `MAX_TOKENS`) are NOT retried — those are determined by the input
334
+ * and a retry only burns API calls.
335
+ *
336
+ * The retried attempt is a *fresh* generation — its thinking is
337
+ * streamed too, so a recovered turn shows the prior (discarded)
338
+ * thinking ahead of the real answer. A cosmetic cost on what was
339
+ * otherwise a hard failure.
340
+ */
341
+ const MAX_GEMINI_ATTEMPTS = 3;
342
+ const RETRY_DELAY_MS = 500;
343
+ /** Substrings that identify a retryable provider error. Matched
344
+ * against `ModelEvent.message` from `geminiEventsFromResponse`. */
345
+ const RETRYABLE_ERROR_MARKERS = [
346
+ 'MALFORMED_FUNCTION_CALL',
347
+ 'finishReason=STOP',
348
+ 'finishReason=none',
349
+ ];
350
+ function isRetryableError(message) {
351
+ return RETRYABLE_ERROR_MARKERS.some((m) => message.includes(m));
352
+ }
353
+ /**
354
+ * Build a Gemini `ModelClient`. Construction values (apiKey, model)
355
+ * come in the config; per-call values (messages, tools, sampling) come
356
+ * in the `ModelRequest`.
357
+ */
358
+ export function geminiModelClient(config) {
359
+ return {
360
+ id: `gemini:${config.model}`,
361
+ supportsTools: true,
362
+ async *chat(req, signal) {
363
+ for (let attempt = 1; attempt <= MAX_GEMINI_ATTEMPTS; attempt++) {
364
+ if (signal?.aborted)
365
+ return;
366
+ const request = buildGeminiRequest(config, req);
367
+ let response;
368
+ try {
369
+ response = await fetch(request, signal ? { signal } : {});
370
+ }
371
+ catch (e) {
372
+ if (signal?.aborted)
373
+ return;
374
+ yield { kind: 'error', message: describeError(e) };
375
+ return;
376
+ }
377
+ let retry = false;
378
+ for await (const evt of geminiEventsFromResponse(response, signal)) {
379
+ // Swallow retryable errors so the next attempt can recover.
380
+ // The non-retryable kinds (SAFETY, RECITATION, MAX_TOKENS,
381
+ // network/parse failures) fall straight through and surface.
382
+ // Final attempt always yields whatever it produces.
383
+ if (evt.kind === 'error' &&
384
+ isRetryableError(evt.message) &&
385
+ attempt < MAX_GEMINI_ATTEMPTS) {
386
+ retry = true;
387
+ break;
388
+ }
389
+ yield evt;
390
+ }
391
+ if (!retry)
392
+ return;
393
+ try {
394
+ await response.body?.cancel();
395
+ }
396
+ catch {
397
+ /* already released — fine */
398
+ }
399
+ if (signal?.aborted)
400
+ return;
401
+ await new Promise((resolve) => setTimeout(resolve, RETRY_DELAY_MS));
402
+ }
403
+ },
404
+ };
405
+ }
406
+ /**
407
+ * Strip JSON-Schema keywords Gemini's `function_declarations[].parameters`
408
+ * validator rejects. The validator is a narrow subset of OpenAPI 3.0
409
+ * Schema — anything `zodToJsonSchema` (or hand-written JSON Schema)
410
+ * emits beyond that subset 400s with `Unknown name "<key>"`.
411
+ *
412
+ * Keys stripped:
413
+ * - `additionalProperties` — emitted by `zodToJsonSchema` on every
414
+ * object; Gemini rejects it outright.
415
+ * - `$schema`, `$ref`, `$defs`, `definitions` — JSON-Schema-isms not
416
+ * supported in OpenAPI 3.0 Schema.
417
+ *
418
+ * OpenRouter's adapter accepts the standard JSON Schema unchanged —
419
+ * no equivalent sanitizer there.
420
+ *
421
+ * Implementation: deep-clone walk so we never mutate the caller's
422
+ * schema object (the same `parameters` reference is held by the
423
+ * ToolRegistry and shared across providers).
424
+ */
425
+ const STRIP_KEYS = new Set(['additionalProperties', '$schema', '$ref', '$defs', 'definitions']);
426
+ export function sanitizeGeminiSchema(node) {
427
+ if (Array.isArray(node)) {
428
+ return node.map(sanitizeGeminiSchema);
429
+ }
430
+ if (node && typeof node === 'object') {
431
+ const out = {};
432
+ for (const [k, v] of Object.entries(node)) {
433
+ if (STRIP_KEYS.has(k))
434
+ continue;
435
+ out[k] = sanitizeGeminiSchema(v);
436
+ }
437
+ return out;
438
+ }
439
+ return node;
440
+ }
441
+ //# sourceMappingURL=gemini.js.map