shmakk 1.2.3 → 1.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/.env.example +11 -0
  2. package/README.md +75 -1
  3. package/docs/index.html +154 -16
  4. package/docs/mcp.md +78 -0
  5. package/docs/ssh.md +82 -0
  6. package/docs/vibedit-analysis.md +375 -0
  7. package/docs/vim.md +110 -0
  8. package/docs/voice.md +4 -0
  9. package/package.json +9 -5
  10. package/scripts/test-vibedit.js +45 -0
  11. package/scripts/vibedit-demo.sh +52 -0
  12. package/skills/shmakk-skill-creator.md +269 -0
  13. package/src/_check.js +7 -0
  14. package/src/_check_schema.js +5 -0
  15. package/src/_cleanup.js +18 -0
  16. package/src/_fix.js +9 -0
  17. package/src/_test_import.js +15 -0
  18. package/src/agent.js +11 -4
  19. package/src/browser-daemon.js +209 -0
  20. package/src/browser.js +10 -0
  21. package/src/cli/browserDaemon.js +60 -0
  22. package/src/cli/connectBrowser.js +137 -0
  23. package/src/cli.js +235 -8
  24. package/src/completions.js +8 -0
  25. package/src/control.js +273 -1
  26. package/src/core/browserConnector.js +523 -0
  27. package/src/correction.js +6 -0
  28. package/src/electron.js +305 -0
  29. package/src/endpoints.js +74 -9
  30. package/src/index.js +24 -1
  31. package/src/llm.js +501 -61
  32. package/src/mobile.js +307 -0
  33. package/src/notify.js +51 -3
  34. package/src/orchestrator.js +35 -1
  35. package/src/pty.js +11 -6
  36. package/src/review.js +45 -11
  37. package/src/self-commands.js +153 -0
  38. package/src/session-convert.js +508 -0
  39. package/src/session-search.js +31 -0
  40. package/src/session.js +392 -46
  41. package/src/skills/browserActions.ts +984 -0
  42. package/src/skills.js +451 -24
  43. package/src/system-prompt.js +31 -25
  44. package/src/tools.js +81 -0
  45. package/src/vibedit/control.js +534 -0
  46. package/src/vibedit/electron.js +108 -0
  47. package/src/vibedit/files.js +171 -0
  48. package/src/vibedit/index.js +298 -0
  49. package/src/vibedit/overlay.js +1482 -0
  50. package/src/vibedit/prompts.js +245 -0
  51. package/src/vibedit/state.js +32 -0
  52. package/src/vim.js +410 -0
package/src/llm.js CHANGED
@@ -4,7 +4,7 @@ try { OpenAI = require('openai'); } catch { OpenAI = null; }
4
4
  const path = require('path');
5
5
  const os = require('os');
6
6
  const fs = require('fs');
7
- const { getCurrentEndpoint, getCurrentEndpointName, getModelRegistry, supportsVision } = require('./endpoints');
7
+ const { getCurrentEndpoint, getCurrentEndpointName, getModelRegistry, supportsVision, getVisionSupport } = require('./endpoints');
8
8
 
9
9
  function parseHeaders(s) {
10
10
  const out = {};
@@ -27,6 +27,78 @@ function buildHeaders(customHeaders, registry) {
27
27
  return headers;
28
28
  }
29
29
 
30
+ // ── Retry helper ───────────────────────────────────────────────────────────
31
+ // Shared retry with exponential backoff + jitter for 429 / 503 / 502.
32
+ // Also enforces a minimum gap between requests within this process so that
33
+ // rapid tool-call loops don't pile onto the rate limit immediately.
34
+
35
+ const RETRYABLE = new Set([429, 503, 502, 504]);
36
+ const MAX_RETRIES = 4;
37
+ const BASE_DELAY_MS = 1000;
38
+ const MAX_DELAY_MS = 30000;
39
+ const MIN_GAP_MS = 600; // floor between subsequent fetches in this process
40
+
41
+ let _lastReq = 0;
42
+
43
+ function sleepMs(ms) {
44
+ return new Promise((r) => setTimeout(r, ms));
45
+ }
46
+
47
+ function retryDelay(attempt, retryAfterHeader) {
48
+ if (retryAfterHeader) {
49
+ const parsed = Number(retryAfterHeader);
50
+ if (!Number.isNaN(parsed) && parsed > 0) return Math.min(parsed * 1000, MAX_DELAY_MS);
51
+ }
52
+ const exp = Math.min(BASE_DELAY_MS * Math.pow(2, attempt), MAX_DELAY_MS);
53
+ const jitter = exp * (0.5 + Math.random() * 0.5); // 50%–100% of exponential
54
+ return Math.round(jitter);
55
+ }
56
+
57
+ async function fetchWithBackoff(url, init, providerLabel) {
58
+ for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) {
59
+ // Abort signal check first
60
+ if (init.signal?.aborted) {
61
+ const err = new Error('aborted');
62
+ err.name = 'AbortError';
63
+ throw err;
64
+ }
65
+
66
+ // Enforce minimum request gap
67
+ const now = Date.now();
68
+ const wait = MIN_GAP_MS - (now - _lastReq);
69
+ if (wait > 0) await sleepMs(wait);
70
+
71
+ let res;
72
+ try {
73
+ _lastReq = Date.now();
74
+ res = await fetch(url, init);
75
+ } catch (e) {
76
+ if (attempt < MAX_RETRIES && (e.name === 'TypeError' || e.code === 'ECONNRESET' || e.code === 'ETIMEDOUT')) {
77
+ await sleepMs(retryDelay(attempt, null));
78
+ continue;
79
+ }
80
+ throw e;
81
+ }
82
+
83
+ if (res.ok) return res;
84
+
85
+ const status = res.status;
86
+ const retryAfter = res.headers.get('retry-after');
87
+ const isRetryable = RETRYABLE.has(status);
88
+
89
+ if (isRetryable && attempt < MAX_RETRIES) {
90
+ const errText = await res.text().catch(() => '');
91
+ const delay = retryDelay(attempt, retryAfter);
92
+ process.stderr.write(`[shmakk] ${providerLabel} ${status} (attempt ${attempt + 1}/${MAX_RETRIES + 1}), retrying in ${(delay / 1000).toFixed(1)}s…\n`);
93
+ await sleepMs(delay);
94
+ continue;
95
+ }
96
+
97
+ const errText = await res.text().catch(() => '');
98
+ throw new Error(`${providerLabel} API ${status}: ${errText.slice(0, 500)}`);
99
+ }
100
+ }
101
+
30
102
  function envForProvider() {
31
103
  // Check active endpoint first (allows hotswap)
32
104
  const activeEndpoint = getCurrentEndpoint();
@@ -56,25 +128,87 @@ function envForProvider() {
56
128
  function isConfigured() {
57
129
  const cfg = envForProvider();
58
130
  if (recommendationMode()) return Object.keys(getModelRegistry().models).length > 0;
59
- if (cfg.provider === 'anthropic') return !!cfg.apiKey;
131
+ if (cfg.provider === 'anthropic') return true; // claude-proxy handles auth via OAuth
60
132
  if (cfg.provider === 'codex') return true; // codex-proxy handles auth via OAuth
133
+ if (cfg.provider === 'nvidia') return !!cfg.apiKey && !!OpenAI;
61
134
  return (!!cfg.baseURL || cfg.provider === 'openai') && !!OpenAI;
62
135
  }
63
136
 
137
+ function getDefaultBaseURL(provider) {
138
+ if (provider === 'openai') return 'https://local:8095/v1';
139
+ if (provider === 'nvidia') return 'https://integrate.api.nvidia.com/v1';
140
+ return undefined;
141
+ }
142
+
64
143
  function makeOpenAIClient(cfg) {
65
144
  if (!OpenAI) throw new Error('openai sdk not installed');
66
- const baseURL = cfg.baseURL || (cfg.provider === 'openai' ? 'https://local:8095/v1' : undefined);
145
+ const baseURL = cfg.baseURL || getDefaultBaseURL(cfg.provider);
67
146
  if (!baseURL) throw new Error('SHMAKK_BASE_URL is required for OpenAI-compatible providers');
68
- return new OpenAI({
147
+ const client = new OpenAI({
69
148
  baseURL,
70
149
  apiKey: cfg.apiKey || process.env.OPENAI_API_KEY || 'not-needed',
71
150
  defaultHeaders: buildHeaders(cfg.headers, cfg.registry),
72
151
  });
152
+ const rawCreate = client.chat.completions.create.bind(client.chat.completions);
153
+ client.chat.completions.create = async (params, options = {}) => {
154
+ try {
155
+ return await rawCreate(params, options);
156
+ } catch (e) {
157
+ if (!hasVisionContent(params?.messages) || !isImageUrlSchemaError(e)) throw e;
158
+ process.stderr.write('[shmakk] endpoint rejected image_url blocks; retrying with image metadata as text\n');
159
+ return rawCreate({ ...params, messages: downgradeVisionMessages(params.messages) }, options);
160
+ }
161
+ };
162
+ return client;
163
+ }
164
+
165
+ function hasVisionContent(messages) {
166
+ return (messages || []).some((message) => {
167
+ return Array.isArray(message?.content) && message.content.some((part) => {
168
+ return part && typeof part === 'object' && (part.type === 'image_url' || part.image_url);
169
+ });
170
+ });
171
+ }
172
+
173
+ function imageUrlSummary(part) {
174
+ const url = String(part?.image_url?.url || part?.url || '');
175
+ const mime = url.match(/^data:([^;]+);base64,/)?.[1] || 'image';
176
+ const b64 = url.match(/^data:[^;]+;base64,(.*)$/)?.[1] || '';
177
+ const size = b64 ? `, base64=${b64.length} chars` : '';
178
+ const detail = part?.image_url?.detail || part?.detail;
179
+ return `[Image omitted: ${mime}${size}${detail ? `, detail=${detail}` : ''}]`;
180
+ }
181
+
182
+ function contentArrayToText(content) {
183
+ return content.map((part) => {
184
+ if (typeof part === 'string') return part;
185
+ if (!part || typeof part !== 'object') return '';
186
+ if (part.type === 'text') return String(part.text || '');
187
+ if (part.type === 'image_url' || part.image_url) return imageUrlSummary(part);
188
+ return JSON.stringify(part);
189
+ }).filter(Boolean).join('\n');
190
+ }
191
+
192
+ function downgradeVisionMessages(messages) {
193
+ return (messages || []).map((message) => {
194
+ if (!Array.isArray(message?.content)) return message;
195
+ return {
196
+ ...message,
197
+ content: contentArrayToText(message.content),
198
+ };
199
+ });
200
+ }
201
+
202
+ function isImageUrlSchemaError(err) {
203
+ const status = err?.status || err?.response?.status || 0;
204
+ const message = String(err?.message || err?.error?.message || err?.response?.data || '');
205
+ return status >= 400 && status < 500 && /\bimage_url\b/i.test(message) && /(unknown variant|expected|deserialize|invalid)/i.test(message);
73
206
  }
74
207
 
75
208
  function makeProviderClient(cfg) {
76
209
  if (cfg.provider === 'anthropic') return makeAnthropicCompatClient(cfg);
77
210
  if (cfg.provider === 'codex') return makeCodexCompatClient(cfg);
211
+ if (cfg.provider === 'nvidia') return makeOpenAIClient(cfg);
78
212
  return makeOpenAIClient(cfg);
79
213
  }
80
214
 
@@ -84,6 +218,18 @@ function makeClient() {
84
218
  return makeProviderClient(cfg);
85
219
  }
86
220
 
221
+ function makeClientForEndpoint(name) {
222
+ const registry = getModelRegistry();
223
+ const selected = name === 'main' ? registry.main : name === 'fast' ? registry.fast : name;
224
+ if (!selected || !registry.models[selected]) return null;
225
+ const cfg = configFromModelEntry(selected, registry.models[selected]);
226
+ return {
227
+ name: selected,
228
+ model: cfg.model || selected,
229
+ client: makeProviderClient(cfg),
230
+ };
231
+ }
232
+
87
233
  function modelFor() {
88
234
  if (recommendationMode()) return process.env._SHMAKK_LAST_MODEL || 'model-recommendation';
89
235
  const activeEndpoint = getCurrentEndpoint();
@@ -208,7 +354,7 @@ async function ensureModelRuntime() {}
208
354
 
209
355
  // ── Codex (Responses API) compat client ────────────────────────────────────
210
356
  // Translates OpenAI chat.completions format to/from the Codex Responses API
211
- // via the codex-proxy (mitmdump on :8095 -> chatgpt.com/backend-api/codex/responses).
357
+ // via the anthprox FastAPI (:8256) -> mitmdump (:8095) -> chatgpt.com.
212
358
 
213
359
  function splitCodexSystem(messages) {
214
360
  let instructions = '';
@@ -265,38 +411,122 @@ function codexToolChoice(choice) {
265
411
  return 'auto';
266
412
  }
267
413
 
268
- function fromCodexResponse(model, data) {
269
- const message = { role: 'assistant', content: '', tool_calls: undefined };
270
- const calls = [];
271
- for (const item of data.output || []) {
272
- if (item.type === 'message') {
273
- const content = item.content || [];
274
- if (typeof content === 'string') {
275
- message.content += content;
276
- } else if (Array.isArray(content)) {
277
- for (const part of content) {
278
- if (part.type === 'output_text') message.content += part.text || '';
279
- }
280
- }
281
- }
282
- if (item.type === 'function_call') {
283
- calls.push({
284
- id: item.call_id,
285
- type: 'function',
286
- function: { name: item.name, arguments: item.arguments || '{}' },
287
- });
288
- }
414
+
415
+ // ── SSE parsing helpers (shared by streaming + buffered paths) ──────────
416
+
417
+ function codexSSEParseState() {
418
+ return {
419
+ content: '',
420
+ callMap: new Map(), // item_id -> { call_id, name, arguments }
421
+ };
422
+ }
423
+
424
+ function codexSSEFeed(state, line) {
425
+ // Processes one SSE data line (without the 'data: ' prefix).
426
+ // Returns a content delta string if text was produced, else null.
427
+ if (!line) return null;
428
+ let evt;
429
+ try { evt = JSON.parse(line); } catch { return null; }
430
+
431
+ if (evt.type === 'response.output_text.delta') {
432
+ state.content += evt.delta || '';
433
+ return evt.delta || '';
289
434
  }
435
+ if (evt.type === 'response.output_item.added' && evt.item?.type === 'function_call') {
436
+ state.callMap.set(evt.item.id, {
437
+ call_id: evt.item.call_id,
438
+ name: evt.item.name,
439
+ arguments: evt.item.arguments || '',
440
+ });
441
+ } else if (evt.type === 'response.function_call_arguments.delta' && evt.item_id) {
442
+ const entry = state.callMap.get(evt.item_id);
443
+ if (entry) entry.arguments += evt.delta || '';
444
+ } else if (evt.type === 'response.function_call_arguments.done' && evt.item_id) {
445
+ const entry = state.callMap.get(evt.item_id);
446
+ if (entry) entry.arguments = evt.arguments || entry.arguments;
447
+ }
448
+ return null;
449
+ }
450
+
451
+ function codexSSEBuildCompletion(model, state) {
452
+ const calls = [...state.callMap.values()].map((c) => ({
453
+ id: c.call_id,
454
+ type: 'function',
455
+ function: { name: c.name, arguments: typeof c.arguments === 'string' ? c.arguments : JSON.stringify(c.arguments) },
456
+ }));
457
+ const message = { role: 'assistant', content: state.content, tool_calls: undefined };
290
458
  if (calls.length) message.tool_calls = calls;
291
459
  return {
292
- id: data.id,
460
+ id: 'codex-' + Date.now(),
293
461
  object: 'chat.completion',
294
462
  model,
295
463
  choices: [{ index: 0, message, finish_reason: 'stop' }],
296
- usage: data.usage,
297
464
  };
298
465
  }
299
466
 
467
+ function codexSSEBuildToolCallChunks(state) {
468
+ // Build OpenAI-format tool_call delta chunks for streaming consumers.
469
+ const calls = [...state.callMap.values()];
470
+ if (!calls.length) return [];
471
+ return calls.map((c, i) => ({
472
+ choices: [{
473
+ index: 0,
474
+ delta: {
475
+ tool_calls: [{
476
+ index: i,
477
+ id: c.call_id,
478
+ type: 'function',
479
+ function: { name: c.name, arguments: c.arguments },
480
+ }],
481
+ },
482
+ finish_reason: null,
483
+ }],
484
+ }));
485
+ }
486
+
487
+ // ── Streaming SSE iterator ─────────────────────────────────────────────
488
+
489
+ async function* codexStreamIterator(body, model, signal) {
490
+ const state = codexSSEParseState();
491
+ const reader = body.getReader();
492
+ const decoder = new TextDecoder();
493
+ let buffer = '';
494
+
495
+ try {
496
+ while (true) {
497
+ if (signal?.aborted) break;
498
+ const { done, value } = await reader.read();
499
+ if (done) break;
500
+
501
+ buffer += decoder.decode(value, { stream: true });
502
+ const lines = buffer.split('\n');
503
+ buffer = lines.pop() || ''; // keep incomplete final line
504
+
505
+ for (const line of lines) {
506
+ if (!line.startsWith('data: ')) continue;
507
+ const delta = codexSSEFeed(state, line.slice(6).replace(/\r$/, ''));
508
+ if (delta) {
509
+ yield { choices: [{ index: 0, delta: { content: delta }, finish_reason: null }] };
510
+ }
511
+ }
512
+ }
513
+
514
+ // Flush remaining buffer
515
+ if (buffer.startsWith('data: ')) {
516
+ codexSSEFeed(state, buffer.slice(6));
517
+ }
518
+ } finally {
519
+ try { reader.releaseLock(); } catch {}
520
+ }
521
+
522
+ // Yield tool calls then stop
523
+ const toolChunks = codexSSEBuildToolCallChunks(state);
524
+ for (const chunk of toolChunks) yield chunk;
525
+ yield { choices: [{ index: 0, delta: {}, finish_reason: 'stop' }] };
526
+ }
527
+
528
+ // ── Codex compat client ─────────────────────────────────────────────────
529
+
300
530
  function makeCodexCompatClient(cfg) {
301
531
  return {
302
532
  chat: {
@@ -309,19 +539,17 @@ function makeCodexCompatClient(cfg) {
309
539
  instructions,
310
540
  input,
311
541
  store: false,
312
- stream: false, // always collect, then fake-stream if caller wants it
313
- max_output_tokens: params.max_tokens || 4096,
542
+ stream: true, // Codex API requires stream: true
314
543
  };
315
- if (params.temperature != null) body.temperature = params.temperature;
316
- if (params.top_p != null) body.top_p = params.top_p;
317
544
  if (tools.length) {
318
545
  body.tools = tools;
319
546
  const tc = codexToolChoice(params.tool_choice);
320
547
  if (tc) body.tool_choice = tc;
321
548
  }
322
549
 
323
- const base = (cfg.baseURL || 'https://local:8095').replace(/\/+$/, '');
324
- const res = await fetch(`${base}/backend-api/codex/responses`, {
550
+ // Default to the anthprox codex-api FastAPI, not the raw mitmdump.
551
+ const base = (cfg.baseURL || 'http://localhost:8256').replace(/\/+$/, '');
552
+ const res = await fetchWithBackoff(`${base}/responses`, {
325
553
  method: 'POST',
326
554
  signal: options.signal,
327
555
  headers: {
@@ -330,11 +558,22 @@ function makeCodexCompatClient(cfg) {
330
558
  ...buildHeaders(cfg.headers, cfg.registry),
331
559
  },
332
560
  body: JSON.stringify(body),
333
- });
334
- if (!res.ok) throw new Error(`Codex API ${res.status}: ${await res.text().slice(0, 500)}`);
335
- const completion = fromCodexResponse(body.model, await res.json());
336
- if (params.stream) return fakeOpenAIStreamFromCompletion(completion);
337
- return completion;
561
+ }, 'Codex');
562
+
563
+ // Streaming: return an async iterable that yields OpenAI-format chunks
564
+ // as SSE events arrive from the codex-api.
565
+ if (params.stream) {
566
+ return codexStreamIterator(res.body, body.model, options.signal);
567
+ }
568
+
569
+ // Non-streaming: buffer and parse the SSE response into a completion.
570
+ const raw = await res.text();
571
+ const state = codexSSEParseState();
572
+ for (const line of raw.split('\n')) {
573
+ if (line.startsWith('data: ')) codexSSEFeed(state, line.slice(6).replace(/\r$/, ''));
574
+ }
575
+ if (!state.content && !state.callMap.size) throw new Error('Codex API: no response data');
576
+ return codexSSEBuildCompletion(body.model, state);
338
577
  },
339
578
  },
340
579
  },
@@ -418,29 +657,143 @@ function toOpenAICompletion(model, data) {
418
657
  return { id: data.id, object: 'chat.completion', model, choices: [{ index: 0, message, finish_reason: data.stop_reason || 'stop' }] };
419
658
  }
420
659
 
421
- async function* fakeOpenAIStreamFromCompletion(completion) {
422
- const message = completion.choices?.[0]?.message || {};
423
- if (message.content) {
424
- yield { choices: [{ index: 0, delta: { content: message.content }, finish_reason: null }] };
660
+ // ── Anthropic SSE helpers ──────────────────────────────────────────────────
661
+ // Anthropic streaming SSE format (via anthprox proxy):
662
+ // event: content_block_start / content_block_delta / content_block_stop
663
+ // event: message_start / message_delta / message_stop
664
+ // event: ping
665
+
666
+ function anthropicSSEParseState() {
667
+ return {
668
+ content: '',
669
+ blocks: new Map(), // index -> { type, id?, name?, text, input_json }
670
+ blockOrder: [],
671
+ stopReason: null,
672
+ model: null,
673
+ };
674
+ }
675
+
676
+ function anthropicSSEFeed(state, eventName, data) {
677
+ let evt;
678
+ try { evt = JSON.parse(data); } catch { return null; }
679
+ const type = evt.type;
680
+ if (type === 'message_start') {
681
+ state.model = evt.message?.model;
682
+ } else if (type === 'content_block_start') {
683
+ const idx = evt.index;
684
+ const block = evt.content_block || {};
685
+ state.blocks.set(idx, { type: block.type, id: block.id, name: block.name, text: '', input_json: '' });
686
+ state.blockOrder.push(idx);
687
+ } else if (type === 'content_block_delta') {
688
+ const block = state.blocks.get(evt.index);
689
+ if (!block) return null;
690
+ const delta = evt.delta || {};
691
+ if (delta.type === 'text_delta') {
692
+ block.text += delta.text || '';
693
+ return delta.text || '';
694
+ } else if (delta.type === 'input_json_delta') {
695
+ block.input_json += delta.partial_json || '';
696
+ }
697
+ } else if (type === 'content_block_stop') {
698
+ // no-op
699
+ } else if (type === 'message_delta') {
700
+ state.stopReason = evt.delta?.stop_reason || null;
701
+ }
702
+ return null;
703
+ }
704
+
705
+ function anthropicSSEBuildCompletion(state) {
706
+ const message = { role: 'assistant', content: '', tool_calls: undefined };
707
+ const calls = [];
708
+ for (const idx of state.blockOrder) {
709
+ const block = state.blocks.get(idx);
710
+ if (!block) continue;
711
+ if (block.type === 'text' || !block.type) {
712
+ message.content += block.text;
713
+ } else if (block.type === 'tool_use') {
714
+ calls.push({
715
+ id: block.id,
716
+ type: 'function',
717
+ function: { name: block.name, arguments: block.input_json || '{}' },
718
+ });
719
+ }
425
720
  }
426
- for (let i = 0; i < (message.tool_calls || []).length; i++) {
427
- const tc = message.tool_calls[i];
428
- yield {
721
+ if (calls.length) message.tool_calls = calls;
722
+ return {
723
+ id: 'ant-' + Date.now(),
724
+ object: 'chat.completion',
725
+ model: state.model || 'claude',
726
+ choices: [{ index: 0, message, finish_reason: state.stopReason || 'stop' }],
727
+ };
728
+ }
729
+
730
+ function anthropicSSEBuildToolCallChunks(state) {
731
+ const chunks = [];
732
+ for (const idx of state.blockOrder) {
733
+ const block = state.blocks.get(idx);
734
+ if (!block || block.type !== 'tool_use') continue;
735
+ chunks.push({
429
736
  choices: [{
430
737
  index: 0,
431
738
  delta: {
432
739
  tool_calls: [{
433
- index: i,
434
- id: tc.id,
740
+ index: 0,
741
+ id: block.id,
435
742
  type: 'function',
436
- function: { name: tc.function.name, arguments: tc.function.arguments },
743
+ function: { name: block.name, arguments: block.input_json || '{}' },
437
744
  }],
438
745
  },
439
746
  finish_reason: null,
440
747
  }],
441
- };
748
+ });
442
749
  }
443
- yield { choices: [{ index: 0, delta: {}, finish_reason: 'stop' }] };
750
+ return chunks;
751
+ }
752
+
753
+ async function* anthropicStreamIterator(body, model, signal) {
754
+ const reader = body.getReader();
755
+ const decoder = new TextDecoder();
756
+ const state = anthropicSSEParseState();
757
+ let buffer = '';
758
+ let eventName = '';
759
+
760
+ try {
761
+ while (true) {
762
+ if (signal?.aborted) break;
763
+ const { done, value } = await reader.read();
764
+ if (done) break;
765
+ buffer += decoder.decode(value, { stream: true });
766
+
767
+ // Parse SSE — Anthropic uses "event:" + "data:" lines, \r\n endings
768
+ const lines = buffer.split('\n');
769
+ buffer = lines.pop() || '';
770
+ for (const line of lines) {
771
+ if (!line) continue;
772
+ if (line.startsWith('event: ')) {
773
+ eventName = line.slice(7).trim();
774
+ } else if (line.startsWith('data: ')) {
775
+ // Trim trailing \r that comes from \r\n line endings
776
+ const payload = line.slice(6).replace(/\r$/, '');
777
+ const text = anthropicSSEFeed(state, eventName, payload);
778
+ if (text) {
779
+ yield { choices: [{ index: 0, delta: { content: text }, finish_reason: null }] };
780
+ }
781
+ }
782
+ }
783
+ }
784
+ // Flush remaining buffer
785
+ const flushPayload = buffer.startsWith('data: ') ? buffer.slice(6).replace(/\r$/, '') : '';
786
+ if (flushPayload) {
787
+ anthropicSSEFeed(state, '', flushPayload);
788
+ }
789
+ } finally {
790
+ try { reader.releaseLock(); } catch {}
791
+ }
792
+
793
+ // Yield tool calls then stop
794
+ const toolChunks = anthropicSSEBuildToolCallChunks(state);
795
+ for (const chunk of toolChunks) yield chunk;
796
+ yield { choices: [{ index: 0, delta: {}, finish_reason: state.stopReason || 'stop' }] };
444
797
  }
445
798
 
446
799
  function makeAnthropicCompatClient(cfg) {
@@ -448,13 +801,13 @@ function makeAnthropicCompatClient(cfg) {
448
801
  chat: {
449
802
  completions: {
450
803
  create: async (params, options = {}) => {
451
- if (!cfg.apiKey) throw new Error('Anthropic api_key is required');
452
804
  const { system, messages } = splitAnthropicSystem(params.messages || []);
453
805
  const tools = params.tool_choice === 'none' ? [] : anthropicTools(params.tools);
454
806
  const body = {
455
807
  model: params.model || cfg.model,
456
808
  max_tokens: params.max_tokens || 4096,
457
809
  temperature: params.temperature ?? 0,
810
+ stream: !!params.stream,
458
811
  messages,
459
812
  };
460
813
  if (system) body.system = system;
@@ -464,22 +817,28 @@ function makeAnthropicCompatClient(cfg) {
464
817
  if (toolChoice) body.tool_choice = toolChoice;
465
818
  }
466
819
 
467
- const base = (cfg.baseURL || 'https://local:8083').replace(/\/+$/, '');
468
- const res = await fetch(`${base}/v1/messages`, {
820
+ // Default to the anthprox claude-api FastAPI, not the raw mitmdump.
821
+ const base = (cfg.baseURL || 'http://localhost:8083').replace(/\/+$/, '');
822
+ const res = await fetchWithBackoff(`${base}/v1/messages`, {
469
823
  method: 'POST',
470
824
  signal: options.signal,
471
825
  headers: {
472
826
  'content-type': 'application/json',
473
- 'x-api-key': cfg.apiKey,
827
+ 'x-api-key': cfg.apiKey || '',
474
828
  'anthropic-version': '2023-06-01',
475
829
  ...buildHeaders(cfg.headers, cfg.registry),
476
830
  },
477
831
  body: JSON.stringify(body),
478
- });
479
- if (!res.ok) throw new Error(`Anthropic API ${res.status}: ${await res.text()}`);
480
- const completion = toOpenAICompletion(body.model, await res.json());
481
- if (params.stream) return fakeOpenAIStreamFromCompletion(completion);
482
- return completion;
832
+ }, 'Anthropic');
833
+
834
+ // Streaming: read SSE in real-time via Anthropic SSE parser
835
+ if (params.stream) {
836
+ return anthropicStreamIterator(res.body, body.model, options.signal);
837
+ }
838
+
839
+ // Non-streaming: buffer and convert
840
+ const data = await res.json();
841
+ return toOpenAICompletion(body.model, data);
483
842
  },
484
843
  },
485
844
  },
@@ -536,4 +895,85 @@ function getDeepSeekOptions(taskType) {
536
895
  };
537
896
  }
538
897
 
539
- module.exports = { makeClient, modelFor, isConfigured, ensureModelRuntime, getDeepSeekOptions, isDeepSeekProvider, supportsVision };
898
+ // ── Vision fallback: describe images via a vision-capable endpoint ────────
899
+ // When the current endpoint doesn't support vision but a tool returned images,
900
+ // we call the dedicated visionSupport endpoint (from endpoints.json) to
901
+ // describe them as text for the non-vision model.
902
+
903
+ async function describeImages(images, signal) {
904
+ // Filter to images with actual base64 data
905
+ const valid = (images || []).filter((img) => img && img.data);
906
+ if (!valid.length) return null;
907
+
908
+ let visionCfg = getVisionSupport();
909
+
910
+ // No explicit visionSupport config: try to find a vision-capable endpoint
911
+ // from the model registry automatically.
912
+ if (!visionCfg) {
913
+ const registry = getModelRegistry();
914
+ if (registry && registry.models) {
915
+ for (const [name, entry] of Object.entries(registry.models)) {
916
+ if (entry.vision) {
917
+ visionCfg = { name, ...entry, vision: true };
918
+ break;
919
+ }
920
+ }
921
+ }
922
+ }
923
+
924
+ if (!visionCfg) return null;
925
+
926
+ const cfg = configFromModelEntry('visionSupport', visionCfg);
927
+ let client;
928
+ try {
929
+ client = makeProviderClient(cfg);
930
+ } catch {
931
+ return null;
932
+ }
933
+ if (!client) return null;
934
+
935
+ const desc = valid.map((img, i) =>
936
+ `[Image #${i + 1}: ${img.mimeType}, ${(img.dataLength * 0.75) | 0} decoded bytes${img.truncated ? ', truncated' : ''}]`
937
+ ).join(', ');
938
+
939
+ try {
940
+ const resp = await client.chat.completions.create({
941
+ model: cfg.model,
942
+ messages: [{
943
+ role: 'user',
944
+ content: [
945
+ { type: 'text', text: 'Describe these images concisely. Focus on what is visible: UI elements, text, layout, key content. If there are multiple images, describe each one labeled by number. Keep it under 500 words.' },
946
+ ...valid.map((img) => ({
947
+ type: 'image_url',
948
+ image_url: { url: `data:${img.mimeType};base64,${img.data}`, detail: 'auto' },
949
+ })),
950
+ ],
951
+ }],
952
+ max_tokens: 600,
953
+ }, { signal });
954
+
955
+ const text = resp?.choices?.[0]?.message?.content?.trim();
956
+ if (text) {
957
+ process.stderr.write(`[shmakk] vision fallback described ${valid.length} image(s): ${desc}\n`);
958
+ return `[Vision description via ${cfg.model || 'visionSupport'}]:\n${text}`;
959
+ }
960
+ return null;
961
+ } catch (e) {
962
+ if (e?.name === 'AbortError') throw e;
963
+ process.stderr.write(`[shmakk] vision fallback (${cfg.model || 'visionSupport'}) failed: ${e.message}\n`);
964
+ return null;
965
+ }
966
+ }
967
+
968
+ module.exports = {
969
+ makeClient,
970
+ makeClientForEndpoint,
971
+ modelFor,
972
+ isConfigured,
973
+ ensureModelRuntime,
974
+ getDeepSeekOptions,
975
+ isDeepSeekProvider,
976
+ supportsVision,
977
+ describeImages,
978
+ _test: { downgradeVisionMessages, hasVisionContent, isImageUrlSchemaError },
979
+ };