voyageai-cli 1.30.2 → 1.30.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4517,24 +4517,39 @@ select:focus { outline: none; border-color: var(--accent); }
4517
4517
  cursor: help;
4518
4518
  }
4519
4519
  .wf-validation-bar {
4520
- position: fixed;
4521
- top: 44px;
4522
- left: 260px;
4523
- right: 0;
4520
+ position: absolute;
4521
+ top: 52px;
4522
+ left: 12px;
4523
+ right: 12px;
4524
4524
  height: 28px;
4525
4525
  background: var(--bg-surface);
4526
- border-bottom: 1px solid var(--border);
4526
+ border: 1px solid var(--border);
4527
+ border-radius: 6px;
4527
4528
  display: none;
4528
4529
  align-items: center;
4529
- padding: 0 16px;
4530
+ padding: 0 12px;
4530
4531
  font-size: 12px;
4531
- z-index: 50;
4532
+ z-index: 9;
4532
4533
  cursor: pointer;
4533
4534
  transition: background-color 0.15s;
4534
4535
  }
4535
4536
  .wf-validation-bar:hover {
4536
4537
  background: var(--bg-card);
4537
4538
  }
4539
+ .wf-validation-bar .wf-validation-close {
4540
+ margin-left: auto;
4541
+ background: none;
4542
+ border: none;
4543
+ color: inherit;
4544
+ cursor: pointer;
4545
+ font-size: 14px;
4546
+ padding: 0 4px;
4547
+ opacity: 0.6;
4548
+ line-height: 1;
4549
+ }
4550
+ .wf-validation-bar .wf-validation-close:hover {
4551
+ opacity: 1;
4552
+ }
4538
4553
  .wf-validation-bar.warning {
4539
4554
  color: #FFB74D;
4540
4555
  border-bottom-color: #FFB74D;
@@ -6687,12 +6702,12 @@ Semantic search understands meaning beyond keyword matching</textarea>
6687
6702
  <div class="tab-panel" id="tab-multimodal" role="tabpanel" aria-labelledby="tab-btn-multimodal" tabindex="0">
6688
6703
  <div class="page-header">
6689
6704
  <h2 class="page-header-title">Multimodal</h2>
6690
- <p class="page-header-subtitle">Compare images and text in the same vector space</p>
6691
- <p class="page-header-hint">Voyage AI's multimodal models embed images and text into a unified vector space, so you can compare them directly with cosine similarity.</p>
6705
+ <p class="page-header-subtitle">Compare images, video, and text in the same vector space</p>
6706
+ <p class="page-header-hint">Voyage AI's multimodal models embed images, video, and text into a unified vector space, so you can compare them directly with cosine similarity.</p>
6692
6707
  <a class="page-header-docs" href="https://docs.vaicli.com/docs/commands/embeddings/embed" target="_blank" rel="noopener" title="Multimodal embedding documentation"><svg width="14" height="14" viewBox="0 0 16 16" fill="none" stroke="currentColor" stroke-width="1.5"><path d="M3 1h7l3 3v11H3z"/><path d="M10 1v3h3"/></svg>Docs</a>
6693
6708
  </div>
6694
6709
 
6695
- <!-- Section A: Image ↔ Text Similarity -->
6710
+ <!-- Section A: Image / Video ↔ Text Similarity -->
6696
6711
  <div class="mm-grid">
6697
6712
  <div class="card">
6698
6713
  <div class="card-title">Image</div>
@@ -6708,9 +6723,23 @@ Semantic search understands meaning beyond keyword matching</textarea>
6708
6723
  <button class="mm-clear-btn" onclick="clearMultimodalImage()">✕ Clear</button>
6709
6724
  </div>
6710
6725
  </div>
6726
+ <div class="card">
6727
+ <div class="card-title">Video</div>
6728
+ <div class="mm-drop-zone" id="mmVideoDropZone">
6729
+ <div class="mm-drop-icon">🎬</div>
6730
+ <div class="mm-drop-text">Drop a video here or click to browse</div>
6731
+ <div class="mm-drop-hint">MP4, WebM, MOV, max 20 MB</div>
6732
+ </div>
6733
+ <input type="file" id="mmVideoFileInput" accept="video/mp4,video/webm,video/quicktime,video/x-msvideo,video/x-matroska" style="display:none">
6734
+ <div class="mm-preview" id="mmVideoPreview">
6735
+ <video id="mmPreviewVideo" controls style="max-width:100%;max-height:240px;border-radius:8px;background:#000;"></video>
6736
+ <div class="mm-file-info" id="mmVideoFileInfo"></div>
6737
+ <button class="mm-clear-btn" onclick="clearMultimodalVideo()">✕ Clear</button>
6738
+ </div>
6739
+ </div>
6711
6740
  <div class="card">
6712
6741
  <div class="card-title">Text</div>
6713
- <textarea id="mmText" rows="8" placeholder="Describe what you see, or enter any text to compare against the image..."></textarea>
6742
+ <textarea id="mmText" rows="8" placeholder="Describe what you see, or enter any text to compare against the media..."></textarea>
6714
6743
  </div>
6715
6744
  </div>
6716
6745
 
@@ -7355,6 +7384,9 @@ Reranking models rescore initial search results to improve relevance ordering.</
7355
7384
  <button onclick="wfZoom(1)" title="Zoom in">+</button>
7356
7385
  <button onclick="wfZoom(-1)" title="Zoom out">&minus;</button>
7357
7386
  <button onclick="wfFitToView()" title="Fit to view">&#8862;</button>
7387
+ <button onclick="wfRelayout()" title="Auto-layout (reorder nodes to minimize crossings)">
7388
+ <svg width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><rect x="3" y="3" width="7" height="7"/><rect x="14" y="3" width="7" height="7"/><rect x="14" y="14" width="7" height="7"/><rect x="3" y="14" width="7" height="7"/><line x1="10" y1="6.5" x2="14" y2="6.5"/><line x1="10" y1="17.5" x2="14" y2="17.5"/></svg>
7389
+ </button>
7358
7390
  <button onclick="wfResetExecution()" title="Reset">&#8635;</button>
7359
7391
  <span class="wf-toolbar-sep"></span>
7360
7392
  <button class="wf-plan-btn" onclick="wfDryRun()" id="wfDryRunBtn" disabled title="Dry run: show execution plan">&#9881; Plan</button>
@@ -7374,6 +7406,7 @@ Reranking models rescore initial search results to improve relevance ordering.</
7374
7406
  <!-- Draft validation status bar -->
7375
7407
  <div class="wf-validation-bar" id="wfValidationBar">
7376
7408
  <span id="wfValidationBarText"></span>
7409
+ <button class="wf-validation-close" onclick="event.stopPropagation(); document.getElementById('wfValidationBar').style.display='none';" title="Dismiss">&times;</button>
7377
7410
  </div>
7378
7411
  <!-- Edge handles for collapsed panels -->
7379
7412
  <div class="wf-edge-handle wf-edge-handle--left" id="wfEdgeHandleLeft" onclick="wfToggleLibrary()" title="Expand library">&#x203A;</div>
@@ -12062,7 +12095,23 @@ function checkForAppUpdate() {
12062
12095
  downloadBtn.style.display = 'none';
12063
12096
  bannerText.style.display = 'none';
12064
12097
  progressWrap.style.display = 'flex';
12098
+ // Timeout: if no progress after 30s, offer manual download
12099
+ let gotProgress = false;
12100
+ const dlTimeout = setTimeout(() => {
12101
+ if (!gotProgress) {
12102
+ progressWrap.style.display = 'none';
12103
+ bannerText.innerHTML = 'Download stalled. <a href="#" onclick="window.vai.updates.openRelease(\'' + result.releaseUrl + '\');return false;" style="color:var(--accent);">Download manually</a>';
12104
+ bannerText.style.display = '';
12105
+ }
12106
+ }, 30000);
12107
+ // Listen for first progress event to cancel timeout
12108
+ const unsub = window.vai.updates.onEvent((d) => {
12109
+ if (d.event === 'download-progress') { gotProgress = true; clearTimeout(dlTimeout); unsub(); }
12110
+ if (d.event === 'update-error') { clearTimeout(dlTimeout); unsub(); }
12111
+ if (d.event === 'update-downloaded') { clearTimeout(dlTimeout); unsub(); }
12112
+ });
12065
12113
  window.vai.updates.download().catch(() => {
12114
+ clearTimeout(dlTimeout);
12066
12115
  // Fallback to manual download
12067
12116
  window.vai.updates.openRelease(result.releaseUrl);
12068
12117
  });
@@ -12341,6 +12390,7 @@ function initOnboarding() {
12341
12390
 
12342
12391
  // ── Multimodal Tab ──
12343
12392
  let mmImageData = null; // base64 data URL of the uploaded image
12393
+ let mmVideoData = null; // base64 data URL of the uploaded video
12344
12394
  let mmGalleryImages = []; // array of { dataUrl, name, size }
12345
12395
  let mmSearchMode = 'text';
12346
12396
  let mmSearchImageIndex = -1;
@@ -12411,6 +12461,59 @@ function initMultimodal() {
12411
12461
  }
12412
12462
  });
12413
12463
 
12464
+ // Video drop zone — the hidden <input type="file"> lives inside the same
12465
+ // card, so its programmatic .click() bubbles back up through the DOM and
12466
+ // re-triggers the dropzone click handler, creating an infinite dialog loop.
12467
+ // Fix: stopPropagation on the input so its click never reaches the dropzone,
12468
+ // plus a re-trigger guard identical to the image dropzone.
12469
+ const videoDropZone = document.getElementById('mmVideoDropZone');
12470
+ const videoFileInput = document.getElementById('mmVideoFileInput');
12471
+
12472
+ videoFileInput.addEventListener('click', (e) => e.stopPropagation());
12473
+
12474
+ let videoDialogOpen = false;
12475
+ videoDropZone.addEventListener('click', async () => {
12476
+ if (videoDialogOpen) return;
12477
+ videoDialogOpen = true;
12478
+ try {
12479
+ if (window.vai && window.vai.isElectron && window.vai.openVideoDialog) {
12480
+ const result = await window.vai.openVideoDialog();
12481
+ if (!result.canceled && result.dataUrl) {
12482
+ handleMultimodalVideoFromData(result.dataUrl, result.name, result.size);
12483
+ }
12484
+ } else {
12485
+ videoFileInput.click();
12486
+ }
12487
+ } finally {
12488
+ setTimeout(() => { videoDialogOpen = false; }, 300);
12489
+ }
12490
+ });
12491
+ videoFileInput.addEventListener('change', (e) => {
12492
+ videoDialogOpen = false;
12493
+ if (e.target.files && e.target.files[0]) handleMultimodalVideo(e.target.files[0]);
12494
+ videoFileInput.value = '';
12495
+ });
12496
+
12497
+ ['dragenter', 'dragover'].forEach(evt => {
12498
+ videoDropZone.addEventListener(evt, (e) => {
12499
+ e.preventDefault();
12500
+ e.stopPropagation();
12501
+ videoDropZone.classList.add('drag-active');
12502
+ });
12503
+ });
12504
+ ['dragleave', 'drop'].forEach(evt => {
12505
+ videoDropZone.addEventListener(evt, (e) => {
12506
+ e.preventDefault();
12507
+ e.stopPropagation();
12508
+ videoDropZone.classList.remove('drag-active');
12509
+ });
12510
+ });
12511
+ videoDropZone.addEventListener('drop', (e) => {
12512
+ if (e.dataTransfer.files && e.dataTransfer.files[0]) {
12513
+ handleMultimodalVideo(e.dataTransfer.files[0]);
12514
+ }
12515
+ });
12516
+
12414
12517
  // Gallery
12415
12518
  renderGalleryGrid();
12416
12519
 
@@ -12481,22 +12584,155 @@ window.clearMultimodalImage = function() {
12481
12584
  document.getElementById('mmFileInput').value = '';
12482
12585
  };
12483
12586
 
12587
+ async function handleMultimodalVideoFromData(dataUrl, name, size) {
12588
+ mmVideoData = dataUrl;
12589
+ const video = document.getElementById('mmPreviewVideo');
12590
+ video.src = mmVideoData;
12591
+
12592
+ const info = document.getElementById('mmVideoFileInfo');
12593
+ const sizeStr = size > 1024 * 1024
12594
+ ? (size / (1024 * 1024)).toFixed(1) + ' MB'
12595
+ : (size / 1024).toFixed(0) + ' KB';
12596
+
12597
+ document.getElementById('mmVideoDropZone').style.display = 'none';
12598
+ document.getElementById('mmVideoPreview').classList.add('visible');
12599
+ hideError('mmError');
12600
+
12601
+ // Estimate tokens from video metadata
12602
+ const meta = await estimateVideoTokens(video);
12603
+ if (meta) {
12604
+ const durStr = meta.duration.toFixed(1) + 's';
12605
+ const resStr = meta.width + '×' + meta.height;
12606
+ info.textContent = `${name} · ${sizeStr} · ${resStr} · ${durStr} · ~${meta.tokens.toLocaleString()} tokens`;
12607
+ if (meta.tokens > 32000) {
12608
+ showError('mmError',
12609
+ 'This video is estimated at ~' + meta.tokens.toLocaleString() + ' tokens, which exceeds the 32,000 token context window. ' +
12610
+ 'Try a shorter clip, lower resolution, or smaller dimensions. ' +
12611
+ '(' + resStr + ', ' + durStr + ')'
12612
+ );
12613
+ }
12614
+ } else {
12615
+ info.textContent = `${name} · ${sizeStr}`;
12616
+ }
12617
+ }
12618
+
12619
+ // Estimate video tokens: total pixels across all frames / 1120 pixels per token
12620
+ // The playground server downsamples video to 1fps before sending to Voyage AI,
12621
+ // so we estimate based on 1fps which matches what will actually be sent.
12622
+ function estimateVideoTokens(videoEl) {
12623
+ return new Promise((resolve) => {
12624
+ const checkMeta = () => {
12625
+ const w = videoEl.videoWidth;
12626
+ const h = videoEl.videoHeight;
12627
+ const dur = videoEl.duration;
12628
+ if (w && h && dur && isFinite(dur)) {
12629
+ // Server downsamples to 1fps before API call
12630
+ const assumedFps = 1;
12631
+ const frames = Math.ceil(dur * assumedFps);
12632
+ const totalPixels = w * h * frames;
12633
+ const tokens = Math.ceil(totalPixels / 1120);
12634
+ resolve({ width: w, height: h, duration: dur, frames, totalPixels, tokens });
12635
+ } else {
12636
+ resolve(null);
12637
+ }
12638
+ };
12639
+ if (videoEl.readyState >= 1) {
12640
+ checkMeta();
12641
+ } else {
12642
+ videoEl.addEventListener('loadedmetadata', checkMeta, { once: true });
12643
+ // Timeout fallback
12644
+ setTimeout(() => resolve(null), 3000);
12645
+ }
12646
+ });
12647
+ }
12648
+
12649
+ function handleMultimodalVideo(file) {
12650
+ const VALID_TYPES = ['video/mp4', 'video/webm', 'video/quicktime', 'video/x-msvideo', 'video/x-matroska'];
12651
+ if (!VALID_TYPES.includes(file.type) && !file.name.match(/\.(mp4|webm|mov|avi|mkv)$/i)) {
12652
+ showError('mmError', 'Unsupported video type. Use MP4, WebM, MOV, AVI, or MKV.');
12653
+ return;
12654
+ }
12655
+ if (file.size > 20 * 1024 * 1024) {
12656
+ showError('mmError', 'Video too large. Maximum size is 20 MB.');
12657
+ return;
12658
+ }
12659
+ hideError('mmError');
12660
+
12661
+ const reader = new FileReader();
12662
+ reader.onload = async (e) => {
12663
+ mmVideoData = e.target.result;
12664
+ const video = document.getElementById('mmPreviewVideo');
12665
+ video.src = mmVideoData;
12666
+
12667
+ const info = document.getElementById('mmVideoFileInfo');
12668
+ const sizeStr = file.size > 1024 * 1024
12669
+ ? (file.size / (1024 * 1024)).toFixed(1) + ' MB'
12670
+ : (file.size / 1024).toFixed(0) + ' KB';
12671
+
12672
+ document.getElementById('mmVideoDropZone').style.display = 'none';
12673
+ document.getElementById('mmVideoPreview').classList.add('visible');
12674
+
12675
+ // Estimate tokens from video metadata and warn if likely to exceed limit
12676
+ const meta = await estimateVideoTokens(video);
12677
+ if (meta) {
12678
+ const durStr = meta.duration.toFixed(1) + 's';
12679
+ const resStr = meta.width + '×' + meta.height;
12680
+ info.textContent = `${file.name} · ${sizeStr} · ${resStr} · ${durStr} · ~${meta.tokens.toLocaleString()} tokens`;
12681
+ if (meta.tokens > 32000) {
12682
+ showError('mmError',
12683
+ 'This video is estimated at ~' + meta.tokens.toLocaleString() + ' tokens, which exceeds the 32,000 token context window. ' +
12684
+ 'Try a shorter clip, lower resolution, or smaller dimensions. ' +
12685
+ '(' + resStr + ', ' + durStr + ')'
12686
+ );
12687
+ }
12688
+ } else {
12689
+ info.textContent = `${file.name} · ${sizeStr}`;
12690
+ }
12691
+ };
12692
+ reader.readAsDataURL(file);
12693
+ }
12694
+
12695
+ window.clearMultimodalVideo = function() {
12696
+ mmVideoData = null;
12697
+ const video = document.getElementById('mmPreviewVideo');
12698
+ video.pause();
12699
+ video.src = '';
12700
+ document.getElementById('mmVideoFileInfo').textContent = '';
12701
+ document.getElementById('mmVideoPreview').classList.remove('visible');
12702
+ document.getElementById('mmVideoDropZone').style.display = '';
12703
+ document.getElementById('mmVideoFileInput').value = '';
12704
+ };
12705
+
12484
12706
  window.doMultimodalCompare = async function() {
12485
12707
  hideError('mmError');
12486
12708
  sendTelemetry('api_call', { endpoint: 'multimodal-compare', model: document.getElementById('mmModel').value });
12487
12709
  const text = document.getElementById('mmText').value.trim();
12488
- if (!mmImageData) { showError('mmError', 'Upload an image first'); return; }
12489
- if (!text) { showError('mmError', 'Enter text to compare against the image'); return; }
12710
+ const hasMedia = mmImageData || mmVideoData;
12711
+ if (!hasMedia && !text) { showError('mmError', 'Upload an image or video, and enter text to compare'); return; }
12712
+
12713
+ // Need at least 2 inputs to compare
12714
+ const mediaInputs = [];
12715
+ if (mmImageData) {
12716
+ mediaInputs.push({ content: [{ type: 'image_base64', image_base64: mmImageData }], label: 'Image' });
12717
+ }
12718
+ if (mmVideoData) {
12719
+ mediaInputs.push({ content: [{ type: 'video_base64', video_base64: mmVideoData }], label: 'Video' });
12720
+ }
12721
+ if (text) {
12722
+ mediaInputs.push({ content: [{ type: 'text', text: text }], label: 'Text' });
12723
+ }
12724
+
12725
+ if (mediaInputs.length < 2) {
12726
+ showError('mmError', 'Provide at least 2 inputs to compare (e.g., image + text, video + text, or image + video)');
12727
+ return;
12728
+ }
12490
12729
 
12491
12730
  setLoading('mmCompareBtn', true);
12492
12731
  try {
12493
12732
  const model = document.getElementById('mmModel').value;
12494
12733
  const dimsVal = document.getElementById('mmDimensions').value;
12495
12734
  const body = {
12496
- inputs: [
12497
- { content: [{ type: 'image_base64', image_base64: mmImageData }] },
12498
- { content: [{ type: 'text', text: text }] }
12499
- ],
12735
+ inputs: mediaInputs.map(m => m.content ? { content: m.content } : m),
12500
12736
  model: model,
12501
12737
  input_type: 'document'
12502
12738
  };
@@ -12504,6 +12740,7 @@ window.doMultimodalCompare = async function() {
12504
12740
 
12505
12741
  const data = await apiPost('/api/multimodal-embed', body);
12506
12742
 
12743
+ // Compare first two inputs (primary comparison)
12507
12744
  const vecA = data.data[0].embedding;
12508
12745
  const vecB = data.data[1].embedding;
12509
12746
  const cosine = cosineSim(vecA, vecB);
@@ -12525,29 +12762,46 @@ window.doMultimodalCompare = async function() {
12525
12762
 
12526
12763
  // Stats
12527
12764
  const usage = data.usage || {};
12528
- const statsEl = document.getElementById('mmStats');
12529
- statsEl.innerHTML = `
12765
+ const pairLabel = mediaInputs[0].label + ' vs ' + mediaInputs[1].label;
12766
+ let statsHtml = `
12530
12767
  <span class="stat"><span class="stat-label">Model</span><span class="stat-value">${data.model || model}</span></span>
12768
+ <span class="stat"><span class="stat-label">Comparing</span><span class="stat-value">${pairLabel}</span></span>
12531
12769
  <span class="stat"><span class="stat-label">Dimensions</span><span class="stat-value">${vecA.length}</span></span>
12532
- <span class="stat"><span class="stat-label">Text Tokens</span><span class="stat-value">${usage.text_tokens || '—'}</span></span>
12533
- <span class="stat"><span class="stat-label">Image Pixels</span><span class="stat-value">${usage.image_pixels ? usage.image_pixels.toLocaleString() : '—'}</span></span>
12534
- <span class="stat"><span class="stat-label">Total Tokens</span><span class="stat-value">${usage.total_tokens || '—'}</span></span>
12535
12770
  `;
12771
+ if (usage.text_tokens) statsHtml += `<span class="stat"><span class="stat-label">Text Tokens</span><span class="stat-value">${usage.text_tokens}</span></span>`;
12772
+ if (usage.image_pixels) statsHtml += `<span class="stat"><span class="stat-label">Image Pixels</span><span class="stat-value">${usage.image_pixels.toLocaleString()}</span></span>`;
12773
+ statsHtml += `<span class="stat"><span class="stat-label">Total Tokens</span><span class="stat-value">${usage.total_tokens || '—'}</span></span>`;
12774
+
12775
+ // If 3 inputs, show all pairwise similarities
12776
+ if (mediaInputs.length === 3) {
12777
+ const vecC = data.data[2].embedding;
12778
+ const simAC = cosineSim(vecA, vecC);
12779
+ const simBC = cosineSim(vecB, vecC);
12780
+ statsHtml += `<br><span class="stat"><span class="stat-label">${mediaInputs[0].label} vs ${mediaInputs[2].label}</span><span class="stat-value">${simAC.toFixed(4)}</span></span>`;
12781
+ statsHtml += `<span class="stat"><span class="stat-label">${mediaInputs[1].label} vs ${mediaInputs[2].label}</span><span class="stat-value">${simBC.toFixed(4)}</span></span>`;
12782
+ }
12783
+
12784
+ document.getElementById('mmStats').innerHTML = statsHtml;
12536
12785
 
12537
12786
  // Insight note
12538
12787
  const noteEl = document.getElementById('mmNote');
12539
12788
  if (cosine > 0.7) {
12540
- noteEl.innerHTML = '💡 <strong>High similarity!</strong> The image and text are closely related in Voyage AI\'s multimodal embedding space. This means the text is a good semantic description of the image.';
12789
+ noteEl.innerHTML = '💡 <strong>High similarity!</strong> The inputs are closely related in Voyage AI\'s multimodal embedding space.';
12541
12790
  } else if (cosine > 0.4) {
12542
- noteEl.innerHTML = '💡 <strong>Moderate similarity.</strong> The image and text share some semantic overlap. They may be related but not a direct match.';
12791
+ noteEl.innerHTML = '💡 <strong>Moderate similarity.</strong> The inputs share some semantic overlap but are not a direct match.';
12543
12792
  } else {
12544
- noteEl.innerHTML = '💡 <strong>Low similarity.</strong> The image and text are semantically distant. Try a description that matches the image content more closely.';
12793
+ noteEl.innerHTML = '💡 <strong>Low similarity.</strong> The inputs are semantically distant. Try content that is more closely related.';
12545
12794
  }
12546
12795
 
12547
12796
  document.getElementById('mmResult').classList.add('visible');
12548
12797
  CostTracker.addOperation('multimodal-compare', data.model || model, usage.total_tokens || 0);
12549
12798
  } catch (err) {
12550
- showError('mmError', err.message);
12799
+ let msg = err.message;
12800
+ // Provide actionable guidance for context window errors
12801
+ if (msg.includes('context window') || msg.includes('exceed')) {
12802
+ msg += ' Try reducing video resolution, trimming to a shorter clip, or using a smaller image.';
12803
+ }
12804
+ showError('mmError', msg);
12551
12805
  } finally {
12552
12806
  setLoading('mmCompareBtn', false);
12553
12807
  }
@@ -12751,7 +13005,11 @@ window.doMultimodalSearch = async function() {
12751
13005
  document.getElementById('mmSearchResult').classList.add('visible');
12752
13006
  CostTracker.addOperation('multimodal-search', model, data.usage?.total_tokens || 0);
12753
13007
  } catch (err) {
12754
- showError('mmSearchError', err.message);
13008
+ let msg = err.message;
13009
+ if (msg.includes('context window') || msg.includes('exceed')) {
13010
+ msg += ' Try using smaller images or fewer corpus items per batch.';
13011
+ }
13012
+ showError('mmSearchError', msg);
12755
13013
  } finally {
12756
13014
  setLoading(btnId, false);
12757
13015
  }
@@ -14745,6 +15003,158 @@ function wfHandleFileLoad(event) {
14745
15003
  }
14746
15004
 
14747
15005
  // ── DAG Layout + SVG Rendering ──
15006
+ /**
15007
+ * Sugiyama-style DAG auto-layout: barycenter crossing minimization + neighbor-aware placement.
15008
+ * Takes layers (from Kahn's topological sort) and graph (dependency map),
15009
+ * reorders nodes within each layer to minimize edge crossings,
15010
+ * then assigns Y coordinates so nodes sit near their connected neighbors.
15011
+ *
15012
+ * @param {string[][]} layers - layers[i] = [stepIds that can run in parallel]
15013
+ * @param {Object} graph - { stepId: [depStepIds...] } (incoming edges)
15014
+ * @returns {Object} positions - { stepId: { x, y } }
15015
+ */
15016
+ function wfAutoLayout(layers, graph) {
15017
+ if (!layers || layers.length === 0) return {};
15018
+
15019
+ // Build adjacency: forward (outgoing) and backward (incoming) neighbor lists
15020
+ const incoming = {}; // stepId -> [ids in previous layers that connect to it]
15021
+ const outgoing = {}; // stepId -> [ids in next layers it connects to]
15022
+ const allIds = new Set(layers.flat());
15023
+ for (const id of allIds) { incoming[id] = []; outgoing[id] = []; }
15024
+
15025
+ for (const [stepId, deps] of Object.entries(graph)) {
15026
+ if (!deps || !Array.isArray(deps)) continue;
15027
+ for (const rawDep of deps) {
15028
+ const dep = rawDep.replace(/^!/, '');
15029
+ if (allIds.has(dep) && allIds.has(stepId)) {
15030
+ incoming[stepId].push(dep);
15031
+ outgoing[dep].push(stepId);
15032
+ }
15033
+ }
15034
+ }
15035
+
15036
+ // Index: which layer is each node in?
15037
+ const layerOf = {};
15038
+ layers.forEach((layer, li) => layer.forEach(id => { layerOf[id] = li; }));
15039
+
15040
+ // Work on mutable copies of each layer's ordering
15041
+ const ordered = layers.map(l => [...l]);
15042
+
15043
+ // Barycenter crossing minimization (multi-pass, alternating direction)
15044
+ // Barycenter = average position of a node's neighbors in the adjacent layer.
15045
+ // Sorting by barycenter within each layer reduces crossings.
15046
+ const NUM_PASSES = 4;
15047
+
15048
+ for (let pass = 0; pass < NUM_PASSES; pass++) {
15049
+ if (pass % 2 === 0) {
15050
+ // Forward pass: for each layer (left to right), sort by barycenter of incoming neighbors
15051
+ for (let li = 1; li < ordered.length; li++) {
15052
+ const prevOrder = {};
15053
+ ordered[li - 1].forEach((id, idx) => { prevOrder[id] = idx; });
15054
+
15055
+ ordered[li].sort((a, b) => {
15056
+ const baryA = wfBarycenter(a, incoming, prevOrder);
15057
+ const baryB = wfBarycenter(b, incoming, prevOrder);
15058
+ return baryA - baryB;
15059
+ });
15060
+ }
15061
+ } else {
15062
+ // Backward pass: for each layer (right to left), sort by barycenter of outgoing neighbors
15063
+ for (let li = ordered.length - 2; li >= 0; li--) {
15064
+ const nextOrder = {};
15065
+ ordered[li + 1].forEach((id, idx) => { nextOrder[id] = idx; });
15066
+
15067
+ ordered[li].sort((a, b) => {
15068
+ const baryA = wfBarycenter(a, outgoing, nextOrder);
15069
+ const baryB = wfBarycenter(b, outgoing, nextOrder);
15070
+ return baryA - baryB;
15071
+ });
15072
+ }
15073
+ }
15074
+ }
15075
+
15076
+ // Assign X coordinates (fixed per layer)
15077
+ // Assign Y coordinates using neighbor-aware placement:
15078
+ // First pass: assign evenly spaced Y (centered).
15079
+ // Second pass: nudge each node toward the average Y of its neighbors.
15080
+ const positions = {};
15081
+ const maxLayerSize = Math.max(...ordered.map(l => l.length));
15082
+ const totalH = maxLayerSize * (WF_NODE_H + WF_NODE_GAP);
15083
+
15084
+ // Initial even spacing (centered vertically)
15085
+ ordered.forEach((layer, li) => {
15086
+ const x = WF_PAD + li * WF_LAYER_GAP;
15087
+ const layerH = layer.length * WF_NODE_H + (layer.length - 1) * WF_NODE_GAP;
15088
+ const startY = WF_PAD + (totalH - layerH) / 2;
15089
+ layer.forEach((stepId, ni) => {
15090
+ positions[stepId] = { x, y: startY + ni * (WF_NODE_H + WF_NODE_GAP) };
15091
+ });
15092
+ });
15093
+
15094
+ // Neighbor-aware Y nudging (iterative relaxation)
15095
+ // Pulls nodes toward the average Y of their connected neighbors while maintaining order and minimum gap.
15096
+ const RELAX_PASSES = 3;
15097
+ const RELAX_STRENGTH = 0.4; // How much to move toward neighbor average (0=none, 1=full)
15098
+
15099
+ for (let rp = 0; rp < RELAX_PASSES; rp++) {
15100
+ for (let li = 0; li < ordered.length; li++) {
15101
+ const layer = ordered[li];
15102
+ if (layer.length <= 1) continue;
15103
+
15104
+ // Compute ideal Y for each node (average Y of all neighbors)
15105
+ const idealY = {};
15106
+ for (const id of layer) {
15107
+ const neighbors = [...(incoming[id] || []), ...(outgoing[id] || [])];
15108
+ if (neighbors.length === 0) { idealY[id] = positions[id].y; continue; }
15109
+ const avgY = neighbors.reduce((sum, nid) => sum + (positions[nid]?.y || 0), 0) / neighbors.length;
15110
+ idealY[id] = avgY;
15111
+ }
15112
+
15113
+ // Nudge toward ideal, then enforce minimum gap and original order
15114
+ for (const id of layer) {
15115
+ const current = positions[id].y;
15116
+ const target = idealY[id];
15117
+ positions[id].y = current + (target - current) * RELAX_STRENGTH;
15118
+ }
15119
+
15120
+ // Re-sort by Y to maintain the barycenter ordering, then enforce minimum gaps
15121
+ const sortedLayer = [...layer].sort((a, b) => positions[a].y - positions[b].y);
15122
+ const minGap = WF_NODE_H + WF_NODE_GAP;
15123
+
15124
+ // Push apart any overlaps (top-down)
15125
+ for (let i = 1; i < sortedLayer.length; i++) {
15126
+ const prev = positions[sortedLayer[i - 1]].y;
15127
+ const curr = positions[sortedLayer[i]].y;
15128
+ if (curr - prev < minGap) {
15129
+ positions[sortedLayer[i]].y = prev + minGap;
15130
+ }
15131
+ }
15132
+
15133
+ // Re-center the layer vertically to keep it balanced
15134
+ const layerTop = positions[sortedLayer[0]].y;
15135
+ const layerBottom = positions[sortedLayer[sortedLayer.length - 1]].y + WF_NODE_H;
15136
+ const layerMid = (layerTop + layerBottom) / 2;
15137
+ const canvasMid = WF_PAD + totalH / 2;
15138
+ const shift = canvasMid - layerMid;
15139
+ for (const id of sortedLayer) {
15140
+ positions[id].y += shift;
15141
+ }
15142
+ }
15143
+ }
15144
+
15145
+ return positions;
15146
+ }
15147
+
15148
+ /**
15149
+ * Compute barycenter (average position) of a node's neighbors in an adjacent layer.
15150
+ * If a node has no neighbors in the given orderMap, returns Infinity to push it to the end.
15151
+ */
15152
+ function wfBarycenter(nodeId, neighborMap, orderMap) {
15153
+ const neighbors = (neighborMap[nodeId] || []).filter(n => orderMap[n] !== undefined);
15154
+ if (neighbors.length === 0) return Infinity;
15155
+ return neighbors.reduce((sum, n) => sum + orderMap[n], 0) / neighbors.length;
15156
+ }
15157
+
14748
15158
  async function wfRenderWorkflow(definition) {
14749
15159
  const svg = document.getElementById('wf-canvas');
14750
15160
  // Clear previous nodes and edges (keep defs)
@@ -14773,23 +15183,8 @@ async function wfRenderWorkflow(definition) {
14773
15183
  const stepMap = {};
14774
15184
  definition.steps.forEach(s => { stepMap[s.id] = s; });
14775
15185
 
14776
- // Calculate positions
14777
- const positions = {};
14778
- const maxLayerSize = Math.max(...layers.map(l => l.length));
14779
- const totalW = layers.length * WF_LAYER_GAP;
14780
- const totalH = maxLayerSize * (WF_NODE_H + WF_NODE_GAP);
14781
-
14782
- layers.forEach((layer, li) => {
14783
- const x = WF_PAD + li * WF_LAYER_GAP;
14784
- const layerH = layer.length * WF_NODE_H + (layer.length - 1) * WF_NODE_GAP;
14785
- const startY = WF_PAD + (totalH - layerH) / 2;
14786
- layer.forEach((stepId, ni) => {
14787
- positions[stepId] = {
14788
- x,
14789
- y: startY + ni * (WF_NODE_H + WF_NODE_GAP),
14790
- };
14791
- });
14792
- });
15186
+ // Auto-layout: crossing minimization + neighbor-aware placement
15187
+ const positions = wfAutoLayout(layers, graph);
14793
15188
  wfState.nodePositions = positions;
14794
15189
 
14795
15190
  // Build port-visibility maps: which nodes have input deps, which have dependents
@@ -16231,10 +16626,131 @@ async function wfExecute() {
16231
16626
  return;
16232
16627
  }
16233
16628
 
16629
+ // Check for empty required step inputs and prompt the user
16630
+ const missing = wfFindMissingStepInputs(def);
16631
+ if (missing.length > 0) {
16632
+ wfShowStepInputModal(missing);
16633
+ return;
16634
+ }
16635
+
16234
16636
  // No inputs needed, execute directly
16235
16637
  wfExecuteWithInputs({});
16236
16638
  }
16237
16639
 
16640
+ /**
16641
+ * Scan all steps for required inputs that are empty (and not filled by template refs).
16642
+ * Returns array of { stepId, stepName, tool, key, placeholder, type } for each missing input.
16643
+ */
16644
+ function wfFindMissingStepInputs(def) {
16645
+ const missing = [];
16646
+ for (const step of (def.steps || [])) {
16647
+ const inputDefs = WF_INPUT_DEFS[step.tool] || [];
16648
+ for (const d of inputDefs) {
16649
+ if (!d.required) continue;
16650
+ const val = step.inputs?.[d.key];
16651
+ // Skip if already filled (non-empty string, or any non-string value)
16652
+ if (val !== undefined && val !== null && val !== '') continue;
16653
+ missing.push({
16654
+ stepId: step.id,
16655
+ stepName: step.name || step.id,
16656
+ tool: step.tool,
16657
+ key: d.key,
16658
+ placeholder: d.placeholder || '',
16659
+ type: d.type || 'text',
16660
+ });
16661
+ }
16662
+ }
16663
+ return missing;
16664
+ }
16665
+
16666
+ /**
16667
+ * Show a modal prompting the user for missing required step inputs before execution.
16668
+ * Reuses the existing input modal UI.
16669
+ */
16670
+ function wfShowStepInputModal(missing) {
16671
+ document.getElementById('wfInputModalTitle').textContent = 'Required Inputs';
16672
+ let html = '';
16673
+ let lastStepId = '';
16674
+
16675
+ for (const m of missing) {
16676
+ // Group header per step
16677
+ if (m.stepId !== lastStepId) {
16678
+ const meta = WF_NODE_META[m.tool] || {};
16679
+ html += `<div style="margin-top:${lastStepId ? '16' : '0'}px;margin-bottom:8px;font-size:11px;text-transform:uppercase;letter-spacing:0.5px;color:var(--text-muted);display:flex;align-items:center;gap:6px;">
16680
+ <span style="color:${meta.color || 'var(--text-muted)'}">
16681
+ <svg width="12" height="12" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="${meta.icon || WF_FALLBACK_ICON}"/></svg>
16682
+ </span>
16683
+ ${escapeHtml(m.stepName)}
16684
+ </div>`;
16685
+ lastStepId = m.stepId;
16686
+ }
16687
+
16688
+ const inputId = 'wf-stepin-' + m.stepId + '-' + m.key;
16689
+ html += `<div class="wf-input-modal-field">
16690
+ <div class="wf-input-modal-label">${escapeHtml(m.key)} <span style="color:#e74c3c">*</span></div>
16691
+ <input class="wf-input-modal-input" id="${inputId}" placeholder="${escapeHtml(m.placeholder)}" data-step-id="${escapeHtml(m.stepId)}" data-key="${escapeHtml(m.key)}" data-type="${m.type}">
16692
+ <div class="wf-input-modal-error" id="${inputId}-err">This field is required</div>
16693
+ </div>`;
16694
+ }
16695
+
16696
+ document.getElementById('wfInputModalBody').innerHTML = html;
16697
+
16698
+ // Swap the footer button handler temporarily
16699
+ const runBtn = document.querySelector('.wf-input-modal-run');
16700
+ runBtn.textContent = 'Run Workflow';
16701
+ runBtn.onclick = () => wfStepInputModalSubmit(missing);
16702
+
16703
+ document.getElementById('wfInputModalBackdrop').style.display = '';
16704
+
16705
+ // Focus first input
16706
+ const firstId = 'wf-stepin-' + missing[0].stepId + '-' + missing[0].key;
16707
+ const firstEl = document.getElementById(firstId);
16708
+ if (firstEl) setTimeout(() => firstEl.focus(), 50);
16709
+ }
16710
+
16711
+ function wfStepInputModalSubmit(missing) {
16712
+ let hasError = false;
16713
+
16714
+ for (const m of missing) {
16715
+ const inputId = 'wf-stepin-' + m.stepId + '-' + m.key;
16716
+ const el = document.getElementById(inputId);
16717
+ const errEl = document.getElementById(inputId + '-err');
16718
+ if (!el) continue;
16719
+
16720
+ el.classList.remove('error');
16721
+ if (errEl) errEl.style.display = 'none';
16722
+
16723
+ const val = el.value.trim();
16724
+ if (!val) {
16725
+ el.classList.add('error');
16726
+ if (errEl) { errEl.textContent = 'This field is required'; errEl.style.display = ''; }
16727
+ hasError = true;
16728
+ continue;
16729
+ }
16730
+
16731
+ // Write the value into the step definition
16732
+ const def = wfState.activeWorkflow;
16733
+ const step = def.steps.find(s => s.id === m.stepId);
16734
+ if (step) {
16735
+ if (!step.inputs) step.inputs = {};
16736
+ step.inputs[m.key] = val;
16737
+ }
16738
+ }
16739
+
16740
+ if (hasError) return;
16741
+
16742
+ // Restore the default modal handler
16743
+ const runBtn = document.querySelector('.wf-input-modal-run');
16744
+ runBtn.onclick = () => wfInputModalSubmit();
16745
+
16746
+ wfCloseInputModal();
16747
+
16748
+ // Refresh inspector if a step is selected
16749
+ if (wfState.selectedNodeId) wfUpdateInspector();
16750
+
16751
+ wfExecuteWithInputs({});
16752
+ }
16753
+
16238
16754
  async function wfExecuteWithInputs(inputs) {
16239
16755
  const def = wfState.activeWorkflow;
16240
16756
  if (!def || wfState.executing) return;
@@ -16306,6 +16822,16 @@ async function wfExecuteWithInputs(inputs) {
16306
16822
  timeMs: data.timeMs,
16307
16823
  summary: data.summary || '',
16308
16824
  };
16825
+ // Update cost tracker with usage data from this step
16826
+ if (data._usage && Array.isArray(data._usage)) {
16827
+ data._usage.forEach(u => {
16828
+ if (u.op === 'llm') {
16829
+ CostTracker.addLLMOperation('wf-' + u.op, u.model, u.inputTokens || 0, u.outputTokens || 0);
16830
+ } else {
16831
+ CostTracker.addOperation('wf-' + u.op, u.model, u.tokens || 0);
16832
+ }
16833
+ });
16834
+ }
16309
16835
  wfRefreshNodes();
16310
16836
  if (wfState.selectedNodeId === data.stepId) wfUpdateInspector();
16311
16837
  } else if (currentEvent === 'step_skip') {
@@ -16409,6 +16935,45 @@ function wfResetExecution() {
16409
16935
  wfUpdateInspector();
16410
16936
  }
16411
16937
 
16938
+ async function wfRelayout() {
16939
+ const def = wfState.activeWorkflow;
16940
+ if (!def) return;
16941
+
16942
+ // If layers/graph aren't populated yet (e.g. builder mode), fetch them
16943
+ if (!wfState.layers || !wfState.graph || wfState.layers.length === 0) {
16944
+ try {
16945
+ const res = await fetch('/api/workflows/plan', {
16946
+ method: 'POST',
16947
+ headers: { 'Content-Type': 'application/json' },
16948
+ body: JSON.stringify({ definition: def }),
16949
+ });
16950
+ const data = await res.json();
16951
+ if (data.layers) wfState.layers = data.layers;
16952
+ if (data.graph) wfState.graph = data.graph;
16953
+ } catch (err) {
16954
+ console.warn('Relayout: failed to fetch plan:', err.message);
16955
+ return;
16956
+ }
16957
+ }
16958
+
16959
+ if (!wfState.layers || wfState.layers.length === 0) return;
16960
+
16961
+ const positions = wfAutoLayout(wfState.layers, wfState.graph || {});
16962
+
16963
+ // Preserve positions for orphan nodes not in any layer (builder mode)
16964
+ if (def.steps) {
16965
+ for (const step of def.steps) {
16966
+ if (!positions[step.id] && wfState.nodePositions[step.id]) {
16967
+ positions[step.id] = wfState.nodePositions[step.id];
16968
+ }
16969
+ }
16970
+ }
16971
+
16972
+ wfState.nodePositions = positions;
16973
+ wfRefreshNodes();
16974
+ wfFitToView();
16975
+ }
16976
+
16412
16977
  // ── Output Modal ──
16413
16978
  let wfOutputModalData = '';
16414
16979
 
@@ -17452,10 +18017,9 @@ function wfEdgeDropOnInput(toId) {
17452
18017
  if (connected) {
17453
18018
  wfState.dirtyFlag = true;
17454
18019
  wfBuildGraph();
17455
- wfRelayout();
17456
18020
  wfRefreshNodes();
17457
18021
  if (wfState.selectedNodeId === toId) wfUpdateInspector();
17458
-
18022
+
17459
18023
  // Trigger draft validation immediately on edge connection
17460
18024
  wfTriggerDraftValidation(0);
17461
18025
  }
@@ -17466,42 +18030,9 @@ function wfEdgeDropOnInput(toId) {
17466
18030
  if (el) el.remove();
17467
18031
  }
17468
18032
 
17469
- // ── Builder: Relayout via topological sort ──
17470
- async function wfRelayout() {
17471
- const def = wfState.activeWorkflow;
17472
- if (!def || def.steps.length === 0) return;
17473
-
17474
- try {
17475
- const res = await fetch('/api/workflows/plan', {
17476
- method: 'POST',
17477
- headers: { 'Content-Type': 'application/json' },
17478
- body: JSON.stringify(def),
17479
- });
17480
- const data = await res.json();
17481
- if (data.layers && data.layers.length > 0) {
17482
- wfState.layers = data.layers;
17483
- // Reposition nodes based on layers
17484
- const positions = {};
17485
- data.layers.forEach((layer, li) => {
17486
- layer.forEach((stepId, ni) => {
17487
- positions[stepId] = {
17488
- x: WF_PAD + li * WF_LAYER_GAP,
17489
- y: WF_PAD + ni * (WF_NODE_H + WF_NODE_GAP),
17490
- };
17491
- });
17492
- });
17493
- // Keep orphan nodes (not in any layer) at their current position
17494
- for (const step of def.steps) {
17495
- if (!positions[step.id] && wfState.nodePositions[step.id]) {
17496
- positions[step.id] = wfState.nodePositions[step.id];
17497
- }
17498
- }
17499
- wfState.nodePositions = positions;
17500
- }
17501
- } catch (err) {
17502
- console.warn('Relayout failed:', err.message);
17503
- }
17504
- }
18033
+ /// ── Builder: Relayout via auto-layout algorithm ──
18034
+ // (delegates to wfRelayout defined earlier, which uses wfAutoLayout for
18035
+ // barycenter crossing minimization + neighbor-aware placement)
17505
18036
 
17506
18037
  // ── Docs shortcut (F1) ──
17507
18038
  const DOCS_URLS = {
@@ -17553,6 +18084,7 @@ document.addEventListener('keydown', (e) => {
17553
18084
  if (e.key === '+' || e.key === '=') { wfZoom(1); e.preventDefault(); }
17554
18085
  else if (e.key === '-') { wfZoom(-1); e.preventDefault(); }
17555
18086
  else if (e.key === '0') { wfFitToView(); e.preventDefault(); }
18087
+ else if (e.key === 'l' || e.key === 'L') { wfRelayout(); e.preventDefault(); }
17556
18088
  else if (e.key === 'Escape') { wfDeselectNode(); e.preventDefault(); }
17557
18089
  else if (e.key === 'ArrowLeft') { wfState.panX -= PAN_STEP / wfState.zoom; wfApplyViewBox(); e.preventDefault(); }
17558
18090
  else if (e.key === 'ArrowRight') { wfState.panX += PAN_STEP / wfState.zoom; wfApplyViewBox(); e.preventDefault(); }