voyageai-cli 1.20.3 → 1.20.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "voyageai-cli",
3
- "version": "1.20.3",
3
+ "version": "1.20.5",
4
4
  "description": "CLI for Voyage AI embeddings, reranking, and MongoDB Atlas Vector Search",
5
5
  "bin": {
6
6
  "vai": "./src/cli.js"
@@ -871,6 +871,147 @@ const concepts = {
871
871
  ],
872
872
  },
873
873
 
874
+ 'provider-comparison': {
875
+ title: 'Voyage AI vs OpenAI vs Anthropic',
876
+ summary: 'Understanding the differences between AI providers for embeddings and search',
877
+ content: [
878
+ `Developers often ask: "Should I use Voyage AI, OpenAI, or Anthropic for my vector`,
879
+ `search application?" The answer depends on what you're building — these companies`,
880
+ `serve ${pc.cyan('fundamentally different purposes')}.`,
881
+ ``,
882
+ `${pc.bold('TL;DR:')}`,
883
+ ` ${pc.cyan('Voyage AI')} → ${pc.bold('Embeddings & Reranking')} (specialized, best-in-class)`,
884
+ ` ${pc.cyan('OpenAI')} → ${pc.bold('Embeddings + Generation')} (general-purpose, convenient)`,
885
+ ` ${pc.cyan('Anthropic')} → ${pc.bold('Generation only')} (Claude LLM, no embedding API)`,
886
+ ``,
887
+ `${pc.bold('━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━')}`,
888
+ ``,
889
+ `${pc.bold(pc.cyan('VOYAGE AI'))} — The Embedding Specialist`,
890
+ ``,
891
+ `Voyage AI is ${pc.cyan('100% focused on embeddings and reranking')}. That's it. No chatbots,`,
892
+ `no image generation — just the best vector representations for retrieval.`,
893
+ ``,
894
+ `${pc.dim('Models:')}`,
895
+ ` • ${pc.cyan('voyage-4-large')} 71.41 RTEB score (SOTA), MoE architecture`,
896
+ ` • ${pc.cyan('voyage-4')} 70.07 RTEB, balanced quality/cost`,
897
+ ` • ${pc.cyan('voyage-4-lite')} 68.10 RTEB, lowest cost`,
898
+ ` • ${pc.cyan('voyage-code-3')} Optimized for code retrieval`,
899
+ ` • ${pc.cyan('voyage-finance-2')} Financial documents`,
900
+ ` • ${pc.cyan('voyage-law-2')} Legal text`,
901
+ ` • ${pc.cyan('voyage-multimodal-3.5')} Images + text in same space`,
902
+ ` • ${pc.cyan('rerank-2.5')} Best-in-class reranking`,
903
+ ``,
904
+ `${pc.dim('Unique strengths:')}`,
905
+ ` • ${pc.cyan('Shared embedding space')} — mix models freely (embed docs with -large, query with -lite)`,
906
+ ` • ${pc.cyan('Matryoshka dimensions')} — truncate 1024→256 dims without re-embedding`,
907
+ ` • ${pc.cyan('Domain-specific models')} — code, finance, law tuned for their domains`,
908
+ ` • ${pc.cyan('No modality gap')} — multimodal model uses unified backbone, not CLIP`,
909
+ ` • ${pc.cyan('Quantization support')} — int8, binary output for 4-32× storage savings`,
910
+ ``,
911
+ `${pc.dim('Pricing:')} $0.02-$0.12 per million tokens (embedding), $2/M (reranking)`,
912
+ ``,
913
+ `${pc.bold('━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━')}`,
914
+ ``,
915
+ `${pc.bold(pc.cyan('OPENAI'))} — The General-Purpose Platform`,
916
+ ``,
917
+ `OpenAI offers embeddings as ${pc.cyan('one product among many')}. They're convenient if you're`,
918
+ `already using GPT, but embeddings aren't their core focus.`,
919
+ ``,
920
+ `${pc.dim('Embedding models:')}`,
921
+ ` • ${pc.cyan('text-embedding-3-large')} 62.57 RTEB, 3072 dims, $0.13/M tokens`,
922
+ ` • ${pc.cyan('text-embedding-3-small')} 62.26 RTEB, 1536 dims, $0.02/M tokens`,
923
+ ` • ${pc.cyan('text-embedding-ada-002')} Legacy, 1536 dims, $0.10/M tokens`,
924
+ ``,
925
+ `${pc.dim('Strengths:')}`,
926
+ ` • ${pc.cyan('One platform')} — embeddings + GPT generation in same account`,
927
+ ` • ${pc.cyan('Familiar API')} — if you use ChatGPT API, embeddings are similar`,
928
+ ` • ${pc.cyan('Broad ecosystem')} — many tutorials and integrations assume OpenAI`,
929
+ ``,
930
+ `${pc.dim('Limitations:')}`,
931
+ ` • ${pc.cyan('Lower quality')} — voyage-4-large beats text-embedding-3-large by 14% on RTEB`,
932
+ ` • ${pc.cyan('No reranking')} — you need a separate provider for two-stage retrieval`,
933
+ ` • ${pc.cyan('No domain models')} — same model for code, legal, finance (suboptimal)`,
934
+ ` • ${pc.cyan('No shared space')} — can't mix models or truncate dimensions freely`,
935
+ ` • ${pc.cyan('No multimodal')} — text-only (CLIP is separate and limited)`,
936
+ ``,
937
+ `${pc.bold('━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━')}`,
938
+ ``,
939
+ `${pc.bold(pc.cyan('ANTHROPIC'))} — The LLM Company (No Embeddings!)`,
940
+ ``,
941
+ `${pc.yellow('Important:')} Anthropic ${pc.bold('does not offer embedding models')}. They make Claude,`,
942
+ `an LLM for text generation, reasoning, and analysis — not vectorization.`,
943
+ ``,
944
+ `${pc.dim('What Anthropic offers:')}`,
945
+ ` • ${pc.cyan('Claude 3.5 Sonnet/Haiku')} — Text generation, coding, analysis`,
946
+ ` • ${pc.cyan('Claude 3 Opus')} — Most capable reasoning model`,
947
+ ` • ${pc.cyan('Tool use & function calling')} — Structured outputs`,
948
+ ``,
949
+ `${pc.dim('What Anthropic does NOT offer:')}`,
950
+ ` • ${pc.red('❌ Embedding models')}`,
951
+ ` • ${pc.red('❌ Reranking models')}`,
952
+ ` • ${pc.red('❌ Vector search APIs')}`,
953
+ ``,
954
+ `${pc.dim('How to use Claude with embeddings:')}`,
955
+ `Claude is the ${pc.cyan('"G" in RAG')} — the generation model. You still need an embedding`,
956
+ `provider (Voyage AI, OpenAI, Cohere) for the retrieval step:`,
957
+ ` 1. Embed your documents with Voyage AI → store in vector DB`,
958
+ ` 2. Embed user query → search vector DB → retrieve context`,
959
+ ` 3. Pass context + query to Claude → generate response`,
960
+ ``,
961
+ `${pc.bold('━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━')}`,
962
+ ``,
963
+ `${pc.bold('HEAD-TO-HEAD: Voyage AI vs OpenAI Embeddings')}`,
964
+ ``,
965
+ `${pc.dim(' Voyage AI OpenAI')}`,
966
+ `${pc.dim('─────────────────────────────────────────────────────────────────')}`,
967
+ `Best RTEB score ${pc.cyan('71.41')} (v4-large) ${pc.dim('62.57')} (v3-large)`,
968
+ `Reranking ${pc.cyan('Yes')} (rerank-2.5) ${pc.dim('No')}`,
969
+ `Domain models ${pc.cyan('Yes')} (code/law/fin) ${pc.dim('No')}`,
970
+ `Multimodal ${pc.cyan('Yes')} (unified) ${pc.dim('Limited')} (CLIP)`,
971
+ `Shared embedding space ${pc.cyan('Yes')} ${pc.dim('No')}`,
972
+ `Flexible dimensions ${pc.cyan('Yes')} (Matryoshka) ${pc.dim('No')}`,
973
+ `Quantized output ${pc.cyan('Yes')} (int8/binary) ${pc.dim('No')}`,
974
+ `MongoDB integration ${pc.cyan('Native')} (Atlas) ${pc.dim('Manual')}`,
975
+ ``,
976
+ `${pc.bold('━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━')}`,
977
+ ``,
978
+ `${pc.bold('WHEN TO USE EACH:')}`,
979
+ ``,
980
+ `${pc.cyan('Choose Voyage AI when:')}`,
981
+ ` • Retrieval quality is critical (RAG, search, recommendations)`,
982
+ ` • You need domain-specific embeddings (code, legal, finance)`,
983
+ ` • You want two-stage retrieval (embed → search → rerank)`,
984
+ ` • You're using MongoDB Atlas Vector Search`,
985
+ ` • You need multimodal search (images + text)`,
986
+ ` • Storage cost matters (quantization, flexible dims)`,
987
+ ``,
988
+ `${pc.cyan('Choose OpenAI embeddings when:')}`,
989
+ ` • You're already deep in the OpenAI ecosystem`,
990
+ ` • Convenience matters more than retrieval quality`,
991
+ ` • You don't need reranking or domain models`,
992
+ ` • Your use case is simple (not mission-critical search)`,
993
+ ``,
994
+ `${pc.cyan('Choose Anthropic (Claude) when:')}`,
995
+ ` • You need ${pc.bold('generation')}, not embeddings`,
996
+ ` • You want the best reasoning/coding LLM`,
997
+ ` • You're building the "G" in RAG (pair with Voyage for "R")`,
998
+ ``,
999
+ `${pc.bold('The winning combination:')} Use ${pc.cyan('Voyage AI for retrieval')} (embeddings +`,
1000
+ `reranking) and ${pc.cyan('Claude for generation')}. Best of both worlds.`,
1001
+ ].join('\n'),
1002
+ links: [
1003
+ 'https://www.mongodb.com/docs/voyageai/models/',
1004
+ 'https://platform.openai.com/docs/guides/embeddings',
1005
+ 'https://docs.anthropic.com/',
1006
+ 'https://blog.voyageai.com/2026/01/15/voyage-4-model-family/',
1007
+ ],
1008
+ tryIt: [
1009
+ 'vai models --wide',
1010
+ 'vai benchmark embed --models voyage-4-large,voyage-4,voyage-4-lite',
1011
+ 'vai explain rteb-benchmarks',
1012
+ ],
1013
+ },
1014
+
874
1015
  'rerank-eval': {
875
1016
  title: 'Reranking Evaluation — nDCG, Recall, MRR for Rerankers',
876
1017
  summary: 'Measure how well a reranker surfaces relevant documents',
@@ -1017,6 +1158,24 @@ const aliases = {
1017
1158
  recall: 'rerank-eval',
1018
1159
  mrr: 'rerank-eval',
1019
1160
  'eval-rerank': 'rerank-eval',
1161
+ // Provider comparison aliases
1162
+ 'provider-comparison': 'provider-comparison',
1163
+ providers: 'provider-comparison',
1164
+ 'voyage-vs-openai': 'provider-comparison',
1165
+ 'openai-vs-voyage': 'provider-comparison',
1166
+ 'voyage-vs-anthropic': 'provider-comparison',
1167
+ 'anthropic-vs-voyage': 'provider-comparison',
1168
+ 'openai-vs-anthropic': 'provider-comparison',
1169
+ 'anthropic-vs-openai': 'provider-comparison',
1170
+ 'voyage-openai-anthropic': 'provider-comparison',
1171
+ differences: 'provider-comparison',
1172
+ 'which-provider': 'provider-comparison',
1173
+ 'compare-providers': 'provider-comparison',
1174
+ 'embedding-providers': 'provider-comparison',
1175
+ 'vs-openai': 'provider-comparison',
1176
+ 'vs-anthropic': 'provider-comparison',
1177
+ competitors: 'provider-comparison',
1178
+ alternatives: 'provider-comparison',
1020
1179
  };
1021
1180
 
1022
1181
  /**