ruvector 0.2.21 → 0.2.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (123) hide show
  1. package/README.md +2 -2
  2. package/bin/cli.js +160 -0
  3. package/package.json +9 -5
  4. package/src/decompiler/api-prober.js +302 -0
  5. package/src/decompiler/model-decompiler.js +423 -0
  6. package/dist/analysis/complexity.d.ts +0 -52
  7. package/dist/analysis/complexity.d.ts.map +0 -1
  8. package/dist/analysis/complexity.js +0 -146
  9. package/dist/analysis/index.d.ts +0 -15
  10. package/dist/analysis/index.d.ts.map +0 -1
  11. package/dist/analysis/index.js +0 -38
  12. package/dist/analysis/patterns.d.ts +0 -71
  13. package/dist/analysis/patterns.d.ts.map +0 -1
  14. package/dist/analysis/patterns.js +0 -243
  15. package/dist/analysis/security.d.ts +0 -51
  16. package/dist/analysis/security.d.ts.map +0 -1
  17. package/dist/analysis/security.js +0 -139
  18. package/dist/core/adaptive-embedder.d.ts +0 -156
  19. package/dist/core/adaptive-embedder.d.ts.map +0 -1
  20. package/dist/core/adaptive-embedder.js +0 -838
  21. package/dist/core/agentdb-fast.d.ts +0 -149
  22. package/dist/core/agentdb-fast.d.ts.map +0 -1
  23. package/dist/core/agentdb-fast.js +0 -301
  24. package/dist/core/ast-parser.d.ts +0 -108
  25. package/dist/core/ast-parser.d.ts.map +0 -1
  26. package/dist/core/ast-parser.js +0 -602
  27. package/dist/core/attention-fallbacks.d.ts +0 -321
  28. package/dist/core/attention-fallbacks.d.ts.map +0 -1
  29. package/dist/core/attention-fallbacks.js +0 -552
  30. package/dist/core/cluster-wrapper.d.ts +0 -148
  31. package/dist/core/cluster-wrapper.d.ts.map +0 -1
  32. package/dist/core/cluster-wrapper.js +0 -271
  33. package/dist/core/coverage-router.d.ts +0 -88
  34. package/dist/core/coverage-router.d.ts.map +0 -1
  35. package/dist/core/coverage-router.js +0 -315
  36. package/dist/core/diff-embeddings.d.ts +0 -93
  37. package/dist/core/diff-embeddings.d.ts.map +0 -1
  38. package/dist/core/diff-embeddings.js +0 -334
  39. package/dist/core/gnn-wrapper.d.ts +0 -143
  40. package/dist/core/gnn-wrapper.d.ts.map +0 -1
  41. package/dist/core/gnn-wrapper.js +0 -213
  42. package/dist/core/graph-algorithms.d.ts +0 -83
  43. package/dist/core/graph-algorithms.d.ts.map +0 -1
  44. package/dist/core/graph-algorithms.js +0 -514
  45. package/dist/core/graph-wrapper.d.ts +0 -147
  46. package/dist/core/graph-wrapper.d.ts.map +0 -1
  47. package/dist/core/graph-wrapper.js +0 -299
  48. package/dist/core/index.d.ts +0 -48
  49. package/dist/core/index.d.ts.map +0 -1
  50. package/dist/core/index.js +0 -89
  51. package/dist/core/intelligence-engine.d.ts +0 -258
  52. package/dist/core/intelligence-engine.d.ts.map +0 -1
  53. package/dist/core/intelligence-engine.js +0 -1030
  54. package/dist/core/learning-engine.d.ts +0 -160
  55. package/dist/core/learning-engine.d.ts.map +0 -1
  56. package/dist/core/learning-engine.js +0 -589
  57. package/dist/core/neural-embeddings.d.ts +0 -393
  58. package/dist/core/neural-embeddings.d.ts.map +0 -1
  59. package/dist/core/neural-embeddings.js +0 -1091
  60. package/dist/core/neural-perf.d.ts +0 -331
  61. package/dist/core/neural-perf.d.ts.map +0 -1
  62. package/dist/core/neural-perf.js +0 -704
  63. package/dist/core/onnx/loader.js +0 -348
  64. package/dist/core/onnx/pkg/LICENSE +0 -21
  65. package/dist/core/onnx/pkg/loader.js +0 -348
  66. package/dist/core/onnx/pkg/package.json +0 -3
  67. package/dist/core/onnx/pkg/ruvector_onnx_embeddings_wasm.d.ts +0 -112
  68. package/dist/core/onnx/pkg/ruvector_onnx_embeddings_wasm.js +0 -5
  69. package/dist/core/onnx/pkg/ruvector_onnx_embeddings_wasm_bg.js +0 -638
  70. package/dist/core/onnx/pkg/ruvector_onnx_embeddings_wasm_bg.wasm +0 -0
  71. package/dist/core/onnx/pkg/ruvector_onnx_embeddings_wasm_bg.wasm.d.ts +0 -29
  72. package/dist/core/onnx/pkg/ruvector_onnx_embeddings_wasm_cjs.js +0 -127
  73. package/dist/core/onnx-embedder.d.ts +0 -105
  74. package/dist/core/onnx-embedder.d.ts.map +0 -1
  75. package/dist/core/onnx-embedder.js +0 -410
  76. package/dist/core/onnx-llm.d.ts +0 -206
  77. package/dist/core/onnx-llm.d.ts.map +0 -1
  78. package/dist/core/onnx-llm.js +0 -430
  79. package/dist/core/onnx-optimized.d.ts +0 -109
  80. package/dist/core/onnx-optimized.d.ts.map +0 -1
  81. package/dist/core/onnx-optimized.js +0 -419
  82. package/dist/core/parallel-intelligence.d.ts +0 -109
  83. package/dist/core/parallel-intelligence.d.ts.map +0 -1
  84. package/dist/core/parallel-intelligence.js +0 -340
  85. package/dist/core/parallel-workers.d.ts +0 -177
  86. package/dist/core/parallel-workers.d.ts.map +0 -1
  87. package/dist/core/parallel-workers.js +0 -671
  88. package/dist/core/router-wrapper.d.ts +0 -62
  89. package/dist/core/router-wrapper.d.ts.map +0 -1
  90. package/dist/core/router-wrapper.js +0 -209
  91. package/dist/core/rvf-wrapper.d.ts +0 -86
  92. package/dist/core/rvf-wrapper.d.ts.map +0 -1
  93. package/dist/core/rvf-wrapper.js +0 -102
  94. package/dist/core/sona-wrapper.d.ts +0 -226
  95. package/dist/core/sona-wrapper.d.ts.map +0 -1
  96. package/dist/core/sona-wrapper.js +0 -282
  97. package/dist/core/tensor-compress.d.ts +0 -134
  98. package/dist/core/tensor-compress.d.ts.map +0 -1
  99. package/dist/core/tensor-compress.js +0 -432
  100. package/dist/index.d.ts +0 -105
  101. package/dist/index.d.ts.map +0 -1
  102. package/dist/index.js +0 -221
  103. package/dist/services/embedding-service.d.ts +0 -136
  104. package/dist/services/embedding-service.d.ts.map +0 -1
  105. package/dist/services/embedding-service.js +0 -294
  106. package/dist/services/index.d.ts +0 -6
  107. package/dist/services/index.d.ts.map +0 -1
  108. package/dist/services/index.js +0 -26
  109. package/dist/types.d.ts +0 -145
  110. package/dist/types.d.ts.map +0 -1
  111. package/dist/types.js +0 -2
  112. package/dist/workers/benchmark.d.ts +0 -44
  113. package/dist/workers/benchmark.d.ts.map +0 -1
  114. package/dist/workers/benchmark.js +0 -230
  115. package/dist/workers/index.d.ts +0 -10
  116. package/dist/workers/index.d.ts.map +0 -1
  117. package/dist/workers/index.js +0 -25
  118. package/dist/workers/native-worker.d.ts +0 -76
  119. package/dist/workers/native-worker.d.ts.map +0 -1
  120. package/dist/workers/native-worker.js +0 -490
  121. package/dist/workers/types.d.ts +0 -69
  122. package/dist/workers/types.d.ts.map +0 -1
  123. package/dist/workers/types.js +0 -7
package/README.md CHANGED
@@ -10,7 +10,7 @@
10
10
 
11
11
  **The fastest vector database for Node.js—built in Rust, runs everywhere**
12
12
 
13
- Ruvector is a self-learning vector database with **enterprise-grade semantic search**, hybrid retrieval (sparse + dense), Graph RAG, FlashAttention-3, and billion-scale DiskANN — all in a single npm package. Unlike cloud-only solutions or Python-first databases, Ruvector is designed for JavaScript/TypeScript developers who need **blazing-fast vector search** without external services.
13
+ Ruvector is a self-learning vector database with **enterprise-grade semantic search**, hybrid retrieval (sparse + dense), Graph RAG, FlashAttention-3, and DiskANN — all in a single npm package. Unlike cloud-only solutions or Python-first databases, Ruvector is designed for JavaScript/TypeScript developers who need **blazing-fast vector search** without external services.
14
14
 
15
15
  > 🚀 **Sub-millisecond queries** • 🎯 **52,000+ inserts/sec** • 💾 **~50 bytes per vector** • 🌍 **Runs anywhere** • 🧠 **859 tests passing**
16
16
 
@@ -40,7 +40,7 @@ npx ruvector hooks init --pretrain --build-agents quality
40
40
  - **FlashAttention-3** — IO-aware tiled attention, O(N) memory instead of O(N^2)
41
41
  - **Graph RAG** — Knowledge graph + community detection for multi-hop queries (30-60% improvement)
42
42
  - **Hybrid Search** — Sparse + dense vectors with RRF fusion (20-49% better retrieval)
43
- - **DiskANN / Vamana** — Billion-scale SSD-backed ANN with <10ms latency
43
+ - **DiskANN / Vamana** — SSD-friendly ANN graph with PQ compression for large-scale search
44
44
  - **ColBERT Multi-Vector** — Per-token late interaction retrieval (MaxSim)
45
45
  - **Matryoshka Embeddings** — Adaptive-dimension search with funnel/cascade modes
46
46
  - **MLA** — Multi-Head Latent Attention with ~93% KV-cache compression (DeepSeek-V2/V3)
package/bin/cli.js CHANGED
@@ -8935,13 +8935,52 @@ const decompileCmd = program
8935
8935
  .option('-q, --quiet', 'Suppress progress output')
8936
8936
  .option('--version-pkg <ver>', 'Package version (alternative to @version syntax)')
8937
8937
  .option('--diff <version>', 'Compare against another version')
8938
+ .option('--model <file>', 'Decompile LLM model weight file (.gguf, .safetensors)')
8939
+ .option('--api <model-id>', 'Probe remote LLM API to discover architecture')
8940
+ .option('--api-key <key>', 'API key for --api mode (or use env vars)')
8938
8941
  .action(async (target, opts) => {
8942
+ // Model weight decompilation mode (ADR-138)
8943
+ if (opts.model) {
8944
+ try {
8945
+ const modelDecompiler = require('../src/decompiler/model-decompiler.js');
8946
+ const result = await modelDecompiler.decompileModelFile(opts.model);
8947
+ if (opts.json) {
8948
+ console.log(JSON.stringify(result, null, 2));
8949
+ } else {
8950
+ modelDecompiler.printModelResult(result);
8951
+ }
8952
+ } catch (err) {
8953
+ console.error(chalk.red(`Model decompilation failed: ${err.message}`));
8954
+ process.exit(1);
8955
+ }
8956
+ return;
8957
+ }
8958
+
8959
+ // API probing mode (ADR-138)
8960
+ if (opts.api) {
8961
+ try {
8962
+ const apiProber = require('../src/decompiler/api-prober.js');
8963
+ const result = await apiProber.probeModel(opts.api, { apiKey: opts.apiKey });
8964
+ if (opts.json) {
8965
+ console.log(JSON.stringify(result, null, 2));
8966
+ } else {
8967
+ apiProber.printProbeResult(result);
8968
+ }
8969
+ } catch (err) {
8970
+ console.error(chalk.red(`API probe failed: ${err.message}`));
8971
+ process.exit(1);
8972
+ }
8973
+ return;
8974
+ }
8975
+
8939
8976
  if (!target) {
8940
8977
  console.log(chalk.cyan('\nUsage:'));
8941
8978
  console.log(chalk.white(' ruvector decompile <package> Decompile npm package'));
8942
8979
  console.log(chalk.white(' ruvector decompile <pkg>@<ver> Specific version'));
8943
8980
  console.log(chalk.white(' ruvector decompile ./bundle.js Local file'));
8944
8981
  console.log(chalk.white(' ruvector decompile https://unpkg.com/x URL'));
8982
+ console.log(chalk.white(' ruvector decompile --model <file.gguf> LLM weight file'));
8983
+ console.log(chalk.white(' ruvector decompile --api <model-id> Probe remote API'));
8945
8984
  console.log(chalk.dim('\nOptions:'));
8946
8985
  console.log(chalk.dim(' -o, --output <dir> Output directory'));
8947
8986
  console.log(chalk.dim(' -f, --format <type> modules | single | json'));
@@ -8949,6 +8988,9 @@ const decompileCmd = program
8949
8988
  console.log(chalk.dim(' --no-witness Skip witness chain'));
8950
8989
  console.log(chalk.dim(' --json JSON to stdout'));
8951
8990
  console.log(chalk.dim(' --diff <version> Diff against another version'));
8991
+ console.log(chalk.dim(' --model <file> Decompile .gguf/.safetensors'));
8992
+ console.log(chalk.dim(' --api <model-id> Probe LLM API'));
8993
+ console.log(chalk.dim(' --api-key <key> API key (or set env var)'));
8952
8994
  console.log('');
8953
8995
  return;
8954
8996
  }
@@ -9061,6 +9103,124 @@ const decompileCmd = program
9061
9103
  }
9062
9104
  });
9063
9105
 
9106
+ // =============================================================================
9107
+ // Optimize Commands — Claude Code profile optimization (ADR-139)
9108
+ // =============================================================================
9109
+
9110
+ const optimizeCmd = program.command('optimize')
9111
+ .description('Optimize Claude Code configuration per task type (ADR-139)')
9112
+ .option('-p, --profile <type>', 'Task profile: coding|research|quickfix|planning|background|swarm|review|ci')
9113
+ .option('-s, --show', 'Show current optimization status')
9114
+ .option('-l, --list', 'List all available profiles')
9115
+ .option('--generate-settings', 'Output optimal .claude/settings.json')
9116
+ .option('--detect <prompt>', 'Auto-detect task type from a prompt')
9117
+ .option('--apply', 'Apply profile env vars to current process (for hooks)')
9118
+ .option('--json', 'JSON output')
9119
+ .action(async (opts) => {
9120
+ let optimizerMod;
9121
+ try {
9122
+ optimizerMod = require('../src/optimizer/index.js');
9123
+ } catch (e) {
9124
+ console.error(chalk.red('Error: Failed to load optimizer module.'));
9125
+ console.error(chalk.dim(` ${e.message}`));
9126
+ process.exit(1);
9127
+ }
9128
+
9129
+ // --list: show all profiles
9130
+ if (opts.list) {
9131
+ const profiles = optimizerMod.listProfiles();
9132
+ if (opts.json) {
9133
+ const data = {};
9134
+ for (const name of profiles) {
9135
+ data[name] = optimizerMod.getProfile(name);
9136
+ }
9137
+ console.log(JSON.stringify(data, null, 2));
9138
+ return;
9139
+ }
9140
+ console.log(chalk.bold.cyan('\n RVAgent Optimizer Profiles (ADR-139)\n'));
9141
+ console.log(chalk.dim(' Based on decompiled Claude Code v2.1.91 intelligence\n'));
9142
+ for (const name of profiles) {
9143
+ const p = optimizerMod.getProfile(name);
9144
+ const envCount = Object.keys(p.env).length;
9145
+ console.log(` ${chalk.bold.white(name.padEnd(12))} ${chalk.dim(p.description)}`);
9146
+ console.log(chalk.dim(`${''.padEnd(14)}Permission: ${p.permissionMode}, Env vars: ${envCount}`));
9147
+ }
9148
+ console.log('');
9149
+ console.log(chalk.dim(' Usage: ruvector optimize --profile <type>'));
9150
+ console.log(chalk.dim(' ruvector optimize --generate-settings --profile coding'));
9151
+ console.log('');
9152
+ return;
9153
+ }
9154
+
9155
+ // --detect: infer task type from prompt
9156
+ if (opts.detect) {
9157
+ const detected = optimizerMod.detectTaskType(opts.detect);
9158
+ if (opts.json) {
9159
+ console.log(JSON.stringify({ prompt: opts.detect, taskType: detected }));
9160
+ return;
9161
+ }
9162
+ console.log(chalk.cyan(` Detected task type: ${chalk.bold(detected)}`));
9163
+ return;
9164
+ }
9165
+
9166
+ // Determine profile to use
9167
+ const profileName = opts.profile || 'coding';
9168
+ const profile = optimizerMod.getProfile(profileName);
9169
+
9170
+ if (!profile) {
9171
+ console.error(chalk.red(` Unknown profile: ${profileName}`));
9172
+ console.error(chalk.yellow(` Available: ${optimizerMod.listProfiles().join(', ')}`));
9173
+ process.exit(1);
9174
+ }
9175
+
9176
+ // --generate-settings: output settings.json
9177
+ if (opts.generateSettings) {
9178
+ const { generateSettings, formatSettings } = require('../src/optimizer/settings-generator.js');
9179
+ const settings = generateSettings({ ...profile, taskType: profileName });
9180
+ if (opts.json) {
9181
+ console.log(formatSettings(settings));
9182
+ } else {
9183
+ console.log(chalk.bold.cyan(`\n Generated settings.json for profile: ${profileName}\n`));
9184
+ console.log(formatSettings(settings));
9185
+ console.log('');
9186
+ console.log(chalk.dim(' Save to .claude/settings.json to activate.'));
9187
+ console.log('');
9188
+ }
9189
+ return;
9190
+ }
9191
+
9192
+ // --show: display profile details
9193
+ if (opts.show) {
9194
+ if (opts.json) {
9195
+ console.log(JSON.stringify({ profile: profileName, ...profile }, null, 2));
9196
+ return;
9197
+ }
9198
+ console.log(chalk.bold.cyan(`\n Profile: ${profileName}\n`));
9199
+ console.log(` ${chalk.dim('Description:')} ${profile.description}`);
9200
+ console.log(` ${chalk.dim('Permission:')} ${profile.permissionMode}`);
9201
+ console.log(` ${chalk.dim('Env vars:')}`);
9202
+ for (const [key, val] of Object.entries(profile.env)) {
9203
+ console.log(` ${chalk.white(key)}=${chalk.green(val)}`);
9204
+ }
9205
+ console.log('');
9206
+ return;
9207
+ }
9208
+
9209
+ // --apply or default: apply env vars
9210
+ const result = optimizerMod.applyProfile(profileName);
9211
+ if (opts.json) {
9212
+ console.log(JSON.stringify(result, null, 2));
9213
+ return;
9214
+ }
9215
+ console.log(chalk.bold.cyan(`\n Applied profile: ${profileName}`));
9216
+ console.log(chalk.dim(` ${profile.description}\n`));
9217
+ for (const [key, val] of Object.entries(result.applied)) {
9218
+ console.log(` ${chalk.green('+')} ${key}=${val}`);
9219
+ }
9220
+ console.log(`\n ${chalk.dim('Permission mode:')} ${result.permissionMode}`);
9221
+ console.log('');
9222
+ });
9223
+
9064
9224
  program.parse();
9065
9225
 
9066
9226
 
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "ruvector",
3
- "version": "0.2.21",
4
- "description": "Self-learning vector database for Node.js hybrid search, Graph RAG, FlashAttention-3, DiskANN, 50+ attention mechanisms",
3
+ "version": "0.2.23",
4
+ "description": "Self-learning vector database for Node.js \u2014 hybrid search, Graph RAG, FlashAttention-3, HNSW, 50+ attention mechanisms",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",
7
7
  "bin": {
@@ -47,7 +47,7 @@
47
47
  "mcp",
48
48
  "edge-computing",
49
49
  "graph-rag",
50
- "diskann",
50
+ "hnsw",
51
51
  "hybrid-search",
52
52
  "colbert",
53
53
  "turboquant",
@@ -97,7 +97,8 @@
97
97
  "peerDependencies": {
98
98
  "@ruvector/pi-brain": ">=0.1.0",
99
99
  "@ruvector/router": ">=0.1.0",
100
- "@ruvector/ruvllm": ">=2.0.0"
100
+ "@ruvector/ruvllm": ">=2.0.0",
101
+ "@ruvector/diskann": ">=0.1.0"
101
102
  },
102
103
  "peerDependenciesMeta": {
103
104
  "@ruvector/pi-brain": {
@@ -108,9 +109,12 @@
108
109
  },
109
110
  "@ruvector/router": {
110
111
  "optional": true
112
+ },
113
+ "@ruvector/diskann": {
114
+ "optional": true
111
115
  }
112
116
  },
113
117
  "engines": {
114
118
  "node": ">=18.0.0"
115
119
  }
116
- }
120
+ }
@@ -0,0 +1,302 @@
1
+ 'use strict';
2
+
3
+ /**
4
+ * LLM API prober -- discovers model architecture by probing remote APIs.
5
+ * Detects capabilities, token limits, tokenizer behavior, and model fingerprints.
6
+ * See ADR-138.
7
+ */
8
+
9
+ // ── Provider detection ───────────────────────────────────────────────────
10
+
11
+ const PROVIDERS = {
12
+ anthropic: {
13
+ endpoint: 'https://api.anthropic.com/v1/messages',
14
+ envKey: 'ANTHROPIC_API_KEY',
15
+ models: ['claude-sonnet-4-6', 'claude-sonnet-4-20250514', 'claude-haiku-4-20250414', 'claude-opus-4-20250514'],
16
+ },
17
+ openai: {
18
+ endpoint: 'https://api.openai.com/v1/chat/completions',
19
+ envKey: 'OPENAI_API_KEY',
20
+ models: ['gpt-4o', 'gpt-4o-mini', 'gpt-4-turbo', 'o1', 'o1-mini'],
21
+ },
22
+ google: {
23
+ endpoint: 'https://generativelanguage.googleapis.com/v1beta/models',
24
+ envKey: 'GOOGLE_AI_API_KEY',
25
+ models: ['gemini-2.5-flash', 'gemini-2.5-pro', 'gemini-2.0-flash'],
26
+ },
27
+ };
28
+
29
+ function detectProvider(modelId) {
30
+ modelId = modelId.toLowerCase();
31
+ if (modelId.startsWith('claude')) return 'anthropic';
32
+ if (modelId.startsWith('gpt') || modelId.startsWith('o1') || modelId.startsWith('o3')) return 'openai';
33
+ if (modelId.startsWith('gemini')) return 'google';
34
+ return 'unknown';
35
+ }
36
+
37
+ // ── Main probe ───────────────────────────────────────────────────────────
38
+
39
+ async function probeModel(modelId, opts = {}) {
40
+ const provider = detectProvider(modelId);
41
+ const providerConfig = PROVIDERS[provider];
42
+ if (!providerConfig && provider === 'unknown') {
43
+ throw new Error(`Unknown provider for model: ${modelId}. Supported: claude-*, gpt-*, gemini-*`);
44
+ }
45
+
46
+ const apiKey = opts.apiKey || process.env[providerConfig?.envKey || ''];
47
+ if (!apiKey) {
48
+ throw new Error(
49
+ `No API key found. Set ${providerConfig?.envKey || 'API_KEY'} env var or pass --api-key`
50
+ );
51
+ }
52
+
53
+ const result = {
54
+ model: modelId,
55
+ provider,
56
+ capabilities: {},
57
+ tokenizer: {},
58
+ limits: {},
59
+ fingerprint: {},
60
+ latency: {},
61
+ };
62
+
63
+ const send = buildSender(provider, modelId, apiKey);
64
+
65
+ // 1. Basic probe -- verify model is reachable and measure latency
66
+ const start = Date.now();
67
+ const basicResp = await send('Say exactly: PROBE_OK');
68
+ result.latency.first_token_ms = Date.now() - start;
69
+ result.capabilities.reachable = !!basicResp;
70
+
71
+ if (!basicResp) {
72
+ result.capabilities.error = 'Model unreachable or invalid API key';
73
+ return result;
74
+ }
75
+
76
+ // 2. Capability probes (run in parallel for speed)
77
+ const [streamResp, toolResp, sysResp] = await Promise.allSettled([
78
+ testStreaming(send),
79
+ testToolUse(provider, modelId, apiKey),
80
+ send('What is 2+2? Reply with just the number.', { systemPrompt: 'You are a calculator.' }),
81
+ ]);
82
+
83
+ result.capabilities.streaming = streamResp.status === 'fulfilled' && streamResp.value;
84
+ result.capabilities.tools = toolResp.status === 'fulfilled' && toolResp.value;
85
+ result.capabilities.system_prompt = sysResp.status === 'fulfilled' && !!sysResp.value;
86
+
87
+ // 3. Tokenizer probe -- send known strings, analyze responses
88
+ const tokenizerResult = await probeTokenizer(send);
89
+ result.tokenizer = tokenizerResult;
90
+
91
+ // 4. Model fingerprint -- specific prompts that distinguish families
92
+ const fingerprint = await fingerprintModel(send, provider);
93
+ result.fingerprint = fingerprint;
94
+
95
+ // 5. Measure response speed
96
+ const speedStart = Date.now();
97
+ const longResp = await send('Count from 1 to 20, one per line.');
98
+ const speedMs = Date.now() - speedStart;
99
+ const outputTokens = longResp ? longResp.split(/\s+/).length : 0;
100
+ result.latency.generation_ms = speedMs;
101
+ result.latency.est_tokens_per_sec = speedMs > 0 ? Math.round((outputTokens / speedMs) * 1000) : 0;
102
+
103
+ return result;
104
+ }
105
+
106
+ // ── Provider-specific request builders ───────────────────────────────────
107
+
108
+ function buildSender(provider, modelId, apiKey) {
109
+ return async (prompt, opts = {}) => {
110
+ try {
111
+ if (provider === 'anthropic') return await sendAnthropic(modelId, apiKey, prompt, opts);
112
+ if (provider === 'openai') return await sendOpenAI(modelId, apiKey, prompt, opts);
113
+ if (provider === 'google') return await sendGoogle(modelId, apiKey, prompt, opts);
114
+ throw new Error(`Unsupported provider: ${provider}`);
115
+ } catch (err) {
116
+ // Return null on API errors (model may not support the feature)
117
+ if (err.message?.includes('API error')) return null;
118
+ throw err;
119
+ }
120
+ };
121
+ }
122
+
123
+ async function sendAnthropic(model, apiKey, prompt, opts = {}) {
124
+ const body = {
125
+ model,
126
+ max_tokens: opts.maxTokens || 100,
127
+ messages: [{ role: 'user', content: prompt }],
128
+ };
129
+ if (opts.systemPrompt) body.system = opts.systemPrompt;
130
+
131
+ const resp = await fetch('https://api.anthropic.com/v1/messages', {
132
+ method: 'POST',
133
+ headers: {
134
+ 'Content-Type': 'application/json',
135
+ 'x-api-key': apiKey,
136
+ 'anthropic-version': '2023-06-01',
137
+ },
138
+ body: JSON.stringify(body),
139
+ });
140
+ if (!resp.ok) throw new Error(`API error ${resp.status}: ${await resp.text()}`);
141
+ const data = await resp.json();
142
+ return data.content?.[0]?.text || '';
143
+ }
144
+
145
+ async function sendOpenAI(model, apiKey, prompt, opts = {}) {
146
+ const messages = [];
147
+ if (opts.systemPrompt) messages.push({ role: 'system', content: opts.systemPrompt });
148
+ messages.push({ role: 'user', content: prompt });
149
+
150
+ const resp = await fetch('https://api.openai.com/v1/chat/completions', {
151
+ method: 'POST',
152
+ headers: {
153
+ 'Content-Type': 'application/json',
154
+ 'Authorization': `Bearer ${apiKey}`,
155
+ },
156
+ body: JSON.stringify({ model, messages, max_tokens: opts.maxTokens || 100 }),
157
+ });
158
+ if (!resp.ok) throw new Error(`API error ${resp.status}: ${await resp.text()}`);
159
+ const data = await resp.json();
160
+ return data.choices?.[0]?.message?.content || '';
161
+ }
162
+
163
+ async function sendGoogle(model, apiKey, prompt, opts = {}) {
164
+ const url = `https://generativelanguage.googleapis.com/v1beta/models/${model}:generateContent?key=${apiKey}`;
165
+ const body = {
166
+ contents: [{ parts: [{ text: prompt }] }],
167
+ generationConfig: { maxOutputTokens: opts.maxTokens || 100 },
168
+ };
169
+ if (opts.systemPrompt) {
170
+ body.systemInstruction = { parts: [{ text: opts.systemPrompt }] };
171
+ }
172
+
173
+ const resp = await fetch(url, {
174
+ method: 'POST',
175
+ headers: { 'Content-Type': 'application/json' },
176
+ body: JSON.stringify(body),
177
+ });
178
+ if (!resp.ok) throw new Error(`API error ${resp.status}: ${await resp.text()}`);
179
+ const data = await resp.json();
180
+ return data.candidates?.[0]?.content?.parts?.[0]?.text || '';
181
+ }
182
+
183
+ // ── Feature probes ───────────────────────────────────────────────────────
184
+
185
+ async function testStreaming(send) {
186
+ // Streaming support is provider-dependent; we just check if the model responds
187
+ const resp = await send('Say "stream test"');
188
+ return !!resp;
189
+ }
190
+
191
+ async function testToolUse(provider, modelId, apiKey) {
192
+ try {
193
+ if (provider === 'anthropic') {
194
+ const resp = await fetch('https://api.anthropic.com/v1/messages', {
195
+ method: 'POST',
196
+ headers: {
197
+ 'Content-Type': 'application/json',
198
+ 'x-api-key': apiKey,
199
+ 'anthropic-version': '2023-06-01',
200
+ },
201
+ body: JSON.stringify({
202
+ model: modelId,
203
+ max_tokens: 100,
204
+ messages: [{ role: 'user', content: 'What is the weather in SF?' }],
205
+ tools: [{
206
+ name: 'get_weather',
207
+ description: 'Get weather for a location',
208
+ input_schema: { type: 'object', properties: { location: { type: 'string' } } },
209
+ }],
210
+ }),
211
+ });
212
+ return resp.ok;
213
+ }
214
+ return true; // Assume supported for other providers
215
+ } catch {
216
+ return false;
217
+ }
218
+ }
219
+
220
+ // ── Tokenizer probing ────────────────────────────────────────────────────
221
+
222
+ async function probeTokenizer(send) {
223
+ // Send known strings and analyze how the model interprets them
224
+ const testStr = 'antidisestablishmentarianism';
225
+ const resp = await send(
226
+ `How many tokens does the word "${testStr}" require? Just give the number.`
227
+ );
228
+ const tokenCount = resp ? parseInt(resp.match(/\d+/)?.[0] || '0', 10) : 0;
229
+
230
+ // Detect BPE vs SentencePiece by checking token boundary behavior
231
+ const bpeResp = await send(
232
+ 'Split "unhappiness" into its BPE tokens. List each token on a line.'
233
+ );
234
+
235
+ let type = 'unknown';
236
+ if (bpeResp) {
237
+ if (bpeResp.includes('un') && bpeResp.includes('happiness')) type = 'BPE';
238
+ if (bpeResp.includes('_un') || bpeResp.includes('\u2581un')) type = 'SentencePiece';
239
+ }
240
+
241
+ return {
242
+ type,
243
+ estimated_tokens_for_test_word: tokenCount,
244
+ test_word: testStr,
245
+ };
246
+ }
247
+
248
+ // ── Model fingerprinting ─────────────────────────────────────────────────
249
+
250
+ async function fingerprintModel(send, provider) {
251
+ // Ask the model to identify itself
252
+ const identResp = await send(
253
+ 'What LLM are you? Reply in format: "I am [model name] by [company]"'
254
+ );
255
+
256
+ // Test for specific behaviors
257
+ const mathResp = await send('What is 7 * 8? Reply with just the number.');
258
+
259
+ return {
260
+ self_identification: identResp || 'unknown',
261
+ provider_detected: provider,
262
+ math_correct: mathResp?.trim() === '56',
263
+ timestamp: new Date().toISOString(),
264
+ };
265
+ }
266
+
267
+ // ── Pretty printer ───────────────────────────────────────────────────────
268
+
269
+ function printProbeResult(result) {
270
+ const _chalk = require('chalk');
271
+ const chalk = _chalk.default || _chalk;
272
+
273
+ console.log(chalk.bold.cyan('\n LLM API Probe Results'));
274
+ console.log(chalk.white(` Model: ${result.model}`));
275
+ console.log(chalk.white(` Provider: ${result.provider}`));
276
+ console.log('');
277
+
278
+ console.log(chalk.bold(' Capabilities:'));
279
+ console.log(chalk.white(` Reachable: ${result.capabilities.reachable ? 'Yes' : 'No'}`));
280
+ console.log(chalk.white(` Streaming: ${result.capabilities.streaming ? 'Yes' : 'No'}`));
281
+ console.log(chalk.white(` Tool use: ${result.capabilities.tools ? 'Yes' : 'No'}`));
282
+ console.log(chalk.white(` System prompt: ${result.capabilities.system_prompt ? 'Yes' : 'No'}`));
283
+ console.log('');
284
+
285
+ console.log(chalk.bold(' Latency:'));
286
+ console.log(chalk.white(` First token: ${result.latency.first_token_ms} ms`));
287
+ console.log(chalk.white(` Generation: ${result.latency.generation_ms} ms`));
288
+ console.log(chalk.white(` Est. tok/sec: ${result.latency.est_tokens_per_sec}`));
289
+ console.log('');
290
+
291
+ console.log(chalk.bold(' Tokenizer:'));
292
+ console.log(chalk.white(` Type: ${result.tokenizer.type}`));
293
+ console.log(chalk.white(` Test word: "${result.tokenizer.test_word}" -> ${result.tokenizer.estimated_tokens_for_test_word} tokens`));
294
+ console.log('');
295
+
296
+ console.log(chalk.bold(' Fingerprint:'));
297
+ console.log(chalk.white(` Self-ID: ${result.fingerprint.self_identification?.slice(0, 80)}`));
298
+ console.log(chalk.white(` Math correct: ${result.fingerprint.math_correct ? 'Yes' : 'No'}`));
299
+ console.log('');
300
+ }
301
+
302
+ module.exports = { probeModel, printProbeResult, detectProvider };