tea-rags 1.19.0 → 1.19.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,148 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * Embedding Diagnostic Benchmark
4
+ *
5
+ * Automatically calibrates EMBEDDING_BATCH_SIZE and EMBEDDING_CONCURRENCY
6
+ * using a three-phase plateau-detection algorithm.
7
+ *
8
+ * Phase 1: Find batch size plateau (CONCURRENCY=1)
9
+ * Phase 2: Test concurrency on plateau batches
10
+ * Phase 3: Select robust configuration (within 2% of max, prefer lower concurrency/batch)
11
+ *
12
+ * Run: npm run benchmark-embeddings
13
+ */
14
+ import { c, printBox } from "./lib/colors.mjs";
15
+ import { AVG_LOC_PER_CHUNK, config, MEDIAN_CODE_CHUNK_SIZE } from "./lib/config.mjs";
16
+ import { calibrateEmbeddings } from "./lib/embedding-calibration.mjs";
17
+ import { checkProviderConnectivity, createEmbeddingProvider } from "./lib/provider.mjs";
18
+
19
+ /**
20
+ * Format time in human readable format
21
+ */
22
+ function formatTime(ms) {
23
+ if (ms < 1000) return `${ms}ms`;
24
+ if (ms < 60000) return `${(ms / 1000).toFixed(1)}s`;
25
+ const minutes = Math.floor(ms / 60000);
26
+ const seconds = Math.round((ms % 60000) / 1000);
27
+ return `${minutes}m ${seconds}s`;
28
+ }
29
+
30
+ async function main() {
31
+ console.clear();
32
+ printBox("EMBEDDING CALIBRATION BENCHMARK", "Three-phase plateau detection");
33
+
34
+ // Show configuration
35
+ console.log(`${c.bold}Configuration:${c.reset}`);
36
+ console.log(` ${c.dim}Ollama:${c.reset} ${config.EMBEDDING_BASE_URL}`);
37
+ console.log(` ${c.dim}Model:${c.reset} ${config.EMBEDDING_MODEL}`);
38
+ console.log(` ${c.dim}Chunk size:${c.reset} ${MEDIAN_CODE_CHUNK_SIZE} chars (median from production)`);
39
+ console.log();
40
+
41
+ // Check embedding provider
42
+ process.stdout.write(`${c.dim}Checking embedding provider...${c.reset} `);
43
+ const embeddingCheck = await checkProviderConnectivity();
44
+ if (!embeddingCheck.ok) {
45
+ console.log(`${c.red}FAILED${c.reset}`);
46
+ console.log(`\n${c.red}Error:${c.reset} ${embeddingCheck.error}`);
47
+ process.exit(1);
48
+ }
49
+ console.log(`${c.green}OK${c.reset}`);
50
+
51
+ // Initialize embeddings
52
+ const { provider: embeddings, name: providerName } = await createEmbeddingProvider();
53
+ console.log(` ${c.green}✓${c.reset} Embedding provider: ${providerName}`);
54
+ console.log(` ${c.green}✓${c.reset} Vector dimension: ${embeddings.getDimensions()}`);
55
+ console.log();
56
+
57
+ // Run calibration
58
+ const result = await calibrateEmbeddings(embeddings, { verbose: true });
59
+
60
+ // ========== OUTPUT ==========
61
+ console.log();
62
+
63
+ // Detect setup type
64
+ const isOnnx = providerName === "onnx";
65
+ const isRemote =
66
+ !isOnnx && !config.EMBEDDING_BASE_URL.includes("localhost") && !config.EMBEDDING_BASE_URL.includes("127.0.0.1");
67
+ const setupIcon = isOnnx ? "⚡" : isRemote ? "🌐" : "🏠";
68
+ const setupName = isOnnx ? "Local ONNX" : isRemote ? "Remote GPU" : "Local GPU";
69
+
70
+ printBox(`${setupIcon} ${setupName.toUpperCase()} - OPTIMAL CONFIGURATION`, "");
71
+
72
+ // Main result
73
+ console.log(
74
+ ` ${c.bold}EMBEDDING_BATCH_SIZE${c.reset} = ${c.green}${c.bold}${result.EMBEDDING_BATCH_SIZE}${c.reset}`,
75
+ );
76
+ console.log(
77
+ ` ${c.bold}EMBEDDING_CONCURRENCY${c.reset} = ${c.green}${c.bold}${result.EMBEDDING_CONCURRENCY}${c.reset}`,
78
+ );
79
+ console.log();
80
+ console.log(` ${c.bold}Throughput:${c.reset} ${c.cyan}${result.throughput_chunks_per_sec} chunks/s${c.reset}`);
81
+ console.log();
82
+
83
+ // Explain the choice
84
+ console.log(`${c.bold}Why this configuration?${c.reset}`);
85
+ if (isOnnx) {
86
+ console.log(` ${c.dim}•${c.reset} Local ONNX runtime (${providerName})`);
87
+ console.log(` ${c.dim}•${c.reset} In-process inference, no network overhead`);
88
+ if (result.EMBEDDING_BATCH_SIZE <= 16) {
89
+ console.log(` ${c.dim}•${c.reset} Small batches optimal for ONNX memory management`);
90
+ }
91
+ } else if (isRemote) {
92
+ console.log(` ${c.dim}•${c.reset} Remote GPU detected (${config.EMBEDDING_BASE_URL})`);
93
+ console.log(` ${c.dim}•${c.reset} Lower batch + higher concurrency hides network latency`);
94
+ console.log(` ${c.dim}•${c.reset} While one batch transfers, GPU processes another`);
95
+ if (result.EMBEDDING_CONCURRENCY > 1) {
96
+ console.log(
97
+ ` ${c.dim}•${c.reset} CONCURRENCY=${result.EMBEDDING_CONCURRENCY} overlaps network I/O with GPU compute`,
98
+ );
99
+ }
100
+ } else {
101
+ console.log(` ${c.dim}•${c.reset} Local GPU detected (minimal network latency)`);
102
+ console.log(` ${c.dim}•${c.reset} Higher batch + lower concurrency minimizes overhead`);
103
+ if (result.EMBEDDING_CONCURRENCY === 1) {
104
+ console.log(` ${c.dim}•${c.reset} CONCURRENCY=1 indicates GPU-bound workload`);
105
+ }
106
+ }
107
+ console.log();
108
+
109
+ // Environment export
110
+ console.log(`${c.bold}Add to your environment:${c.reset}`);
111
+ console.log();
112
+ console.log(` ${c.cyan}export EMBEDDING_BATCH_SIZE=${result.EMBEDDING_BATCH_SIZE}${c.reset}`);
113
+ console.log(` ${c.cyan}export EMBEDDING_CONCURRENCY=${result.EMBEDDING_CONCURRENCY}${c.reset}`);
114
+ console.log();
115
+
116
+ // Time estimates
117
+ console.log(`${c.bold}Estimated indexing times:${c.reset}`);
118
+ const projects = [
119
+ { name: "10K LoC", loc: 10_000 },
120
+ { name: "100K LoC", loc: 100_000 },
121
+ { name: "1M LoC", loc: 1_000_000 },
122
+ { name: "VS Code (3.5M)", loc: 3_500_000 },
123
+ ];
124
+ for (const p of projects) {
125
+ const chunks = Math.ceil(p.loc / AVG_LOC_PER_CHUNK);
126
+ const seconds = Math.ceil(chunks / result.throughput_chunks_per_sec);
127
+ console.log(` ${c.dim}${p.name.padEnd(20)}${c.reset} ${c.bold}${formatTime(seconds * 1000)}${c.reset}`);
128
+ }
129
+ console.log();
130
+
131
+ // Stats
132
+ console.log(`${c.dim}────────────────────────────────────────${c.reset}`);
133
+ console.log(
134
+ `${c.dim}Configs tested: ${result.stable_configs_count} stable, ${result.discarded_configs_count} discarded${c.reset}`,
135
+ );
136
+ console.log(`${c.bold}Total benchmark time: ${formatTime(result.calibration_time_ms)}${c.reset}`);
137
+
138
+ // Terminate provider (ONNX keeps socket alive)
139
+ if ("terminate" in embeddings && typeof embeddings.terminate === "function") {
140
+ await embeddings.terminate();
141
+ }
142
+ }
143
+
144
+ main().catch((err) => {
145
+ console.error(`${c.red}Fatal error:${c.reset}`, err.message);
146
+ console.error(err.stack);
147
+ process.exit(1);
148
+ });