sweet-search 0.0.1 → 2.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +190 -0
- package/NOTICE +23 -0
- package/core/cli.js +51 -0
- package/core/config.js +27 -0
- package/core/embedding/embedding-cache.js +467 -0
- package/core/embedding/embedding-local-model.js +845 -0
- package/core/embedding/embedding-remote.js +492 -0
- package/core/embedding/embedding-service.js +712 -0
- package/core/embedding/embedding-telemetry.js +219 -0
- package/core/embedding/index.js +40 -0
- package/core/graph/community-detector.js +294 -0
- package/core/graph/graph-expansion.js +839 -0
- package/core/graph/graph-extractor.js +2304 -0
- package/core/graph/graph-search.js +2148 -0
- package/core/graph/hcgs-generator.js +666 -0
- package/core/graph/index.js +16 -0
- package/core/graph/leiden-algorithm.js +547 -0
- package/core/graph/relationship-resolver.js +366 -0
- package/core/graph/repo-map.js +408 -0
- package/core/graph/summary-manager.js +549 -0
- package/core/indexing/artifact-builder.js +1054 -0
- package/core/indexing/ast-chunker.js +709 -0
- package/core/indexing/chunking/chunk-builder.js +170 -0
- package/core/indexing/chunking/markdown-chunker.js +503 -0
- package/core/indexing/chunking/plaintext-chunker.js +104 -0
- package/core/indexing/dedup/dedup-phase.js +159 -0
- package/core/indexing/dedup/exemplar-selector.js +65 -0
- package/core/indexing/document-chunker.js +56 -0
- package/core/indexing/incremental-parser.js +390 -0
- package/core/indexing/incremental-tracker.js +761 -0
- package/core/indexing/index-codebase-v21.js +472 -0
- package/core/indexing/index-maintainer.mjs +1674 -0
- package/core/indexing/index.js +90 -0
- package/core/indexing/indexer-ann.js +1077 -0
- package/core/indexing/indexer-build.js +742 -0
- package/core/indexing/indexer-phases.js +800 -0
- package/core/indexing/indexer-pool.js +764 -0
- package/core/indexing/indexer-sparse-gram.js +98 -0
- package/core/indexing/indexer-utils.js +536 -0
- package/core/indexing/indexer-worker.js +148 -0
- package/core/indexing/li-skip-policy.js +225 -0
- package/core/indexing/merkle-tracker.js +244 -0
- package/core/indexing/model-pool.js +166 -0
- package/core/infrastructure/code-graph-repository.js +120 -0
- package/core/infrastructure/codebase-repository.js +131 -0
- package/core/infrastructure/config/dedup.js +54 -0
- package/core/infrastructure/config/embedding.js +298 -0
- package/core/infrastructure/config/graph.js +80 -0
- package/core/infrastructure/config/index.js +82 -0
- package/core/infrastructure/config/indexing.js +8 -0
- package/core/infrastructure/config/platform.js +254 -0
- package/core/infrastructure/config/ranking.js +221 -0
- package/core/infrastructure/config/search.js +396 -0
- package/core/infrastructure/config/translation.js +89 -0
- package/core/infrastructure/config/vector-store.js +114 -0
- package/core/infrastructure/constants.js +86 -0
- package/core/infrastructure/coreml-cascade.js +909 -0
- package/core/infrastructure/coreml-cascade.json +46 -0
- package/core/infrastructure/coreml-provider.js +81 -0
- package/core/infrastructure/db-utils.js +69 -0
- package/core/infrastructure/dedup-hashing.js +83 -0
- package/core/infrastructure/hardware-capability.js +332 -0
- package/core/infrastructure/index.js +104 -0
- package/core/infrastructure/language-patterns/maps.js +121 -0
- package/core/infrastructure/language-patterns/registry-core.js +323 -0
- package/core/infrastructure/language-patterns/registry-data-query.js +155 -0
- package/core/infrastructure/language-patterns/registry-object-oriented.js +285 -0
- package/core/infrastructure/language-patterns/registry-tooling.js +240 -0
- package/core/infrastructure/language-patterns/registry-web-style.js +143 -0
- package/core/infrastructure/language-patterns/registry.js +19 -0
- package/core/infrastructure/language-patterns.js +141 -0
- package/core/infrastructure/llm-provider.js +733 -0
- package/core/infrastructure/manifest.json +46 -0
- package/core/infrastructure/maxsim.wasm +0 -0
- package/core/infrastructure/model-fetcher.js +423 -0
- package/core/infrastructure/model-registry.js +214 -0
- package/core/infrastructure/native-inference.js +587 -0
- package/core/infrastructure/native-resolver.js +187 -0
- package/core/infrastructure/native-sparse-gram.js +257 -0
- package/core/infrastructure/native-tokenizer.js +160 -0
- package/core/infrastructure/onnx-mutex.js +45 -0
- package/core/infrastructure/onnx-session-utils.js +261 -0
- package/core/infrastructure/ort-pipeline.js +111 -0
- package/core/infrastructure/project-detector.js +102 -0
- package/core/infrastructure/quantization.js +410 -0
- package/core/infrastructure/simd-distance.js +502 -0
- package/core/infrastructure/simd-distance.wasm +0 -0
- package/core/infrastructure/tree-sitter-provider.js +665 -0
- package/core/infrastructure/webgpu-maxsim.js +222 -0
- package/core/query/index.js +35 -0
- package/core/query/intent-detector.js +201 -0
- package/core/query/intent-router.js +156 -0
- package/core/query/query-router-catboost.js +222 -0
- package/core/query/query-router-ml.js +266 -0
- package/core/query/query-router.js +213 -0
- package/core/ranking/cascaded-scorer.js +379 -0
- package/core/ranking/flashrank.js +810 -0
- package/core/ranking/index.js +49 -0
- package/core/ranking/late-interaction-index.js +2383 -0
- package/core/ranking/late-interaction-model.js +812 -0
- package/core/ranking/local-reranker.js +374 -0
- package/core/ranking/mmr.js +379 -0
- package/core/ranking/quality-scorer.js +363 -0
- package/core/search/context-expander.js +1167 -0
- package/core/search/dedup/sibling-expander.js +327 -0
- package/core/search/index.js +16 -0
- package/core/search/search-boost.js +259 -0
- package/core/search/search-cli.js +544 -0
- package/core/search/search-format.js +282 -0
- package/core/search/search-fusion.js +327 -0
- package/core/search/search-hybrid.js +204 -0
- package/core/search/search-pattern-chunks.js +337 -0
- package/core/search/search-pattern-planner.js +439 -0
- package/core/search/search-pattern-prefilter.js +412 -0
- package/core/search/search-pattern-ripgrep.js +663 -0
- package/core/search/search-pattern.js +463 -0
- package/core/search/search-postprocess.js +452 -0
- package/core/search/search-semantic.js +706 -0
- package/core/search/search-server.js +554 -0
- package/core/search/session-daemon-prewarm.mjs +164 -0
- package/core/search/session-warmup.js +595 -0
- package/core/search/sweet-search.js +632 -0
- package/core/search/warmup-metrics.js +532 -0
- package/core/start-server.js +6 -0
- package/core/training/query-router/features/extractor.js +762 -0
- package/core/training/query-router/features/multilingual-patterns.js +431 -0
- package/core/training/query-router/features/text-segmenter.js +303 -0
- package/core/training/query-router/features/unicode-utils.js +383 -0
- package/core/training/query-router/output/v45_router_d4.js +11521 -0
- package/core/training/query-router/output/v46_router_d4.js +11498 -0
- package/core/vector-store/binary-heap.js +227 -0
- package/core/vector-store/binary-hnsw-index.js +1004 -0
- package/core/vector-store/float-vector-store.js +234 -0
- package/core/vector-store/hnsw-index.js +580 -0
- package/core/vector-store/index.js +39 -0
- package/core/vector-store/seismic-index.js +498 -0
- package/core/vocabulary/index.js +84 -0
- package/core/vocabulary/vocab-constants.js +20 -0
- package/core/vocabulary/vocab-miner-extractors.js +375 -0
- package/core/vocabulary/vocab-miner-nl.js +404 -0
- package/core/vocabulary/vocab-miner-utils.js +146 -0
- package/core/vocabulary/vocab-miner.js +574 -0
- package/core/vocabulary/vocab-prewarm-cli.js +110 -0
- package/core/vocabulary/vocab-ranker.js +492 -0
- package/core/vocabulary/vocab-warmer.js +523 -0
- package/core/vocabulary/vocab-warmup-orchestrator.js +425 -0
- package/core/vocabulary/vocabulary-utils.js +704 -0
- package/crates/wasm-router/pkg/package.json +13 -0
- package/crates/wasm-router/pkg/query_router_wasm.d.ts +36 -0
- package/crates/wasm-router/pkg/query_router_wasm.js +271 -0
- package/crates/wasm-router/pkg/query_router_wasm_bg.wasm +0 -0
- package/crates/wasm-router/pkg/query_router_wasm_bg.wasm.d.ts +19 -0
- package/mcp/config-gen.js +121 -0
- package/mcp/server.js +335 -0
- package/mcp/tool-handlers.js +476 -0
- package/package.json +131 -9
- package/scripts/benchmark-harness.js +794 -0
- package/scripts/init.js +1058 -0
- package/scripts/smoke-test.js +435 -0
- package/scripts/uninstall.js +478 -0
- package/scripts/verify-runtime.js +176 -0
|
@@ -0,0 +1,733 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* LLM Provider Fallback Chain for HCGS Summary Generation
|
|
3
|
+
*
|
|
4
|
+
* Tiered approach for generating code summaries:
|
|
5
|
+
* 1. Groq (primary) - llama-3.2-3b-preview, ~2800 tok/s, $0.06/M (cheapest)
|
|
6
|
+
* └─ internal fallback: llama-3.1-8b-instant (~560 tok/s) if 3B model fails
|
|
7
|
+
* 2. Cerebras (fallback) - llama3.1-8b, ~2200 tok/s, $0.10/M
|
|
8
|
+
* 3. Ollama (local GPU) - qwen2.5-coder:7b-instruct at localhost:11434
|
|
9
|
+
* 4. Transformers.js (local CPU) - phi-3-mini-4k-instruct via @xenova/transformers
|
|
10
|
+
* 5. Static fallback (no LLM) - uses doc_comment, signature, or "{type} {name}"
|
|
11
|
+
*
|
|
12
|
+
* Override model for any provider: HCGS_MODEL=model-name
|
|
13
|
+
*
|
|
14
|
+
* Features:
|
|
15
|
+
* - Auto-detection of best available provider
|
|
16
|
+
* - Exponential backoff retry with jitter
|
|
17
|
+
* - Intelligent error classification (retryable vs permanent)
|
|
18
|
+
* - Consistent interface across all providers
|
|
19
|
+
*/
|
|
20
|
+
|
|
21
|
+
import { CEREBRAS_CONFIG, HCGS_CONFIG, getCerebrasModel, isCerebrasAvailable } from './config/index.js';
|
|
22
|
+
|
|
23
|
+
// =============================================================================
|
|
24
|
+
// PROVIDER CONFIGURATION
|
|
25
|
+
// =============================================================================
|
|
26
|
+
|
|
27
|
+
const GROQ_API_KEY = process.env.GROQ_API_KEY || '';
|
|
28
|
+
|
|
29
|
+
const GROQ_HCGS_CONFIG = {
|
|
30
|
+
apiKey: GROQ_API_KEY,
|
|
31
|
+
baseUrl: 'https://api.groq.com/openai/v1',
|
|
32
|
+
model: 'llama-3.2-3b-preview', // ~2800 tok/s, $0.06/M — fastest & cheapest
|
|
33
|
+
fallbackModel: 'llama-3.1-8b-instant', // ~560 tok/s, $0.08/M — better quality fallback
|
|
34
|
+
timeout: 15000,
|
|
35
|
+
};
|
|
36
|
+
|
|
37
|
+
const OLLAMA_CONFIG = {
|
|
38
|
+
baseUrl: process.env.OLLAMA_BASE_URL || 'http://localhost:11434',
|
|
39
|
+
model: process.env.OLLAMA_MODEL || 'qwen2.5-coder:7b-instruct',
|
|
40
|
+
timeout: 30000, // 30s timeout for local inference
|
|
41
|
+
};
|
|
42
|
+
|
|
43
|
+
const TRANSFORMERS_CONFIG = {
|
|
44
|
+
model: 'Xenova/Phi-3-mini-4k-instruct',
|
|
45
|
+
maxNewTokens: 150,
|
|
46
|
+
temperature: 0.3,
|
|
47
|
+
};
|
|
48
|
+
|
|
49
|
+
const RETRY_CONFIG = {
|
|
50
|
+
maxRetries: 3,
|
|
51
|
+
baseDelay: 1000, // 1 second
|
|
52
|
+
maxDelay: 8000, // 8 seconds max
|
|
53
|
+
backoffMultiplier: 2,
|
|
54
|
+
jitterFactor: 0.2, // 20% jitter
|
|
55
|
+
};
|
|
56
|
+
|
|
57
|
+
// =============================================================================
|
|
58
|
+
// ERROR CLASSIFICATION
|
|
59
|
+
// =============================================================================
|
|
60
|
+
|
|
61
|
+
/**
|
|
62
|
+
* Determines if an error is retryable (transient) or permanent
|
|
63
|
+
* @param {Error} error - The error to classify
|
|
64
|
+
* @returns {boolean} - true if the error is retryable
|
|
65
|
+
*/
|
|
66
|
+
export function isRetryable(error) {
|
|
67
|
+
// Network errors are typically retryable
|
|
68
|
+
if (error.code === 'ECONNREFUSED' ||
|
|
69
|
+
error.code === 'ETIMEDOUT' ||
|
|
70
|
+
error.code === 'ECONNRESET' ||
|
|
71
|
+
error.code === 'ENOTFOUND' ||
|
|
72
|
+
error.code === 'EAI_AGAIN') {
|
|
73
|
+
return true;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
// HTTP status codes
|
|
77
|
+
const status = error.status || error.statusCode || (error.response && error.response.status);
|
|
78
|
+
if (status) {
|
|
79
|
+
// Rate limits (429) and server errors (5xx) are retryable
|
|
80
|
+
if (status === 429) return true;
|
|
81
|
+
if (status >= 500 && status < 600) return true;
|
|
82
|
+
// Client errors (4xx except 429) are permanent
|
|
83
|
+
if (status >= 400 && status < 500) return false;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
// Timeout errors are retryable
|
|
87
|
+
if (error.name === 'AbortError' || error.message?.includes('timeout')) {
|
|
88
|
+
return true;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
// Default: assume transient for unknown errors
|
|
92
|
+
return true;
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
// =============================================================================
|
|
96
|
+
// RETRY MECHANISM
|
|
97
|
+
// =============================================================================
|
|
98
|
+
|
|
99
|
+
/**
|
|
100
|
+
* Calculates delay with exponential backoff and jitter
|
|
101
|
+
* @param {number} attempt - Current attempt number (0-indexed)
|
|
102
|
+
* @returns {number} - Delay in milliseconds
|
|
103
|
+
*/
|
|
104
|
+
function calculateDelay(attempt) {
|
|
105
|
+
const exponentialDelay = RETRY_CONFIG.baseDelay * Math.pow(RETRY_CONFIG.backoffMultiplier, attempt);
|
|
106
|
+
const cappedDelay = Math.min(exponentialDelay, RETRY_CONFIG.maxDelay);
|
|
107
|
+
const jitter = cappedDelay * RETRY_CONFIG.jitterFactor * (Math.random() * 2 - 1);
|
|
108
|
+
return Math.round(cappedDelay + jitter);
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
/**
|
|
112
|
+
* Executes a function with exponential backoff retry
|
|
113
|
+
* @param {Function} fn - Async function to execute
|
|
114
|
+
* @param {object} options - Retry options
|
|
115
|
+
* @returns {Promise<any>} - Result from the function
|
|
116
|
+
* @throws {Error} - Last error if all retries fail
|
|
117
|
+
*/
|
|
118
|
+
export async function generateWithRetry(fn, options = {}) {
|
|
119
|
+
const maxRetries = options.maxRetries ?? RETRY_CONFIG.maxRetries;
|
|
120
|
+
let lastError;
|
|
121
|
+
|
|
122
|
+
for (let attempt = 0; attempt <= maxRetries; attempt++) {
|
|
123
|
+
try {
|
|
124
|
+
return await fn();
|
|
125
|
+
} catch (error) {
|
|
126
|
+
lastError = error;
|
|
127
|
+
|
|
128
|
+
// Don't retry if this is the last attempt or error is not retryable
|
|
129
|
+
if (attempt >= maxRetries || !isRetryable(error)) {
|
|
130
|
+
throw error;
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
// Calculate delay and wait
|
|
134
|
+
const delay = calculateDelay(attempt);
|
|
135
|
+
if (process.env.SEARCH_DEBUG) {
|
|
136
|
+
console.log(`[LLM] Retry ${attempt + 1}/${maxRetries} after ${delay}ms: ${error.message}`);
|
|
137
|
+
}
|
|
138
|
+
await new Promise(resolve => setTimeout(resolve, delay));
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
throw lastError;
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
// =============================================================================
|
|
146
|
+
// PROVIDER: GROQ (Primary — fastest, cheapest)
|
|
147
|
+
// =============================================================================
|
|
148
|
+
|
|
149
|
+
/**
|
|
150
|
+
* Calls the Groq chat completions API with a specific model
|
|
151
|
+
* @param {string} model - Groq model ID
|
|
152
|
+
* @param {string} prompt - The prompt to send
|
|
153
|
+
* @param {number} maxTokens - Maximum tokens to generate
|
|
154
|
+
* @returns {Promise<string>} - Generated text
|
|
155
|
+
*/
|
|
156
|
+
async function callGroqCompletion(model, prompt, maxTokens) {
|
|
157
|
+
const requestBody = {
|
|
158
|
+
model,
|
|
159
|
+
messages: [
|
|
160
|
+
{
|
|
161
|
+
role: 'system',
|
|
162
|
+
content: 'You are a code documentation assistant. Generate concise, accurate summaries of code entities. Focus on what the code does, not how. Be brief and direct.',
|
|
163
|
+
},
|
|
164
|
+
{
|
|
165
|
+
role: 'user',
|
|
166
|
+
content: prompt,
|
|
167
|
+
},
|
|
168
|
+
],
|
|
169
|
+
max_tokens: maxTokens,
|
|
170
|
+
temperature: 0.3,
|
|
171
|
+
};
|
|
172
|
+
|
|
173
|
+
const response = await fetch(`${GROQ_HCGS_CONFIG.baseUrl}/chat/completions`, {
|
|
174
|
+
method: 'POST',
|
|
175
|
+
headers: {
|
|
176
|
+
'Content-Type': 'application/json',
|
|
177
|
+
'Authorization': `Bearer ${GROQ_API_KEY}`,
|
|
178
|
+
},
|
|
179
|
+
body: JSON.stringify(requestBody),
|
|
180
|
+
signal: AbortSignal.timeout(GROQ_HCGS_CONFIG.timeout),
|
|
181
|
+
});
|
|
182
|
+
|
|
183
|
+
if (!response.ok) {
|
|
184
|
+
const error = new Error(`Groq API error: ${response.status} ${response.statusText}`);
|
|
185
|
+
error.status = response.status;
|
|
186
|
+
throw error;
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
const data = await response.json();
|
|
190
|
+
return data.choices?.[0]?.message?.content?.trim() || '';
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
async function generateWithGroq(prompt, options = {}) {
|
|
194
|
+
if (!GROQ_API_KEY) {
|
|
195
|
+
throw new Error('Groq API key not configured');
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
const primaryModel = process.env.HCGS_MODEL || GROQ_HCGS_CONFIG.model;
|
|
199
|
+
const maxTokens = options.maxTokens ?? 150;
|
|
200
|
+
|
|
201
|
+
try {
|
|
202
|
+
return await callGroqCompletion(primaryModel, prompt, maxTokens);
|
|
203
|
+
} catch (primaryError) {
|
|
204
|
+
// If user overrode model via HCGS_MODEL, respect their choice — don't fallback
|
|
205
|
+
if (process.env.HCGS_MODEL || !GROQ_HCGS_CONFIG.fallbackModel) {
|
|
206
|
+
throw primaryError;
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
if (process.env.SEARCH_DEBUG) {
|
|
210
|
+
console.log(`[LLM] Groq ${primaryModel} failed, trying fallback ${GROQ_HCGS_CONFIG.fallbackModel}: ${primaryError.message}`);
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
return await callGroqCompletion(GROQ_HCGS_CONFIG.fallbackModel, prompt, maxTokens);
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
// =============================================================================
|
|
218
|
+
// PROVIDER: CEREBRAS (Fallback — fast, good quality)
|
|
219
|
+
// =============================================================================
|
|
220
|
+
|
|
221
|
+
/**
|
|
222
|
+
* Generates summary using Cerebras
|
|
223
|
+
* @param {string} prompt - The prompt to send
|
|
224
|
+
* @param {object} options - Generation options
|
|
225
|
+
* @returns {Promise<string>} - Generated summary
|
|
226
|
+
*/
|
|
227
|
+
async function generateWithCerebras(prompt, options = {}) {
|
|
228
|
+
if (!isCerebrasAvailable()) {
|
|
229
|
+
throw new Error('Cerebras API key not configured');
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
const model = process.env.HCGS_MODEL || getCerebrasModel('hcgs');
|
|
233
|
+
const maxTokens = options.maxTokens ?? 150;
|
|
234
|
+
|
|
235
|
+
const requestBody = {
|
|
236
|
+
model,
|
|
237
|
+
messages: [
|
|
238
|
+
{
|
|
239
|
+
role: 'system',
|
|
240
|
+
content: 'You are a code documentation assistant. Generate concise, accurate summaries of code entities. Focus on what the code does, not how. Be brief and direct.',
|
|
241
|
+
},
|
|
242
|
+
{
|
|
243
|
+
role: 'user',
|
|
244
|
+
content: prompt,
|
|
245
|
+
},
|
|
246
|
+
],
|
|
247
|
+
max_tokens: maxTokens,
|
|
248
|
+
temperature: 0.3,
|
|
249
|
+
// Disable chain-of-thought for faster responses
|
|
250
|
+
...(CEREBRAS_CONFIG.reasoning?.fastMode && { disable_reasoning: true }),
|
|
251
|
+
};
|
|
252
|
+
|
|
253
|
+
const response = await fetch(`${CEREBRAS_CONFIG.baseUrl}/chat/completions`, {
|
|
254
|
+
method: 'POST',
|
|
255
|
+
headers: {
|
|
256
|
+
'Content-Type': 'application/json',
|
|
257
|
+
'Authorization': `Bearer ${CEREBRAS_CONFIG.apiKey}`,
|
|
258
|
+
},
|
|
259
|
+
body: JSON.stringify(requestBody),
|
|
260
|
+
signal: AbortSignal.timeout(15000), // 15s timeout
|
|
261
|
+
});
|
|
262
|
+
|
|
263
|
+
if (!response.ok) {
|
|
264
|
+
const error = new Error(`Cerebras API error: ${response.status} ${response.statusText}`);
|
|
265
|
+
error.status = response.status;
|
|
266
|
+
throw error;
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
const data = await response.json();
|
|
270
|
+
return data.choices?.[0]?.message?.content?.trim() || '';
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
// =============================================================================
|
|
274
|
+
// PROVIDER: OLLAMA (Local GPU)
|
|
275
|
+
// =============================================================================
|
|
276
|
+
|
|
277
|
+
/**
|
|
278
|
+
* Checks if Ollama is available
|
|
279
|
+
* @returns {Promise<boolean>}
|
|
280
|
+
*/
|
|
281
|
+
async function isOllamaAvailable() {
|
|
282
|
+
try {
|
|
283
|
+
const response = await fetch(`${OLLAMA_CONFIG.baseUrl}/api/tags`, {
|
|
284
|
+
signal: AbortSignal.timeout(2000),
|
|
285
|
+
});
|
|
286
|
+
if (!response.ok) return false;
|
|
287
|
+
|
|
288
|
+
const data = await response.json();
|
|
289
|
+
// Check if our desired model is available
|
|
290
|
+
return data.models?.some(m =>
|
|
291
|
+
m.name === OLLAMA_CONFIG.model ||
|
|
292
|
+
m.name.startsWith(OLLAMA_CONFIG.model.split(':')[0])
|
|
293
|
+
) ?? false;
|
|
294
|
+
} catch {
|
|
295
|
+
return false;
|
|
296
|
+
}
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
/**
|
|
300
|
+
* Generates summary using Ollama
|
|
301
|
+
* @param {string} prompt - The prompt to send
|
|
302
|
+
* @param {object} options - Generation options
|
|
303
|
+
* @returns {Promise<string>} - Generated summary
|
|
304
|
+
*/
|
|
305
|
+
async function generateWithOllama(prompt, options = {}) {
|
|
306
|
+
const maxTokens = options.maxTokens ?? 150;
|
|
307
|
+
|
|
308
|
+
const requestBody = {
|
|
309
|
+
model: OLLAMA_CONFIG.model,
|
|
310
|
+
prompt: `You are a code documentation assistant. Generate concise, accurate summaries of code entities. Focus on what the code does, not how. Be brief and direct.\n\n${prompt}`,
|
|
311
|
+
stream: false,
|
|
312
|
+
options: {
|
|
313
|
+
num_predict: maxTokens,
|
|
314
|
+
temperature: 0.3,
|
|
315
|
+
},
|
|
316
|
+
};
|
|
317
|
+
|
|
318
|
+
const response = await fetch(`${OLLAMA_CONFIG.baseUrl}/api/generate`, {
|
|
319
|
+
method: 'POST',
|
|
320
|
+
headers: {
|
|
321
|
+
'Content-Type': 'application/json',
|
|
322
|
+
},
|
|
323
|
+
body: JSON.stringify(requestBody),
|
|
324
|
+
signal: AbortSignal.timeout(OLLAMA_CONFIG.timeout),
|
|
325
|
+
});
|
|
326
|
+
|
|
327
|
+
if (!response.ok) {
|
|
328
|
+
const error = new Error(`Ollama API error: ${response.status} ${response.statusText}`);
|
|
329
|
+
error.status = response.status;
|
|
330
|
+
throw error;
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
const data = await response.json();
|
|
334
|
+
return data.response?.trim() || '';
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
// =============================================================================
|
|
338
|
+
// PROVIDER: TRANSFORMERS.JS (Local CPU)
|
|
339
|
+
// =============================================================================
|
|
340
|
+
|
|
341
|
+
// Lazy-loaded pipeline instance
|
|
342
|
+
let transformersPipeline = null;
|
|
343
|
+
let transformersAvailable = null;
|
|
344
|
+
|
|
345
|
+
/**
|
|
346
|
+
* Checks if Transformers.js is available and initializes it
|
|
347
|
+
* @returns {Promise<boolean>}
|
|
348
|
+
*/
|
|
349
|
+
async function isTransformersAvailable() {
|
|
350
|
+
if (transformersAvailable !== null) {
|
|
351
|
+
return transformersAvailable;
|
|
352
|
+
}
|
|
353
|
+
|
|
354
|
+
try {
|
|
355
|
+
// Dynamic import to avoid errors if not installed
|
|
356
|
+
const { pipeline, env } = await import('@xenova/transformers');
|
|
357
|
+
|
|
358
|
+
// Configure for optimal performance
|
|
359
|
+
env.allowLocalModels = true;
|
|
360
|
+
env.useBrowserCache = false;
|
|
361
|
+
|
|
362
|
+
// Initialize the pipeline (this downloads the model on first run)
|
|
363
|
+
if (process.env.SEARCH_DEBUG) {
|
|
364
|
+
console.log('[LLM] Initializing Transformers.js pipeline...');
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
transformersPipeline = await pipeline('text-generation', TRANSFORMERS_CONFIG.model, {
|
|
368
|
+
quantized: true, // Use quantized model for speed
|
|
369
|
+
});
|
|
370
|
+
|
|
371
|
+
transformersAvailable = true;
|
|
372
|
+
return true;
|
|
373
|
+
} catch (error) {
|
|
374
|
+
if (process.env.SEARCH_DEBUG) {
|
|
375
|
+
console.log(`[LLM] Transformers.js not available: ${error.message}`);
|
|
376
|
+
}
|
|
377
|
+
transformersAvailable = false;
|
|
378
|
+
return false;
|
|
379
|
+
}
|
|
380
|
+
}
|
|
381
|
+
|
|
382
|
+
/**
|
|
383
|
+
* Generates summary using Transformers.js
|
|
384
|
+
* @param {string} prompt - The prompt to send
|
|
385
|
+
* @param {object} options - Generation options
|
|
386
|
+
* @returns {Promise<string>} - Generated summary
|
|
387
|
+
*/
|
|
388
|
+
async function generateWithTransformers(prompt, options = {}) {
|
|
389
|
+
if (!transformersPipeline) {
|
|
390
|
+
const available = await isTransformersAvailable();
|
|
391
|
+
if (!available) {
|
|
392
|
+
throw new Error('Transformers.js not available');
|
|
393
|
+
}
|
|
394
|
+
}
|
|
395
|
+
|
|
396
|
+
const maxTokens = options.maxTokens ?? TRANSFORMERS_CONFIG.maxNewTokens;
|
|
397
|
+
|
|
398
|
+
const systemPrompt = 'You are a code documentation assistant. Generate concise, accurate summaries.';
|
|
399
|
+
const fullPrompt = `<|system|>\n${systemPrompt}<|end|>\n<|user|>\n${prompt}<|end|>\n<|assistant|>\n`;
|
|
400
|
+
|
|
401
|
+
const result = await transformersPipeline(fullPrompt, {
|
|
402
|
+
max_new_tokens: maxTokens,
|
|
403
|
+
temperature: TRANSFORMERS_CONFIG.temperature,
|
|
404
|
+
do_sample: true,
|
|
405
|
+
return_full_text: false,
|
|
406
|
+
});
|
|
407
|
+
|
|
408
|
+
return result[0]?.generated_text?.trim() || '';
|
|
409
|
+
}
|
|
410
|
+
|
|
411
|
+
// =============================================================================
|
|
412
|
+
// PROVIDER: STATIC FALLBACK (No LLM)
|
|
413
|
+
// =============================================================================
|
|
414
|
+
|
|
415
|
+
/**
|
|
416
|
+
* Generates a static summary without LLM
|
|
417
|
+
* Uses available metadata: doc_comment > signature > type + name
|
|
418
|
+
* @param {object} entity - Code entity with metadata
|
|
419
|
+
* @returns {string} - Generated summary
|
|
420
|
+
*/
|
|
421
|
+
function generateStaticSummary(entity) {
|
|
422
|
+
// Priority 1: Use existing doc comment
|
|
423
|
+
if (entity.doc_comment) {
|
|
424
|
+
// Extract first sentence or line
|
|
425
|
+
const firstLine = entity.doc_comment
|
|
426
|
+
.replace(/^\/\*\*\s*|\s*\*\/$/g, '') // Remove /** */
|
|
427
|
+
.replace(/^\s*\*\s*/gm, '') // Remove leading *
|
|
428
|
+
.split(/[.\n]/)[0] // First sentence/line
|
|
429
|
+
.trim();
|
|
430
|
+
|
|
431
|
+
if (firstLine && firstLine.length > 10) {
|
|
432
|
+
return firstLine.length > 200 ? firstLine.slice(0, 197) + '...' : firstLine;
|
|
433
|
+
}
|
|
434
|
+
}
|
|
435
|
+
|
|
436
|
+
// Priority 2: Use signature if available
|
|
437
|
+
if (entity.signature) {
|
|
438
|
+
const cleanSig = entity.signature
|
|
439
|
+
.replace(/\s+/g, ' ')
|
|
440
|
+
.trim();
|
|
441
|
+
|
|
442
|
+
if (cleanSig.length > 10) {
|
|
443
|
+
return cleanSig.length > 200 ? cleanSig.slice(0, 197) + '...' : cleanSig;
|
|
444
|
+
}
|
|
445
|
+
}
|
|
446
|
+
|
|
447
|
+
// Priority 3: Construct from type and name
|
|
448
|
+
const type = entity.type || 'entity';
|
|
449
|
+
const name = entity.name || 'unknown';
|
|
450
|
+
|
|
451
|
+
// Convert CamelCase/snake_case to words
|
|
452
|
+
const humanName = name
|
|
453
|
+
.replace(/([a-z])([A-Z])/g, '$1 $2')
|
|
454
|
+
.replace(/_/g, ' ')
|
|
455
|
+
.toLowerCase();
|
|
456
|
+
|
|
457
|
+
return `${type.charAt(0).toUpperCase() + type.slice(1)}: ${humanName}`;
|
|
458
|
+
}
|
|
459
|
+
|
|
460
|
+
// =============================================================================
|
|
461
|
+
// PROVIDER DETECTION AND SELECTION
|
|
462
|
+
// =============================================================================
|
|
463
|
+
|
|
464
|
+
/**
|
|
465
|
+
* Provider interface
|
|
466
|
+
* @typedef {object} SummaryProvider
|
|
467
|
+
* @property {string} name - Provider name
|
|
468
|
+
* @property {boolean} isLocal - Whether provider runs locally
|
|
469
|
+
* @property {Function} generate - Generation function
|
|
470
|
+
*/
|
|
471
|
+
|
|
472
|
+
// Provider registry (priority order: Groq → Cerebras → Ollama → Transformers.js → Static)
|
|
473
|
+
const providers = {
|
|
474
|
+
groq: {
|
|
475
|
+
name: 'groq',
|
|
476
|
+
isLocal: false,
|
|
477
|
+
priority: 1,
|
|
478
|
+
checkAvailable: () => Promise.resolve(GROQ_API_KEY.length > 0),
|
|
479
|
+
generate: generateWithGroq,
|
|
480
|
+
},
|
|
481
|
+
cerebras: {
|
|
482
|
+
name: 'cerebras',
|
|
483
|
+
isLocal: false,
|
|
484
|
+
priority: 2,
|
|
485
|
+
checkAvailable: () => Promise.resolve(isCerebrasAvailable()),
|
|
486
|
+
generate: generateWithCerebras,
|
|
487
|
+
},
|
|
488
|
+
ollama: {
|
|
489
|
+
name: 'ollama',
|
|
490
|
+
isLocal: true,
|
|
491
|
+
priority: 3,
|
|
492
|
+
checkAvailable: isOllamaAvailable,
|
|
493
|
+
generate: generateWithOllama,
|
|
494
|
+
},
|
|
495
|
+
transformers: {
|
|
496
|
+
name: 'transformers',
|
|
497
|
+
isLocal: true,
|
|
498
|
+
priority: 4,
|
|
499
|
+
checkAvailable: isTransformersAvailable,
|
|
500
|
+
generate: generateWithTransformers,
|
|
501
|
+
},
|
|
502
|
+
static: {
|
|
503
|
+
name: 'static',
|
|
504
|
+
isLocal: true,
|
|
505
|
+
priority: 99,
|
|
506
|
+
checkAvailable: () => Promise.resolve(true),
|
|
507
|
+
generate: async (prompt, options) => {
|
|
508
|
+
// Static provider expects entity metadata, not raw prompt
|
|
509
|
+
if (options?.entity) {
|
|
510
|
+
return generateStaticSummary(options.entity);
|
|
511
|
+
}
|
|
512
|
+
return 'Code entity';
|
|
513
|
+
},
|
|
514
|
+
},
|
|
515
|
+
};
|
|
516
|
+
|
|
517
|
+
// Cached provider selection
|
|
518
|
+
let selectedProvider = null;
|
|
519
|
+
let providerCheckPromise = null;
|
|
520
|
+
|
|
521
|
+
/**
|
|
522
|
+
* Auto-detects and returns the best available summary provider
|
|
523
|
+
* @param {object} options - Options
|
|
524
|
+
* @param {boolean} options.preferLocal - Prefer local providers over remote
|
|
525
|
+
* @param {boolean} options.forceCheck - Force re-check of provider availability
|
|
526
|
+
* @returns {Promise<SummaryProvider>} - Best available provider
|
|
527
|
+
*/
|
|
528
|
+
export async function getSummaryProvider(options = {}) {
|
|
529
|
+
const { preferLocal = false, forceCheck = false } = options;
|
|
530
|
+
|
|
531
|
+
// Return cached provider if available and not forcing recheck
|
|
532
|
+
if (selectedProvider && !forceCheck) {
|
|
533
|
+
return selectedProvider;
|
|
534
|
+
}
|
|
535
|
+
|
|
536
|
+
// Prevent concurrent provider checks
|
|
537
|
+
if (providerCheckPromise && !forceCheck) {
|
|
538
|
+
return providerCheckPromise;
|
|
539
|
+
}
|
|
540
|
+
|
|
541
|
+
providerCheckPromise = (async () => {
|
|
542
|
+
// Get providers sorted by priority (optionally preferring local)
|
|
543
|
+
const providerList = Object.values(providers).sort((a, b) => {
|
|
544
|
+
if (preferLocal) {
|
|
545
|
+
// Local providers first
|
|
546
|
+
if (a.isLocal !== b.isLocal) {
|
|
547
|
+
return a.isLocal ? -1 : 1;
|
|
548
|
+
}
|
|
549
|
+
}
|
|
550
|
+
return a.priority - b.priority;
|
|
551
|
+
});
|
|
552
|
+
|
|
553
|
+
// Find first available provider
|
|
554
|
+
for (const provider of providerList) {
|
|
555
|
+
try {
|
|
556
|
+
const available = await provider.checkAvailable();
|
|
557
|
+
if (available) {
|
|
558
|
+
if (process.env.SEARCH_DEBUG) {
|
|
559
|
+
console.log(`[LLM] Selected provider: ${provider.name}`);
|
|
560
|
+
}
|
|
561
|
+
selectedProvider = provider;
|
|
562
|
+
return provider;
|
|
563
|
+
}
|
|
564
|
+
} catch (error) {
|
|
565
|
+
if (process.env.SEARCH_DEBUG) {
|
|
566
|
+
console.log(`[LLM] Provider ${provider.name} check failed: ${error.message}`);
|
|
567
|
+
}
|
|
568
|
+
}
|
|
569
|
+
}
|
|
570
|
+
|
|
571
|
+
// Fallback to static (always available)
|
|
572
|
+
selectedProvider = providers.static;
|
|
573
|
+
return providers.static;
|
|
574
|
+
})();
|
|
575
|
+
|
|
576
|
+
return providerCheckPromise;
|
|
577
|
+
}
|
|
578
|
+
|
|
579
|
+
// =============================================================================
|
|
580
|
+
// UNIFIED GENERATION INTERFACE
|
|
581
|
+
// =============================================================================
|
|
582
|
+
|
|
583
|
+
/**
|
|
584
|
+
* Generates a summary for a code entity using the best available provider
|
|
585
|
+
* Falls back through the provider chain on failures
|
|
586
|
+
* @param {string} prompt - The prompt to send
|
|
587
|
+
* @param {object} options - Generation options
|
|
588
|
+
* @param {object} options.entity - Code entity metadata (for static fallback)
|
|
589
|
+
* @param {number} options.maxTokens - Maximum tokens to generate
|
|
590
|
+
* @param {boolean} options.preferLocal - Prefer local providers
|
|
591
|
+
* @returns {Promise<{summary: string, provider: string}>}
|
|
592
|
+
*/
|
|
593
|
+
export async function generateSummary(prompt, options = {}) {
|
|
594
|
+
const provider = await getSummaryProvider({ preferLocal: options.preferLocal });
|
|
595
|
+
|
|
596
|
+
// Try primary provider with retry
|
|
597
|
+
try {
|
|
598
|
+
const summary = await generateWithRetry(
|
|
599
|
+
() => provider.generate(prompt, options),
|
|
600
|
+
{ maxRetries: provider.isLocal ? 1 : RETRY_CONFIG.maxRetries }
|
|
601
|
+
);
|
|
602
|
+
|
|
603
|
+
return {
|
|
604
|
+
summary,
|
|
605
|
+
provider: provider.name,
|
|
606
|
+
};
|
|
607
|
+
} catch (primaryError) {
|
|
608
|
+
if (process.env.SEARCH_DEBUG) {
|
|
609
|
+
console.log(`[LLM] Primary provider ${provider.name} failed: ${primaryError.message}`);
|
|
610
|
+
}
|
|
611
|
+
|
|
612
|
+
// Try fallback providers
|
|
613
|
+
const fallbackOrder = ['groq', 'cerebras', 'ollama', 'transformers', 'static'];
|
|
614
|
+
for (const fallbackName of fallbackOrder) {
|
|
615
|
+
if (fallbackName === provider.name) continue;
|
|
616
|
+
|
|
617
|
+
const fallback = providers[fallbackName];
|
|
618
|
+
try {
|
|
619
|
+
const available = await fallback.checkAvailable();
|
|
620
|
+
if (!available) continue;
|
|
621
|
+
|
|
622
|
+
const summary = await fallback.generate(prompt, options);
|
|
623
|
+
return {
|
|
624
|
+
summary,
|
|
625
|
+
provider: fallback.name,
|
|
626
|
+
};
|
|
627
|
+
} catch (fallbackError) {
|
|
628
|
+
if (process.env.SEARCH_DEBUG) {
|
|
629
|
+
console.log(`[LLM] Fallback ${fallbackName} failed: ${fallbackError.message}`);
|
|
630
|
+
}
|
|
631
|
+
}
|
|
632
|
+
}
|
|
633
|
+
|
|
634
|
+
// Ultimate fallback: static
|
|
635
|
+
return {
|
|
636
|
+
summary: generateStaticSummary(options.entity || {}),
|
|
637
|
+
provider: 'static',
|
|
638
|
+
};
|
|
639
|
+
}
|
|
640
|
+
}
|
|
641
|
+
|
|
642
|
+
/**
|
|
643
|
+
* Generates summaries for multiple entities in batch
|
|
644
|
+
* @param {Array<{prompt: string, entity: object}>} items - Items to summarize
|
|
645
|
+
* @param {object} options - Generation options
|
|
646
|
+
* @returns {Promise<Array<{summary: string, provider: string}>>}
|
|
647
|
+
*/
|
|
648
|
+
export async function generateSummariesBatch(items, options = {}) {
|
|
649
|
+
const provider = await getSummaryProvider({ preferLocal: options.preferLocal });
|
|
650
|
+
const concurrency = provider.isLocal ? 1 : 5; // Limit concurrency for local providers
|
|
651
|
+
|
|
652
|
+
const results = [];
|
|
653
|
+
for (let i = 0; i < items.length; i += concurrency) {
|
|
654
|
+
const batch = items.slice(i, i + concurrency);
|
|
655
|
+
const batchResults = await Promise.all(
|
|
656
|
+
batch.map(item => generateSummary(item.prompt, { ...options, entity: item.entity }))
|
|
657
|
+
);
|
|
658
|
+
results.push(...batchResults);
|
|
659
|
+
}
|
|
660
|
+
|
|
661
|
+
return results;
|
|
662
|
+
}
|
|
663
|
+
|
|
664
|
+
// =============================================================================
|
|
665
|
+
// PROMPT TEMPLATES
|
|
666
|
+
// =============================================================================
|
|
667
|
+
|
|
668
|
+
/**
|
|
669
|
+
* Creates a summary prompt for a code entity
|
|
670
|
+
* @param {object} entity - Code entity
|
|
671
|
+
* @returns {string} - Formatted prompt
|
|
672
|
+
*/
|
|
673
|
+
export function createSummaryPrompt(entity) {
|
|
674
|
+
const tokenLimit = HCGS_CONFIG.summaryTokenLimits[entity.type] || HCGS_CONFIG.defaultTokenLimit;
|
|
675
|
+
|
|
676
|
+
let prompt = `Summarize this ${entity.type} in ${tokenLimit} tokens or less:\n\n`;
|
|
677
|
+
|
|
678
|
+
// Add entity header
|
|
679
|
+
prompt += `Name: ${entity.name}\n`;
|
|
680
|
+
if (entity.file) {
|
|
681
|
+
prompt += `File: ${entity.file}\n`;
|
|
682
|
+
}
|
|
683
|
+
|
|
684
|
+
// Add code content
|
|
685
|
+
if (entity.code) {
|
|
686
|
+
const codeSnippet = entity.code.length > 2000
|
|
687
|
+
? entity.code.slice(0, 2000) + '\n// ... (truncated)'
|
|
688
|
+
: entity.code;
|
|
689
|
+
prompt += `\nCode:\n\`\`\`\n${codeSnippet}\n\`\`\`\n`;
|
|
690
|
+
}
|
|
691
|
+
|
|
692
|
+
// Add context from children if available
|
|
693
|
+
if (entity.childSummaries && entity.childSummaries.length > 0) {
|
|
694
|
+
prompt += '\nContains:\n';
|
|
695
|
+
for (const child of entity.childSummaries.slice(0, 5)) {
|
|
696
|
+
prompt += `- ${child.name}: ${child.summary}\n`;
|
|
697
|
+
}
|
|
698
|
+
}
|
|
699
|
+
|
|
700
|
+
prompt += '\nProvide a concise summary focusing on purpose and functionality.';
|
|
701
|
+
|
|
702
|
+
return prompt;
|
|
703
|
+
}
|
|
704
|
+
|
|
705
|
+
// =============================================================================
|
|
706
|
+
// EXPORTS
|
|
707
|
+
// =============================================================================
|
|
708
|
+
|
|
709
|
+
export {
|
|
710
|
+
generateWithGroq,
|
|
711
|
+
generateWithCerebras,
|
|
712
|
+
generateWithOllama,
|
|
713
|
+
generateWithTransformers,
|
|
714
|
+
generateStaticSummary,
|
|
715
|
+
isOllamaAvailable,
|
|
716
|
+
isTransformersAvailable,
|
|
717
|
+
providers,
|
|
718
|
+
GROQ_HCGS_CONFIG,
|
|
719
|
+
OLLAMA_CONFIG,
|
|
720
|
+
TRANSFORMERS_CONFIG,
|
|
721
|
+
RETRY_CONFIG,
|
|
722
|
+
};
|
|
723
|
+
|
|
724
|
+
export default {
|
|
725
|
+
getSummaryProvider,
|
|
726
|
+
generateSummary,
|
|
727
|
+
generateSummariesBatch,
|
|
728
|
+
generateWithRetry,
|
|
729
|
+
isRetryable,
|
|
730
|
+
createSummaryPrompt,
|
|
731
|
+
generateStaticSummary,
|
|
732
|
+
providers,
|
|
733
|
+
};
|