sweet-search 0.0.1 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (161) hide show
  1. package/LICENSE +190 -0
  2. package/NOTICE +23 -0
  3. package/core/cli.js +51 -0
  4. package/core/config.js +27 -0
  5. package/core/embedding/embedding-cache.js +467 -0
  6. package/core/embedding/embedding-local-model.js +845 -0
  7. package/core/embedding/embedding-remote.js +492 -0
  8. package/core/embedding/embedding-service.js +712 -0
  9. package/core/embedding/embedding-telemetry.js +219 -0
  10. package/core/embedding/index.js +40 -0
  11. package/core/graph/community-detector.js +294 -0
  12. package/core/graph/graph-expansion.js +839 -0
  13. package/core/graph/graph-extractor.js +2304 -0
  14. package/core/graph/graph-search.js +2148 -0
  15. package/core/graph/hcgs-generator.js +666 -0
  16. package/core/graph/index.js +16 -0
  17. package/core/graph/leiden-algorithm.js +547 -0
  18. package/core/graph/relationship-resolver.js +366 -0
  19. package/core/graph/repo-map.js +408 -0
  20. package/core/graph/summary-manager.js +549 -0
  21. package/core/indexing/artifact-builder.js +1054 -0
  22. package/core/indexing/ast-chunker.js +709 -0
  23. package/core/indexing/chunking/chunk-builder.js +170 -0
  24. package/core/indexing/chunking/markdown-chunker.js +503 -0
  25. package/core/indexing/chunking/plaintext-chunker.js +104 -0
  26. package/core/indexing/dedup/dedup-phase.js +159 -0
  27. package/core/indexing/dedup/exemplar-selector.js +65 -0
  28. package/core/indexing/document-chunker.js +56 -0
  29. package/core/indexing/incremental-parser.js +390 -0
  30. package/core/indexing/incremental-tracker.js +761 -0
  31. package/core/indexing/index-codebase-v21.js +472 -0
  32. package/core/indexing/index-maintainer.mjs +1674 -0
  33. package/core/indexing/index.js +90 -0
  34. package/core/indexing/indexer-ann.js +1077 -0
  35. package/core/indexing/indexer-build.js +742 -0
  36. package/core/indexing/indexer-phases.js +800 -0
  37. package/core/indexing/indexer-pool.js +764 -0
  38. package/core/indexing/indexer-sparse-gram.js +98 -0
  39. package/core/indexing/indexer-utils.js +536 -0
  40. package/core/indexing/indexer-worker.js +148 -0
  41. package/core/indexing/li-skip-policy.js +225 -0
  42. package/core/indexing/merkle-tracker.js +244 -0
  43. package/core/indexing/model-pool.js +166 -0
  44. package/core/infrastructure/code-graph-repository.js +120 -0
  45. package/core/infrastructure/codebase-repository.js +131 -0
  46. package/core/infrastructure/config/dedup.js +54 -0
  47. package/core/infrastructure/config/embedding.js +298 -0
  48. package/core/infrastructure/config/graph.js +80 -0
  49. package/core/infrastructure/config/index.js +82 -0
  50. package/core/infrastructure/config/indexing.js +8 -0
  51. package/core/infrastructure/config/platform.js +254 -0
  52. package/core/infrastructure/config/ranking.js +221 -0
  53. package/core/infrastructure/config/search.js +396 -0
  54. package/core/infrastructure/config/translation.js +89 -0
  55. package/core/infrastructure/config/vector-store.js +114 -0
  56. package/core/infrastructure/constants.js +86 -0
  57. package/core/infrastructure/coreml-cascade.js +909 -0
  58. package/core/infrastructure/coreml-cascade.json +46 -0
  59. package/core/infrastructure/coreml-provider.js +81 -0
  60. package/core/infrastructure/db-utils.js +69 -0
  61. package/core/infrastructure/dedup-hashing.js +83 -0
  62. package/core/infrastructure/hardware-capability.js +332 -0
  63. package/core/infrastructure/index.js +104 -0
  64. package/core/infrastructure/language-patterns/maps.js +121 -0
  65. package/core/infrastructure/language-patterns/registry-core.js +323 -0
  66. package/core/infrastructure/language-patterns/registry-data-query.js +155 -0
  67. package/core/infrastructure/language-patterns/registry-object-oriented.js +285 -0
  68. package/core/infrastructure/language-patterns/registry-tooling.js +240 -0
  69. package/core/infrastructure/language-patterns/registry-web-style.js +143 -0
  70. package/core/infrastructure/language-patterns/registry.js +19 -0
  71. package/core/infrastructure/language-patterns.js +141 -0
  72. package/core/infrastructure/llm-provider.js +733 -0
  73. package/core/infrastructure/manifest.json +46 -0
  74. package/core/infrastructure/maxsim.wasm +0 -0
  75. package/core/infrastructure/model-fetcher.js +423 -0
  76. package/core/infrastructure/model-registry.js +214 -0
  77. package/core/infrastructure/native-inference.js +587 -0
  78. package/core/infrastructure/native-resolver.js +187 -0
  79. package/core/infrastructure/native-sparse-gram.js +257 -0
  80. package/core/infrastructure/native-tokenizer.js +160 -0
  81. package/core/infrastructure/onnx-mutex.js +45 -0
  82. package/core/infrastructure/onnx-session-utils.js +261 -0
  83. package/core/infrastructure/ort-pipeline.js +111 -0
  84. package/core/infrastructure/project-detector.js +102 -0
  85. package/core/infrastructure/quantization.js +410 -0
  86. package/core/infrastructure/simd-distance.js +502 -0
  87. package/core/infrastructure/simd-distance.wasm +0 -0
  88. package/core/infrastructure/tree-sitter-provider.js +665 -0
  89. package/core/infrastructure/webgpu-maxsim.js +222 -0
  90. package/core/query/index.js +35 -0
  91. package/core/query/intent-detector.js +201 -0
  92. package/core/query/intent-router.js +156 -0
  93. package/core/query/query-router-catboost.js +222 -0
  94. package/core/query/query-router-ml.js +266 -0
  95. package/core/query/query-router.js +213 -0
  96. package/core/ranking/cascaded-scorer.js +379 -0
  97. package/core/ranking/flashrank.js +810 -0
  98. package/core/ranking/index.js +49 -0
  99. package/core/ranking/late-interaction-index.js +2383 -0
  100. package/core/ranking/late-interaction-model.js +812 -0
  101. package/core/ranking/local-reranker.js +374 -0
  102. package/core/ranking/mmr.js +379 -0
  103. package/core/ranking/quality-scorer.js +363 -0
  104. package/core/search/context-expander.js +1167 -0
  105. package/core/search/dedup/sibling-expander.js +327 -0
  106. package/core/search/index.js +16 -0
  107. package/core/search/search-boost.js +259 -0
  108. package/core/search/search-cli.js +544 -0
  109. package/core/search/search-format.js +282 -0
  110. package/core/search/search-fusion.js +327 -0
  111. package/core/search/search-hybrid.js +204 -0
  112. package/core/search/search-pattern-chunks.js +337 -0
  113. package/core/search/search-pattern-planner.js +439 -0
  114. package/core/search/search-pattern-prefilter.js +412 -0
  115. package/core/search/search-pattern-ripgrep.js +663 -0
  116. package/core/search/search-pattern.js +463 -0
  117. package/core/search/search-postprocess.js +452 -0
  118. package/core/search/search-semantic.js +706 -0
  119. package/core/search/search-server.js +554 -0
  120. package/core/search/session-daemon-prewarm.mjs +164 -0
  121. package/core/search/session-warmup.js +595 -0
  122. package/core/search/sweet-search.js +632 -0
  123. package/core/search/warmup-metrics.js +532 -0
  124. package/core/start-server.js +6 -0
  125. package/core/training/query-router/features/extractor.js +762 -0
  126. package/core/training/query-router/features/multilingual-patterns.js +431 -0
  127. package/core/training/query-router/features/text-segmenter.js +303 -0
  128. package/core/training/query-router/features/unicode-utils.js +383 -0
  129. package/core/training/query-router/output/v45_router_d4.js +11521 -0
  130. package/core/training/query-router/output/v46_router_d4.js +11498 -0
  131. package/core/vector-store/binary-heap.js +227 -0
  132. package/core/vector-store/binary-hnsw-index.js +1004 -0
  133. package/core/vector-store/float-vector-store.js +234 -0
  134. package/core/vector-store/hnsw-index.js +580 -0
  135. package/core/vector-store/index.js +39 -0
  136. package/core/vector-store/seismic-index.js +498 -0
  137. package/core/vocabulary/index.js +84 -0
  138. package/core/vocabulary/vocab-constants.js +20 -0
  139. package/core/vocabulary/vocab-miner-extractors.js +375 -0
  140. package/core/vocabulary/vocab-miner-nl.js +404 -0
  141. package/core/vocabulary/vocab-miner-utils.js +146 -0
  142. package/core/vocabulary/vocab-miner.js +574 -0
  143. package/core/vocabulary/vocab-prewarm-cli.js +110 -0
  144. package/core/vocabulary/vocab-ranker.js +492 -0
  145. package/core/vocabulary/vocab-warmer.js +523 -0
  146. package/core/vocabulary/vocab-warmup-orchestrator.js +425 -0
  147. package/core/vocabulary/vocabulary-utils.js +704 -0
  148. package/crates/wasm-router/pkg/package.json +13 -0
  149. package/crates/wasm-router/pkg/query_router_wasm.d.ts +36 -0
  150. package/crates/wasm-router/pkg/query_router_wasm.js +271 -0
  151. package/crates/wasm-router/pkg/query_router_wasm_bg.wasm +0 -0
  152. package/crates/wasm-router/pkg/query_router_wasm_bg.wasm.d.ts +19 -0
  153. package/mcp/config-gen.js +121 -0
  154. package/mcp/server.js +335 -0
  155. package/mcp/tool-handlers.js +476 -0
  156. package/package.json +131 -9
  157. package/scripts/benchmark-harness.js +794 -0
  158. package/scripts/init.js +1058 -0
  159. package/scripts/smoke-test.js +435 -0
  160. package/scripts/uninstall.js +478 -0
  161. package/scripts/verify-runtime.js +176 -0
@@ -0,0 +1,733 @@
1
+ /**
2
+ * LLM Provider Fallback Chain for HCGS Summary Generation
3
+ *
4
+ * Tiered approach for generating code summaries:
5
+ * 1. Groq (primary) - llama-3.2-3b-preview, ~2800 tok/s, $0.06/M (cheapest)
6
+ * └─ internal fallback: llama-3.1-8b-instant (~560 tok/s) if 3B model fails
7
+ * 2. Cerebras (fallback) - llama3.1-8b, ~2200 tok/s, $0.10/M
8
+ * 3. Ollama (local GPU) - qwen2.5-coder:7b-instruct at localhost:11434
9
+ * 4. Transformers.js (local CPU) - phi-3-mini-4k-instruct via @xenova/transformers
10
+ * 5. Static fallback (no LLM) - uses doc_comment, signature, or "{type} {name}"
11
+ *
12
+ * Override model for any provider: HCGS_MODEL=model-name
13
+ *
14
+ * Features:
15
+ * - Auto-detection of best available provider
16
+ * - Exponential backoff retry with jitter
17
+ * - Intelligent error classification (retryable vs permanent)
18
+ * - Consistent interface across all providers
19
+ */
20
+
21
+ import { CEREBRAS_CONFIG, HCGS_CONFIG, getCerebrasModel, isCerebrasAvailable } from './config/index.js';
22
+
23
+ // =============================================================================
24
+ // PROVIDER CONFIGURATION
25
+ // =============================================================================
26
+
27
+ const GROQ_API_KEY = process.env.GROQ_API_KEY || '';
28
+
29
+ const GROQ_HCGS_CONFIG = {
30
+ apiKey: GROQ_API_KEY,
31
+ baseUrl: 'https://api.groq.com/openai/v1',
32
+ model: 'llama-3.2-3b-preview', // ~2800 tok/s, $0.06/M — fastest & cheapest
33
+ fallbackModel: 'llama-3.1-8b-instant', // ~560 tok/s, $0.08/M — better quality fallback
34
+ timeout: 15000,
35
+ };
36
+
37
+ const OLLAMA_CONFIG = {
38
+ baseUrl: process.env.OLLAMA_BASE_URL || 'http://localhost:11434',
39
+ model: process.env.OLLAMA_MODEL || 'qwen2.5-coder:7b-instruct',
40
+ timeout: 30000, // 30s timeout for local inference
41
+ };
42
+
43
+ const TRANSFORMERS_CONFIG = {
44
+ model: 'Xenova/Phi-3-mini-4k-instruct',
45
+ maxNewTokens: 150,
46
+ temperature: 0.3,
47
+ };
48
+
49
+ const RETRY_CONFIG = {
50
+ maxRetries: 3,
51
+ baseDelay: 1000, // 1 second
52
+ maxDelay: 8000, // 8 seconds max
53
+ backoffMultiplier: 2,
54
+ jitterFactor: 0.2, // 20% jitter
55
+ };
56
+
57
+ // =============================================================================
58
+ // ERROR CLASSIFICATION
59
+ // =============================================================================
60
+
61
+ /**
62
+ * Determines if an error is retryable (transient) or permanent
63
+ * @param {Error} error - The error to classify
64
+ * @returns {boolean} - true if the error is retryable
65
+ */
66
+ export function isRetryable(error) {
67
+ // Network errors are typically retryable
68
+ if (error.code === 'ECONNREFUSED' ||
69
+ error.code === 'ETIMEDOUT' ||
70
+ error.code === 'ECONNRESET' ||
71
+ error.code === 'ENOTFOUND' ||
72
+ error.code === 'EAI_AGAIN') {
73
+ return true;
74
+ }
75
+
76
+ // HTTP status codes
77
+ const status = error.status || error.statusCode || (error.response && error.response.status);
78
+ if (status) {
79
+ // Rate limits (429) and server errors (5xx) are retryable
80
+ if (status === 429) return true;
81
+ if (status >= 500 && status < 600) return true;
82
+ // Client errors (4xx except 429) are permanent
83
+ if (status >= 400 && status < 500) return false;
84
+ }
85
+
86
+ // Timeout errors are retryable
87
+ if (error.name === 'AbortError' || error.message?.includes('timeout')) {
88
+ return true;
89
+ }
90
+
91
+ // Default: assume transient for unknown errors
92
+ return true;
93
+ }
94
+
95
+ // =============================================================================
96
+ // RETRY MECHANISM
97
+ // =============================================================================
98
+
99
+ /**
100
+ * Calculates delay with exponential backoff and jitter
101
+ * @param {number} attempt - Current attempt number (0-indexed)
102
+ * @returns {number} - Delay in milliseconds
103
+ */
104
+ function calculateDelay(attempt) {
105
+ const exponentialDelay = RETRY_CONFIG.baseDelay * Math.pow(RETRY_CONFIG.backoffMultiplier, attempt);
106
+ const cappedDelay = Math.min(exponentialDelay, RETRY_CONFIG.maxDelay);
107
+ const jitter = cappedDelay * RETRY_CONFIG.jitterFactor * (Math.random() * 2 - 1);
108
+ return Math.round(cappedDelay + jitter);
109
+ }
110
+
111
+ /**
112
+ * Executes a function with exponential backoff retry
113
+ * @param {Function} fn - Async function to execute
114
+ * @param {object} options - Retry options
115
+ * @returns {Promise<any>} - Result from the function
116
+ * @throws {Error} - Last error if all retries fail
117
+ */
118
+ export async function generateWithRetry(fn, options = {}) {
119
+ const maxRetries = options.maxRetries ?? RETRY_CONFIG.maxRetries;
120
+ let lastError;
121
+
122
+ for (let attempt = 0; attempt <= maxRetries; attempt++) {
123
+ try {
124
+ return await fn();
125
+ } catch (error) {
126
+ lastError = error;
127
+
128
+ // Don't retry if this is the last attempt or error is not retryable
129
+ if (attempt >= maxRetries || !isRetryable(error)) {
130
+ throw error;
131
+ }
132
+
133
+ // Calculate delay and wait
134
+ const delay = calculateDelay(attempt);
135
+ if (process.env.SEARCH_DEBUG) {
136
+ console.log(`[LLM] Retry ${attempt + 1}/${maxRetries} after ${delay}ms: ${error.message}`);
137
+ }
138
+ await new Promise(resolve => setTimeout(resolve, delay));
139
+ }
140
+ }
141
+
142
+ throw lastError;
143
+ }
144
+
145
+ // =============================================================================
146
+ // PROVIDER: GROQ (Primary — fastest, cheapest)
147
+ // =============================================================================
148
+
149
+ /**
150
+ * Calls the Groq chat completions API with a specific model
151
+ * @param {string} model - Groq model ID
152
+ * @param {string} prompt - The prompt to send
153
+ * @param {number} maxTokens - Maximum tokens to generate
154
+ * @returns {Promise<string>} - Generated text
155
+ */
156
+ async function callGroqCompletion(model, prompt, maxTokens) {
157
+ const requestBody = {
158
+ model,
159
+ messages: [
160
+ {
161
+ role: 'system',
162
+ content: 'You are a code documentation assistant. Generate concise, accurate summaries of code entities. Focus on what the code does, not how. Be brief and direct.',
163
+ },
164
+ {
165
+ role: 'user',
166
+ content: prompt,
167
+ },
168
+ ],
169
+ max_tokens: maxTokens,
170
+ temperature: 0.3,
171
+ };
172
+
173
+ const response = await fetch(`${GROQ_HCGS_CONFIG.baseUrl}/chat/completions`, {
174
+ method: 'POST',
175
+ headers: {
176
+ 'Content-Type': 'application/json',
177
+ 'Authorization': `Bearer ${GROQ_API_KEY}`,
178
+ },
179
+ body: JSON.stringify(requestBody),
180
+ signal: AbortSignal.timeout(GROQ_HCGS_CONFIG.timeout),
181
+ });
182
+
183
+ if (!response.ok) {
184
+ const error = new Error(`Groq API error: ${response.status} ${response.statusText}`);
185
+ error.status = response.status;
186
+ throw error;
187
+ }
188
+
189
+ const data = await response.json();
190
+ return data.choices?.[0]?.message?.content?.trim() || '';
191
+ }
192
+
193
+ async function generateWithGroq(prompt, options = {}) {
194
+ if (!GROQ_API_KEY) {
195
+ throw new Error('Groq API key not configured');
196
+ }
197
+
198
+ const primaryModel = process.env.HCGS_MODEL || GROQ_HCGS_CONFIG.model;
199
+ const maxTokens = options.maxTokens ?? 150;
200
+
201
+ try {
202
+ return await callGroqCompletion(primaryModel, prompt, maxTokens);
203
+ } catch (primaryError) {
204
+ // If user overrode model via HCGS_MODEL, respect their choice — don't fallback
205
+ if (process.env.HCGS_MODEL || !GROQ_HCGS_CONFIG.fallbackModel) {
206
+ throw primaryError;
207
+ }
208
+
209
+ if (process.env.SEARCH_DEBUG) {
210
+ console.log(`[LLM] Groq ${primaryModel} failed, trying fallback ${GROQ_HCGS_CONFIG.fallbackModel}: ${primaryError.message}`);
211
+ }
212
+
213
+ return await callGroqCompletion(GROQ_HCGS_CONFIG.fallbackModel, prompt, maxTokens);
214
+ }
215
+ }
216
+
217
+ // =============================================================================
218
+ // PROVIDER: CEREBRAS (Fallback — fast, good quality)
219
+ // =============================================================================
220
+
221
+ /**
222
+ * Generates summary using Cerebras
223
+ * @param {string} prompt - The prompt to send
224
+ * @param {object} options - Generation options
225
+ * @returns {Promise<string>} - Generated summary
226
+ */
227
+ async function generateWithCerebras(prompt, options = {}) {
228
+ if (!isCerebrasAvailable()) {
229
+ throw new Error('Cerebras API key not configured');
230
+ }
231
+
232
+ const model = process.env.HCGS_MODEL || getCerebrasModel('hcgs');
233
+ const maxTokens = options.maxTokens ?? 150;
234
+
235
+ const requestBody = {
236
+ model,
237
+ messages: [
238
+ {
239
+ role: 'system',
240
+ content: 'You are a code documentation assistant. Generate concise, accurate summaries of code entities. Focus on what the code does, not how. Be brief and direct.',
241
+ },
242
+ {
243
+ role: 'user',
244
+ content: prompt,
245
+ },
246
+ ],
247
+ max_tokens: maxTokens,
248
+ temperature: 0.3,
249
+ // Disable chain-of-thought for faster responses
250
+ ...(CEREBRAS_CONFIG.reasoning?.fastMode && { disable_reasoning: true }),
251
+ };
252
+
253
+ const response = await fetch(`${CEREBRAS_CONFIG.baseUrl}/chat/completions`, {
254
+ method: 'POST',
255
+ headers: {
256
+ 'Content-Type': 'application/json',
257
+ 'Authorization': `Bearer ${CEREBRAS_CONFIG.apiKey}`,
258
+ },
259
+ body: JSON.stringify(requestBody),
260
+ signal: AbortSignal.timeout(15000), // 15s timeout
261
+ });
262
+
263
+ if (!response.ok) {
264
+ const error = new Error(`Cerebras API error: ${response.status} ${response.statusText}`);
265
+ error.status = response.status;
266
+ throw error;
267
+ }
268
+
269
+ const data = await response.json();
270
+ return data.choices?.[0]?.message?.content?.trim() || '';
271
+ }
272
+
273
+ // =============================================================================
274
+ // PROVIDER: OLLAMA (Local GPU)
275
+ // =============================================================================
276
+
277
+ /**
278
+ * Checks if Ollama is available
279
+ * @returns {Promise<boolean>}
280
+ */
281
+ async function isOllamaAvailable() {
282
+ try {
283
+ const response = await fetch(`${OLLAMA_CONFIG.baseUrl}/api/tags`, {
284
+ signal: AbortSignal.timeout(2000),
285
+ });
286
+ if (!response.ok) return false;
287
+
288
+ const data = await response.json();
289
+ // Check if our desired model is available
290
+ return data.models?.some(m =>
291
+ m.name === OLLAMA_CONFIG.model ||
292
+ m.name.startsWith(OLLAMA_CONFIG.model.split(':')[0])
293
+ ) ?? false;
294
+ } catch {
295
+ return false;
296
+ }
297
+ }
298
+
299
+ /**
300
+ * Generates summary using Ollama
301
+ * @param {string} prompt - The prompt to send
302
+ * @param {object} options - Generation options
303
+ * @returns {Promise<string>} - Generated summary
304
+ */
305
+ async function generateWithOllama(prompt, options = {}) {
306
+ const maxTokens = options.maxTokens ?? 150;
307
+
308
+ const requestBody = {
309
+ model: OLLAMA_CONFIG.model,
310
+ prompt: `You are a code documentation assistant. Generate concise, accurate summaries of code entities. Focus on what the code does, not how. Be brief and direct.\n\n${prompt}`,
311
+ stream: false,
312
+ options: {
313
+ num_predict: maxTokens,
314
+ temperature: 0.3,
315
+ },
316
+ };
317
+
318
+ const response = await fetch(`${OLLAMA_CONFIG.baseUrl}/api/generate`, {
319
+ method: 'POST',
320
+ headers: {
321
+ 'Content-Type': 'application/json',
322
+ },
323
+ body: JSON.stringify(requestBody),
324
+ signal: AbortSignal.timeout(OLLAMA_CONFIG.timeout),
325
+ });
326
+
327
+ if (!response.ok) {
328
+ const error = new Error(`Ollama API error: ${response.status} ${response.statusText}`);
329
+ error.status = response.status;
330
+ throw error;
331
+ }
332
+
333
+ const data = await response.json();
334
+ return data.response?.trim() || '';
335
+ }
336
+
337
+ // =============================================================================
338
+ // PROVIDER: TRANSFORMERS.JS (Local CPU)
339
+ // =============================================================================
340
+
341
+ // Lazy-loaded pipeline instance
342
+ let transformersPipeline = null;
343
+ let transformersAvailable = null;
344
+
345
+ /**
346
+ * Checks if Transformers.js is available and initializes it
347
+ * @returns {Promise<boolean>}
348
+ */
349
+ async function isTransformersAvailable() {
350
+ if (transformersAvailable !== null) {
351
+ return transformersAvailable;
352
+ }
353
+
354
+ try {
355
+ // Dynamic import to avoid errors if not installed
356
+ const { pipeline, env } = await import('@xenova/transformers');
357
+
358
+ // Configure for optimal performance
359
+ env.allowLocalModels = true;
360
+ env.useBrowserCache = false;
361
+
362
+ // Initialize the pipeline (this downloads the model on first run)
363
+ if (process.env.SEARCH_DEBUG) {
364
+ console.log('[LLM] Initializing Transformers.js pipeline...');
365
+ }
366
+
367
+ transformersPipeline = await pipeline('text-generation', TRANSFORMERS_CONFIG.model, {
368
+ quantized: true, // Use quantized model for speed
369
+ });
370
+
371
+ transformersAvailable = true;
372
+ return true;
373
+ } catch (error) {
374
+ if (process.env.SEARCH_DEBUG) {
375
+ console.log(`[LLM] Transformers.js not available: ${error.message}`);
376
+ }
377
+ transformersAvailable = false;
378
+ return false;
379
+ }
380
+ }
381
+
382
+ /**
383
+ * Generates summary using Transformers.js
384
+ * @param {string} prompt - The prompt to send
385
+ * @param {object} options - Generation options
386
+ * @returns {Promise<string>} - Generated summary
387
+ */
388
+ async function generateWithTransformers(prompt, options = {}) {
389
+ if (!transformersPipeline) {
390
+ const available = await isTransformersAvailable();
391
+ if (!available) {
392
+ throw new Error('Transformers.js not available');
393
+ }
394
+ }
395
+
396
+ const maxTokens = options.maxTokens ?? TRANSFORMERS_CONFIG.maxNewTokens;
397
+
398
+ const systemPrompt = 'You are a code documentation assistant. Generate concise, accurate summaries.';
399
+ const fullPrompt = `<|system|>\n${systemPrompt}<|end|>\n<|user|>\n${prompt}<|end|>\n<|assistant|>\n`;
400
+
401
+ const result = await transformersPipeline(fullPrompt, {
402
+ max_new_tokens: maxTokens,
403
+ temperature: TRANSFORMERS_CONFIG.temperature,
404
+ do_sample: true,
405
+ return_full_text: false,
406
+ });
407
+
408
+ return result[0]?.generated_text?.trim() || '';
409
+ }
410
+
411
+ // =============================================================================
412
+ // PROVIDER: STATIC FALLBACK (No LLM)
413
+ // =============================================================================
414
+
415
+ /**
416
+ * Generates a static summary without LLM
417
+ * Uses available metadata: doc_comment > signature > type + name
418
+ * @param {object} entity - Code entity with metadata
419
+ * @returns {string} - Generated summary
420
+ */
421
+ function generateStaticSummary(entity) {
422
+ // Priority 1: Use existing doc comment
423
+ if (entity.doc_comment) {
424
+ // Extract first sentence or line
425
+ const firstLine = entity.doc_comment
426
+ .replace(/^\/\*\*\s*|\s*\*\/$/g, '') // Remove /** */
427
+ .replace(/^\s*\*\s*/gm, '') // Remove leading *
428
+ .split(/[.\n]/)[0] // First sentence/line
429
+ .trim();
430
+
431
+ if (firstLine && firstLine.length > 10) {
432
+ return firstLine.length > 200 ? firstLine.slice(0, 197) + '...' : firstLine;
433
+ }
434
+ }
435
+
436
+ // Priority 2: Use signature if available
437
+ if (entity.signature) {
438
+ const cleanSig = entity.signature
439
+ .replace(/\s+/g, ' ')
440
+ .trim();
441
+
442
+ if (cleanSig.length > 10) {
443
+ return cleanSig.length > 200 ? cleanSig.slice(0, 197) + '...' : cleanSig;
444
+ }
445
+ }
446
+
447
+ // Priority 3: Construct from type and name
448
+ const type = entity.type || 'entity';
449
+ const name = entity.name || 'unknown';
450
+
451
+ // Convert CamelCase/snake_case to words
452
+ const humanName = name
453
+ .replace(/([a-z])([A-Z])/g, '$1 $2')
454
+ .replace(/_/g, ' ')
455
+ .toLowerCase();
456
+
457
+ return `${type.charAt(0).toUpperCase() + type.slice(1)}: ${humanName}`;
458
+ }
459
+
460
+ // =============================================================================
461
+ // PROVIDER DETECTION AND SELECTION
462
+ // =============================================================================
463
+
464
+ /**
465
+ * Provider interface
466
+ * @typedef {object} SummaryProvider
467
+ * @property {string} name - Provider name
468
+ * @property {boolean} isLocal - Whether provider runs locally
469
+ * @property {Function} generate - Generation function
470
+ */
471
+
472
+ // Provider registry (priority order: Groq → Cerebras → Ollama → Transformers.js → Static)
473
+ const providers = {
474
+ groq: {
475
+ name: 'groq',
476
+ isLocal: false,
477
+ priority: 1,
478
+ checkAvailable: () => Promise.resolve(GROQ_API_KEY.length > 0),
479
+ generate: generateWithGroq,
480
+ },
481
+ cerebras: {
482
+ name: 'cerebras',
483
+ isLocal: false,
484
+ priority: 2,
485
+ checkAvailable: () => Promise.resolve(isCerebrasAvailable()),
486
+ generate: generateWithCerebras,
487
+ },
488
+ ollama: {
489
+ name: 'ollama',
490
+ isLocal: true,
491
+ priority: 3,
492
+ checkAvailable: isOllamaAvailable,
493
+ generate: generateWithOllama,
494
+ },
495
+ transformers: {
496
+ name: 'transformers',
497
+ isLocal: true,
498
+ priority: 4,
499
+ checkAvailable: isTransformersAvailable,
500
+ generate: generateWithTransformers,
501
+ },
502
+ static: {
503
+ name: 'static',
504
+ isLocal: true,
505
+ priority: 99,
506
+ checkAvailable: () => Promise.resolve(true),
507
+ generate: async (prompt, options) => {
508
+ // Static provider expects entity metadata, not raw prompt
509
+ if (options?.entity) {
510
+ return generateStaticSummary(options.entity);
511
+ }
512
+ return 'Code entity';
513
+ },
514
+ },
515
+ };
516
+
517
+ // Cached provider selection
518
+ let selectedProvider = null;
519
+ let providerCheckPromise = null;
520
+
521
+ /**
522
+ * Auto-detects and returns the best available summary provider
523
+ * @param {object} options - Options
524
+ * @param {boolean} options.preferLocal - Prefer local providers over remote
525
+ * @param {boolean} options.forceCheck - Force re-check of provider availability
526
+ * @returns {Promise<SummaryProvider>} - Best available provider
527
+ */
528
+ export async function getSummaryProvider(options = {}) {
529
+ const { preferLocal = false, forceCheck = false } = options;
530
+
531
+ // Return cached provider if available and not forcing recheck
532
+ if (selectedProvider && !forceCheck) {
533
+ return selectedProvider;
534
+ }
535
+
536
+ // Prevent concurrent provider checks
537
+ if (providerCheckPromise && !forceCheck) {
538
+ return providerCheckPromise;
539
+ }
540
+
541
+ providerCheckPromise = (async () => {
542
+ // Get providers sorted by priority (optionally preferring local)
543
+ const providerList = Object.values(providers).sort((a, b) => {
544
+ if (preferLocal) {
545
+ // Local providers first
546
+ if (a.isLocal !== b.isLocal) {
547
+ return a.isLocal ? -1 : 1;
548
+ }
549
+ }
550
+ return a.priority - b.priority;
551
+ });
552
+
553
+ // Find first available provider
554
+ for (const provider of providerList) {
555
+ try {
556
+ const available = await provider.checkAvailable();
557
+ if (available) {
558
+ if (process.env.SEARCH_DEBUG) {
559
+ console.log(`[LLM] Selected provider: ${provider.name}`);
560
+ }
561
+ selectedProvider = provider;
562
+ return provider;
563
+ }
564
+ } catch (error) {
565
+ if (process.env.SEARCH_DEBUG) {
566
+ console.log(`[LLM] Provider ${provider.name} check failed: ${error.message}`);
567
+ }
568
+ }
569
+ }
570
+
571
+ // Fallback to static (always available)
572
+ selectedProvider = providers.static;
573
+ return providers.static;
574
+ })();
575
+
576
+ return providerCheckPromise;
577
+ }
578
+
579
+ // =============================================================================
580
+ // UNIFIED GENERATION INTERFACE
581
+ // =============================================================================
582
+
583
+ /**
584
+ * Generates a summary for a code entity using the best available provider
585
+ * Falls back through the provider chain on failures
586
+ * @param {string} prompt - The prompt to send
587
+ * @param {object} options - Generation options
588
+ * @param {object} options.entity - Code entity metadata (for static fallback)
589
+ * @param {number} options.maxTokens - Maximum tokens to generate
590
+ * @param {boolean} options.preferLocal - Prefer local providers
591
+ * @returns {Promise<{summary: string, provider: string}>}
592
+ */
593
+ export async function generateSummary(prompt, options = {}) {
594
+ const provider = await getSummaryProvider({ preferLocal: options.preferLocal });
595
+
596
+ // Try primary provider with retry
597
+ try {
598
+ const summary = await generateWithRetry(
599
+ () => provider.generate(prompt, options),
600
+ { maxRetries: provider.isLocal ? 1 : RETRY_CONFIG.maxRetries }
601
+ );
602
+
603
+ return {
604
+ summary,
605
+ provider: provider.name,
606
+ };
607
+ } catch (primaryError) {
608
+ if (process.env.SEARCH_DEBUG) {
609
+ console.log(`[LLM] Primary provider ${provider.name} failed: ${primaryError.message}`);
610
+ }
611
+
612
+ // Try fallback providers
613
+ const fallbackOrder = ['groq', 'cerebras', 'ollama', 'transformers', 'static'];
614
+ for (const fallbackName of fallbackOrder) {
615
+ if (fallbackName === provider.name) continue;
616
+
617
+ const fallback = providers[fallbackName];
618
+ try {
619
+ const available = await fallback.checkAvailable();
620
+ if (!available) continue;
621
+
622
+ const summary = await fallback.generate(prompt, options);
623
+ return {
624
+ summary,
625
+ provider: fallback.name,
626
+ };
627
+ } catch (fallbackError) {
628
+ if (process.env.SEARCH_DEBUG) {
629
+ console.log(`[LLM] Fallback ${fallbackName} failed: ${fallbackError.message}`);
630
+ }
631
+ }
632
+ }
633
+
634
+ // Ultimate fallback: static
635
+ return {
636
+ summary: generateStaticSummary(options.entity || {}),
637
+ provider: 'static',
638
+ };
639
+ }
640
+ }
641
+
642
+ /**
643
+ * Generates summaries for multiple entities in batch
644
+ * @param {Array<{prompt: string, entity: object}>} items - Items to summarize
645
+ * @param {object} options - Generation options
646
+ * @returns {Promise<Array<{summary: string, provider: string}>>}
647
+ */
648
+ export async function generateSummariesBatch(items, options = {}) {
649
+ const provider = await getSummaryProvider({ preferLocal: options.preferLocal });
650
+ const concurrency = provider.isLocal ? 1 : 5; // Limit concurrency for local providers
651
+
652
+ const results = [];
653
+ for (let i = 0; i < items.length; i += concurrency) {
654
+ const batch = items.slice(i, i + concurrency);
655
+ const batchResults = await Promise.all(
656
+ batch.map(item => generateSummary(item.prompt, { ...options, entity: item.entity }))
657
+ );
658
+ results.push(...batchResults);
659
+ }
660
+
661
+ return results;
662
+ }
663
+
664
+ // =============================================================================
665
+ // PROMPT TEMPLATES
666
+ // =============================================================================
667
+
668
+ /**
669
+ * Creates a summary prompt for a code entity
670
+ * @param {object} entity - Code entity
671
+ * @returns {string} - Formatted prompt
672
+ */
673
+ export function createSummaryPrompt(entity) {
674
+ const tokenLimit = HCGS_CONFIG.summaryTokenLimits[entity.type] || HCGS_CONFIG.defaultTokenLimit;
675
+
676
+ let prompt = `Summarize this ${entity.type} in ${tokenLimit} tokens or less:\n\n`;
677
+
678
+ // Add entity header
679
+ prompt += `Name: ${entity.name}\n`;
680
+ if (entity.file) {
681
+ prompt += `File: ${entity.file}\n`;
682
+ }
683
+
684
+ // Add code content
685
+ if (entity.code) {
686
+ const codeSnippet = entity.code.length > 2000
687
+ ? entity.code.slice(0, 2000) + '\n// ... (truncated)'
688
+ : entity.code;
689
+ prompt += `\nCode:\n\`\`\`\n${codeSnippet}\n\`\`\`\n`;
690
+ }
691
+
692
+ // Add context from children if available
693
+ if (entity.childSummaries && entity.childSummaries.length > 0) {
694
+ prompt += '\nContains:\n';
695
+ for (const child of entity.childSummaries.slice(0, 5)) {
696
+ prompt += `- ${child.name}: ${child.summary}\n`;
697
+ }
698
+ }
699
+
700
+ prompt += '\nProvide a concise summary focusing on purpose and functionality.';
701
+
702
+ return prompt;
703
+ }
704
+
705
+ // =============================================================================
706
+ // EXPORTS
707
+ // =============================================================================
708
+
709
+ export {
710
+ generateWithGroq,
711
+ generateWithCerebras,
712
+ generateWithOllama,
713
+ generateWithTransformers,
714
+ generateStaticSummary,
715
+ isOllamaAvailable,
716
+ isTransformersAvailable,
717
+ providers,
718
+ GROQ_HCGS_CONFIG,
719
+ OLLAMA_CONFIG,
720
+ TRANSFORMERS_CONFIG,
721
+ RETRY_CONFIG,
722
+ };
723
+
724
+ export default {
725
+ getSummaryProvider,
726
+ generateSummary,
727
+ generateSummariesBatch,
728
+ generateWithRetry,
729
+ isRetryable,
730
+ createSummaryPrompt,
731
+ generateStaticSummary,
732
+ providers,
733
+ };