@autodev/codebase 0.0.6 → 0.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +72 -2
- package/dist/cli.js +20214 -10531
- package/dist/cli.js.map +1 -1
- package/dist/index.js +1389 -162
- package/dist/index.js.map +1 -1
- package/package.json +2 -1
package/dist/index.js
CHANGED
|
@@ -47,7 +47,21 @@ const DEFAULT_CONFIG = {
|
|
|
47
47
|
qdrantUrl: "http://localhost:6333",
|
|
48
48
|
vectorSearchMinScore: 0.1,
|
|
49
49
|
vectorSearchMaxResults: 20,
|
|
50
|
-
rerankerEnabled: false
|
|
50
|
+
rerankerEnabled: false,
|
|
51
|
+
rerankerConcurrency: 3,
|
|
52
|
+
rerankerMaxRetries: 3,
|
|
53
|
+
rerankerRetryDelayMs: 1000,
|
|
54
|
+
summarizerProvider: 'ollama',
|
|
55
|
+
summarizerOllamaBaseUrl: 'http://localhost:11434',
|
|
56
|
+
summarizerOllamaModelId: 'qwen3-vl:4b-instruct',
|
|
57
|
+
summarizerOpenAiCompatibleBaseUrl: 'http://localhost:8080/v1',
|
|
58
|
+
summarizerOpenAiCompatibleModelId: 'gpt-4',
|
|
59
|
+
summarizerOpenAiCompatibleApiKey: '',
|
|
60
|
+
summarizerLanguage: 'English',
|
|
61
|
+
summarizerBatchSize: 2,
|
|
62
|
+
summarizerConcurrency: 2,
|
|
63
|
+
summarizerMaxRetries: 3,
|
|
64
|
+
summarizerRetryDelayMs: 1000
|
|
51
65
|
};
|
|
52
66
|
/**Parser */
|
|
53
67
|
const MAX_BLOCK_CHARS = 2000;
|
|
@@ -104,6 +118,14 @@ const MAX_ITEM_TOKENS = 8191;
|
|
|
104
118
|
const BATCH_PROCESSING_CONCURRENCY = 10;
|
|
105
119
|
/**Gemini Embedder */
|
|
106
120
|
const GEMINI_MAX_ITEM_TOKENS = 2048;
|
|
121
|
+
/**BatchProcessor Truncation - 截断降级功能用于处理超长文本 */
|
|
122
|
+
const TRUNCATION_INITIAL_THRESHOLD = 800; // 初始截断阈值(chars)
|
|
123
|
+
const TRUNCATION_REDUCTION_FACTOR = 0.7; // 每次降低 30%
|
|
124
|
+
const MIN_TRUNCATION_THRESHOLD = 200; // 最小阈值
|
|
125
|
+
const MAX_TRUNCATION_ATTEMPTS = 3; // 最大重试次数
|
|
126
|
+
const INDIVIDUAL_PROCESSING_TIMEOUT_MS = 60000; // 降级处理超时(1分钟)
|
|
127
|
+
/**Feature Flags - 功能开关 */
|
|
128
|
+
const ENABLE_TRUNCATION_FALLBACK = true; // 是否启用截断降级功能
|
|
107
129
|
|
|
108
130
|
/**
|
|
109
131
|
* Defines profiles for different embedding models, including their dimensions.
|
|
@@ -277,6 +299,8 @@ class ConfigValidator {
|
|
|
277
299
|
ConfigValidator.validateQdrant(config, issues);
|
|
278
300
|
// Validate reranker configuration
|
|
279
301
|
ConfigValidator.validateReranker(config, issues);
|
|
302
|
+
// Validate summarizer configuration (optional - only when --summarize flag is used)
|
|
303
|
+
ConfigValidator.validateSummarizer(config, issues);
|
|
280
304
|
// Validate basic configuration consistency
|
|
281
305
|
ConfigValidator.validateBasicConsistency(config, issues);
|
|
282
306
|
return {
|
|
@@ -446,6 +470,71 @@ class ConfigValidator {
|
|
|
446
470
|
});
|
|
447
471
|
}
|
|
448
472
|
}
|
|
473
|
+
/**
|
|
474
|
+
* Validate summarizer configuration
|
|
475
|
+
* Note: This validation is optional and only performed when --summarize flag is actually used.
|
|
476
|
+
* It doesn't block other operations if summarizer config is incomplete.
|
|
477
|
+
*/
|
|
478
|
+
static validateSummarizer(config, issues) {
|
|
479
|
+
// Only validate if summarizer provider is specified
|
|
480
|
+
if (!config.summarizerProvider) {
|
|
481
|
+
return;
|
|
482
|
+
}
|
|
483
|
+
// Validate provider is supported
|
|
484
|
+
if (config.summarizerProvider !== 'ollama' && config.summarizerProvider !== 'openai-compatible') {
|
|
485
|
+
issues.push({
|
|
486
|
+
path: 'summarizerProvider',
|
|
487
|
+
code: 'invalid_value',
|
|
488
|
+
message: `Unsupported summarizer provider: ${config.summarizerProvider}. Supported: 'ollama', 'openai-compatible'.`
|
|
489
|
+
});
|
|
490
|
+
return;
|
|
491
|
+
}
|
|
492
|
+
// For ollama provider, validate required fields
|
|
493
|
+
if (config.summarizerProvider === 'ollama') {
|
|
494
|
+
if (!config.summarizerOllamaBaseUrl) {
|
|
495
|
+
issues.push({
|
|
496
|
+
path: 'summarizerOllamaBaseUrl',
|
|
497
|
+
code: 'required',
|
|
498
|
+
message: 'Ollama base URL is required for summarizer when provider is ollama'
|
|
499
|
+
});
|
|
500
|
+
}
|
|
501
|
+
if (!config.summarizerOllamaModelId) {
|
|
502
|
+
issues.push({
|
|
503
|
+
path: 'summarizerOllamaModelId',
|
|
504
|
+
code: 'required',
|
|
505
|
+
message: 'Ollama model ID is required for summarizer when provider is ollama'
|
|
506
|
+
});
|
|
507
|
+
}
|
|
508
|
+
}
|
|
509
|
+
// For openai-compatible provider, validate required fields
|
|
510
|
+
if (config.summarizerProvider === 'openai-compatible') {
|
|
511
|
+
if (!config.summarizerOpenAiCompatibleBaseUrl) {
|
|
512
|
+
issues.push({
|
|
513
|
+
path: 'summarizerOpenAiCompatibleBaseUrl',
|
|
514
|
+
code: 'required',
|
|
515
|
+
message: 'OpenAI-compatible base URL is required for summarizer when provider is openai-compatible'
|
|
516
|
+
});
|
|
517
|
+
}
|
|
518
|
+
if (!config.summarizerOpenAiCompatibleModelId) {
|
|
519
|
+
issues.push({
|
|
520
|
+
path: 'summarizerOpenAiCompatibleModelId',
|
|
521
|
+
code: 'required',
|
|
522
|
+
message: 'OpenAI-compatible model ID is required for summarizer when provider is openai-compatible'
|
|
523
|
+
});
|
|
524
|
+
}
|
|
525
|
+
// Note: API key is optional for local servers (e.g., LM Studio)
|
|
526
|
+
}
|
|
527
|
+
// Validate language if specified
|
|
528
|
+
if (config.summarizerLanguage) {
|
|
529
|
+
if (config.summarizerLanguage !== 'English' && config.summarizerLanguage !== 'Chinese') {
|
|
530
|
+
issues.push({
|
|
531
|
+
path: 'summarizerLanguage',
|
|
532
|
+
code: 'invalid_value',
|
|
533
|
+
message: `Invalid language: ${config.summarizerLanguage}. Must be 'English' or 'Chinese'.`
|
|
534
|
+
});
|
|
535
|
+
}
|
|
536
|
+
}
|
|
537
|
+
}
|
|
449
538
|
/**
|
|
450
539
|
* Validate basic configuration consistency
|
|
451
540
|
*/
|
|
@@ -473,6 +562,27 @@ class ConfigValidator {
|
|
|
473
562
|
message: 'Reranker batch size must be positive'
|
|
474
563
|
});
|
|
475
564
|
}
|
|
565
|
+
if (config.rerankerConcurrency !== undefined && config.rerankerConcurrency <= 0) {
|
|
566
|
+
issues.push({
|
|
567
|
+
path: 'rerankerConcurrency',
|
|
568
|
+
code: 'invalid_range',
|
|
569
|
+
message: 'Reranker concurrency must be positive'
|
|
570
|
+
});
|
|
571
|
+
}
|
|
572
|
+
if (config.rerankerMaxRetries !== undefined && config.rerankerMaxRetries < 0) {
|
|
573
|
+
issues.push({
|
|
574
|
+
path: 'rerankerMaxRetries',
|
|
575
|
+
code: 'invalid_range',
|
|
576
|
+
message: 'Reranker max retries must be non-negative'
|
|
577
|
+
});
|
|
578
|
+
}
|
|
579
|
+
if (config.rerankerRetryDelayMs !== undefined && config.rerankerRetryDelayMs < 0) {
|
|
580
|
+
issues.push({
|
|
581
|
+
path: 'rerankerRetryDelayMs',
|
|
582
|
+
code: 'invalid_range',
|
|
583
|
+
message: 'Reranker retry delay must be non-negative'
|
|
584
|
+
});
|
|
585
|
+
}
|
|
476
586
|
if (config.vectorSearchMaxResults !== undefined && config.vectorSearchMaxResults <= 0) {
|
|
477
587
|
issues.push({
|
|
478
588
|
path: 'vectorSearchMaxResults',
|
|
@@ -694,6 +804,17 @@ class CodeIndexConfigManager {
|
|
|
694
804
|
rerankerOpenAiCompatibleApiKey: config.rerankerOpenAiCompatibleApiKey,
|
|
695
805
|
rerankerMinScore: config.rerankerMinScore,
|
|
696
806
|
rerankerBatchSize: config.rerankerBatchSize,
|
|
807
|
+
rerankerConcurrency: config.rerankerConcurrency,
|
|
808
|
+
rerankerMaxRetries: config.rerankerMaxRetries,
|
|
809
|
+
rerankerRetryDelayMs: config.rerankerRetryDelayMs,
|
|
810
|
+
summarizerProvider: config.summarizerProvider,
|
|
811
|
+
summarizerOllamaBaseUrl: config.summarizerOllamaBaseUrl,
|
|
812
|
+
summarizerOllamaModelId: config.summarizerOllamaModelId,
|
|
813
|
+
summarizerOpenAiCompatibleBaseUrl: config.summarizerOpenAiCompatibleBaseUrl,
|
|
814
|
+
summarizerOpenAiCompatibleModelId: config.summarizerOpenAiCompatibleModelId,
|
|
815
|
+
summarizerOpenAiCompatibleApiKey: config.summarizerOpenAiCompatibleApiKey,
|
|
816
|
+
summarizerLanguage: config.summarizerLanguage,
|
|
817
|
+
summarizerTemperature: config.summarizerTemperature,
|
|
697
818
|
};
|
|
698
819
|
}
|
|
699
820
|
/**
|
|
@@ -898,7 +1019,32 @@ class CodeIndexConfigManager {
|
|
|
898
1019
|
openAiCompatibleModelId: this.config.rerankerOpenAiCompatibleModelId,
|
|
899
1020
|
openAiCompatibleApiKey: this.config.rerankerOpenAiCompatibleApiKey,
|
|
900
1021
|
minScore: this.config.rerankerMinScore,
|
|
901
|
-
batchSize: this.config.rerankerBatchSize || 10
|
|
1022
|
+
batchSize: this.config.rerankerBatchSize || 10,
|
|
1023
|
+
concurrency: this.config.rerankerConcurrency ?? DEFAULT_CONFIG.rerankerConcurrency,
|
|
1024
|
+
maxRetries: this.config.rerankerMaxRetries ?? DEFAULT_CONFIG.rerankerMaxRetries,
|
|
1025
|
+
retryDelayMs: this.config.rerankerRetryDelayMs ?? DEFAULT_CONFIG.rerankerRetryDelayMs
|
|
1026
|
+
};
|
|
1027
|
+
}
|
|
1028
|
+
/**
|
|
1029
|
+
* Gets the summarizer configuration.
|
|
1030
|
+
* Always returns config (never undefined) since summarizer is only used when --summarize flag is present.
|
|
1031
|
+
* Missing values are filled with defaults.
|
|
1032
|
+
*/
|
|
1033
|
+
get summarizerConfig() {
|
|
1034
|
+
const provider = this.config?.summarizerProvider || 'ollama';
|
|
1035
|
+
return {
|
|
1036
|
+
provider: provider,
|
|
1037
|
+
ollamaBaseUrl: this.config?.summarizerOllamaBaseUrl || 'http://localhost:11434',
|
|
1038
|
+
ollamaModelId: this.config?.summarizerOllamaModelId || 'qwen3-vl:4b-instruct',
|
|
1039
|
+
openAiCompatibleBaseUrl: this.config?.summarizerOpenAiCompatibleBaseUrl || 'http://localhost:8080/v1',
|
|
1040
|
+
openAiCompatibleModelId: this.config?.summarizerOpenAiCompatibleModelId || 'gpt-4',
|
|
1041
|
+
openAiCompatibleApiKey: this.config?.summarizerOpenAiCompatibleApiKey || '',
|
|
1042
|
+
language: this.config?.summarizerLanguage || 'English',
|
|
1043
|
+
temperature: this.config?.summarizerTemperature,
|
|
1044
|
+
batchSize: this.config?.summarizerBatchSize ?? DEFAULT_CONFIG.summarizerBatchSize,
|
|
1045
|
+
concurrency: this.config?.summarizerConcurrency ?? DEFAULT_CONFIG.summarizerConcurrency,
|
|
1046
|
+
maxRetries: this.config?.summarizerMaxRetries ?? DEFAULT_CONFIG.summarizerMaxRetries,
|
|
1047
|
+
retryDelayMs: this.config?.summarizerRetryDelayMs ?? DEFAULT_CONFIG.summarizerRetryDelayMs
|
|
902
1048
|
};
|
|
903
1049
|
}
|
|
904
1050
|
/**
|
|
@@ -33167,7 +33313,7 @@ class OpenAiEmbedder {
|
|
|
33167
33313
|
|
|
33168
33314
|
// Timeout constants for Ollama API requests
|
|
33169
33315
|
const OLLAMA_EMBEDDING_TIMEOUT_MS = 120000; // 120 seconds for embedding requests (increased for large models)
|
|
33170
|
-
const OLLAMA_VALIDATION_TIMEOUT_MS$
|
|
33316
|
+
const OLLAMA_VALIDATION_TIMEOUT_MS$2 = 30000; // 30 seconds for validation requests
|
|
33171
33317
|
const OLLAMA_MAX_RETRIES = 2; // Ollama-specific retry count
|
|
33172
33318
|
const OLLAMA_RETRY_DELAY_MS = 1000; // Initial retry delay for Ollama
|
|
33173
33319
|
/**
|
|
@@ -33339,7 +33485,7 @@ class CodeIndexOllamaEmbedder {
|
|
|
33339
33485
|
const modelsUrl = `${this.baseUrl}/api/tags`;
|
|
33340
33486
|
// Add timeout to prevent indefinite hanging
|
|
33341
33487
|
const controller = new AbortController();
|
|
33342
|
-
const timeoutId = setTimeout(() => controller.abort(), OLLAMA_VALIDATION_TIMEOUT_MS$
|
|
33488
|
+
const timeoutId = setTimeout(() => controller.abort(), OLLAMA_VALIDATION_TIMEOUT_MS$2);
|
|
33343
33489
|
// 检查环境变量中的代理设置
|
|
33344
33490
|
const httpsProxy = process.env['HTTPS_PROXY'] || process.env['https_proxy'];
|
|
33345
33491
|
const httpProxy = process.env['HTTP_PROXY'] || process.env['http_proxy'];
|
|
@@ -33398,7 +33544,7 @@ class CodeIndexOllamaEmbedder {
|
|
|
33398
33544
|
const testUrl = `${this.baseUrl}/api/embed`;
|
|
33399
33545
|
// Add timeout for test request too
|
|
33400
33546
|
const testController = new AbortController();
|
|
33401
|
-
const testTimeoutId = setTimeout(() => testController.abort(), OLLAMA_VALIDATION_TIMEOUT_MS$
|
|
33547
|
+
const testTimeoutId = setTimeout(() => testController.abort(), OLLAMA_VALIDATION_TIMEOUT_MS$2);
|
|
33402
33548
|
const testFetchOptions = {
|
|
33403
33549
|
method: "POST",
|
|
33404
33550
|
headers: {
|
|
@@ -34569,20 +34715,25 @@ OpenRouterEmbedder.globalRateLimitState = {
|
|
|
34569
34715
|
|
|
34570
34716
|
// Timeout constants for Ollama API requests
|
|
34571
34717
|
const OLLAMA_RERANK_TIMEOUT_MS = 60000; // 60 seconds for rerank requests
|
|
34572
|
-
const OLLAMA_VALIDATION_TIMEOUT_MS = 30000; // 30 seconds for validation requests
|
|
34718
|
+
const OLLAMA_VALIDATION_TIMEOUT_MS$1 = 30000; // 30 seconds for validation requests
|
|
34573
34719
|
/**
|
|
34574
34720
|
* Implements the IReranker interface using a local Ollama instance with LLM-based reranking.
|
|
34575
34721
|
*/
|
|
34576
34722
|
class OllamaLLMReranker {
|
|
34577
|
-
constructor(baseUrl = "http://localhost:11434", modelId = "qwen3-vl:4b-instruct", batchSize = 10) {
|
|
34723
|
+
constructor(baseUrl = "http://localhost:11434", modelId = "qwen3-vl:4b-instruct", batchSize = 10, concurrency = 3, maxRetries = 3, retryDelayMs = 1000) {
|
|
34578
34724
|
// Normalize the baseUrl by removing all trailing slashes
|
|
34579
34725
|
const normalizedBaseUrl = baseUrl.replace(/\/+$/, "");
|
|
34580
34726
|
this.baseUrl = normalizedBaseUrl;
|
|
34581
34727
|
this.modelId = modelId;
|
|
34582
34728
|
this.batchSize = batchSize;
|
|
34729
|
+
this.concurrency = concurrency;
|
|
34730
|
+
this.maxRetries = maxRetries;
|
|
34731
|
+
this.retryDelayMs = retryDelayMs;
|
|
34583
34732
|
}
|
|
34584
34733
|
/**
|
|
34585
|
-
* Reranks candidates using LLM-based scoring.
|
|
34734
|
+
* Reranks candidates using LLM-based scoring with batch-grouped concurrency.
|
|
34735
|
+
* Reference: src/cli-tools/outline.ts generateSummariesWithRetry function
|
|
34736
|
+
*
|
|
34586
34737
|
* @param query The search query
|
|
34587
34738
|
* @param candidates Array of candidates to rerank
|
|
34588
34739
|
* @returns Promise resolving to reranked results with LLM scores
|
|
@@ -34591,32 +34742,71 @@ class OllamaLLMReranker {
|
|
|
34591
34742
|
if (candidates.length === 0) {
|
|
34592
34743
|
return [];
|
|
34593
34744
|
}
|
|
34594
|
-
// If candidates count <= batchSize, process directly
|
|
34745
|
+
// If candidates count <= batchSize, process directly
|
|
34595
34746
|
if (candidates.length <= this.batchSize) {
|
|
34596
34747
|
return this.rerankSingleBatch(query, candidates);
|
|
34597
34748
|
}
|
|
34598
|
-
//
|
|
34599
|
-
const
|
|
34600
|
-
let processedCount = 0;
|
|
34749
|
+
// Group candidates into batches
|
|
34750
|
+
const batches = [];
|
|
34601
34751
|
for (let i = 0; i < candidates.length; i += this.batchSize) {
|
|
34602
|
-
|
|
34603
|
-
|
|
34604
|
-
|
|
34605
|
-
|
|
34752
|
+
batches.push(candidates.slice(i, i + this.batchSize));
|
|
34753
|
+
}
|
|
34754
|
+
// Process batches with concurrency control and retry logic
|
|
34755
|
+
let completedBatches = 0;
|
|
34756
|
+
const allResults = [];
|
|
34757
|
+
const processBatchWithRetry = async (batch, batchIndex) => {
|
|
34758
|
+
let attempt = 0;
|
|
34759
|
+
let lastError = null;
|
|
34760
|
+
while (attempt < this.maxRetries) {
|
|
34761
|
+
try {
|
|
34762
|
+
const results = await this.rerankSingleBatch(query, batch);
|
|
34763
|
+
completedBatches++;
|
|
34764
|
+
if (completedBatches % 5 === 0 || completedBatches === batches.length) {
|
|
34765
|
+
console.log(`[OllamaReranker] Progress: ${completedBatches}/${batches.length} batches completed`);
|
|
34766
|
+
}
|
|
34767
|
+
return results;
|
|
34768
|
+
}
|
|
34769
|
+
catch (error) {
|
|
34770
|
+
lastError = error instanceof Error ? error : new Error(String(error));
|
|
34771
|
+
attempt++;
|
|
34772
|
+
if (attempt < this.maxRetries) {
|
|
34773
|
+
// Exponential backoff
|
|
34774
|
+
const delay = this.retryDelayMs * Math.pow(2, attempt - 1);
|
|
34775
|
+
console.warn(`[OllamaReranker] Batch ${batchIndex + 1} failed (attempt ${attempt}/${this.maxRetries}): ` +
|
|
34776
|
+
`${lastError.message}. Retrying in ${delay}ms...`);
|
|
34777
|
+
await new Promise(resolve => setTimeout(resolve, delay));
|
|
34778
|
+
}
|
|
34779
|
+
else {
|
|
34780
|
+
// Max retries reached, use fallback scores
|
|
34781
|
+
console.warn(`[OllamaReranker] Batch ${batchIndex + 1} failed after ${this.maxRetries} attempts. ` +
|
|
34782
|
+
`Using fallback scores...`);
|
|
34783
|
+
const baseScore = 10 - batchIndex * 0.1;
|
|
34784
|
+
return batch.map((candidate, idx) => ({
|
|
34785
|
+
id: candidate.id,
|
|
34786
|
+
score: baseScore - idx * 0.01,
|
|
34787
|
+
originalScore: candidate.score,
|
|
34788
|
+
payload: candidate.payload
|
|
34789
|
+
}));
|
|
34790
|
+
}
|
|
34791
|
+
}
|
|
34606
34792
|
}
|
|
34607
|
-
|
|
34608
|
-
|
|
34609
|
-
|
|
34610
|
-
|
|
34611
|
-
|
|
34612
|
-
|
|
34613
|
-
|
|
34614
|
-
|
|
34615
|
-
|
|
34616
|
-
|
|
34793
|
+
// Should never reach here, but TypeScript needs a return
|
|
34794
|
+
return batch.map((candidate, idx) => ({
|
|
34795
|
+
id: candidate.id,
|
|
34796
|
+
score: 0,
|
|
34797
|
+
originalScore: candidate.score,
|
|
34798
|
+
payload: candidate.payload
|
|
34799
|
+
}));
|
|
34800
|
+
};
|
|
34801
|
+
// Process batches with concurrency control (group-based pattern)
|
|
34802
|
+
const processBatchesWithConcurrency = async () => {
|
|
34803
|
+
for (let i = 0; i < batches.length; i += this.concurrency) {
|
|
34804
|
+
const batchGroup = batches.slice(i, i + this.concurrency);
|
|
34805
|
+
const groupResults = await Promise.all(batchGroup.map((batch, idx) => processBatchWithRetry(batch, i + idx)));
|
|
34806
|
+
allResults.push(...groupResults.flat());
|
|
34617
34807
|
}
|
|
34618
|
-
|
|
34619
|
-
|
|
34808
|
+
};
|
|
34809
|
+
await processBatchesWithConcurrency();
|
|
34620
34810
|
// Merge and re-sort all results
|
|
34621
34811
|
allResults.sort((a, b) => b.score - a.score);
|
|
34622
34812
|
return allResults;
|
|
@@ -34628,32 +34818,21 @@ class OllamaLLMReranker {
|
|
|
34628
34818
|
* @returns Promise resolving to reranked results with LLM scores
|
|
34629
34819
|
*/
|
|
34630
34820
|
async rerankSingleBatch(query, candidates) {
|
|
34631
|
-
|
|
34632
|
-
|
|
34633
|
-
|
|
34634
|
-
|
|
34635
|
-
|
|
34636
|
-
|
|
34637
|
-
|
|
34638
|
-
|
|
34639
|
-
|
|
34640
|
-
|
|
34641
|
-
|
|
34642
|
-
|
|
34643
|
-
|
|
34644
|
-
|
|
34645
|
-
|
|
34646
|
-
}
|
|
34647
|
-
catch (error) {
|
|
34648
|
-
console.error("Ollama LLM batch reranking failed, returning original order:", error);
|
|
34649
|
-
// Fallback to original order with default scores
|
|
34650
|
-
return candidates.map((candidate, index) => ({
|
|
34651
|
-
id: candidate.id,
|
|
34652
|
-
score: 10 - index * 0.1, // Slight decreasing scores to maintain order
|
|
34653
|
-
originalScore: candidate.score,
|
|
34654
|
-
payload: candidate.payload
|
|
34655
|
-
}));
|
|
34656
|
-
}
|
|
34821
|
+
// Build the scoring prompt with all candidates
|
|
34822
|
+
const prompt = this.buildScoringPrompt(query, candidates);
|
|
34823
|
+
// Call Ollama /api/generate endpoint
|
|
34824
|
+
// This will throw an error if generation fails, allowing the retry logic to kick in
|
|
34825
|
+
const scores = await this.generateScores(prompt);
|
|
34826
|
+
// Combine original candidates with LLM scores
|
|
34827
|
+
const results = candidates.map((candidate, index) => ({
|
|
34828
|
+
id: candidate.id,
|
|
34829
|
+
score: scores[index] || 0, // Default to 0 if no score
|
|
34830
|
+
originalScore: candidate.score,
|
|
34831
|
+
payload: candidate.payload
|
|
34832
|
+
}));
|
|
34833
|
+
// Sort by LLM score (descending) - this maintains order within the batch
|
|
34834
|
+
results.sort((a, b) => b.score - a.score);
|
|
34835
|
+
return results;
|
|
34657
34836
|
}
|
|
34658
34837
|
/**
|
|
34659
34838
|
* Builds the scoring prompt for the LLM.
|
|
@@ -34680,7 +34859,7 @@ Snippets:
|
|
|
34680
34859
|
---
|
|
34681
34860
|
`;
|
|
34682
34861
|
});
|
|
34683
|
-
prompt += `Respond with ONLY a JSON object with a relevant "scores" array: {"scores": [${Array.from({ length: candidates.length }, (_, i) => `
|
|
34862
|
+
prompt += `Respond with ONLY a JSON object with a relevant "scores" array: {"scores": [${Array.from({ length: candidates.length }, (_, i) => `snippet${i + 1}_score`).join(', ')}]}`;
|
|
34684
34863
|
return prompt;
|
|
34685
34864
|
}
|
|
34686
34865
|
/**
|
|
@@ -34810,7 +34989,7 @@ Snippets:
|
|
|
34810
34989
|
const modelsUrl = `${this.baseUrl}/api/tags`;
|
|
34811
34990
|
// Add timeout to prevent indefinite hanging
|
|
34812
34991
|
const controller = new AbortController();
|
|
34813
|
-
const timeoutId = setTimeout(() => controller.abort(), OLLAMA_VALIDATION_TIMEOUT_MS);
|
|
34992
|
+
const timeoutId = setTimeout(() => controller.abort(), OLLAMA_VALIDATION_TIMEOUT_MS$1);
|
|
34814
34993
|
// Check for proxy settings in environment variables
|
|
34815
34994
|
const httpsProxy = process.env['HTTPS_PROXY'] || process.env['https_proxy'];
|
|
34816
34995
|
const httpProxy = process.env['HTTP_PROXY'] || process.env['http_proxy'];
|
|
@@ -34869,7 +35048,7 @@ Snippets:
|
|
|
34869
35048
|
const testUrl = `${this.baseUrl}/api/generate`;
|
|
34870
35049
|
// Add timeout for test request too
|
|
34871
35050
|
const testController = new AbortController();
|
|
34872
|
-
const testTimeoutId = setTimeout(() => testController.abort(), OLLAMA_VALIDATION_TIMEOUT_MS);
|
|
35051
|
+
const testTimeoutId = setTimeout(() => testController.abort(), OLLAMA_VALIDATION_TIMEOUT_MS$1);
|
|
34873
35052
|
const testFetchOptions = {
|
|
34874
35053
|
method: "POST",
|
|
34875
35054
|
headers: {
|
|
@@ -34940,16 +35119,21 @@ const OPENAI_VALIDATION_TIMEOUT_MS = 30000; // 30 seconds for validation request
|
|
|
34940
35119
|
* Implements the IReranker interface using OpenAI-compatible API endpoints with LLM-based reranking.
|
|
34941
35120
|
*/
|
|
34942
35121
|
class OpenAICompatibleReranker {
|
|
34943
|
-
constructor(baseUrl = "http://localhost:8080/v1", modelId = "gpt-4", apiKey = "", batchSize = 10) {
|
|
35122
|
+
constructor(baseUrl = "http://localhost:8080/v1", modelId = "gpt-4", apiKey = "", batchSize = 10, concurrency = 3, maxRetries = 3, retryDelayMs = 1000) {
|
|
34944
35123
|
// Normalize the baseUrl by removing all trailing slashes
|
|
34945
35124
|
const normalizedBaseUrl = baseUrl.replace(/\/+$/, "");
|
|
34946
35125
|
this.baseUrl = normalizedBaseUrl;
|
|
34947
35126
|
this.modelId = modelId;
|
|
34948
35127
|
this.apiKey = apiKey;
|
|
34949
35128
|
this.batchSize = batchSize;
|
|
35129
|
+
this.concurrency = concurrency;
|
|
35130
|
+
this.maxRetries = maxRetries;
|
|
35131
|
+
this.retryDelayMs = retryDelayMs;
|
|
34950
35132
|
}
|
|
34951
35133
|
/**
|
|
34952
|
-
* Reranks candidates using LLM-based scoring.
|
|
35134
|
+
* Reranks candidates using LLM-based scoring with batch-grouped concurrency.
|
|
35135
|
+
* Reference: src/cli-tools/outline.ts generateSummariesWithRetry function
|
|
35136
|
+
*
|
|
34953
35137
|
* @param query The search query
|
|
34954
35138
|
* @param candidates Array of candidates to rerank
|
|
34955
35139
|
* @returns Promise resolving to reranked results with LLM scores
|
|
@@ -34958,32 +35142,72 @@ class OpenAICompatibleReranker {
|
|
|
34958
35142
|
if (candidates.length === 0) {
|
|
34959
35143
|
return [];
|
|
34960
35144
|
}
|
|
34961
|
-
// If candidates count <= batchSize, process directly
|
|
35145
|
+
// If candidates count <= batchSize, process directly
|
|
34962
35146
|
if (candidates.length <= this.batchSize) {
|
|
34963
35147
|
return this.rerankSingleBatch(query, candidates);
|
|
34964
35148
|
}
|
|
34965
|
-
//
|
|
34966
|
-
const
|
|
34967
|
-
let processedCount = 0;
|
|
35149
|
+
// Group candidates into batches
|
|
35150
|
+
const batches = [];
|
|
34968
35151
|
for (let i = 0; i < candidates.length; i += this.batchSize) {
|
|
34969
|
-
|
|
34970
|
-
|
|
34971
|
-
|
|
34972
|
-
|
|
35152
|
+
batches.push(candidates.slice(i, i + this.batchSize));
|
|
35153
|
+
}
|
|
35154
|
+
// Process batches with concurrency control and retry logic
|
|
35155
|
+
let completedBatches = 0;
|
|
35156
|
+
const allResults = [];
|
|
35157
|
+
const processBatchWithRetry = async (batch, batchIndex) => {
|
|
35158
|
+
let attempt = 0;
|
|
35159
|
+
let lastError = null;
|
|
35160
|
+
console.log(`[OpenAICompatibleReranker] Starting batch ${batchIndex + 1}/${batches.length} with ${batch.length} candidates`);
|
|
35161
|
+
while (attempt < this.maxRetries) {
|
|
35162
|
+
try {
|
|
35163
|
+
const results = await this.rerankSingleBatch(query, batch);
|
|
35164
|
+
completedBatches++;
|
|
35165
|
+
if (completedBatches % 5 === 0 || completedBatches === batches.length) {
|
|
35166
|
+
console.log(`[OpenAICompatibleReranker] Progress: ${completedBatches}/${batches.length} batches completed`);
|
|
35167
|
+
}
|
|
35168
|
+
return results;
|
|
35169
|
+
}
|
|
35170
|
+
catch (error) {
|
|
35171
|
+
lastError = error instanceof Error ? error : new Error(String(error));
|
|
35172
|
+
attempt++;
|
|
35173
|
+
if (attempt < this.maxRetries) {
|
|
35174
|
+
// Exponential backoff
|
|
35175
|
+
const delay = this.retryDelayMs * Math.pow(2, attempt - 1);
|
|
35176
|
+
console.warn(`[OpenAICompatibleReranker] Batch ${batchIndex + 1} - Attempt ${attempt}/${this.maxRetries} FAILED: ` +
|
|
35177
|
+
`${lastError.message}. Retrying in ${delay}ms...`);
|
|
35178
|
+
await new Promise(resolve => setTimeout(resolve, delay));
|
|
35179
|
+
}
|
|
35180
|
+
else {
|
|
35181
|
+
// Max retries reached, use fallback scores
|
|
35182
|
+
console.warn(`[OpenAICompatibleReranker] Batch ${batchIndex + 1} - All ${this.maxRetries} attempts FAILED. ` +
|
|
35183
|
+
`Using fallback scores. Last error: ${lastError.message}`);
|
|
35184
|
+
const baseScore = 10 - batchIndex * 0.1;
|
|
35185
|
+
return batch.map((candidate, idx) => ({
|
|
35186
|
+
id: candidate.id,
|
|
35187
|
+
score: baseScore - idx * 0.01,
|
|
35188
|
+
originalScore: candidate.score,
|
|
35189
|
+
payload: candidate.payload
|
|
35190
|
+
}));
|
|
35191
|
+
}
|
|
35192
|
+
}
|
|
34973
35193
|
}
|
|
34974
|
-
|
|
34975
|
-
|
|
34976
|
-
|
|
34977
|
-
|
|
34978
|
-
|
|
34979
|
-
|
|
34980
|
-
|
|
34981
|
-
|
|
34982
|
-
|
|
34983
|
-
|
|
35194
|
+
// Should never reach here, but TypeScript needs a return
|
|
35195
|
+
return batch.map((candidate, idx) => ({
|
|
35196
|
+
id: candidate.id,
|
|
35197
|
+
score: 0,
|
|
35198
|
+
originalScore: candidate.score,
|
|
35199
|
+
payload: candidate.payload
|
|
35200
|
+
}));
|
|
35201
|
+
};
|
|
35202
|
+
// Process batches with concurrency control (group-based pattern)
|
|
35203
|
+
const processBatchesWithConcurrency = async () => {
|
|
35204
|
+
for (let i = 0; i < batches.length; i += this.concurrency) {
|
|
35205
|
+
const batchGroup = batches.slice(i, i + this.concurrency);
|
|
35206
|
+
const groupResults = await Promise.all(batchGroup.map((batch, idx) => processBatchWithRetry(batch, i + idx)));
|
|
35207
|
+
allResults.push(...groupResults.flat());
|
|
34984
35208
|
}
|
|
34985
|
-
|
|
34986
|
-
|
|
35209
|
+
};
|
|
35210
|
+
await processBatchesWithConcurrency();
|
|
34987
35211
|
// Merge and re-sort all results
|
|
34988
35212
|
allResults.sort((a, b) => b.score - a.score);
|
|
34989
35213
|
return allResults;
|
|
@@ -34995,32 +35219,21 @@ class OpenAICompatibleReranker {
|
|
|
34995
35219
|
* @returns Promise resolving to reranked results with LLM scores
|
|
34996
35220
|
*/
|
|
34997
35221
|
async rerankSingleBatch(query, candidates) {
|
|
34998
|
-
|
|
34999
|
-
|
|
35000
|
-
|
|
35001
|
-
|
|
35002
|
-
|
|
35003
|
-
|
|
35004
|
-
|
|
35005
|
-
|
|
35006
|
-
|
|
35007
|
-
|
|
35008
|
-
|
|
35009
|
-
|
|
35010
|
-
|
|
35011
|
-
|
|
35012
|
-
|
|
35013
|
-
}
|
|
35014
|
-
catch (error) {
|
|
35015
|
-
console.error("OpenAI-compatible LLM batch reranking failed, returning original order:", error);
|
|
35016
|
-
// Fallback to original order with default scores
|
|
35017
|
-
return candidates.map((candidate, index) => ({
|
|
35018
|
-
id: candidate.id,
|
|
35019
|
-
score: 10 - index * 0.1, // Slight decreasing scores to maintain order
|
|
35020
|
-
originalScore: candidate.score,
|
|
35021
|
-
payload: candidate.payload
|
|
35022
|
-
}));
|
|
35023
|
-
}
|
|
35222
|
+
// Build the scoring prompt with all candidates
|
|
35223
|
+
const prompt = this.buildScoringPrompt(query, candidates);
|
|
35224
|
+
// Call OpenAI-compatible /chat/completions endpoint
|
|
35225
|
+
// This will throw an error if generation fails, allowing the retry logic to kick in
|
|
35226
|
+
const scores = await this.generateScores(prompt);
|
|
35227
|
+
// Combine original candidates with LLM scores
|
|
35228
|
+
const results = candidates.map((candidate, index) => ({
|
|
35229
|
+
id: candidate.id,
|
|
35230
|
+
score: scores[index] || 0, // Default to 0 if no score
|
|
35231
|
+
originalScore: candidate.score,
|
|
35232
|
+
payload: candidate.payload
|
|
35233
|
+
}));
|
|
35234
|
+
// Sort by LLM score (descending) - this maintains order within the batch
|
|
35235
|
+
results.sort((a, b) => b.score - a.score);
|
|
35236
|
+
return results;
|
|
35024
35237
|
}
|
|
35025
35238
|
/**
|
|
35026
35239
|
* Builds the scoring prompt for the LLM.
|
|
@@ -35047,7 +35260,7 @@ Snippets:
|
|
|
35047
35260
|
---
|
|
35048
35261
|
`;
|
|
35049
35262
|
});
|
|
35050
|
-
prompt += `Respond with ONLY a JSON object with a relevant "scores" array: {"scores": [${Array.from({ length: candidates.length }, (_, i) => `
|
|
35263
|
+
prompt += `Respond with ONLY a JSON object with a relevant "scores" array: {"scores": [${Array.from({ length: candidates.length }, (_, i) => `snippet${i + 1}_score`).join(', ')}]}`;
|
|
35051
35264
|
return prompt;
|
|
35052
35265
|
}
|
|
35053
35266
|
/**
|
|
@@ -35365,6 +35578,721 @@ Snippets:
|
|
|
35365
35578
|
}
|
|
35366
35579
|
}
|
|
35367
35580
|
|
|
35581
|
+
// Timeout constants for Ollama API requests
|
|
35582
|
+
const OLLAMA_SUMMARIZE_TIMEOUT_MS = 60000; // 60 seconds for summarization
|
|
35583
|
+
const OLLAMA_VALIDATION_TIMEOUT_MS = 30000; // 30 seconds for validation
|
|
35584
|
+
/**
|
|
35585
|
+
* Implements the ISummarizer interface using a local Ollama instance with LLM-based summarization.
|
|
35586
|
+
*/
|
|
35587
|
+
class OllamaSummarizer {
|
|
35588
|
+
constructor(baseUrl = "http://localhost:11434", modelId = "qwen3-vl:4b-instruct", defaultLanguage = 'English', temperature = 0.3) {
|
|
35589
|
+
// Normalize the baseUrl by removing all trailing slashes
|
|
35590
|
+
const normalizedBaseUrl = baseUrl.replace(/\/+$/, "");
|
|
35591
|
+
this.baseUrl = normalizedBaseUrl;
|
|
35592
|
+
this.modelId = modelId;
|
|
35593
|
+
this.defaultLanguage = defaultLanguage;
|
|
35594
|
+
this.temperature = temperature;
|
|
35595
|
+
}
|
|
35596
|
+
/**
|
|
35597
|
+
* Generate a summary for the given code content
|
|
35598
|
+
* Internally delegates to summarizeBatch() for unified processing
|
|
35599
|
+
*/
|
|
35600
|
+
async summarize(request) {
|
|
35601
|
+
// Wrap single request as a batch of one
|
|
35602
|
+
const batchRequest = {
|
|
35603
|
+
document: request.document,
|
|
35604
|
+
filePath: request.filePath,
|
|
35605
|
+
blocks: [{
|
|
35606
|
+
content: request.content,
|
|
35607
|
+
codeType: request.codeType,
|
|
35608
|
+
codeName: request.codeName
|
|
35609
|
+
}],
|
|
35610
|
+
language: request.language
|
|
35611
|
+
};
|
|
35612
|
+
const result = await this.summarizeBatch(batchRequest);
|
|
35613
|
+
return result.summaries[0];
|
|
35614
|
+
}
|
|
35615
|
+
/**
|
|
35616
|
+
* Builds a unified batch prompt for summarizing code blocks
|
|
35617
|
+
* Works for both single and batch requests
|
|
35618
|
+
*/
|
|
35619
|
+
buildPrompt(request) {
|
|
35620
|
+
const { blocks, language, document, filePath } = request;
|
|
35621
|
+
// Unified English prompt template
|
|
35622
|
+
let prompt = `Generate semantic descriptions for the following code snippets:\n\n`;
|
|
35623
|
+
// Add shared context once at the beginning
|
|
35624
|
+
if (filePath) {
|
|
35625
|
+
prompt += `[File]: ${filePath}\n\n`;
|
|
35626
|
+
}
|
|
35627
|
+
if (document) {
|
|
35628
|
+
prompt += `[Shared Context]:\n\`\`\`\n${document}\n\`\`\`\n\n`;
|
|
35629
|
+
}
|
|
35630
|
+
blocks.forEach((block, index) => {
|
|
35631
|
+
prompt += `### Snippet ${index + 1}\n\n`;
|
|
35632
|
+
prompt += `[Type]: ${block.codeType}${block.codeName ? ` "${block.codeName}"` : ''}\n\n`;
|
|
35633
|
+
prompt += `[Target Code]:\n`;
|
|
35634
|
+
if (block.content === document) {
|
|
35635
|
+
prompt += `(See Shared Context)\n\n---\n\n`;
|
|
35636
|
+
}
|
|
35637
|
+
else {
|
|
35638
|
+
prompt += `\`\`\`\n${block.content}\n\`\`\`\n\n---\n\n`;
|
|
35639
|
+
}
|
|
35640
|
+
});
|
|
35641
|
+
prompt += `Requirements:\n`;
|
|
35642
|
+
prompt += `- Generate semantic description for each snippet\n`;
|
|
35643
|
+
prompt += `- Focus on logic, implementation details, business role\n`;
|
|
35644
|
+
prompt += `- **Start directly with verbs**, NO prefixes like "Function X" or "Class Y"\n`;
|
|
35645
|
+
prompt += `- For core implementations, include keywords like "implements", "logic"\n\n`;
|
|
35646
|
+
// Language-specific output instructions
|
|
35647
|
+
if (language === 'Chinese') {
|
|
35648
|
+
prompt += `IMPORTANT: Respond in **Chinese (中文)**. Each description must be 30-80 Chinese characters.\n\n`;
|
|
35649
|
+
}
|
|
35650
|
+
prompt += `IMPORTANT: Respond with ONLY the JSON object, no extra text.\n\n`;
|
|
35651
|
+
// Different format for single vs multiple blocks
|
|
35652
|
+
if (blocks.length === 1) {
|
|
35653
|
+
prompt += `Return format: {"summaries": "description"} (single string)\n`;
|
|
35654
|
+
}
|
|
35655
|
+
else {
|
|
35656
|
+
const descs = Array.from({ length: blocks.length }, (_, i) => `"snippet${i + 1}_desc"`).join(', ');
|
|
35657
|
+
prompt += `Return format: {"summaries": [${descs}]} (${blocks.length} descriptions)\n`;
|
|
35658
|
+
}
|
|
35659
|
+
return prompt;
|
|
35660
|
+
}
|
|
35661
|
+
/**
|
|
35662
|
+
* Extracts a complete JSON object from text using bracket matching
|
|
35663
|
+
* This handles nested JSON objects correctly, unlike regex greedy matching
|
|
35664
|
+
* @returns The extracted JSON string or null if not found
|
|
35665
|
+
*/
|
|
35666
|
+
extractCompleteJsonObject(text) {
|
|
35667
|
+
// Find the first opening brace
|
|
35668
|
+
const startIndex = text.indexOf('{');
|
|
35669
|
+
if (startIndex === -1) {
|
|
35670
|
+
return null;
|
|
35671
|
+
}
|
|
35672
|
+
// Use stack to find matching closing brace
|
|
35673
|
+
let depth = 0;
|
|
35674
|
+
let inString = false;
|
|
35675
|
+
let escapeNext = false;
|
|
35676
|
+
for (let i = startIndex; i < text.length; i++) {
|
|
35677
|
+
const char = text[i];
|
|
35678
|
+
if (escapeNext) {
|
|
35679
|
+
escapeNext = false;
|
|
35680
|
+
continue;
|
|
35681
|
+
}
|
|
35682
|
+
if (char === '\\') {
|
|
35683
|
+
escapeNext = true;
|
|
35684
|
+
continue;
|
|
35685
|
+
}
|
|
35686
|
+
if (char === '"') {
|
|
35687
|
+
inString = !inString;
|
|
35688
|
+
continue;
|
|
35689
|
+
}
|
|
35690
|
+
if (!inString) {
|
|
35691
|
+
if (char === '{') {
|
|
35692
|
+
depth++;
|
|
35693
|
+
}
|
|
35694
|
+
else if (char === '}') {
|
|
35695
|
+
depth--;
|
|
35696
|
+
if (depth === 0) {
|
|
35697
|
+
// Found matching closing brace
|
|
35698
|
+
return text.substring(startIndex, i + 1);
|
|
35699
|
+
}
|
|
35700
|
+
}
|
|
35701
|
+
}
|
|
35702
|
+
}
|
|
35703
|
+
return null;
|
|
35704
|
+
}
|
|
35705
|
+
/**
|
|
35706
|
+
* Generate summaries for multiple code blocks in a single batch request
|
|
35707
|
+
* This is more efficient than calling summarize() multiple times
|
|
35708
|
+
*/
|
|
35709
|
+
async summarizeBatch(request) {
|
|
35710
|
+
const prompt = this.buildPrompt(request);
|
|
35711
|
+
const url = `${this.baseUrl}/api/generate`;
|
|
35712
|
+
// Add timeout to prevent indefinite hanging
|
|
35713
|
+
const controller = new AbortController();
|
|
35714
|
+
const timeoutId = setTimeout(() => controller.abort(), OLLAMA_SUMMARIZE_TIMEOUT_MS);
|
|
35715
|
+
// Check for proxy settings in environment variables
|
|
35716
|
+
const httpsProxy = process.env['HTTPS_PROXY'] || process.env['https_proxy'];
|
|
35717
|
+
const httpProxy = process.env['HTTP_PROXY'] || process.env['http_proxy'];
|
|
35718
|
+
// Choose appropriate proxy based on target URL protocol
|
|
35719
|
+
let dispatcher = undefined;
|
|
35720
|
+
const proxyUrl = url.startsWith('https:') ? httpsProxy : httpProxy;
|
|
35721
|
+
if (proxyUrl) {
|
|
35722
|
+
try {
|
|
35723
|
+
dispatcher = new ProxyAgent_1(proxyUrl);
|
|
35724
|
+
}
|
|
35725
|
+
catch (error) {
|
|
35726
|
+
// Silently fail - proxy is optional
|
|
35727
|
+
}
|
|
35728
|
+
}
|
|
35729
|
+
try {
|
|
35730
|
+
const fetchOptions = {
|
|
35731
|
+
method: "POST",
|
|
35732
|
+
headers: {
|
|
35733
|
+
"Content-Type": "application/json",
|
|
35734
|
+
},
|
|
35735
|
+
body: JSON.stringify({
|
|
35736
|
+
model: this.modelId,
|
|
35737
|
+
prompt: prompt,
|
|
35738
|
+
stream: false,
|
|
35739
|
+
format: "json",
|
|
35740
|
+
options: {
|
|
35741
|
+
num_predict: 500, // Increased for batch responses
|
|
35742
|
+
temperature: this.temperature
|
|
35743
|
+
}
|
|
35744
|
+
}),
|
|
35745
|
+
signal: controller.signal,
|
|
35746
|
+
};
|
|
35747
|
+
if (dispatcher) {
|
|
35748
|
+
fetchOptions.dispatcher = dispatcher;
|
|
35749
|
+
}
|
|
35750
|
+
const response = await fetch$1(url, fetchOptions);
|
|
35751
|
+
if (!response.ok) {
|
|
35752
|
+
let errorBody = "Could not read error body";
|
|
35753
|
+
try {
|
|
35754
|
+
errorBody = await response.text();
|
|
35755
|
+
}
|
|
35756
|
+
catch (e) {
|
|
35757
|
+
// Ignore error reading body
|
|
35758
|
+
}
|
|
35759
|
+
throw new Error(`Ollama API error: ${response.status} - ${errorBody}`);
|
|
35760
|
+
}
|
|
35761
|
+
const data = await response.json();
|
|
35762
|
+
// Parse response: data.response is a JSON string
|
|
35763
|
+
const responseText = data.response.trim();
|
|
35764
|
+
// Try to extract JSON from the response with multiple fallback strategies
|
|
35765
|
+
let parsedResponse;
|
|
35766
|
+
try {
|
|
35767
|
+
// Strategy 1: Try direct parse
|
|
35768
|
+
parsedResponse = JSON.parse(responseText);
|
|
35769
|
+
}
|
|
35770
|
+
catch {
|
|
35771
|
+
// Strategy 2: Extract JSON from markdown code blocks
|
|
35772
|
+
let jsonMatch = responseText.match(/```json\s*([\s\S]*?)\s*```/) ||
|
|
35773
|
+
responseText.match(/```\s*([\s\S]*?)\s*```/);
|
|
35774
|
+
if (jsonMatch) {
|
|
35775
|
+
try {
|
|
35776
|
+
parsedResponse = JSON.parse(jsonMatch[1].trim());
|
|
35777
|
+
}
|
|
35778
|
+
catch {
|
|
35779
|
+
// Strategy 3: Use bracket matching to find complete JSON object
|
|
35780
|
+
const extracted = this.extractCompleteJsonObject(responseText);
|
|
35781
|
+
if (extracted) {
|
|
35782
|
+
parsedResponse = JSON.parse(extracted);
|
|
35783
|
+
}
|
|
35784
|
+
else {
|
|
35785
|
+
throw new Error(`Failed to parse batch response JSON after multiple attempts`);
|
|
35786
|
+
}
|
|
35787
|
+
}
|
|
35788
|
+
}
|
|
35789
|
+
else {
|
|
35790
|
+
// Strategy 4: Use bracket matching to find complete JSON object
|
|
35791
|
+
const extracted = this.extractCompleteJsonObject(responseText);
|
|
35792
|
+
if (extracted) {
|
|
35793
|
+
parsedResponse = JSON.parse(extracted);
|
|
35794
|
+
}
|
|
35795
|
+
else {
|
|
35796
|
+
throw new Error(`Could not extract JSON from batch response`);
|
|
35797
|
+
}
|
|
35798
|
+
}
|
|
35799
|
+
}
|
|
35800
|
+
// Validate response format - support both array and string (for single block with small models)
|
|
35801
|
+
let summariesArray = [];
|
|
35802
|
+
if (typeof parsedResponse.summaries === 'string') {
|
|
35803
|
+
// Small model may return {"summaries": "desc"} instead of {"summaries": ["desc"]}
|
|
35804
|
+
summariesArray = [parsedResponse.summaries];
|
|
35805
|
+
}
|
|
35806
|
+
else if (Array.isArray(parsedResponse.summaries)) {
|
|
35807
|
+
summariesArray = parsedResponse.summaries;
|
|
35808
|
+
}
|
|
35809
|
+
else {
|
|
35810
|
+
throw new Error(`Invalid batch response format: 'summaries' must be array or string`);
|
|
35811
|
+
}
|
|
35812
|
+
// Validate response length matches request length
|
|
35813
|
+
if (summariesArray.length !== request.blocks.length) {
|
|
35814
|
+
throw new Error(`Batch response length mismatch: expected ${request.blocks.length}, got ${summariesArray.length}`);
|
|
35815
|
+
}
|
|
35816
|
+
// Transform response to SummarizerBatchResult format
|
|
35817
|
+
const summaries = summariesArray.map((item) => {
|
|
35818
|
+
const text = typeof item === 'string' ? item : (item.desc1 || item.summary || '');
|
|
35819
|
+
return {
|
|
35820
|
+
summary: text.trim(),
|
|
35821
|
+
language: request.language
|
|
35822
|
+
};
|
|
35823
|
+
});
|
|
35824
|
+
return { summaries };
|
|
35825
|
+
}
|
|
35826
|
+
finally {
|
|
35827
|
+
clearTimeout(timeoutId);
|
|
35828
|
+
}
|
|
35829
|
+
}
|
|
35830
|
+
/**
|
|
35831
|
+
* Validates the Ollama summarizer configuration by checking service availability and model existence
|
|
35832
|
+
*/
|
|
35833
|
+
async validateConfiguration() {
|
|
35834
|
+
try {
|
|
35835
|
+
// 1. Check if Ollama service is running by trying to list models
|
|
35836
|
+
const modelsUrl = `${this.baseUrl}/api/tags`;
|
|
35837
|
+
// Add timeout to prevent indefinite hanging
|
|
35838
|
+
const controller = new AbortController();
|
|
35839
|
+
const timeoutId = setTimeout(() => controller.abort(), OLLAMA_VALIDATION_TIMEOUT_MS);
|
|
35840
|
+
// Check for proxy settings in environment variables
|
|
35841
|
+
const httpsProxy = process.env['HTTPS_PROXY'] || process.env['https_proxy'];
|
|
35842
|
+
const httpProxy = process.env['HTTP_PROXY'] || process.env['http_proxy'];
|
|
35843
|
+
let dispatcher = undefined;
|
|
35844
|
+
const proxyUrl = modelsUrl.startsWith('https:') ? httpsProxy : httpProxy;
|
|
35845
|
+
if (proxyUrl) {
|
|
35846
|
+
try {
|
|
35847
|
+
dispatcher = new ProxyAgent_1(proxyUrl);
|
|
35848
|
+
}
|
|
35849
|
+
catch (error) {
|
|
35850
|
+
// Silently fail - proxy is optional
|
|
35851
|
+
}
|
|
35852
|
+
}
|
|
35853
|
+
try {
|
|
35854
|
+
const fetchOptions = {
|
|
35855
|
+
method: "GET",
|
|
35856
|
+
headers: {
|
|
35857
|
+
"Content-Type": "application/json",
|
|
35858
|
+
},
|
|
35859
|
+
signal: controller.signal,
|
|
35860
|
+
};
|
|
35861
|
+
if (dispatcher) {
|
|
35862
|
+
fetchOptions.dispatcher = dispatcher;
|
|
35863
|
+
}
|
|
35864
|
+
const modelsResponse = await fetch$1(modelsUrl, fetchOptions);
|
|
35865
|
+
if (!modelsResponse.ok) {
|
|
35866
|
+
return {
|
|
35867
|
+
valid: false,
|
|
35868
|
+
error: `Ollama service unavailable at ${this.baseUrl} (status: ${modelsResponse.status})`
|
|
35869
|
+
};
|
|
35870
|
+
}
|
|
35871
|
+
// 2. Check if model exists
|
|
35872
|
+
const modelsData = await modelsResponse.json();
|
|
35873
|
+
const models = modelsData.models || [];
|
|
35874
|
+
// Check both with and without :latest suffix
|
|
35875
|
+
const modelExists = models.some((m) => {
|
|
35876
|
+
const name = m.name || "";
|
|
35877
|
+
return (name === this.modelId ||
|
|
35878
|
+
name === `${this.modelId}:latest` ||
|
|
35879
|
+
name === this.modelId.replace(":latest", ""));
|
|
35880
|
+
});
|
|
35881
|
+
if (!modelExists) {
|
|
35882
|
+
const available = models.map((m) => m.name).join(', ');
|
|
35883
|
+
return {
|
|
35884
|
+
valid: false,
|
|
35885
|
+
error: `Model '${this.modelId}' not found. Available: ${available}`
|
|
35886
|
+
};
|
|
35887
|
+
}
|
|
35888
|
+
// 3. Test generation
|
|
35889
|
+
const testUrl = `${this.baseUrl}/api/generate`;
|
|
35890
|
+
// Add timeout for test request too
|
|
35891
|
+
const testController = new AbortController();
|
|
35892
|
+
const testTimeoutId = setTimeout(() => testController.abort(), OLLAMA_VALIDATION_TIMEOUT_MS);
|
|
35893
|
+
try {
|
|
35894
|
+
const testFetchOptions = {
|
|
35895
|
+
method: "POST",
|
|
35896
|
+
headers: {
|
|
35897
|
+
"Content-Type": "application/json",
|
|
35898
|
+
},
|
|
35899
|
+
body: JSON.stringify({
|
|
35900
|
+
model: this.modelId,
|
|
35901
|
+
prompt: "test",
|
|
35902
|
+
stream: false,
|
|
35903
|
+
options: {
|
|
35904
|
+
num_predict: 10
|
|
35905
|
+
}
|
|
35906
|
+
}),
|
|
35907
|
+
signal: testController.signal,
|
|
35908
|
+
};
|
|
35909
|
+
if (dispatcher) {
|
|
35910
|
+
testFetchOptions.dispatcher = dispatcher;
|
|
35911
|
+
}
|
|
35912
|
+
const testResponse = await fetch$1(testUrl, testFetchOptions);
|
|
35913
|
+
if (!testResponse.ok) {
|
|
35914
|
+
return {
|
|
35915
|
+
valid: false,
|
|
35916
|
+
error: `Model '${this.modelId}' failed generation test`
|
|
35917
|
+
};
|
|
35918
|
+
}
|
|
35919
|
+
}
|
|
35920
|
+
finally {
|
|
35921
|
+
clearTimeout(testTimeoutId);
|
|
35922
|
+
}
|
|
35923
|
+
return { valid: true };
|
|
35924
|
+
}
|
|
35925
|
+
finally {
|
|
35926
|
+
clearTimeout(timeoutId);
|
|
35927
|
+
}
|
|
35928
|
+
}
|
|
35929
|
+
catch (error) {
|
|
35930
|
+
if (error.name === 'AbortError') {
|
|
35931
|
+
return { valid: false, error: 'Connection timeout' };
|
|
35932
|
+
}
|
|
35933
|
+
if (error.code === 'ECONNREFUSED' || error.message?.includes('ECONNREFUSED')) {
|
|
35934
|
+
return { valid: false, error: `Ollama not running at ${this.baseUrl}` };
|
|
35935
|
+
}
|
|
35936
|
+
return { valid: false, error: error.message };
|
|
35937
|
+
}
|
|
35938
|
+
}
|
|
35939
|
+
get summarizerInfo() {
|
|
35940
|
+
return {
|
|
35941
|
+
name: 'ollama',
|
|
35942
|
+
model: this.modelId
|
|
35943
|
+
};
|
|
35944
|
+
}
|
|
35945
|
+
}
|
|
35946
|
+
|
|
35947
|
+
// Timeout constants for OpenAI-compatible API requests
|
|
35948
|
+
const OPENAI_COMPATIBLE_SUMMARIZE_TIMEOUT_MS = 60000; // 60 seconds for summarization
|
|
35949
|
+
const OPENAI_COMPATIBLE_VALIDATION_TIMEOUT_MS = 30000; // 30 seconds for validation
|
|
35950
|
+
/**
|
|
35951
|
+
* Extracts a complete JSON object from text using bracket matching
|
|
35952
|
+
* This handles nested JSON objects correctly, unlike regex greedy matching
|
|
35953
|
+
* @returns The extracted JSON string or null if not found
|
|
35954
|
+
*/
|
|
35955
|
+
function extractCompleteJsonObject(text) {
|
|
35956
|
+
// Find the first opening brace
|
|
35957
|
+
const startIndex = text.indexOf('{');
|
|
35958
|
+
if (startIndex === -1) {
|
|
35959
|
+
return null;
|
|
35960
|
+
}
|
|
35961
|
+
// Use stack to find matching closing brace
|
|
35962
|
+
let depth = 0;
|
|
35963
|
+
let inString = false;
|
|
35964
|
+
let escapeNext = false;
|
|
35965
|
+
for (let i = startIndex; i < text.length; i++) {
|
|
35966
|
+
const char = text[i];
|
|
35967
|
+
if (escapeNext) {
|
|
35968
|
+
escapeNext = false;
|
|
35969
|
+
continue;
|
|
35970
|
+
}
|
|
35971
|
+
if (char === '\\') {
|
|
35972
|
+
escapeNext = true;
|
|
35973
|
+
continue;
|
|
35974
|
+
}
|
|
35975
|
+
if (char === '"') {
|
|
35976
|
+
inString = !inString;
|
|
35977
|
+
continue;
|
|
35978
|
+
}
|
|
35979
|
+
if (!inString) {
|
|
35980
|
+
if (char === '{') {
|
|
35981
|
+
depth++;
|
|
35982
|
+
}
|
|
35983
|
+
else if (char === '}') {
|
|
35984
|
+
depth--;
|
|
35985
|
+
if (depth === 0) {
|
|
35986
|
+
// Found matching closing brace
|
|
35987
|
+
return text.substring(startIndex, i + 1);
|
|
35988
|
+
}
|
|
35989
|
+
}
|
|
35990
|
+
}
|
|
35991
|
+
}
|
|
35992
|
+
return null;
|
|
35993
|
+
}
|
|
35994
|
+
/**
|
|
35995
|
+
* Implements the ISummarizer interface using OpenAI-compatible API endpoints with LLM-based summarization.
|
|
35996
|
+
* Supports any OpenAI-compatible API such as DeepSeek, SiliconFlow, local LM Studio, etc.
|
|
35997
|
+
*/
|
|
35998
|
+
class OpenAICompatibleSummarizer {
|
|
35999
|
+
constructor(baseUrl = "http://localhost:8080/v1", modelId = "gpt-4", apiKey = "", defaultLanguage = 'English', temperature = 0.3) {
|
|
36000
|
+
// Normalize the baseUrl by removing all trailing slashes
|
|
36001
|
+
const normalizedBaseUrl = baseUrl.replace(/\/+$/, "");
|
|
36002
|
+
this.baseUrl = normalizedBaseUrl;
|
|
36003
|
+
this.modelId = modelId;
|
|
36004
|
+
this.apiKey = apiKey;
|
|
36005
|
+
this.defaultLanguage = defaultLanguage;
|
|
36006
|
+
this.temperature = temperature;
|
|
36007
|
+
}
|
|
36008
|
+
/**
|
|
36009
|
+
* Generate a summary for the given code content
|
|
36010
|
+
* Internally delegates to summarizeBatch() for unified processing
|
|
36011
|
+
*/
|
|
36012
|
+
async summarize(request) {
|
|
36013
|
+
// Wrap single request as a batch of one
|
|
36014
|
+
const batchRequest = {
|
|
36015
|
+
document: request.document,
|
|
36016
|
+
filePath: request.filePath,
|
|
36017
|
+
blocks: [{
|
|
36018
|
+
content: request.content,
|
|
36019
|
+
codeType: request.codeType,
|
|
36020
|
+
codeName: request.codeName
|
|
36021
|
+
}],
|
|
36022
|
+
language: request.language
|
|
36023
|
+
};
|
|
36024
|
+
const result = await this.summarizeBatch(batchRequest);
|
|
36025
|
+
return result.summaries[0];
|
|
36026
|
+
}
|
|
36027
|
+
/**
|
|
36028
|
+
* Builds a unified batch prompt for summarizing code blocks
|
|
36029
|
+
* Works for both single and batch requests
|
|
36030
|
+
*/
|
|
36031
|
+
buildPrompt(request) {
|
|
36032
|
+
const { blocks, language, document, filePath } = request;
|
|
36033
|
+
// Unified English prompt template
|
|
36034
|
+
let prompt = `Generate semantic descriptions for the following code snippets:\n\n`;
|
|
36035
|
+
// Add shared context once at the beginning
|
|
36036
|
+
if (filePath) {
|
|
36037
|
+
prompt += `[File]: ${filePath}\n\n`;
|
|
36038
|
+
}
|
|
36039
|
+
if (document) {
|
|
36040
|
+
prompt += `[Shared Context]:\n\`\`\`\n${document}\n\`\`\`\n\n`;
|
|
36041
|
+
}
|
|
36042
|
+
blocks.forEach((block, index) => {
|
|
36043
|
+
prompt += `### Snippet ${index + 1}\n\n`;
|
|
36044
|
+
prompt += `[Type]: ${block.codeType}${block.codeName ? ` "${block.codeName}"` : ''}\n\n`;
|
|
36045
|
+
prompt += `[Target Code]:\n`;
|
|
36046
|
+
if (block.content === document) {
|
|
36047
|
+
prompt += `(See Shared Context)\n\n---\n\n`;
|
|
36048
|
+
}
|
|
36049
|
+
else {
|
|
36050
|
+
prompt += `\`\`\`\n${block.content}\n\`\`\`\n\n---\n\n`;
|
|
36051
|
+
}
|
|
36052
|
+
});
|
|
36053
|
+
prompt += `Requirements:\n`;
|
|
36054
|
+
prompt += `- Generate semantic description for each snippet\n`;
|
|
36055
|
+
prompt += `- Focus on logic, implementation details, business role\n`;
|
|
36056
|
+
prompt += `- **Start directly with verbs**, NO prefixes like "Function X" or "Class Y"\n`;
|
|
36057
|
+
prompt += `- For core implementations, include keywords like "implements", "logic"\n\n`;
|
|
36058
|
+
// Language-specific output instructions
|
|
36059
|
+
if (language === 'Chinese') {
|
|
36060
|
+
prompt += `IMPORTANT: Respond in **Chinese (中文)**. Each description must be 30-80 Chinese characters.\n\n`;
|
|
36061
|
+
}
|
|
36062
|
+
prompt += `IMPORTANT: Respond with ONLY the JSON object, no extra text.\n\n`;
|
|
36063
|
+
// Different format for single vs multiple blocks
|
|
36064
|
+
if (blocks.length === 1) {
|
|
36065
|
+
prompt += `Return format: {"summaries": "description"} (single string)\n`;
|
|
36066
|
+
}
|
|
36067
|
+
else {
|
|
36068
|
+
const descs = Array.from({ length: blocks.length }, (_, i) => `"snippet${i + 1}_desc"`).join(', ');
|
|
36069
|
+
prompt += `Return format: {"summaries": [${descs}]} (${blocks.length} descriptions)\n`;
|
|
36070
|
+
}
|
|
36071
|
+
return prompt;
|
|
36072
|
+
}
|
|
36073
|
+
/**
|
|
36074
|
+
* Generate summaries for multiple code blocks in a single batch request
|
|
36075
|
+
* This is more efficient than calling summarize() multiple times
|
|
36076
|
+
*/
|
|
36077
|
+
async summarizeBatch(request) {
|
|
36078
|
+
const prompt = this.buildPrompt(request);
|
|
36079
|
+
const url = `${this.baseUrl}/chat/completions`;
|
|
36080
|
+
// Add timeout to prevent indefinite hanging
|
|
36081
|
+
const controller = new AbortController();
|
|
36082
|
+
const timeoutId = setTimeout(() => controller.abort(), OPENAI_COMPATIBLE_SUMMARIZE_TIMEOUT_MS);
|
|
36083
|
+
// Check for proxy settings in environment variables
|
|
36084
|
+
const httpsProxy = process.env['HTTPS_PROXY'] || process.env['https_proxy'];
|
|
36085
|
+
const httpProxy = process.env['HTTP_PROXY'] || process.env['http_proxy'];
|
|
36086
|
+
// Choose appropriate proxy based on target URL protocol
|
|
36087
|
+
let dispatcher = undefined;
|
|
36088
|
+
const proxyUrl = url.startsWith('https:') ? httpsProxy : httpProxy;
|
|
36089
|
+
if (proxyUrl) {
|
|
36090
|
+
try {
|
|
36091
|
+
dispatcher = new ProxyAgent_1(proxyUrl);
|
|
36092
|
+
}
|
|
36093
|
+
catch (error) {
|
|
36094
|
+
// Silently fail - proxy is optional
|
|
36095
|
+
}
|
|
36096
|
+
}
|
|
36097
|
+
try {
|
|
36098
|
+
const headers = {
|
|
36099
|
+
"Content-Type": "application/json",
|
|
36100
|
+
};
|
|
36101
|
+
// Add Authorization header if API key is provided
|
|
36102
|
+
if (this.apiKey) {
|
|
36103
|
+
headers["Authorization"] = `Bearer ${this.apiKey}`;
|
|
36104
|
+
}
|
|
36105
|
+
const fetchOptions = {
|
|
36106
|
+
method: "POST",
|
|
36107
|
+
headers: headers,
|
|
36108
|
+
body: JSON.stringify({
|
|
36109
|
+
model: this.modelId,
|
|
36110
|
+
messages: [
|
|
36111
|
+
{
|
|
36112
|
+
role: "user",
|
|
36113
|
+
content: prompt
|
|
36114
|
+
}
|
|
36115
|
+
],
|
|
36116
|
+
stream: false,
|
|
36117
|
+
temperature: this.temperature,
|
|
36118
|
+
max_tokens: 500 // Increased for batch responses
|
|
36119
|
+
}),
|
|
36120
|
+
signal: controller.signal,
|
|
36121
|
+
};
|
|
36122
|
+
if (dispatcher) {
|
|
36123
|
+
fetchOptions.dispatcher = dispatcher;
|
|
36124
|
+
}
|
|
36125
|
+
const response = await fetch$1(url, fetchOptions);
|
|
36126
|
+
if (!response.ok) {
|
|
36127
|
+
let errorBody = "Could not read error body";
|
|
36128
|
+
try {
|
|
36129
|
+
errorBody = await response.text();
|
|
36130
|
+
}
|
|
36131
|
+
catch (e) {
|
|
36132
|
+
// Ignore error reading body
|
|
36133
|
+
}
|
|
36134
|
+
throw new Error(`OpenAI-compatible API error: ${response.status} - ${errorBody}`);
|
|
36135
|
+
}
|
|
36136
|
+
const data = await response.json();
|
|
36137
|
+
// Parse response: data.choices[0].message.content
|
|
36138
|
+
if (!data.choices || !data.choices[0] || !data.choices[0].message) {
|
|
36139
|
+
throw new Error(`Invalid response format: missing 'choices' field`);
|
|
36140
|
+
}
|
|
36141
|
+
const responseText = data.choices[0].message.content.trim();
|
|
36142
|
+
// Try to extract JSON from the response with multiple fallback strategies
|
|
36143
|
+
let parsedResponse;
|
|
36144
|
+
try {
|
|
36145
|
+
// Strategy 1: Try direct parse
|
|
36146
|
+
parsedResponse = JSON.parse(responseText);
|
|
36147
|
+
}
|
|
36148
|
+
catch {
|
|
36149
|
+
// Strategy 2: Extract JSON from markdown code blocks
|
|
36150
|
+
let jsonMatch = responseText.match(/```json\s*([\s\S]*?)\s*```/) ||
|
|
36151
|
+
responseText.match(/```\s*([\s\S]*?)\s*```/);
|
|
36152
|
+
if (jsonMatch) {
|
|
36153
|
+
try {
|
|
36154
|
+
parsedResponse = JSON.parse(jsonMatch[1].trim());
|
|
36155
|
+
}
|
|
36156
|
+
catch {
|
|
36157
|
+
// Strategy 3: Use bracket matching to find complete JSON object
|
|
36158
|
+
const extracted = extractCompleteJsonObject(responseText);
|
|
36159
|
+
if (extracted) {
|
|
36160
|
+
parsedResponse = JSON.parse(extracted);
|
|
36161
|
+
}
|
|
36162
|
+
else {
|
|
36163
|
+
throw new Error(`Failed to parse batch response JSON after multiple attempts`);
|
|
36164
|
+
}
|
|
36165
|
+
}
|
|
36166
|
+
}
|
|
36167
|
+
else {
|
|
36168
|
+
// Strategy 4: Use bracket matching to find complete JSON object
|
|
36169
|
+
const extracted = extractCompleteJsonObject(responseText);
|
|
36170
|
+
if (extracted) {
|
|
36171
|
+
parsedResponse = JSON.parse(extracted);
|
|
36172
|
+
}
|
|
36173
|
+
else {
|
|
36174
|
+
throw new Error(`Could not extract JSON from batch response`);
|
|
36175
|
+
}
|
|
36176
|
+
}
|
|
36177
|
+
}
|
|
36178
|
+
// Validate response format - support both array and string (for single block with small models)
|
|
36179
|
+
let summariesArray = [];
|
|
36180
|
+
if (typeof parsedResponse.summaries === 'string') {
|
|
36181
|
+
// Small model may return {"summaries": "desc"} instead of {"summaries": ["desc"]}
|
|
36182
|
+
summariesArray = [parsedResponse.summaries];
|
|
36183
|
+
}
|
|
36184
|
+
else if (Array.isArray(parsedResponse.summaries)) {
|
|
36185
|
+
summariesArray = parsedResponse.summaries;
|
|
36186
|
+
}
|
|
36187
|
+
else {
|
|
36188
|
+
throw new Error(`Invalid batch response format: 'summaries' must be array or string`);
|
|
36189
|
+
}
|
|
36190
|
+
// Validate response length matches request length
|
|
36191
|
+
if (summariesArray.length !== request.blocks.length) {
|
|
36192
|
+
throw new Error(`Batch response length mismatch: expected ${request.blocks.length}, got ${summariesArray.length}`);
|
|
36193
|
+
}
|
|
36194
|
+
// Transform response to SummarizerBatchResult format
|
|
36195
|
+
const summaries = summariesArray.map((item) => {
|
|
36196
|
+
const text = typeof item === 'string' ? item : (item.desc1 || item.summary || '');
|
|
36197
|
+
return {
|
|
36198
|
+
summary: text.trim(),
|
|
36199
|
+
language: request.language
|
|
36200
|
+
};
|
|
36201
|
+
});
|
|
36202
|
+
return { summaries };
|
|
36203
|
+
}
|
|
36204
|
+
finally {
|
|
36205
|
+
clearTimeout(timeoutId);
|
|
36206
|
+
}
|
|
36207
|
+
}
|
|
36208
|
+
/**
|
|
36209
|
+
* Validates the OpenAI-compatible summarizer configuration by checking service availability
|
|
36210
|
+
*/
|
|
36211
|
+
async validateConfiguration() {
|
|
36212
|
+
try {
|
|
36213
|
+
// Test by calling the chat completions endpoint with a simple prompt
|
|
36214
|
+
const url = `${this.baseUrl}/chat/completions`;
|
|
36215
|
+
// Add timeout to prevent indefinite hanging
|
|
36216
|
+
const controller = new AbortController();
|
|
36217
|
+
const timeoutId = setTimeout(() => controller.abort(), OPENAI_COMPATIBLE_VALIDATION_TIMEOUT_MS);
|
|
36218
|
+
// Check for proxy settings in environment variables
|
|
36219
|
+
const httpsProxy = process.env['HTTPS_PROXY'] || process.env['https_proxy'];
|
|
36220
|
+
const httpProxy = process.env['HTTP_PROXY'] || process.env['http_proxy'];
|
|
36221
|
+
let dispatcher = undefined;
|
|
36222
|
+
const proxyUrl = url.startsWith('https:') ? httpsProxy : httpProxy;
|
|
36223
|
+
if (proxyUrl) {
|
|
36224
|
+
try {
|
|
36225
|
+
dispatcher = new ProxyAgent_1(proxyUrl);
|
|
36226
|
+
}
|
|
36227
|
+
catch (error) {
|
|
36228
|
+
// Silently fail - proxy is optional
|
|
36229
|
+
}
|
|
36230
|
+
}
|
|
36231
|
+
try {
|
|
36232
|
+
const headers = {
|
|
36233
|
+
"Content-Type": "application/json",
|
|
36234
|
+
};
|
|
36235
|
+
// Add Authorization header if API key is provided
|
|
36236
|
+
if (this.apiKey) {
|
|
36237
|
+
headers["Authorization"] = `Bearer ${this.apiKey}`;
|
|
36238
|
+
}
|
|
36239
|
+
const fetchOptions = {
|
|
36240
|
+
method: "POST",
|
|
36241
|
+
headers: headers,
|
|
36242
|
+
body: JSON.stringify({
|
|
36243
|
+
model: this.modelId,
|
|
36244
|
+
messages: [
|
|
36245
|
+
{
|
|
36246
|
+
role: "user",
|
|
36247
|
+
content: "test"
|
|
36248
|
+
}
|
|
36249
|
+
],
|
|
36250
|
+
stream: false,
|
|
36251
|
+
max_tokens: 10
|
|
36252
|
+
}),
|
|
36253
|
+
signal: controller.signal,
|
|
36254
|
+
};
|
|
36255
|
+
if (dispatcher) {
|
|
36256
|
+
fetchOptions.dispatcher = dispatcher;
|
|
36257
|
+
}
|
|
36258
|
+
const response = await fetch$1(url, fetchOptions);
|
|
36259
|
+
if (!response.ok) {
|
|
36260
|
+
let errorBody = "Could not read error body";
|
|
36261
|
+
try {
|
|
36262
|
+
errorBody = await response.text();
|
|
36263
|
+
}
|
|
36264
|
+
catch (e) {
|
|
36265
|
+
// Ignore error reading body
|
|
36266
|
+
}
|
|
36267
|
+
return {
|
|
36268
|
+
valid: false,
|
|
36269
|
+
error: `API unavailable at ${this.baseUrl} (status: ${response.status}): ${errorBody}`
|
|
36270
|
+
};
|
|
36271
|
+
}
|
|
36272
|
+
return { valid: true };
|
|
36273
|
+
}
|
|
36274
|
+
finally {
|
|
36275
|
+
clearTimeout(timeoutId);
|
|
36276
|
+
}
|
|
36277
|
+
}
|
|
36278
|
+
catch (error) {
|
|
36279
|
+
if (error.name === 'AbortError') {
|
|
36280
|
+
return { valid: false, error: 'Connection timeout' };
|
|
36281
|
+
}
|
|
36282
|
+
if (error.code === 'ECONNREFUSED' || error.message?.includes('ECONNREFUSED')) {
|
|
36283
|
+
return { valid: false, error: `Service not running at ${this.baseUrl}` };
|
|
36284
|
+
}
|
|
36285
|
+
return { valid: false, error: error.message };
|
|
36286
|
+
}
|
|
36287
|
+
}
|
|
36288
|
+
get summarizerInfo() {
|
|
36289
|
+
return {
|
|
36290
|
+
name: 'openai-compatible',
|
|
36291
|
+
model: this.modelId
|
|
36292
|
+
};
|
|
36293
|
+
}
|
|
36294
|
+
}
|
|
36295
|
+
|
|
35368
36296
|
class ApiError extends Error {
|
|
35369
36297
|
constructor(response) {
|
|
35370
36298
|
super(response.statusText);
|
|
@@ -41547,6 +42475,8 @@ function processCaptures(captures, lines, language) {
|
|
|
41547
42475
|
let formattedOutput = "";
|
|
41548
42476
|
// Sort captures by their start position
|
|
41549
42477
|
captures.sort((a, b) => a.node.startPosition.row - b.node.startPosition.row);
|
|
42478
|
+
// Calculate padding width based on file size (minimum 4 digits)
|
|
42479
|
+
const width = Math.max(4, String(lines.length).length);
|
|
41550
42480
|
// Track already processed lines to avoid duplicates
|
|
41551
42481
|
const processedLines = new Set();
|
|
41552
42482
|
const promoteToLineStartAncestor = (node) => {
|
|
@@ -41620,14 +42550,16 @@ function processCaptures(captures, lines, language) {
|
|
|
41620
42550
|
if (docstringLineCount >= getMinComponentLines()) {
|
|
41621
42551
|
const docstringKey = `${startLine}-${docstringEndLine}`;
|
|
41622
42552
|
if (!processedLines.has(docstringKey)) {
|
|
41623
|
-
|
|
42553
|
+
const range = `${startLine + 1}--${docstringEndLine + 1}`.padStart(width * 2 + 2, " ");
|
|
42554
|
+
formattedOutput += `${range} | ${lines[startLine]}\n`;
|
|
41624
42555
|
processedLines.add(docstringKey);
|
|
41625
42556
|
}
|
|
41626
42557
|
}
|
|
41627
42558
|
return;
|
|
41628
42559
|
}
|
|
41629
42560
|
// For other component definitions (classes, functions, etc.)
|
|
41630
|
-
|
|
42561
|
+
const range = `${displayStartLine + 1}--${endLine + 1}`.padStart(width * 2 + 2, " ");
|
|
42562
|
+
formattedOutput += `${range} | ${lines[displayStartLine]}\n`;
|
|
41631
42563
|
processedLines.add(lineKey);
|
|
41632
42564
|
});
|
|
41633
42565
|
if (formattedOutput.length > 0) {
|
|
@@ -42866,9 +43798,193 @@ function generateRelativeFilePath(normalizedAbsolutePath, workspaceRoot) {
|
|
|
42866
43798
|
* - Embedding generation
|
|
42867
43799
|
* - Vector store upserts
|
|
42868
43800
|
* - Cache updates
|
|
42869
|
-
* - Retry logic
|
|
43801
|
+
* - Retry logic with truncation fallback for oversized content
|
|
42870
43802
|
*/
|
|
42871
43803
|
class BatchProcessor {
|
|
43804
|
+
/**
|
|
43805
|
+
* Determines if an error is recoverable (e.g., context length exceeded)
|
|
43806
|
+
* Only these types of errors will trigger the truncation fallback
|
|
43807
|
+
*/
|
|
43808
|
+
_isRecoverableError(error) {
|
|
43809
|
+
const msg = error.message.toLowerCase();
|
|
43810
|
+
return (msg.includes("context length") ||
|
|
43811
|
+
msg.includes("exceeds") ||
|
|
43812
|
+
msg.includes("too long") ||
|
|
43813
|
+
msg.includes("input length") ||
|
|
43814
|
+
msg.includes("invalid input") ||
|
|
43815
|
+
msg.includes("token limit"));
|
|
43816
|
+
}
|
|
43817
|
+
/**
|
|
43818
|
+
* Truncates text by lines to maintain code integrity
|
|
43819
|
+
* Does not add language-specific truncation markers to avoid syntax compatibility issues
|
|
43820
|
+
*/
|
|
43821
|
+
_truncateTextByLines(text, maxChars) {
|
|
43822
|
+
if (text.length <= maxChars) {
|
|
43823
|
+
return text;
|
|
43824
|
+
}
|
|
43825
|
+
const lines = text.split('\n');
|
|
43826
|
+
const result = [];
|
|
43827
|
+
let currentLength = 0;
|
|
43828
|
+
for (const line of lines) {
|
|
43829
|
+
const lineWithNewline = line.length + 1;
|
|
43830
|
+
// Stop if adding this line would exceed the limit and we already have content
|
|
43831
|
+
if (currentLength + lineWithNewline > maxChars && result.length > 0) {
|
|
43832
|
+
break;
|
|
43833
|
+
}
|
|
43834
|
+
result.push(line);
|
|
43835
|
+
currentLength += lineWithNewline;
|
|
43836
|
+
}
|
|
43837
|
+
// Preserve at least part of the first line if nothing else was kept
|
|
43838
|
+
if (result.length === 0 && lines.length > 0) {
|
|
43839
|
+
result.push(lines[0].substring(0, maxChars));
|
|
43840
|
+
}
|
|
43841
|
+
return result.join('\n');
|
|
43842
|
+
}
|
|
43843
|
+
/**
|
|
43844
|
+
* Processes a single item with truncation retry logic
|
|
43845
|
+
* Uses the smaller of original text length and initial threshold as starting point
|
|
43846
|
+
* Recursively reduces threshold until success or minimum reached
|
|
43847
|
+
*/
|
|
43848
|
+
async _processItemWithTruncation(item, options, result, itemIndex) {
|
|
43849
|
+
const originalText = options.itemToText(item);
|
|
43850
|
+
const filePath = options.itemToFilePath(item);
|
|
43851
|
+
// Use the smaller of original text length and initial threshold
|
|
43852
|
+
let threshold = Math.min(originalText.length, TRUNCATION_INITIAL_THRESHOLD);
|
|
43853
|
+
// If original text is already short, this might be a different error - skip truncation
|
|
43854
|
+
if (originalText.length <= MIN_TRUNCATION_THRESHOLD) {
|
|
43855
|
+
console.warn(`[BatchProcessor] Original text is already short (${originalText.length} chars), ` +
|
|
43856
|
+
`skipping truncation for: ${filePath}`);
|
|
43857
|
+
return false;
|
|
43858
|
+
}
|
|
43859
|
+
for (let attempt = 0; attempt < MAX_TRUNCATION_ATTEMPTS; attempt++) {
|
|
43860
|
+
try {
|
|
43861
|
+
const textToEmbed = this._truncateTextByLines(originalText, threshold);
|
|
43862
|
+
// Skip if truncated text is too short
|
|
43863
|
+
if (textToEmbed.length < MIN_TRUNCATION_THRESHOLD) {
|
|
43864
|
+
console.warn(`[BatchProcessor] Text too short after truncation ` +
|
|
43865
|
+
`(${textToEmbed.length} chars < ${MIN_TRUNCATION_THRESHOLD}), skipping: ${filePath}`);
|
|
43866
|
+
return false;
|
|
43867
|
+
}
|
|
43868
|
+
// Try to generate embedding
|
|
43869
|
+
const { embeddings } = await options.embedder.createEmbeddings([textToEmbed]);
|
|
43870
|
+
// Use correct itemIndex for unique point ID
|
|
43871
|
+
const point = options.itemToPoint(item, embeddings[0], itemIndex);
|
|
43872
|
+
await options.vectorStore.upsertPoints([point]);
|
|
43873
|
+
const wasTruncated = textToEmbed.length < originalText.length;
|
|
43874
|
+
if (wasTruncated) {
|
|
43875
|
+
console.info(`[BatchProcessor] Successfully indexed truncated content: ` +
|
|
43876
|
+
`${filePath} (${textToEmbed.length}/${originalText.length} chars, ` +
|
|
43877
|
+
`${(textToEmbed.length / originalText.length * 100).toFixed(1)}%)`);
|
|
43878
|
+
}
|
|
43879
|
+
// Update cache (store original file hash)
|
|
43880
|
+
const fileHash = options.getFileHash?.(item);
|
|
43881
|
+
if (fileHash) {
|
|
43882
|
+
options.cacheManager.updateHash(filePath, fileHash);
|
|
43883
|
+
}
|
|
43884
|
+
result.processed++;
|
|
43885
|
+
result.processedFiles.push({
|
|
43886
|
+
path: filePath,
|
|
43887
|
+
status: "success",
|
|
43888
|
+
newHash: fileHash,
|
|
43889
|
+
truncated: wasTruncated
|
|
43890
|
+
});
|
|
43891
|
+
options.onProgress?.(result.processed, result.processed + result.failed, filePath);
|
|
43892
|
+
options.onItemIndexed?.(1);
|
|
43893
|
+
return true;
|
|
43894
|
+
}
|
|
43895
|
+
catch (error) {
|
|
43896
|
+
const nextThreshold = Math.floor(threshold * TRUNCATION_REDUCTION_FACTOR);
|
|
43897
|
+
// Stop retrying if below minimum threshold
|
|
43898
|
+
if (nextThreshold < MIN_TRUNCATION_THRESHOLD) {
|
|
43899
|
+
console.warn(`[BatchProcessor] Truncation attempt ${attempt + 1} failed, ` +
|
|
43900
|
+
`next threshold ${nextThreshold} below minimum ${MIN_TRUNCATION_THRESHOLD}, giving up`);
|
|
43901
|
+
break;
|
|
43902
|
+
}
|
|
43903
|
+
console.warn(`[BatchProcessor] Truncation attempt ${attempt + 1} failed at ${threshold} chars, ` +
|
|
43904
|
+
`will try ${nextThreshold} chars. Error: ${error.message}`);
|
|
43905
|
+
threshold = nextThreshold;
|
|
43906
|
+
}
|
|
43907
|
+
}
|
|
43908
|
+
// All attempts failed
|
|
43909
|
+
console.error(`[BatchProcessor] All truncation attempts failed for: ${filePath}`);
|
|
43910
|
+
return false;
|
|
43911
|
+
}
|
|
43912
|
+
/**
|
|
43913
|
+
* Fallback to individual item processing with timeout protection
|
|
43914
|
+
*/
|
|
43915
|
+
async _processItemsIndividually(batchItems, options, result, startIndex) {
|
|
43916
|
+
// Boundary check
|
|
43917
|
+
if (!batchItems || batchItems.length === 0) {
|
|
43918
|
+
return;
|
|
43919
|
+
}
|
|
43920
|
+
console.log(`[BatchProcessor] Falling back to individual processing for ${batchItems.length} items`);
|
|
43921
|
+
const startTime = Date.now();
|
|
43922
|
+
let successCount = 0;
|
|
43923
|
+
let failureCount = 0;
|
|
43924
|
+
for (let i = 0; i < batchItems.length; i++) {
|
|
43925
|
+
// Timeout protection
|
|
43926
|
+
if (Date.now() - startTime > INDIVIDUAL_PROCESSING_TIMEOUT_MS) {
|
|
43927
|
+
console.warn(`[BatchProcessor] Individual processing timeout after ${INDIVIDUAL_PROCESSING_TIMEOUT_MS}ms, ` +
|
|
43928
|
+
`skipping remaining ${batchItems.length - i} items`);
|
|
43929
|
+
// Mark remaining items as failed
|
|
43930
|
+
for (let j = i; j < batchItems.length; j++) {
|
|
43931
|
+
const filePath = options.itemToFilePath(batchItems[j]);
|
|
43932
|
+
result.failed++;
|
|
43933
|
+
result.processedFiles.push({
|
|
43934
|
+
path: filePath,
|
|
43935
|
+
status: "error",
|
|
43936
|
+
error: new Error("Individual processing timeout")
|
|
43937
|
+
});
|
|
43938
|
+
}
|
|
43939
|
+
break;
|
|
43940
|
+
}
|
|
43941
|
+
const item = batchItems[i];
|
|
43942
|
+
const filePath = options.itemToFilePath(item);
|
|
43943
|
+
try {
|
|
43944
|
+
// First try without truncation
|
|
43945
|
+
const text = options.itemToText(item);
|
|
43946
|
+
const { embeddings } = await options.embedder.createEmbeddings([text]);
|
|
43947
|
+
const point = options.itemToPoint(item, embeddings[0], startIndex + i);
|
|
43948
|
+
await options.vectorStore.upsertPoints([point]);
|
|
43949
|
+
const fileHash = options.getFileHash?.(item);
|
|
43950
|
+
if (fileHash) {
|
|
43951
|
+
options.cacheManager.updateHash(filePath, fileHash);
|
|
43952
|
+
}
|
|
43953
|
+
result.processed++;
|
|
43954
|
+
successCount++;
|
|
43955
|
+
result.processedFiles.push({
|
|
43956
|
+
path: filePath,
|
|
43957
|
+
status: "success",
|
|
43958
|
+
newHash: fileHash,
|
|
43959
|
+
truncated: false
|
|
43960
|
+
});
|
|
43961
|
+
options.onProgress?.(result.processed, result.processed + result.failed, filePath);
|
|
43962
|
+
options.onItemIndexed?.(1);
|
|
43963
|
+
}
|
|
43964
|
+
catch (itemError) {
|
|
43965
|
+
// Individual item failed, try truncation
|
|
43966
|
+
console.warn(`[BatchProcessor] Individual item failed, trying truncation: ${filePath}`);
|
|
43967
|
+
// Pass correct itemIndex
|
|
43968
|
+
const success = await this._processItemWithTruncation(item, options, result, startIndex + i);
|
|
43969
|
+
if (success) {
|
|
43970
|
+
successCount++;
|
|
43971
|
+
}
|
|
43972
|
+
else {
|
|
43973
|
+
// Truncation also failed, record error
|
|
43974
|
+
failureCount++;
|
|
43975
|
+
result.failed++;
|
|
43976
|
+
result.processedFiles.push({
|
|
43977
|
+
path: filePath,
|
|
43978
|
+
status: "error",
|
|
43979
|
+
error: itemError
|
|
43980
|
+
});
|
|
43981
|
+
options.onProgress?.(result.processed, result.processed + result.failed, filePath);
|
|
43982
|
+
}
|
|
43983
|
+
}
|
|
43984
|
+
}
|
|
43985
|
+
console.log(`[BatchProcessor] Individual processing completed: ` +
|
|
43986
|
+
`${successCount} succeeded, ${failureCount} failed`);
|
|
43987
|
+
}
|
|
42872
43988
|
async processBatch(items, options) {
|
|
42873
43989
|
// console.log(`[BatchProcessor] Starting batch processing for ${items.length} items`)
|
|
42874
43990
|
const result = { processed: 0, failed: 0, errors: [], processedFiles: [] };
|
|
@@ -42936,6 +44052,9 @@ class BatchProcessor {
|
|
|
42936
44052
|
await this.processSingleBatch(batchItems, options, result, i);
|
|
42937
44053
|
}
|
|
42938
44054
|
}
|
|
44055
|
+
/**
|
|
44056
|
+
* Process a single batch with fallback to individual processing on recoverable errors
|
|
44057
|
+
*/
|
|
42939
44058
|
async processSingleBatch(batchItems, options, result, startIndex) {
|
|
42940
44059
|
let attempts = 0;
|
|
42941
44060
|
let success = false;
|
|
@@ -42962,7 +44081,8 @@ class BatchProcessor {
|
|
|
42962
44081
|
result.processedFiles.push({
|
|
42963
44082
|
path: filePath,
|
|
42964
44083
|
status: "success",
|
|
42965
|
-
newHash: fileHash
|
|
44084
|
+
newHash: fileHash,
|
|
44085
|
+
truncated: false
|
|
42966
44086
|
});
|
|
42967
44087
|
options.onProgress?.(result.processed, result.processed + result.failed, filePath);
|
|
42968
44088
|
}
|
|
@@ -42977,12 +44097,26 @@ class BatchProcessor {
|
|
|
42977
44097
|
}
|
|
42978
44098
|
}
|
|
42979
44099
|
}
|
|
44100
|
+
// Fallback: batch failed, try individual processing for recoverable errors
|
|
42980
44101
|
if (!success && lastError) {
|
|
44102
|
+
// Check if this is a recoverable error and truncation fallback is enabled
|
|
44103
|
+
if (this._isRecoverableError(lastError)) {
|
|
44104
|
+
console.warn(`[BatchProcessor] Batch failed with recoverable error: "${lastError.message}". ` +
|
|
44105
|
+
`Falling back to individual processing...`);
|
|
44106
|
+
try {
|
|
44107
|
+
await this._processItemsIndividually(batchItems, options, result, startIndex);
|
|
44108
|
+
return; // Fallback completed successfully, don't throw error
|
|
44109
|
+
}
|
|
44110
|
+
catch (fallbackError) {
|
|
44111
|
+
// Fallback also failed, log and continue with original error handling
|
|
44112
|
+
console.error(`[BatchProcessor] Fallback processing also failed:`, fallbackError);
|
|
44113
|
+
}
|
|
44114
|
+
}
|
|
44115
|
+
// Fatal error: mark entire batch as failed (preserve original behavior)
|
|
42981
44116
|
result.failed += batchItems.length;
|
|
42982
44117
|
result.errors.push(lastError);
|
|
42983
44118
|
const errorMessage = `Failed to process batch after ${MAX_BATCH_RETRIES} attempts: ${lastError.message}`;
|
|
42984
44119
|
const batchError = new Error(errorMessage);
|
|
42985
|
-
result.errors.push(batchError);
|
|
42986
44120
|
options.onError?.(batchError);
|
|
42987
44121
|
// Record failed items and still report progress
|
|
42988
44122
|
for (const item of batchItems) {
|
|
@@ -44978,10 +46112,10 @@ class CodeIndexServiceFactory {
|
|
|
44978
46112
|
return undefined;
|
|
44979
46113
|
}
|
|
44980
46114
|
if (config.provider === 'ollama') {
|
|
44981
|
-
return new OllamaLLMReranker(config.ollamaBaseUrl || 'http://localhost:11434', config.ollamaModelId || 'qwen3-vl:4b-instruct', config.batchSize || 10);
|
|
46115
|
+
return new OllamaLLMReranker(config.ollamaBaseUrl || 'http://localhost:11434', config.ollamaModelId || 'qwen3-vl:4b-instruct', config.batchSize || 10, config.concurrency || 3, config.maxRetries || 3, config.retryDelayMs || 1000);
|
|
44982
46116
|
}
|
|
44983
46117
|
if (config.provider === 'openai-compatible') {
|
|
44984
|
-
return new OpenAICompatibleReranker(config.openAiCompatibleBaseUrl || 'http://localhost:8080/v1', config.openAiCompatibleModelId || 'gpt-4', config.openAiCompatibleApiKey || '', config.batchSize || 10);
|
|
46118
|
+
return new OpenAICompatibleReranker(config.openAiCompatibleBaseUrl || 'http://localhost:8080/v1', config.openAiCompatibleModelId || 'gpt-4', config.openAiCompatibleApiKey || '', config.batchSize || 10, config.concurrency || 3, config.maxRetries || 3, config.retryDelayMs || 1000);
|
|
44985
46119
|
}
|
|
44986
46120
|
// If provider is undefined or unknown, return undefined
|
|
44987
46121
|
return undefined;
|
|
@@ -45003,6 +46137,37 @@ class CodeIndexServiceFactory {
|
|
|
45003
46137
|
};
|
|
45004
46138
|
}
|
|
45005
46139
|
}
|
|
46140
|
+
/**
|
|
46141
|
+
* Creates a summarizer instance based on the current configuration.
|
|
46142
|
+
* @returns ISummarizer instance (always returns an instance, configuration is validated when used)
|
|
46143
|
+
*/
|
|
46144
|
+
createSummarizer() {
|
|
46145
|
+
const config = this.configManager.summarizerConfig;
|
|
46146
|
+
if (config.provider === 'ollama') {
|
|
46147
|
+
return new OllamaSummarizer(config.ollamaBaseUrl || 'http://localhost:11434', config.ollamaModelId || 'qwen3-vl:4b-instruct', config.language || 'English', config.temperature ?? 0);
|
|
46148
|
+
}
|
|
46149
|
+
if (config.provider === 'openai-compatible') {
|
|
46150
|
+
return new OpenAICompatibleSummarizer(config.openAiCompatibleBaseUrl || 'http://localhost:8080/v1', config.openAiCompatibleModelId || 'gpt-4', config.openAiCompatibleApiKey || '', config.language || 'English', config.temperature ?? 0);
|
|
46151
|
+
}
|
|
46152
|
+
// Fallback to ollama if provider unknown
|
|
46153
|
+
return new OllamaSummarizer('http://localhost:11434', 'qwen3-vl:4b-instruct', 'English');
|
|
46154
|
+
}
|
|
46155
|
+
/**
|
|
46156
|
+
* Validates a summarizer instance
|
|
46157
|
+
* @param summarizer The summarizer instance to validate
|
|
46158
|
+
* @returns Promise resolving to validation result
|
|
46159
|
+
*/
|
|
46160
|
+
async validateSummarizer(summarizer) {
|
|
46161
|
+
try {
|
|
46162
|
+
return await summarizer.validateConfiguration();
|
|
46163
|
+
}
|
|
46164
|
+
catch (error) {
|
|
46165
|
+
return {
|
|
46166
|
+
valid: false,
|
|
46167
|
+
error: error instanceof Error ? error.message : 'Summarizer validation failed'
|
|
46168
|
+
};
|
|
46169
|
+
}
|
|
46170
|
+
}
|
|
45006
46171
|
}
|
|
45007
46172
|
|
|
45008
46173
|
/**
|
|
@@ -45339,39 +46504,41 @@ class CodeIndexOrchestrator {
|
|
|
45339
46504
|
if (!result) {
|
|
45340
46505
|
throw new Error("Full scan failed, is scanner initialized?");
|
|
45341
46506
|
}
|
|
45342
|
-
// Enhanced error detection and reporting
|
|
45343
|
-
if (
|
|
45344
|
-
|
|
45345
|
-
|
|
46507
|
+
// Enhanced error detection and reporting - tolerate partial failures
|
|
46508
|
+
// Only throw if we found blocks but indexed NONE of them (complete failure)
|
|
46509
|
+
if (cumulativeBlocksFoundSoFar > 0 && cumulativeBlocksIndexed === 0) {
|
|
46510
|
+
const firstError = batchErrors.length > 0 ? batchErrors[0] : null;
|
|
46511
|
+
const errorMsg = firstError
|
|
46512
|
+
? `Indexing failed completely: ${firstError.message}`
|
|
46513
|
+
: t("embeddings:orchestrator.indexingFailedCritical");
|
|
46514
|
+
throw new Error(errorMsg);
|
|
45346
46515
|
}
|
|
45347
|
-
|
|
45348
|
-
|
|
45349
|
-
|
|
45350
|
-
|
|
46516
|
+
// Partial failures: log warnings but don't throw
|
|
46517
|
+
// This allows indexing to complete even when some batches fail (e.g., oversized content)
|
|
46518
|
+
if (batchErrors.length > 0) {
|
|
46519
|
+
const successRate = cumulativeBlocksFoundSoFar > 0
|
|
46520
|
+
? (cumulativeBlocksIndexed / cumulativeBlocksFoundSoFar * 100).toFixed(1)
|
|
46521
|
+
: "0";
|
|
46522
|
+
this.warn(`[CodeIndexOrchestrator] Indexing completed with partial failures. ` +
|
|
46523
|
+
`Success rate: ${successRate}% (${cumulativeBlocksIndexed}/${cumulativeBlocksFoundSoFar} blocks). ` +
|
|
46524
|
+
`Batch errors: ${batchErrors.length}`);
|
|
46525
|
+
// Log first 3 errors in detail
|
|
46526
|
+
for (const error of batchErrors.slice(0, 3)) {
|
|
46527
|
+
this.warn(`[CodeIndexOrchestrator] Batch error: ${error.message}`);
|
|
46528
|
+
}
|
|
46529
|
+
if (batchErrors.length > 3) {
|
|
46530
|
+
this.warn(`[CodeIndexOrchestrator] ... and ${batchErrors.length - 3} more errors`);
|
|
45351
46531
|
}
|
|
45352
|
-
}
|
|
45353
|
-
// Check for partial failures - if a significant portion of blocks failed
|
|
45354
|
-
const failureRate = (cumulativeBlocksFoundSoFar - cumulativeBlocksIndexed) / cumulativeBlocksFoundSoFar;
|
|
45355
|
-
if (batchErrors.length > 0 && failureRate > 0.1) {
|
|
45356
|
-
// More than 10% of blocks failed to index
|
|
45357
|
-
const firstError = batchErrors[0];
|
|
45358
|
-
throw new Error(`Indexing partially failed: Only ${cumulativeBlocksIndexed} of ${cumulativeBlocksFoundSoFar} blocks were indexed. ${firstError.message}`);
|
|
45359
|
-
}
|
|
45360
|
-
// CRITICAL: If there were ANY batch errors and NO blocks were successfully indexed,
|
|
45361
|
-
// this is a complete failure regardless of the failure rate calculation
|
|
45362
|
-
if (batchErrors.length > 0 && cumulativeBlocksIndexed === 0) {
|
|
45363
|
-
const firstError = batchErrors[0];
|
|
45364
|
-
throw new Error(`Indexing failed completely: ${firstError.message}`);
|
|
45365
|
-
}
|
|
45366
|
-
// Final sanity check: If we found blocks but indexed none and somehow no errors were reported,
|
|
45367
|
-
// this is still a failure
|
|
45368
|
-
if (cumulativeBlocksFoundSoFar > 0 && cumulativeBlocksIndexed === 0) {
|
|
45369
|
-
throw new Error(t("embeddings:orchestrator.indexingFailedCritical"));
|
|
45370
46532
|
}
|
|
45371
46533
|
await this._startWatcher();
|
|
45372
46534
|
// Mark indexing as complete after successful full scan
|
|
45373
46535
|
await this.vectorStore.markIndexingComplete();
|
|
45374
|
-
|
|
46536
|
+
// Set state message - include error info if there were partial failures
|
|
46537
|
+
const message = batchErrors.length > 0
|
|
46538
|
+
? `Indexing completed with ${batchErrors.length} errors. ` +
|
|
46539
|
+
`${cumulativeBlocksIndexed}/${cumulativeBlocksFoundSoFar} blocks indexed.`
|
|
46540
|
+
: t("embeddings:orchestrator.fileWatcherStarted");
|
|
46541
|
+
this.stateManager.setSystemState("Indexed", message);
|
|
45375
46542
|
}
|
|
45376
46543
|
}
|
|
45377
46544
|
catch (error) {
|
|
@@ -46237,6 +47404,26 @@ class CodeIndexManager {
|
|
|
46237
47404
|
workspacePath: this.workspacePath,
|
|
46238
47405
|
};
|
|
46239
47406
|
}
|
|
47407
|
+
/**
|
|
47408
|
+
* Get components needed for dry-run mode
|
|
47409
|
+
* Provides controlled access to internal components for preview operations
|
|
47410
|
+
* @returns Object containing all necessary components for dry-run
|
|
47411
|
+
*/
|
|
47412
|
+
getDryRunComponents() {
|
|
47413
|
+
if (!this._orchestrator || !this._cacheManager) {
|
|
47414
|
+
throw new Error('Manager not initialized. Call initialize() first.');
|
|
47415
|
+
}
|
|
47416
|
+
// Get vector store from orchestrator
|
|
47417
|
+
const vectorStore = this._orchestrator.getVectorStore();
|
|
47418
|
+
return {
|
|
47419
|
+
scanner: this._orchestrator.scanner,
|
|
47420
|
+
cacheManager: this._cacheManager,
|
|
47421
|
+
vectorStore: vectorStore,
|
|
47422
|
+
workspace: this.dependencies.workspace,
|
|
47423
|
+
fileSystem: this.dependencies.fileSystem,
|
|
47424
|
+
pathUtils: this.dependencies.pathUtils
|
|
47425
|
+
};
|
|
47426
|
+
}
|
|
46240
47427
|
async reconcileIndex(vectorStore, scanner) {
|
|
46241
47428
|
const logger = this.dependencies.logger;
|
|
46242
47429
|
logger?.info("Reconciling index with filesystem...");
|
|
@@ -46291,6 +47478,10 @@ class CodeIndexManager {
|
|
|
46291
47478
|
return;
|
|
46292
47479
|
}
|
|
46293
47480
|
// Create .gitignore instance
|
|
47481
|
+
// First ensure ignore rules are loaded by calling shouldIgnore on a dummy path
|
|
47482
|
+
// Use a dummy file path to trigger loading without causing empty path errors
|
|
47483
|
+
const dummyPath = path__default.join(workspacePath, "dummy.txt");
|
|
47484
|
+
await this.dependencies.workspace.shouldIgnore(dummyPath);
|
|
46294
47485
|
const ignoreRules = this.dependencies.workspace.getIgnoreRules();
|
|
46295
47486
|
ignoreInstance.add(ignoreRules);
|
|
46296
47487
|
// (Re)Create shared service instances
|
|
@@ -46719,6 +47910,7 @@ class NodeWorkspace {
|
|
|
46719
47910
|
this.ignoreRulesLoaded = false;
|
|
46720
47911
|
this.rootPath = options.rootPath;
|
|
46721
47912
|
this.ignoreFiles = options.ignoreFiles || ['.gitignore', '.rooignore', '.codebaseignore'];
|
|
47913
|
+
this.ignoreInstance = ignore$1();
|
|
46722
47914
|
}
|
|
46723
47915
|
getRootPath() {
|
|
46724
47916
|
return this.rootPath;
|
|
@@ -46729,30 +47921,48 @@ class NodeWorkspace {
|
|
|
46729
47921
|
return path.relative(this.rootPath, fullPath);
|
|
46730
47922
|
}
|
|
46731
47923
|
getIgnoreRules() {
|
|
47924
|
+
// Ensure rules are loaded before returning
|
|
47925
|
+
if (!this.ignoreRulesLoaded) {
|
|
47926
|
+
// Note: This is a sync method, but loadIgnoreRules is async
|
|
47927
|
+
// In practice, rules should be loaded by shouldIgnore() before this is called
|
|
47928
|
+
// We'll return the current rules (may be empty if not loaded yet)
|
|
47929
|
+
console.warn('getIgnoreRules() called before loadIgnoreRules() - rules may be empty');
|
|
47930
|
+
}
|
|
46732
47931
|
return this.ignoreRules;
|
|
46733
47932
|
}
|
|
47933
|
+
/**
|
|
47934
|
+
* Get ignore patterns formatted for fast-glob
|
|
47935
|
+
* Converts simple directory names to glob patterns with /** suffix
|
|
47936
|
+
*/
|
|
47937
|
+
async getGlobIgnorePatterns() {
|
|
47938
|
+
await this.loadIgnoreRules();
|
|
47939
|
+
const allIgnores = [...NodeWorkspace.DEFAULT_IGNORES, ...this.ignoreRules];
|
|
47940
|
+
// Convert to fast-glob format
|
|
47941
|
+
return allIgnores.map(pattern => {
|
|
47942
|
+
// If pattern contains no path separator and no wildcard, treat as directory
|
|
47943
|
+
if (!pattern.includes('/') && !pattern.includes('*')) {
|
|
47944
|
+
return `${pattern}/**`;
|
|
47945
|
+
}
|
|
47946
|
+
// If pattern ends with /, add **
|
|
47947
|
+
if (pattern.endsWith('/')) {
|
|
47948
|
+
return `${pattern}**`;
|
|
47949
|
+
}
|
|
47950
|
+
// Otherwise return as-is (already a glob pattern)
|
|
47951
|
+
return pattern;
|
|
47952
|
+
});
|
|
47953
|
+
}
|
|
46734
47954
|
async shouldIgnore(filePath) {
|
|
46735
47955
|
await this.loadIgnoreRules();
|
|
46736
47956
|
const relativePath = this.getRelativePath(filePath);
|
|
46737
|
-
//
|
|
46738
|
-
|
|
46739
|
-
|
|
46740
|
-
|
|
46741
|
-
|
|
46742
|
-
|
|
46743
|
-
|
|
46744
|
-
|
|
46745
|
-
|
|
46746
|
-
'*.log',
|
|
46747
|
-
'.env',
|
|
46748
|
-
'.env.local',
|
|
46749
|
-
'.DS_Store',
|
|
46750
|
-
'Thumbs.db'
|
|
46751
|
-
];
|
|
46752
|
-
const allIgnores = [...defaultIgnores, ...this.ignoreRules];
|
|
46753
|
-
return allIgnores.some(pattern => {
|
|
46754
|
-
return this.matchPattern(relativePath, pattern);
|
|
46755
|
-
});
|
|
47957
|
+
// Handle empty relative path (when filePath equals rootPath)
|
|
47958
|
+
if (relativePath === '') {
|
|
47959
|
+
return false; // Root directory itself is not ignored
|
|
47960
|
+
}
|
|
47961
|
+
// Use ignore instance for proper gitignore semantics
|
|
47962
|
+
this.ignoreInstance = ignore$1().add(NodeWorkspace.DEFAULT_IGNORES).add(this.ignoreRules);
|
|
47963
|
+
// ignore expects paths to use forward slashes
|
|
47964
|
+
const normalizedPath = relativePath.split(path.sep).join('/');
|
|
47965
|
+
return this.ignoreInstance.ignores(normalizedPath);
|
|
46756
47966
|
}
|
|
46757
47967
|
getName() {
|
|
46758
47968
|
return path.basename(this.rootPath) || 'workspace';
|
|
@@ -46802,9 +48012,11 @@ class NodeWorkspace {
|
|
|
46802
48012
|
}
|
|
46803
48013
|
this.ignoreRulesLoaded = true;
|
|
46804
48014
|
}
|
|
48015
|
+
/**
|
|
48016
|
+
* Simple glob pattern matching for findFiles method
|
|
48017
|
+
* Note: This is NOT used for gitignore semantics (shouldIgnore uses ignore library)
|
|
48018
|
+
*/
|
|
46805
48019
|
matchPattern(filePath, pattern) {
|
|
46806
|
-
// Simple glob pattern matching
|
|
46807
|
-
// Convert glob pattern to regex
|
|
46808
48020
|
const regexPattern = pattern
|
|
46809
48021
|
.replace(/\./g, '\\.')
|
|
46810
48022
|
.replace(/\*/g, '.*')
|
|
@@ -46831,6 +48043,21 @@ class NodeWorkspace {
|
|
|
46831
48043
|
}
|
|
46832
48044
|
}
|
|
46833
48045
|
}
|
|
48046
|
+
// Default ignore patterns (common across all projects)
|
|
48047
|
+
NodeWorkspace.DEFAULT_IGNORES = [
|
|
48048
|
+
'node_modules',
|
|
48049
|
+
'.git',
|
|
48050
|
+
'.svn',
|
|
48051
|
+
'.hg',
|
|
48052
|
+
'dist',
|
|
48053
|
+
'build',
|
|
48054
|
+
'coverage',
|
|
48055
|
+
'*.log',
|
|
48056
|
+
'.env',
|
|
48057
|
+
'.env.local',
|
|
48058
|
+
'.DS_Store',
|
|
48059
|
+
'Thumbs.db'
|
|
48060
|
+
];
|
|
46834
48061
|
class NodePathUtils {
|
|
46835
48062
|
join(...paths) {
|
|
46836
48063
|
return path.join(...paths);
|
|
@@ -49900,5 +51127,5 @@ function codebase() {
|
|
|
49900
51127
|
return 'codebase';
|
|
49901
51128
|
}
|
|
49902
51129
|
|
|
49903
|
-
export { BATCH_PROCESSING_CONCURRENCY, BATCH_SEGMENT_THRESHOLD, CacheManager, CodeIndexConfigManager, CodeIndexManager, CodeIndexOllamaEmbedder, CodeIndexOrchestrator, CodeIndexSearchService, CodeIndexServiceFactory, CodeIndexStateManager, CodeParser, DEFAULT_CONFIG, DEFAULT_MAX_SEARCH_RESULTS, DEFAULT_SEARCH_MIN_SCORE, DirectoryScanner, EMBEDDER_BATCH_SIZES, FileWatcher, GEMINI_MAX_ITEM_TOKENS, INITIAL_RETRY_DELAY_MS, MAX_BATCH_RETRIES, MAX_BATCH_TOKENS, MAX_BLOCK_CHARS, MAX_CHARS_TOLERANCE_FACTOR, MAX_FILE_SIZE_BYTES, MAX_ITEM_TOKENS, MAX_LIST_FILES_LIMIT_CODE_INDEX, MAX_PENDING_BATCHES, MIN_BLOCK_CHARS, MIN_CHUNK_REMAINDER_CHARS, NodeConfigProvider, NodeEventBus, NodeFileSystem, NodeFileWatcher, NodeLogger, NodePathUtils, NodeStorage, NodeWorkspace, OpenAICompatibleEmbedder, OpenAiEmbedder, PARSING_CONCURRENCY, QDRANT_CODE_BLOCK_NAMESPACE, QdrantVectorStore, codeParser, codebase, createNodeDependencies, createSimpleNodeDependencies, executeRipgrep, executeRipgrepForFiles, extensions, fallbackExtensions, generateNormalizedAbsolutePath, generateRelativeFilePath, getBatchSizeForEmbedder, getMinComponentLines, listFiles, parseSourceCodeDefinitionsForFile, parseSourceCodeForDefinitionsTopLevel, scannerExtensions, searchWorkspaceFiles, setMinComponentLines, shouldUseFallbackChunking };
|
|
51130
|
+
export { BATCH_PROCESSING_CONCURRENCY, BATCH_SEGMENT_THRESHOLD, CacheManager, CodeIndexConfigManager, CodeIndexManager, CodeIndexOllamaEmbedder, CodeIndexOrchestrator, CodeIndexSearchService, CodeIndexServiceFactory, CodeIndexStateManager, CodeParser, DEFAULT_CONFIG, DEFAULT_MAX_SEARCH_RESULTS, DEFAULT_SEARCH_MIN_SCORE, DirectoryScanner, EMBEDDER_BATCH_SIZES, ENABLE_TRUNCATION_FALLBACK, FileWatcher, GEMINI_MAX_ITEM_TOKENS, INDIVIDUAL_PROCESSING_TIMEOUT_MS, INITIAL_RETRY_DELAY_MS, MAX_BATCH_RETRIES, MAX_BATCH_TOKENS, MAX_BLOCK_CHARS, MAX_CHARS_TOLERANCE_FACTOR, MAX_FILE_SIZE_BYTES, MAX_ITEM_TOKENS, MAX_LIST_FILES_LIMIT_CODE_INDEX, MAX_PENDING_BATCHES, MAX_TRUNCATION_ATTEMPTS, MIN_BLOCK_CHARS, MIN_CHUNK_REMAINDER_CHARS, MIN_TRUNCATION_THRESHOLD, NodeConfigProvider, NodeEventBus, NodeFileSystem, NodeFileWatcher, NodeLogger, NodePathUtils, NodeStorage, NodeWorkspace, OpenAICompatibleEmbedder, OpenAiEmbedder, PARSING_CONCURRENCY, QDRANT_CODE_BLOCK_NAMESPACE, QdrantVectorStore, TRUNCATION_INITIAL_THRESHOLD, TRUNCATION_REDUCTION_FACTOR, codeParser, codebase, createNodeDependencies, createSimpleNodeDependencies, executeRipgrep, executeRipgrepForFiles, extensions, fallbackExtensions, generateNormalizedAbsolutePath, generateRelativeFilePath, getBatchSizeForEmbedder, getMinComponentLines, listFiles, parseSourceCodeDefinitionsForFile, parseSourceCodeForDefinitionsTopLevel, scannerExtensions, searchWorkspaceFiles, setMinComponentLines, shouldUseFallbackChunking };
|
|
49904
51131
|
//# sourceMappingURL=index.js.map
|