@juspay/yama 1.3.0 โ†’ 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,8 +4,11 @@
4
4
  */
5
5
  // NeuroLink will be dynamically imported
6
6
  import { ProviderError, } from "../types/index.js";
7
+ import { createMultiInstanceProcessor, } from "./MultiInstanceProcessor.js";
7
8
  import { logger } from "../utils/Logger.js";
8
9
  import { getProviderTokenLimit } from "../utils/ProviderLimits.js";
10
+ import { Semaphore, TokenBudgetManager, calculateOptimalConcurrency, } from "../utils/ParallelProcessing.js";
11
+ import { createExactDuplicateRemover } from "../utils/ExactDuplicateRemover.js";
9
12
  export class CodeReviewer {
10
13
  neurolink;
11
14
  bitbucketProvider;
@@ -17,37 +20,46 @@ export class CodeReviewer {
17
20
  this.reviewConfig = reviewConfig;
18
21
  }
19
22
  /**
20
- * Review code using pre-gathered unified context (OPTIMIZED with Batch Processing)
23
+ * Review code using pre-gathered unified context (OPTIMIZED with Multi-Instance and Batch Processing)
21
24
  */
22
- async reviewCodeWithContext(context, options) {
25
+ async reviewCodeWithContext(context, options, multiInstanceConfig) {
23
26
  const startTime = Date.now();
24
27
  try {
25
28
  logger.phase("๐Ÿงช Conducting AI-powered code analysis...");
26
29
  logger.info(`Analyzing ${context.diffStrategy.fileCount} files using ${context.diffStrategy.strategy} strategy`);
27
- // Determine if we should use batch processing
28
- const batchConfig = this.getBatchProcessingConfig();
29
- const shouldUseBatchProcessing = this.shouldUseBatchProcessing(context, batchConfig);
30
30
  let violations;
31
31
  let processingStrategy;
32
- if (shouldUseBatchProcessing) {
33
- logger.info("๐Ÿ”„ Using batch processing for large PR analysis");
34
- const batchResult = await this.reviewWithBatchProcessing(context, options, batchConfig);
35
- violations = batchResult.violations;
36
- processingStrategy = "batch-processing";
32
+ // Check if multi-instance processing is enabled and configured
33
+ if (multiInstanceConfig?.enabled &&
34
+ multiInstanceConfig.instances?.length > 1) {
35
+ logger.info("๐Ÿš€ Using multi-instance processing for enhanced analysis");
36
+ const multiInstanceResult = await this.reviewWithMultipleInstances(context, options, multiInstanceConfig);
37
+ violations = multiInstanceResult.finalViolations;
38
+ processingStrategy = "multi-instance";
37
39
  }
38
40
  else {
39
- logger.info("โšก Using single-request analysis for small PR");
40
- const analysisPrompt = this.buildAnalysisPrompt(context, options);
41
- violations = await this.analyzeWithAI(analysisPrompt, context);
42
- processingStrategy = "single-request";
41
+ // Determine if we should use batch processing
42
+ const batchConfig = this.getBatchProcessingConfig();
43
+ const shouldUseBatchProcessing = this.shouldUseBatchProcessing(context, batchConfig);
44
+ if (shouldUseBatchProcessing) {
45
+ logger.info("๐Ÿ”„ Using batch processing for large PR analysis");
46
+ const batchResult = await this.reviewWithBatchProcessing(context, options, batchConfig);
47
+ violations = batchResult.violations;
48
+ processingStrategy = "batch-processing";
49
+ }
50
+ else {
51
+ logger.info("โšก Using single-request analysis for small PR");
52
+ const analysisPrompt = this.buildAnalysisPrompt(context, options);
53
+ violations = await this.analyzeWithAI(analysisPrompt, context);
54
+ processingStrategy = "single-request";
55
+ }
43
56
  }
44
- const validatedViolations = this.validateViolations(violations, context);
45
- if (!options.dryRun && validatedViolations.length > 0) {
46
- await this.postComments(context, validatedViolations, options);
57
+ if (!options.dryRun && violations.length > 0) {
58
+ violations = await this.postComments(context, violations, options);
47
59
  }
48
60
  const duration = Math.round((Date.now() - startTime) / 1000);
49
- const result = this.generateReviewResult(validatedViolations, duration, context, processingStrategy);
50
- logger.success(`Code review completed in ${duration}s: ${validatedViolations.length} violations found (${processingStrategy})`);
61
+ const result = this.generateReviewResult(violations, duration, context, processingStrategy);
62
+ logger.success(`Code review completed in ${duration}s: ${violations.length} violations found (${processingStrategy})`);
51
63
  return result;
52
64
  }
53
65
  catch (error) {
@@ -56,153 +68,20 @@ export class CodeReviewer {
56
68
  }
57
69
  }
58
70
  /**
59
- * Validate violations to ensure code snippets exist in diff
71
+ * Review code using multiple instances for enhanced analysis
60
72
  */
61
- validateViolations(violations, context) {
62
- const validatedViolations = [];
63
- const diffContent = this.extractDiffContent(context);
64
- for (const violation of violations) {
65
- if (violation.type === "inline" &&
66
- violation.code_snippet &&
67
- violation.file) {
68
- // Check if the code snippet exists in the diff
69
- if (diffContent.includes(violation.code_snippet)) {
70
- validatedViolations.push(violation);
71
- }
72
- else {
73
- // Try to find a close match and fix the snippet
74
- const fixedViolation = this.tryFixCodeSnippet(violation, context);
75
- if (fixedViolation) {
76
- validatedViolations.push(fixedViolation);
77
- }
78
- else {
79
- logger.debug(`โš ๏ธ Skipping violation - snippet not found in diff: ${violation.file}`);
80
- logger.debug(` Original snippet: "${violation.code_snippet}"`);
81
- }
82
- }
83
- }
84
- else {
85
- // Non-inline violations are always valid
86
- validatedViolations.push(violation);
87
- }
88
- }
89
- logger.debug(`Validated ${validatedViolations.length} out of ${violations.length} violations`);
90
- return validatedViolations;
91
- }
92
- /**
93
- * Try to fix code snippet by finding it in the actual diff
94
- */
95
- tryFixCodeSnippet(violation, context) {
96
- if (!violation.file || !violation.code_snippet) {
97
- return null;
98
- }
73
+ async reviewWithMultipleInstances(context, options, multiInstanceConfig) {
99
74
  try {
100
- // Get the diff for this specific file
101
- let fileDiff;
102
- if (context.diffStrategy.strategy === "whole" && context.prDiff) {
103
- // Extract file diff from whole diff - handle different path formats
104
- const diffLines = context.prDiff.diff.split("\n");
105
- let fileStartIndex = -1;
106
- // Generate all possible path variations
107
- const pathVariations = this.generatePathVariations(violation.file);
108
- // Try to find the file in the diff with various path formats
109
- for (let i = 0; i < diffLines.length; i++) {
110
- const line = diffLines[i];
111
- if (line.startsWith("diff --git") || line.startsWith("Index:")) {
112
- for (const pathVariation of pathVariations) {
113
- if (line.includes(pathVariation)) {
114
- fileStartIndex = i;
115
- break;
116
- }
117
- }
118
- if (fileStartIndex >= 0) {
119
- break;
120
- }
121
- }
122
- }
123
- if (fileStartIndex >= 0) {
124
- const nextFileIndex = diffLines.findIndex((line, idx) => idx > fileStartIndex &&
125
- (line.startsWith("diff --git") || line.startsWith("Index:")));
126
- fileDiff = diffLines
127
- .slice(fileStartIndex, nextFileIndex > 0 ? nextFileIndex : diffLines.length)
128
- .join("\n");
129
- }
130
- }
131
- else if (context.diffStrategy.strategy === "file-by-file" &&
132
- context.fileDiffs) {
133
- // Try all path variations
134
- const pathVariations = this.generatePathVariations(violation.file);
135
- for (const path of pathVariations) {
136
- fileDiff = context.fileDiffs.get(path);
137
- if (fileDiff) {
138
- logger.debug(`Found diff for ${violation.file} using variation: ${path}`);
139
- break;
140
- }
141
- }
142
- // If still not found, try partial matching
143
- if (!fileDiff) {
144
- for (const [key, value] of context.fileDiffs.entries()) {
145
- if (key.endsWith(violation.file) || violation.file.endsWith(key)) {
146
- fileDiff = value;
147
- logger.debug(`Found diff for ${violation.file} using partial match: ${key}`);
148
- break;
149
- }
150
- }
151
- }
152
- }
153
- if (!fileDiff) {
154
- logger.debug(`โŒ Could not find diff for file: ${violation.file}`);
155
- return null;
156
- }
157
- // First, try to find the exact line with line number extraction
158
- const lineInfo = this.extractLineNumberFromDiff(fileDiff, violation.code_snippet);
159
- if (lineInfo) {
160
- const fixedViolation = { ...violation };
161
- fixedViolation.line_type = lineInfo.lineType;
162
- // Extract search context from the diff
163
- const diffLines = fileDiff.split("\n");
164
- const snippetIndex = diffLines.findIndex((line) => line === violation.code_snippet);
165
- if (snippetIndex > 0 && snippetIndex < diffLines.length - 1) {
166
- fixedViolation.search_context = {
167
- before: [diffLines[snippetIndex - 1]],
168
- after: [diffLines[snippetIndex + 1]],
169
- };
170
- }
171
- logger.debug(`โœ… Found exact match with line number for ${violation.file}`);
172
- return fixedViolation;
173
- }
174
- // Fallback: Clean the snippet and try fuzzy matching
175
- const cleanSnippet = violation.code_snippet
176
- .trim()
177
- .replace(/^[+\-\s]/, ""); // Remove diff prefix for searching
178
- // Look for the clean snippet in the diff
179
- const diffLines = fileDiff.split("\n");
180
- for (let i = 0; i < diffLines.length; i++) {
181
- const line = diffLines[i];
182
- const cleanLine = line.replace(/^[+\-\s]/, "").trim();
183
- if (cleanLine.includes(cleanSnippet) ||
184
- cleanSnippet.includes(cleanLine)) {
185
- // Found a match! Update the violation with the correct snippet
186
- const fixedViolation = { ...violation };
187
- fixedViolation.code_snippet = line; // Use the full line with diff prefix
188
- // Update search context if needed
189
- if (i > 0 && i < diffLines.length - 1) {
190
- fixedViolation.search_context = {
191
- before: [diffLines[i - 1]],
192
- after: [diffLines[i + 1]],
193
- };
194
- }
195
- logger.debug(`โœ… Fixed code snippet for ${violation.file} using fuzzy match`);
196
- return fixedViolation;
197
- }
198
- }
199
- logger.debug(`โŒ Could not find snippet in diff for ${violation.file}`);
200
- logger.debug(` Looking for: "${violation.code_snippet}"`);
75
+ // Create multi-instance processor
76
+ const multiInstanceProcessor = createMultiInstanceProcessor(this.bitbucketProvider, this.reviewConfig);
77
+ // Execute multi-instance processing
78
+ const result = await multiInstanceProcessor.processWithMultipleInstances(context, options, multiInstanceConfig);
79
+ return result;
201
80
  }
202
81
  catch (error) {
203
- logger.debug(`Error fixing code snippet: ${error.message}`);
82
+ logger.error(`Multi-instance processing failed: ${error.message}`);
83
+ throw error;
204
84
  }
205
- return null;
206
85
  }
207
86
  /**
208
87
  * Get system prompt for security-focused code review
@@ -512,11 +391,27 @@ Return ONLY valid JSON:
512
391
  */
513
392
  async postComments(context, violations, _options) {
514
393
  logger.phase("๐Ÿ“ Posting review comments...");
394
+ // NEW: Apply semantic comment deduplication before posting
395
+ const duplicateRemover = createExactDuplicateRemover();
396
+ const deduplicationResult = await duplicateRemover.removeAgainstExistingComments(violations, context.pr.comments || [], this.aiConfig, 85);
397
+ logger.info(`๐Ÿ” Semantic deduplication: ${violations.length} โ†’ ${deduplicationResult.uniqueViolations.length} violations ` +
398
+ `(${deduplicationResult.duplicatesRemoved} duplicates removed)`);
399
+ // Log deduplication details if any duplicates were found
400
+ if (deduplicationResult.duplicatesRemoved > 0) {
401
+ logger.info(duplicateRemover.getCommentDeduplicationStats(deduplicationResult));
402
+ // Log details of semantic matches
403
+ deduplicationResult.semanticMatches.forEach((match, index) => {
404
+ logger.debug(`๐ŸŽฏ Semantic match ${index + 1}: "${match.violation}" matches ${match.comment} ` +
405
+ `(${match.similarityScore}% similarity)${match.reasoning ? ` - ${match.reasoning}` : ""}`);
406
+ });
407
+ }
408
+ // Use deduplicated violations for posting
409
+ const uniqueViolations = deduplicationResult.uniqueViolations;
515
410
  let commentsPosted = 0;
516
411
  let commentsFailed = 0;
517
412
  const failedComments = [];
518
413
  // Post inline comments
519
- const inlineViolations = violations.filter((v) => v.type === "inline" && v.file && v.code_snippet);
414
+ const inlineViolations = uniqueViolations.filter((v) => v.type === "inline" && v.file && v.code_snippet);
520
415
  for (const violation of inlineViolations) {
521
416
  try {
522
417
  // Clean file path - remove protocol prefixes ONLY (keep a/ and b/ prefixes)
@@ -571,9 +466,9 @@ Return ONLY valid JSON:
571
466
  }
572
467
  }
573
468
  // Post summary comment (include failed comments info if any)
574
- if (violations.length > 0) {
469
+ if (uniqueViolations.length > 0) {
575
470
  try {
576
- const summaryComment = this.generateSummaryComment(violations, context, failedComments);
471
+ const summaryComment = this.generateSummaryComment(uniqueViolations, context, failedComments);
577
472
  await this.bitbucketProvider.addComment(context.identifier, summaryComment);
578
473
  commentsPosted++;
579
474
  logger.debug("โœ… Posted summary comment");
@@ -586,6 +481,7 @@ Return ONLY valid JSON:
586
481
  if (commentsFailed > 0) {
587
482
  logger.warn(`โš ๏ธ Failed to post ${commentsFailed} inline comments`);
588
483
  }
484
+ return uniqueViolations;
589
485
  }
590
486
  /**
591
487
  * Format inline comment for specific violation
@@ -933,14 +829,33 @@ ${recommendation}
933
829
  enabled: true,
934
830
  maxFilesPerBatch: 3,
935
831
  prioritizeSecurityFiles: true,
936
- parallelBatches: false, // Sequential for better reliability
832
+ parallelBatches: false, // Keep for backward compatibility
937
833
  batchDelayMs: 1000,
938
834
  singleRequestThreshold: 5, // Use single request for โ‰ค5 files
835
+ // NEW: Parallel processing defaults
836
+ parallel: {
837
+ enabled: true, // Enable parallel processing by default
838
+ maxConcurrentBatches: 3,
839
+ rateLimitStrategy: "fixed",
840
+ tokenBudgetDistribution: "equal",
841
+ failureHandling: "continue",
842
+ },
939
843
  };
940
- return {
844
+ const mergedConfig = {
941
845
  ...defaultConfig,
942
846
  ...this.reviewConfig.batchProcessing,
943
847
  };
848
+ // Merge parallel config separately to handle nested object properly
849
+ if (mergedConfig.parallel && this.reviewConfig.batchProcessing?.parallel) {
850
+ mergedConfig.parallel = {
851
+ ...defaultConfig.parallel,
852
+ ...this.reviewConfig.batchProcessing.parallel,
853
+ };
854
+ }
855
+ else if (!mergedConfig.parallel) {
856
+ mergedConfig.parallel = defaultConfig.parallel;
857
+ }
858
+ return mergedConfig;
944
859
  }
945
860
  /**
946
861
  * Determine if batch processing should be used
@@ -964,56 +879,152 @@ ${recommendation}
964
879
  return true;
965
880
  }
966
881
  /**
967
- * Main batch processing method
882
+ * Main batch processing method with parallel processing support
968
883
  */
969
884
  async reviewWithBatchProcessing(context, options, batchConfig) {
970
885
  const startTime = Date.now();
971
886
  try {
972
887
  // Step 1: Prioritize and organize files
973
888
  const prioritizedFiles = await this.prioritizeFiles(context, batchConfig);
974
- logger.info(`๐Ÿ“‹ Prioritized ${prioritizedFiles.length} files: ${prioritizedFiles.filter(f => f.priority === "high").length} high, ${prioritizedFiles.filter(f => f.priority === "medium").length} medium, ${prioritizedFiles.filter(f => f.priority === "low").length} low priority`);
889
+ logger.info(`๐Ÿ“‹ Prioritized ${prioritizedFiles.length} files: ${prioritizedFiles.filter((f) => f.priority === "high").length} high, ${prioritizedFiles.filter((f) => f.priority === "medium").length} medium, ${prioritizedFiles.filter((f) => f.priority === "low").length} low priority`);
975
890
  // Step 2: Create batches
976
891
  const batches = this.createBatches(prioritizedFiles, batchConfig);
977
892
  logger.info(`๐Ÿ“ฆ Created ${batches.length} batches (max ${batchConfig.maxFilesPerBatch} files per batch)`);
978
- // Step 3: Process batches
979
- const batchResults = [];
980
- const allViolations = [];
981
- for (let i = 0; i < batches.length; i++) {
982
- const batch = batches[i];
983
- logger.info(`๐Ÿ”„ Processing batch ${i + 1}/${batches.length} (${batch.files.length} files, ${batch.priority} priority)`);
984
- try {
985
- const batchResult = await this.processBatch(batch, context, options);
986
- batchResults.push(batchResult);
987
- allViolations.push(...batchResult.violations);
988
- logger.info(`โœ… Batch ${i + 1} completed: ${batchResult.violations.length} violations found in ${Math.round(batchResult.processingTime / 1000)}s`);
989
- // Add delay between batches if configured
990
- if (i < batches.length - 1 && batchConfig.batchDelayMs > 0) {
991
- logger.debug(`โณ Waiting ${batchConfig.batchDelayMs}ms before next batch`);
992
- await new Promise(resolve => setTimeout(resolve, batchConfig.batchDelayMs));
993
- }
994
- }
995
- catch (error) {
996
- logger.error(`โŒ Batch ${i + 1} failed: ${error.message}`);
997
- // Record failed batch
998
- batchResults.push({
999
- batchIndex: i,
1000
- files: batch.files,
1001
- violations: [],
1002
- processingTime: Date.now() - startTime,
1003
- error: error.message,
1004
- });
1005
- }
893
+ // Step 3: Determine processing strategy
894
+ const useParallel = batchConfig.parallel?.enabled && batches.length > 1;
895
+ if (useParallel) {
896
+ logger.info(`๐Ÿš€ Using parallel processing: ${batches.length} batches, max ${batchConfig.parallel?.maxConcurrentBatches} concurrent`);
897
+ return await this.processInParallel(batches, context, options, batchConfig);
898
+ }
899
+ else {
900
+ logger.info(`๐Ÿ”„ Using serial processing: ${batches.length} batches`);
901
+ return await this.processSerially(batches, context, options, batchConfig);
1006
902
  }
1007
- const totalTime = Date.now() - startTime;
1008
- const avgBatchSize = batches.reduce((sum, b) => sum + b.files.length, 0) / batches.length;
1009
- logger.success(`๐ŸŽฏ Batch processing completed: ${allViolations.length} total violations from ${batches.length} batches in ${Math.round(totalTime / 1000)}s (avg ${avgBatchSize.toFixed(1)} files/batch)`);
1010
- return { violations: allViolations, batchResults };
1011
903
  }
1012
904
  catch (error) {
1013
905
  logger.error(`Batch processing failed: ${error.message}`);
1014
906
  throw error;
1015
907
  }
1016
908
  }
909
+ /**
910
+ * Process batches in parallel with concurrency control
911
+ */
912
+ async processInParallel(batches, context, options, batchConfig) {
913
+ const startTime = Date.now();
914
+ const parallelConfig = batchConfig.parallel;
915
+ // Calculate optimal concurrency
916
+ const avgTokensPerBatch = batches.reduce((sum, b) => sum + b.estimatedTokens, 0) / batches.length;
917
+ const optimalConcurrency = calculateOptimalConcurrency(batches.length, parallelConfig.maxConcurrentBatches, avgTokensPerBatch, this.getSafeTokenLimit());
918
+ // Initialize concurrency control
919
+ const semaphore = new Semaphore(optimalConcurrency);
920
+ const tokenBudget = new TokenBudgetManager(this.getSafeTokenLimit() * 0.8); // 80% for safety
921
+ logger.info(`๐ŸŽฏ Parallel processing: ${optimalConcurrency} concurrent batches, ${tokenBudget.getTotalBudget()} token budget`);
922
+ const batchResults = new Array(batches.length);
923
+ const allViolations = [];
924
+ const processingPromises = [];
925
+ // Process batches with controlled concurrency
926
+ for (let i = 0; i < batches.length; i++) {
927
+ const batch = batches[i];
928
+ const processingPromise = this.processBatchWithConcurrency(batch, context, options, semaphore, tokenBudget, i, batches.length)
929
+ .then((result) => {
930
+ batchResults[i] = result; // Maintain order
931
+ if (result.violations) {
932
+ allViolations.push(...result.violations);
933
+ }
934
+ })
935
+ .catch((error) => {
936
+ logger.error(`โŒ Batch ${i + 1} failed: ${error.message}`);
937
+ batchResults[i] = {
938
+ batchIndex: i,
939
+ files: batch.files,
940
+ violations: [],
941
+ processingTime: 0,
942
+ error: error.message,
943
+ };
944
+ // Handle failure strategy
945
+ if (parallelConfig.failureHandling === "stop-all") {
946
+ throw error;
947
+ }
948
+ });
949
+ processingPromises.push(processingPromise);
950
+ // Add small delay between batch starts to avoid overwhelming
951
+ if (i < batches.length - 1) {
952
+ await new Promise((resolve) => setTimeout(resolve, 200));
953
+ }
954
+ }
955
+ // Wait for all batches to complete
956
+ await Promise.allSettled(processingPromises);
957
+ // Filter out undefined results and sort by batch index
958
+ const validResults = batchResults
959
+ .filter((r) => r !== undefined)
960
+ .sort((a, b) => a.batchIndex - b.batchIndex);
961
+ const totalTime = Date.now() - startTime;
962
+ const avgBatchSize = batches.reduce((sum, b) => sum + b.files.length, 0) / batches.length;
963
+ const budgetStatus = tokenBudget.getBudgetStatus();
964
+ logger.success(`๐ŸŽฏ Parallel processing completed: ${allViolations.length} total violations from ${batches.length} batches in ${Math.round(totalTime / 1000)}s (avg ${avgBatchSize.toFixed(1)} files/batch, ${budgetStatus.utilizationPercent}% token usage)`);
965
+ return { violations: allViolations, batchResults: validResults };
966
+ }
967
+ /**
968
+ * Process batches serially (original implementation)
969
+ */
970
+ async processSerially(batches, context, options, batchConfig) {
971
+ const startTime = Date.now();
972
+ const batchResults = [];
973
+ const allViolations = [];
974
+ for (let i = 0; i < batches.length; i++) {
975
+ const batch = batches[i];
976
+ logger.info(`๐Ÿ”„ Processing batch ${i + 1}/${batches.length} (${batch.files.length} files, ${batch.priority} priority, serial)`);
977
+ try {
978
+ const batchResult = await this.processBatch(batch, context, options);
979
+ batchResults.push(batchResult);
980
+ allViolations.push(...batchResult.violations);
981
+ logger.info(`โœ… Batch ${i + 1} completed: ${batchResult.violations.length} violations found in ${Math.round(batchResult.processingTime / 1000)}s`);
982
+ // Add delay between batches if configured
983
+ if (i < batches.length - 1 && batchConfig.batchDelayMs > 0) {
984
+ logger.debug(`โณ Waiting ${batchConfig.batchDelayMs}ms before next batch`);
985
+ await new Promise((resolve) => setTimeout(resolve, batchConfig.batchDelayMs));
986
+ }
987
+ }
988
+ catch (error) {
989
+ logger.error(`โŒ Batch ${i + 1} failed: ${error.message}`);
990
+ // Record failed batch
991
+ batchResults.push({
992
+ batchIndex: i,
993
+ files: batch.files,
994
+ violations: [],
995
+ processingTime: Date.now() - startTime,
996
+ error: error.message,
997
+ });
998
+ }
999
+ }
1000
+ const totalTime = Date.now() - startTime;
1001
+ const avgBatchSize = batches.reduce((sum, b) => sum + b.files.length, 0) / batches.length;
1002
+ logger.success(`๐ŸŽฏ Serial processing completed: ${allViolations.length} total violations from ${batches.length} batches in ${Math.round(totalTime / 1000)}s (avg ${avgBatchSize.toFixed(1)} files/batch)`);
1003
+ return { violations: allViolations, batchResults };
1004
+ }
1005
+ /**
1006
+ * Process a single batch with concurrency control
1007
+ */
1008
+ async processBatchWithConcurrency(batch, context, options, semaphore, tokenBudget, batchIndex, totalBatches) {
1009
+ // Acquire semaphore permit
1010
+ await semaphore.acquire();
1011
+ try {
1012
+ // Check token budget
1013
+ if (!tokenBudget.allocateForBatch(batchIndex, batch.estimatedTokens)) {
1014
+ throw new Error(`Insufficient token budget for batch ${batchIndex + 1}`);
1015
+ }
1016
+ logger.info(`๐Ÿ”„ Processing batch ${batchIndex + 1}/${totalBatches} (${batch.files.length} files, parallel)`);
1017
+ // Process the batch (existing logic)
1018
+ const result = await this.processBatch(batch, context, options);
1019
+ logger.info(`โœ… Batch ${batchIndex + 1} completed: ${result.violations.length} violations in ${Math.round(result.processingTime / 1000)}s`);
1020
+ return result;
1021
+ }
1022
+ finally {
1023
+ // Always release resources
1024
+ tokenBudget.releaseBatch(batchIndex);
1025
+ semaphore.release();
1026
+ }
1027
+ }
1017
1028
  /**
1018
1029
  * Prioritize files based on security importance and file type
1019
1030
  */
@@ -1051,26 +1062,73 @@ ${recommendation}
1051
1062
  const path = filePath.toLowerCase();
1052
1063
  // High priority: Security-sensitive files
1053
1064
  const highPriorityPatterns = [
1054
- /auth/i, /login/i, /password/i, /token/i, /jwt/i, /oauth/i,
1055
- /crypto/i, /encrypt/i, /decrypt/i, /hash/i, /security/i,
1056
- /payment/i, /billing/i, /transaction/i, /money/i, /wallet/i,
1057
- /admin/i, /privilege/i, /permission/i, /role/i, /access/i,
1058
- /config/i, /env/i, /secret/i, /key/i, /credential/i,
1059
- /api/i, /endpoint/i, /route/i, /controller/i, /middleware/i,
1065
+ /auth/i,
1066
+ /login/i,
1067
+ /password/i,
1068
+ /token/i,
1069
+ /jwt/i,
1070
+ /oauth/i,
1071
+ /crypto/i,
1072
+ /encrypt/i,
1073
+ /decrypt/i,
1074
+ /hash/i,
1075
+ /security/i,
1076
+ /payment/i,
1077
+ /billing/i,
1078
+ /transaction/i,
1079
+ /money/i,
1080
+ /wallet/i,
1081
+ /admin/i,
1082
+ /privilege/i,
1083
+ /permission/i,
1084
+ /role/i,
1085
+ /access/i,
1086
+ /config/i,
1087
+ /env/i,
1088
+ /secret/i,
1089
+ /key/i,
1090
+ /credential/i,
1091
+ /api/i,
1092
+ /endpoint/i,
1093
+ /route/i,
1094
+ /controller/i,
1095
+ /middleware/i,
1060
1096
  ];
1061
- if (highPriorityPatterns.some(pattern => pattern.test(path))) {
1097
+ if (highPriorityPatterns.some((pattern) => pattern.test(path))) {
1062
1098
  return "high";
1063
1099
  }
1064
1100
  // Low priority: Documentation, tests, config files
1065
1101
  const lowPriorityPatterns = [
1066
- /\.md$/i, /\.txt$/i, /readme/i, /changelog/i, /license/i,
1067
- /test/i, /spec/i, /\.test\./i, /\.spec\./i, /__tests__/i,
1068
- /\.json$/i, /\.yaml$/i, /\.yml$/i, /\.toml$/i, /\.ini$/i,
1069
- /\.lock$/i, /package-lock/i, /yarn\.lock/i, /pnpm-lock/i,
1070
- /\.gitignore/i, /\.eslint/i, /\.prettier/i, /tsconfig/i,
1071
- /\.svg$/i, /\.png$/i, /\.jpg$/i, /\.jpeg$/i, /\.gif$/i,
1102
+ /\.md$/i,
1103
+ /\.txt$/i,
1104
+ /readme/i,
1105
+ /changelog/i,
1106
+ /license/i,
1107
+ /test/i,
1108
+ /spec/i,
1109
+ /\.test\./i,
1110
+ /\.spec\./i,
1111
+ /__tests__/i,
1112
+ /\.json$/i,
1113
+ /\.yaml$/i,
1114
+ /\.yml$/i,
1115
+ /\.toml$/i,
1116
+ /\.ini$/i,
1117
+ /\.lock$/i,
1118
+ /package-lock/i,
1119
+ /yarn\.lock/i,
1120
+ /pnpm-lock/i,
1121
+ /\.gitignore/i,
1122
+ /\.eslint/i,
1123
+ /\.prettier/i,
1124
+ /tsconfig/i,
1125
+ /\.svg$/i,
1126
+ /\.png$/i,
1127
+ /\.jpg$/i,
1128
+ /\.jpeg$/i,
1129
+ /\.gif$/i,
1072
1130
  ];
1073
- if (lowPriorityPatterns.some(pattern => pattern.test(path))) {
1131
+ if (lowPriorityPatterns.some((pattern) => pattern.test(path))) {
1074
1132
  return "low";
1075
1133
  }
1076
1134
  // Medium priority: Everything else
@@ -1128,7 +1186,8 @@ ${recommendation}
1128
1186
  for (const file of prioritizedFiles) {
1129
1187
  const wouldExceedTokens = currentBatch.estimatedTokens + file.estimatedTokens > maxTokensPerBatch;
1130
1188
  const wouldExceedFileCount = currentBatch.files.length >= batchConfig.maxFilesPerBatch;
1131
- if ((wouldExceedTokens || wouldExceedFileCount) && currentBatch.files.length > 0) {
1189
+ if ((wouldExceedTokens || wouldExceedFileCount) &&
1190
+ currentBatch.files.length > 0) {
1132
1191
  // Finalize current batch
1133
1192
  batches.push(currentBatch);
1134
1193
  // Start new batch