@juspay/yama 1.3.0 โ 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +6 -0
- package/README.md +29 -1
- package/dist/core/Guardian.js +6 -3
- package/dist/core/providers/BitbucketProvider.d.ts +1 -1
- package/dist/core/providers/BitbucketProvider.js +31 -1
- package/dist/features/CodeReviewer.d.ts +17 -9
- package/dist/features/CodeReviewer.js +274 -215
- package/dist/features/MultiInstanceProcessor.d.ts +74 -0
- package/dist/features/MultiInstanceProcessor.js +359 -0
- package/dist/types/index.d.ts +120 -1
- package/dist/utils/ContentSimilarityService.d.ts +74 -0
- package/dist/utils/ContentSimilarityService.js +215 -0
- package/dist/utils/ExactDuplicateRemover.d.ts +77 -0
- package/dist/utils/ExactDuplicateRemover.js +361 -0
- package/dist/utils/ParallelProcessing.d.ts +112 -0
- package/dist/utils/ParallelProcessing.js +228 -0
- package/package.json +17 -17
- package/yama.config.example.yaml +42 -4
|
@@ -4,8 +4,11 @@
|
|
|
4
4
|
*/
|
|
5
5
|
// NeuroLink will be dynamically imported
|
|
6
6
|
import { ProviderError, } from "../types/index.js";
|
|
7
|
+
import { createMultiInstanceProcessor, } from "./MultiInstanceProcessor.js";
|
|
7
8
|
import { logger } from "../utils/Logger.js";
|
|
8
9
|
import { getProviderTokenLimit } from "../utils/ProviderLimits.js";
|
|
10
|
+
import { Semaphore, TokenBudgetManager, calculateOptimalConcurrency, } from "../utils/ParallelProcessing.js";
|
|
11
|
+
import { createExactDuplicateRemover } from "../utils/ExactDuplicateRemover.js";
|
|
9
12
|
export class CodeReviewer {
|
|
10
13
|
neurolink;
|
|
11
14
|
bitbucketProvider;
|
|
@@ -17,37 +20,46 @@ export class CodeReviewer {
|
|
|
17
20
|
this.reviewConfig = reviewConfig;
|
|
18
21
|
}
|
|
19
22
|
/**
|
|
20
|
-
* Review code using pre-gathered unified context (OPTIMIZED with Batch Processing)
|
|
23
|
+
* Review code using pre-gathered unified context (OPTIMIZED with Multi-Instance and Batch Processing)
|
|
21
24
|
*/
|
|
22
|
-
async reviewCodeWithContext(context, options) {
|
|
25
|
+
async reviewCodeWithContext(context, options, multiInstanceConfig) {
|
|
23
26
|
const startTime = Date.now();
|
|
24
27
|
try {
|
|
25
28
|
logger.phase("๐งช Conducting AI-powered code analysis...");
|
|
26
29
|
logger.info(`Analyzing ${context.diffStrategy.fileCount} files using ${context.diffStrategy.strategy} strategy`);
|
|
27
|
-
// Determine if we should use batch processing
|
|
28
|
-
const batchConfig = this.getBatchProcessingConfig();
|
|
29
|
-
const shouldUseBatchProcessing = this.shouldUseBatchProcessing(context, batchConfig);
|
|
30
30
|
let violations;
|
|
31
31
|
let processingStrategy;
|
|
32
|
-
if
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
32
|
+
// Check if multi-instance processing is enabled and configured
|
|
33
|
+
if (multiInstanceConfig?.enabled &&
|
|
34
|
+
multiInstanceConfig.instances?.length > 1) {
|
|
35
|
+
logger.info("๐ Using multi-instance processing for enhanced analysis");
|
|
36
|
+
const multiInstanceResult = await this.reviewWithMultipleInstances(context, options, multiInstanceConfig);
|
|
37
|
+
violations = multiInstanceResult.finalViolations;
|
|
38
|
+
processingStrategy = "multi-instance";
|
|
37
39
|
}
|
|
38
40
|
else {
|
|
39
|
-
|
|
40
|
-
const
|
|
41
|
-
|
|
42
|
-
|
|
41
|
+
// Determine if we should use batch processing
|
|
42
|
+
const batchConfig = this.getBatchProcessingConfig();
|
|
43
|
+
const shouldUseBatchProcessing = this.shouldUseBatchProcessing(context, batchConfig);
|
|
44
|
+
if (shouldUseBatchProcessing) {
|
|
45
|
+
logger.info("๐ Using batch processing for large PR analysis");
|
|
46
|
+
const batchResult = await this.reviewWithBatchProcessing(context, options, batchConfig);
|
|
47
|
+
violations = batchResult.violations;
|
|
48
|
+
processingStrategy = "batch-processing";
|
|
49
|
+
}
|
|
50
|
+
else {
|
|
51
|
+
logger.info("โก Using single-request analysis for small PR");
|
|
52
|
+
const analysisPrompt = this.buildAnalysisPrompt(context, options);
|
|
53
|
+
violations = await this.analyzeWithAI(analysisPrompt, context);
|
|
54
|
+
processingStrategy = "single-request";
|
|
55
|
+
}
|
|
43
56
|
}
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
await this.postComments(context, validatedViolations, options);
|
|
57
|
+
if (!options.dryRun && violations.length > 0) {
|
|
58
|
+
violations = await this.postComments(context, violations, options);
|
|
47
59
|
}
|
|
48
60
|
const duration = Math.round((Date.now() - startTime) / 1000);
|
|
49
|
-
const result = this.generateReviewResult(
|
|
50
|
-
logger.success(`Code review completed in ${duration}s: ${
|
|
61
|
+
const result = this.generateReviewResult(violations, duration, context, processingStrategy);
|
|
62
|
+
logger.success(`Code review completed in ${duration}s: ${violations.length} violations found (${processingStrategy})`);
|
|
51
63
|
return result;
|
|
52
64
|
}
|
|
53
65
|
catch (error) {
|
|
@@ -56,153 +68,20 @@ export class CodeReviewer {
|
|
|
56
68
|
}
|
|
57
69
|
}
|
|
58
70
|
/**
|
|
59
|
-
*
|
|
71
|
+
* Review code using multiple instances for enhanced analysis
|
|
60
72
|
*/
|
|
61
|
-
|
|
62
|
-
const validatedViolations = [];
|
|
63
|
-
const diffContent = this.extractDiffContent(context);
|
|
64
|
-
for (const violation of violations) {
|
|
65
|
-
if (violation.type === "inline" &&
|
|
66
|
-
violation.code_snippet &&
|
|
67
|
-
violation.file) {
|
|
68
|
-
// Check if the code snippet exists in the diff
|
|
69
|
-
if (diffContent.includes(violation.code_snippet)) {
|
|
70
|
-
validatedViolations.push(violation);
|
|
71
|
-
}
|
|
72
|
-
else {
|
|
73
|
-
// Try to find a close match and fix the snippet
|
|
74
|
-
const fixedViolation = this.tryFixCodeSnippet(violation, context);
|
|
75
|
-
if (fixedViolation) {
|
|
76
|
-
validatedViolations.push(fixedViolation);
|
|
77
|
-
}
|
|
78
|
-
else {
|
|
79
|
-
logger.debug(`โ ๏ธ Skipping violation - snippet not found in diff: ${violation.file}`);
|
|
80
|
-
logger.debug(` Original snippet: "${violation.code_snippet}"`);
|
|
81
|
-
}
|
|
82
|
-
}
|
|
83
|
-
}
|
|
84
|
-
else {
|
|
85
|
-
// Non-inline violations are always valid
|
|
86
|
-
validatedViolations.push(violation);
|
|
87
|
-
}
|
|
88
|
-
}
|
|
89
|
-
logger.debug(`Validated ${validatedViolations.length} out of ${violations.length} violations`);
|
|
90
|
-
return validatedViolations;
|
|
91
|
-
}
|
|
92
|
-
/**
|
|
93
|
-
* Try to fix code snippet by finding it in the actual diff
|
|
94
|
-
*/
|
|
95
|
-
tryFixCodeSnippet(violation, context) {
|
|
96
|
-
if (!violation.file || !violation.code_snippet) {
|
|
97
|
-
return null;
|
|
98
|
-
}
|
|
73
|
+
async reviewWithMultipleInstances(context, options, multiInstanceConfig) {
|
|
99
74
|
try {
|
|
100
|
-
//
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
let fileStartIndex = -1;
|
|
106
|
-
// Generate all possible path variations
|
|
107
|
-
const pathVariations = this.generatePathVariations(violation.file);
|
|
108
|
-
// Try to find the file in the diff with various path formats
|
|
109
|
-
for (let i = 0; i < diffLines.length; i++) {
|
|
110
|
-
const line = diffLines[i];
|
|
111
|
-
if (line.startsWith("diff --git") || line.startsWith("Index:")) {
|
|
112
|
-
for (const pathVariation of pathVariations) {
|
|
113
|
-
if (line.includes(pathVariation)) {
|
|
114
|
-
fileStartIndex = i;
|
|
115
|
-
break;
|
|
116
|
-
}
|
|
117
|
-
}
|
|
118
|
-
if (fileStartIndex >= 0) {
|
|
119
|
-
break;
|
|
120
|
-
}
|
|
121
|
-
}
|
|
122
|
-
}
|
|
123
|
-
if (fileStartIndex >= 0) {
|
|
124
|
-
const nextFileIndex = diffLines.findIndex((line, idx) => idx > fileStartIndex &&
|
|
125
|
-
(line.startsWith("diff --git") || line.startsWith("Index:")));
|
|
126
|
-
fileDiff = diffLines
|
|
127
|
-
.slice(fileStartIndex, nextFileIndex > 0 ? nextFileIndex : diffLines.length)
|
|
128
|
-
.join("\n");
|
|
129
|
-
}
|
|
130
|
-
}
|
|
131
|
-
else if (context.diffStrategy.strategy === "file-by-file" &&
|
|
132
|
-
context.fileDiffs) {
|
|
133
|
-
// Try all path variations
|
|
134
|
-
const pathVariations = this.generatePathVariations(violation.file);
|
|
135
|
-
for (const path of pathVariations) {
|
|
136
|
-
fileDiff = context.fileDiffs.get(path);
|
|
137
|
-
if (fileDiff) {
|
|
138
|
-
logger.debug(`Found diff for ${violation.file} using variation: ${path}`);
|
|
139
|
-
break;
|
|
140
|
-
}
|
|
141
|
-
}
|
|
142
|
-
// If still not found, try partial matching
|
|
143
|
-
if (!fileDiff) {
|
|
144
|
-
for (const [key, value] of context.fileDiffs.entries()) {
|
|
145
|
-
if (key.endsWith(violation.file) || violation.file.endsWith(key)) {
|
|
146
|
-
fileDiff = value;
|
|
147
|
-
logger.debug(`Found diff for ${violation.file} using partial match: ${key}`);
|
|
148
|
-
break;
|
|
149
|
-
}
|
|
150
|
-
}
|
|
151
|
-
}
|
|
152
|
-
}
|
|
153
|
-
if (!fileDiff) {
|
|
154
|
-
logger.debug(`โ Could not find diff for file: ${violation.file}`);
|
|
155
|
-
return null;
|
|
156
|
-
}
|
|
157
|
-
// First, try to find the exact line with line number extraction
|
|
158
|
-
const lineInfo = this.extractLineNumberFromDiff(fileDiff, violation.code_snippet);
|
|
159
|
-
if (lineInfo) {
|
|
160
|
-
const fixedViolation = { ...violation };
|
|
161
|
-
fixedViolation.line_type = lineInfo.lineType;
|
|
162
|
-
// Extract search context from the diff
|
|
163
|
-
const diffLines = fileDiff.split("\n");
|
|
164
|
-
const snippetIndex = diffLines.findIndex((line) => line === violation.code_snippet);
|
|
165
|
-
if (snippetIndex > 0 && snippetIndex < diffLines.length - 1) {
|
|
166
|
-
fixedViolation.search_context = {
|
|
167
|
-
before: [diffLines[snippetIndex - 1]],
|
|
168
|
-
after: [diffLines[snippetIndex + 1]],
|
|
169
|
-
};
|
|
170
|
-
}
|
|
171
|
-
logger.debug(`โ
Found exact match with line number for ${violation.file}`);
|
|
172
|
-
return fixedViolation;
|
|
173
|
-
}
|
|
174
|
-
// Fallback: Clean the snippet and try fuzzy matching
|
|
175
|
-
const cleanSnippet = violation.code_snippet
|
|
176
|
-
.trim()
|
|
177
|
-
.replace(/^[+\-\s]/, ""); // Remove diff prefix for searching
|
|
178
|
-
// Look for the clean snippet in the diff
|
|
179
|
-
const diffLines = fileDiff.split("\n");
|
|
180
|
-
for (let i = 0; i < diffLines.length; i++) {
|
|
181
|
-
const line = diffLines[i];
|
|
182
|
-
const cleanLine = line.replace(/^[+\-\s]/, "").trim();
|
|
183
|
-
if (cleanLine.includes(cleanSnippet) ||
|
|
184
|
-
cleanSnippet.includes(cleanLine)) {
|
|
185
|
-
// Found a match! Update the violation with the correct snippet
|
|
186
|
-
const fixedViolation = { ...violation };
|
|
187
|
-
fixedViolation.code_snippet = line; // Use the full line with diff prefix
|
|
188
|
-
// Update search context if needed
|
|
189
|
-
if (i > 0 && i < diffLines.length - 1) {
|
|
190
|
-
fixedViolation.search_context = {
|
|
191
|
-
before: [diffLines[i - 1]],
|
|
192
|
-
after: [diffLines[i + 1]],
|
|
193
|
-
};
|
|
194
|
-
}
|
|
195
|
-
logger.debug(`โ
Fixed code snippet for ${violation.file} using fuzzy match`);
|
|
196
|
-
return fixedViolation;
|
|
197
|
-
}
|
|
198
|
-
}
|
|
199
|
-
logger.debug(`โ Could not find snippet in diff for ${violation.file}`);
|
|
200
|
-
logger.debug(` Looking for: "${violation.code_snippet}"`);
|
|
75
|
+
// Create multi-instance processor
|
|
76
|
+
const multiInstanceProcessor = createMultiInstanceProcessor(this.bitbucketProvider, this.reviewConfig);
|
|
77
|
+
// Execute multi-instance processing
|
|
78
|
+
const result = await multiInstanceProcessor.processWithMultipleInstances(context, options, multiInstanceConfig);
|
|
79
|
+
return result;
|
|
201
80
|
}
|
|
202
81
|
catch (error) {
|
|
203
|
-
logger.
|
|
82
|
+
logger.error(`Multi-instance processing failed: ${error.message}`);
|
|
83
|
+
throw error;
|
|
204
84
|
}
|
|
205
|
-
return null;
|
|
206
85
|
}
|
|
207
86
|
/**
|
|
208
87
|
* Get system prompt for security-focused code review
|
|
@@ -512,11 +391,27 @@ Return ONLY valid JSON:
|
|
|
512
391
|
*/
|
|
513
392
|
async postComments(context, violations, _options) {
|
|
514
393
|
logger.phase("๐ Posting review comments...");
|
|
394
|
+
// NEW: Apply semantic comment deduplication before posting
|
|
395
|
+
const duplicateRemover = createExactDuplicateRemover();
|
|
396
|
+
const deduplicationResult = await duplicateRemover.removeAgainstExistingComments(violations, context.pr.comments || [], this.aiConfig, 85);
|
|
397
|
+
logger.info(`๐ Semantic deduplication: ${violations.length} โ ${deduplicationResult.uniqueViolations.length} violations ` +
|
|
398
|
+
`(${deduplicationResult.duplicatesRemoved} duplicates removed)`);
|
|
399
|
+
// Log deduplication details if any duplicates were found
|
|
400
|
+
if (deduplicationResult.duplicatesRemoved > 0) {
|
|
401
|
+
logger.info(duplicateRemover.getCommentDeduplicationStats(deduplicationResult));
|
|
402
|
+
// Log details of semantic matches
|
|
403
|
+
deduplicationResult.semanticMatches.forEach((match, index) => {
|
|
404
|
+
logger.debug(`๐ฏ Semantic match ${index + 1}: "${match.violation}" matches ${match.comment} ` +
|
|
405
|
+
`(${match.similarityScore}% similarity)${match.reasoning ? ` - ${match.reasoning}` : ""}`);
|
|
406
|
+
});
|
|
407
|
+
}
|
|
408
|
+
// Use deduplicated violations for posting
|
|
409
|
+
const uniqueViolations = deduplicationResult.uniqueViolations;
|
|
515
410
|
let commentsPosted = 0;
|
|
516
411
|
let commentsFailed = 0;
|
|
517
412
|
const failedComments = [];
|
|
518
413
|
// Post inline comments
|
|
519
|
-
const inlineViolations =
|
|
414
|
+
const inlineViolations = uniqueViolations.filter((v) => v.type === "inline" && v.file && v.code_snippet);
|
|
520
415
|
for (const violation of inlineViolations) {
|
|
521
416
|
try {
|
|
522
417
|
// Clean file path - remove protocol prefixes ONLY (keep a/ and b/ prefixes)
|
|
@@ -571,9 +466,9 @@ Return ONLY valid JSON:
|
|
|
571
466
|
}
|
|
572
467
|
}
|
|
573
468
|
// Post summary comment (include failed comments info if any)
|
|
574
|
-
if (
|
|
469
|
+
if (uniqueViolations.length > 0) {
|
|
575
470
|
try {
|
|
576
|
-
const summaryComment = this.generateSummaryComment(
|
|
471
|
+
const summaryComment = this.generateSummaryComment(uniqueViolations, context, failedComments);
|
|
577
472
|
await this.bitbucketProvider.addComment(context.identifier, summaryComment);
|
|
578
473
|
commentsPosted++;
|
|
579
474
|
logger.debug("โ
Posted summary comment");
|
|
@@ -586,6 +481,7 @@ Return ONLY valid JSON:
|
|
|
586
481
|
if (commentsFailed > 0) {
|
|
587
482
|
logger.warn(`โ ๏ธ Failed to post ${commentsFailed} inline comments`);
|
|
588
483
|
}
|
|
484
|
+
return uniqueViolations;
|
|
589
485
|
}
|
|
590
486
|
/**
|
|
591
487
|
* Format inline comment for specific violation
|
|
@@ -933,14 +829,33 @@ ${recommendation}
|
|
|
933
829
|
enabled: true,
|
|
934
830
|
maxFilesPerBatch: 3,
|
|
935
831
|
prioritizeSecurityFiles: true,
|
|
936
|
-
parallelBatches: false, //
|
|
832
|
+
parallelBatches: false, // Keep for backward compatibility
|
|
937
833
|
batchDelayMs: 1000,
|
|
938
834
|
singleRequestThreshold: 5, // Use single request for โค5 files
|
|
835
|
+
// NEW: Parallel processing defaults
|
|
836
|
+
parallel: {
|
|
837
|
+
enabled: true, // Enable parallel processing by default
|
|
838
|
+
maxConcurrentBatches: 3,
|
|
839
|
+
rateLimitStrategy: "fixed",
|
|
840
|
+
tokenBudgetDistribution: "equal",
|
|
841
|
+
failureHandling: "continue",
|
|
842
|
+
},
|
|
939
843
|
};
|
|
940
|
-
|
|
844
|
+
const mergedConfig = {
|
|
941
845
|
...defaultConfig,
|
|
942
846
|
...this.reviewConfig.batchProcessing,
|
|
943
847
|
};
|
|
848
|
+
// Merge parallel config separately to handle nested object properly
|
|
849
|
+
if (mergedConfig.parallel && this.reviewConfig.batchProcessing?.parallel) {
|
|
850
|
+
mergedConfig.parallel = {
|
|
851
|
+
...defaultConfig.parallel,
|
|
852
|
+
...this.reviewConfig.batchProcessing.parallel,
|
|
853
|
+
};
|
|
854
|
+
}
|
|
855
|
+
else if (!mergedConfig.parallel) {
|
|
856
|
+
mergedConfig.parallel = defaultConfig.parallel;
|
|
857
|
+
}
|
|
858
|
+
return mergedConfig;
|
|
944
859
|
}
|
|
945
860
|
/**
|
|
946
861
|
* Determine if batch processing should be used
|
|
@@ -964,56 +879,152 @@ ${recommendation}
|
|
|
964
879
|
return true;
|
|
965
880
|
}
|
|
966
881
|
/**
|
|
967
|
-
* Main batch processing method
|
|
882
|
+
* Main batch processing method with parallel processing support
|
|
968
883
|
*/
|
|
969
884
|
async reviewWithBatchProcessing(context, options, batchConfig) {
|
|
970
885
|
const startTime = Date.now();
|
|
971
886
|
try {
|
|
972
887
|
// Step 1: Prioritize and organize files
|
|
973
888
|
const prioritizedFiles = await this.prioritizeFiles(context, batchConfig);
|
|
974
|
-
logger.info(`๐ Prioritized ${prioritizedFiles.length} files: ${prioritizedFiles.filter(f => f.priority === "high").length} high, ${prioritizedFiles.filter(f => f.priority === "medium").length} medium, ${prioritizedFiles.filter(f => f.priority === "low").length} low priority`);
|
|
889
|
+
logger.info(`๐ Prioritized ${prioritizedFiles.length} files: ${prioritizedFiles.filter((f) => f.priority === "high").length} high, ${prioritizedFiles.filter((f) => f.priority === "medium").length} medium, ${prioritizedFiles.filter((f) => f.priority === "low").length} low priority`);
|
|
975
890
|
// Step 2: Create batches
|
|
976
891
|
const batches = this.createBatches(prioritizedFiles, batchConfig);
|
|
977
892
|
logger.info(`๐ฆ Created ${batches.length} batches (max ${batchConfig.maxFilesPerBatch} files per batch)`);
|
|
978
|
-
// Step 3:
|
|
979
|
-
const
|
|
980
|
-
|
|
981
|
-
|
|
982
|
-
|
|
983
|
-
|
|
984
|
-
|
|
985
|
-
|
|
986
|
-
|
|
987
|
-
allViolations.push(...batchResult.violations);
|
|
988
|
-
logger.info(`โ
Batch ${i + 1} completed: ${batchResult.violations.length} violations found in ${Math.round(batchResult.processingTime / 1000)}s`);
|
|
989
|
-
// Add delay between batches if configured
|
|
990
|
-
if (i < batches.length - 1 && batchConfig.batchDelayMs > 0) {
|
|
991
|
-
logger.debug(`โณ Waiting ${batchConfig.batchDelayMs}ms before next batch`);
|
|
992
|
-
await new Promise(resolve => setTimeout(resolve, batchConfig.batchDelayMs));
|
|
993
|
-
}
|
|
994
|
-
}
|
|
995
|
-
catch (error) {
|
|
996
|
-
logger.error(`โ Batch ${i + 1} failed: ${error.message}`);
|
|
997
|
-
// Record failed batch
|
|
998
|
-
batchResults.push({
|
|
999
|
-
batchIndex: i,
|
|
1000
|
-
files: batch.files,
|
|
1001
|
-
violations: [],
|
|
1002
|
-
processingTime: Date.now() - startTime,
|
|
1003
|
-
error: error.message,
|
|
1004
|
-
});
|
|
1005
|
-
}
|
|
893
|
+
// Step 3: Determine processing strategy
|
|
894
|
+
const useParallel = batchConfig.parallel?.enabled && batches.length > 1;
|
|
895
|
+
if (useParallel) {
|
|
896
|
+
logger.info(`๐ Using parallel processing: ${batches.length} batches, max ${batchConfig.parallel?.maxConcurrentBatches} concurrent`);
|
|
897
|
+
return await this.processInParallel(batches, context, options, batchConfig);
|
|
898
|
+
}
|
|
899
|
+
else {
|
|
900
|
+
logger.info(`๐ Using serial processing: ${batches.length} batches`);
|
|
901
|
+
return await this.processSerially(batches, context, options, batchConfig);
|
|
1006
902
|
}
|
|
1007
|
-
const totalTime = Date.now() - startTime;
|
|
1008
|
-
const avgBatchSize = batches.reduce((sum, b) => sum + b.files.length, 0) / batches.length;
|
|
1009
|
-
logger.success(`๐ฏ Batch processing completed: ${allViolations.length} total violations from ${batches.length} batches in ${Math.round(totalTime / 1000)}s (avg ${avgBatchSize.toFixed(1)} files/batch)`);
|
|
1010
|
-
return { violations: allViolations, batchResults };
|
|
1011
903
|
}
|
|
1012
904
|
catch (error) {
|
|
1013
905
|
logger.error(`Batch processing failed: ${error.message}`);
|
|
1014
906
|
throw error;
|
|
1015
907
|
}
|
|
1016
908
|
}
|
|
909
|
+
/**
|
|
910
|
+
* Process batches in parallel with concurrency control
|
|
911
|
+
*/
|
|
912
|
+
async processInParallel(batches, context, options, batchConfig) {
|
|
913
|
+
const startTime = Date.now();
|
|
914
|
+
const parallelConfig = batchConfig.parallel;
|
|
915
|
+
// Calculate optimal concurrency
|
|
916
|
+
const avgTokensPerBatch = batches.reduce((sum, b) => sum + b.estimatedTokens, 0) / batches.length;
|
|
917
|
+
const optimalConcurrency = calculateOptimalConcurrency(batches.length, parallelConfig.maxConcurrentBatches, avgTokensPerBatch, this.getSafeTokenLimit());
|
|
918
|
+
// Initialize concurrency control
|
|
919
|
+
const semaphore = new Semaphore(optimalConcurrency);
|
|
920
|
+
const tokenBudget = new TokenBudgetManager(this.getSafeTokenLimit() * 0.8); // 80% for safety
|
|
921
|
+
logger.info(`๐ฏ Parallel processing: ${optimalConcurrency} concurrent batches, ${tokenBudget.getTotalBudget()} token budget`);
|
|
922
|
+
const batchResults = new Array(batches.length);
|
|
923
|
+
const allViolations = [];
|
|
924
|
+
const processingPromises = [];
|
|
925
|
+
// Process batches with controlled concurrency
|
|
926
|
+
for (let i = 0; i < batches.length; i++) {
|
|
927
|
+
const batch = batches[i];
|
|
928
|
+
const processingPromise = this.processBatchWithConcurrency(batch, context, options, semaphore, tokenBudget, i, batches.length)
|
|
929
|
+
.then((result) => {
|
|
930
|
+
batchResults[i] = result; // Maintain order
|
|
931
|
+
if (result.violations) {
|
|
932
|
+
allViolations.push(...result.violations);
|
|
933
|
+
}
|
|
934
|
+
})
|
|
935
|
+
.catch((error) => {
|
|
936
|
+
logger.error(`โ Batch ${i + 1} failed: ${error.message}`);
|
|
937
|
+
batchResults[i] = {
|
|
938
|
+
batchIndex: i,
|
|
939
|
+
files: batch.files,
|
|
940
|
+
violations: [],
|
|
941
|
+
processingTime: 0,
|
|
942
|
+
error: error.message,
|
|
943
|
+
};
|
|
944
|
+
// Handle failure strategy
|
|
945
|
+
if (parallelConfig.failureHandling === "stop-all") {
|
|
946
|
+
throw error;
|
|
947
|
+
}
|
|
948
|
+
});
|
|
949
|
+
processingPromises.push(processingPromise);
|
|
950
|
+
// Add small delay between batch starts to avoid overwhelming
|
|
951
|
+
if (i < batches.length - 1) {
|
|
952
|
+
await new Promise((resolve) => setTimeout(resolve, 200));
|
|
953
|
+
}
|
|
954
|
+
}
|
|
955
|
+
// Wait for all batches to complete
|
|
956
|
+
await Promise.allSettled(processingPromises);
|
|
957
|
+
// Filter out undefined results and sort by batch index
|
|
958
|
+
const validResults = batchResults
|
|
959
|
+
.filter((r) => r !== undefined)
|
|
960
|
+
.sort((a, b) => a.batchIndex - b.batchIndex);
|
|
961
|
+
const totalTime = Date.now() - startTime;
|
|
962
|
+
const avgBatchSize = batches.reduce((sum, b) => sum + b.files.length, 0) / batches.length;
|
|
963
|
+
const budgetStatus = tokenBudget.getBudgetStatus();
|
|
964
|
+
logger.success(`๐ฏ Parallel processing completed: ${allViolations.length} total violations from ${batches.length} batches in ${Math.round(totalTime / 1000)}s (avg ${avgBatchSize.toFixed(1)} files/batch, ${budgetStatus.utilizationPercent}% token usage)`);
|
|
965
|
+
return { violations: allViolations, batchResults: validResults };
|
|
966
|
+
}
|
|
967
|
+
/**
|
|
968
|
+
* Process batches serially (original implementation)
|
|
969
|
+
*/
|
|
970
|
+
async processSerially(batches, context, options, batchConfig) {
|
|
971
|
+
const startTime = Date.now();
|
|
972
|
+
const batchResults = [];
|
|
973
|
+
const allViolations = [];
|
|
974
|
+
for (let i = 0; i < batches.length; i++) {
|
|
975
|
+
const batch = batches[i];
|
|
976
|
+
logger.info(`๐ Processing batch ${i + 1}/${batches.length} (${batch.files.length} files, ${batch.priority} priority, serial)`);
|
|
977
|
+
try {
|
|
978
|
+
const batchResult = await this.processBatch(batch, context, options);
|
|
979
|
+
batchResults.push(batchResult);
|
|
980
|
+
allViolations.push(...batchResult.violations);
|
|
981
|
+
logger.info(`โ
Batch ${i + 1} completed: ${batchResult.violations.length} violations found in ${Math.round(batchResult.processingTime / 1000)}s`);
|
|
982
|
+
// Add delay between batches if configured
|
|
983
|
+
if (i < batches.length - 1 && batchConfig.batchDelayMs > 0) {
|
|
984
|
+
logger.debug(`โณ Waiting ${batchConfig.batchDelayMs}ms before next batch`);
|
|
985
|
+
await new Promise((resolve) => setTimeout(resolve, batchConfig.batchDelayMs));
|
|
986
|
+
}
|
|
987
|
+
}
|
|
988
|
+
catch (error) {
|
|
989
|
+
logger.error(`โ Batch ${i + 1} failed: ${error.message}`);
|
|
990
|
+
// Record failed batch
|
|
991
|
+
batchResults.push({
|
|
992
|
+
batchIndex: i,
|
|
993
|
+
files: batch.files,
|
|
994
|
+
violations: [],
|
|
995
|
+
processingTime: Date.now() - startTime,
|
|
996
|
+
error: error.message,
|
|
997
|
+
});
|
|
998
|
+
}
|
|
999
|
+
}
|
|
1000
|
+
const totalTime = Date.now() - startTime;
|
|
1001
|
+
const avgBatchSize = batches.reduce((sum, b) => sum + b.files.length, 0) / batches.length;
|
|
1002
|
+
logger.success(`๐ฏ Serial processing completed: ${allViolations.length} total violations from ${batches.length} batches in ${Math.round(totalTime / 1000)}s (avg ${avgBatchSize.toFixed(1)} files/batch)`);
|
|
1003
|
+
return { violations: allViolations, batchResults };
|
|
1004
|
+
}
|
|
1005
|
+
/**
|
|
1006
|
+
* Process a single batch with concurrency control
|
|
1007
|
+
*/
|
|
1008
|
+
async processBatchWithConcurrency(batch, context, options, semaphore, tokenBudget, batchIndex, totalBatches) {
|
|
1009
|
+
// Acquire semaphore permit
|
|
1010
|
+
await semaphore.acquire();
|
|
1011
|
+
try {
|
|
1012
|
+
// Check token budget
|
|
1013
|
+
if (!tokenBudget.allocateForBatch(batchIndex, batch.estimatedTokens)) {
|
|
1014
|
+
throw new Error(`Insufficient token budget for batch ${batchIndex + 1}`);
|
|
1015
|
+
}
|
|
1016
|
+
logger.info(`๐ Processing batch ${batchIndex + 1}/${totalBatches} (${batch.files.length} files, parallel)`);
|
|
1017
|
+
// Process the batch (existing logic)
|
|
1018
|
+
const result = await this.processBatch(batch, context, options);
|
|
1019
|
+
logger.info(`โ
Batch ${batchIndex + 1} completed: ${result.violations.length} violations in ${Math.round(result.processingTime / 1000)}s`);
|
|
1020
|
+
return result;
|
|
1021
|
+
}
|
|
1022
|
+
finally {
|
|
1023
|
+
// Always release resources
|
|
1024
|
+
tokenBudget.releaseBatch(batchIndex);
|
|
1025
|
+
semaphore.release();
|
|
1026
|
+
}
|
|
1027
|
+
}
|
|
1017
1028
|
/**
|
|
1018
1029
|
* Prioritize files based on security importance and file type
|
|
1019
1030
|
*/
|
|
@@ -1051,26 +1062,73 @@ ${recommendation}
|
|
|
1051
1062
|
const path = filePath.toLowerCase();
|
|
1052
1063
|
// High priority: Security-sensitive files
|
|
1053
1064
|
const highPriorityPatterns = [
|
|
1054
|
-
/auth/i,
|
|
1055
|
-
/
|
|
1056
|
-
/
|
|
1057
|
-
/
|
|
1058
|
-
/
|
|
1059
|
-
/
|
|
1065
|
+
/auth/i,
|
|
1066
|
+
/login/i,
|
|
1067
|
+
/password/i,
|
|
1068
|
+
/token/i,
|
|
1069
|
+
/jwt/i,
|
|
1070
|
+
/oauth/i,
|
|
1071
|
+
/crypto/i,
|
|
1072
|
+
/encrypt/i,
|
|
1073
|
+
/decrypt/i,
|
|
1074
|
+
/hash/i,
|
|
1075
|
+
/security/i,
|
|
1076
|
+
/payment/i,
|
|
1077
|
+
/billing/i,
|
|
1078
|
+
/transaction/i,
|
|
1079
|
+
/money/i,
|
|
1080
|
+
/wallet/i,
|
|
1081
|
+
/admin/i,
|
|
1082
|
+
/privilege/i,
|
|
1083
|
+
/permission/i,
|
|
1084
|
+
/role/i,
|
|
1085
|
+
/access/i,
|
|
1086
|
+
/config/i,
|
|
1087
|
+
/env/i,
|
|
1088
|
+
/secret/i,
|
|
1089
|
+
/key/i,
|
|
1090
|
+
/credential/i,
|
|
1091
|
+
/api/i,
|
|
1092
|
+
/endpoint/i,
|
|
1093
|
+
/route/i,
|
|
1094
|
+
/controller/i,
|
|
1095
|
+
/middleware/i,
|
|
1060
1096
|
];
|
|
1061
|
-
if (highPriorityPatterns.some(pattern => pattern.test(path))) {
|
|
1097
|
+
if (highPriorityPatterns.some((pattern) => pattern.test(path))) {
|
|
1062
1098
|
return "high";
|
|
1063
1099
|
}
|
|
1064
1100
|
// Low priority: Documentation, tests, config files
|
|
1065
1101
|
const lowPriorityPatterns = [
|
|
1066
|
-
/\.md$/i,
|
|
1067
|
-
|
|
1068
|
-
|
|
1069
|
-
|
|
1070
|
-
|
|
1071
|
-
|
|
1102
|
+
/\.md$/i,
|
|
1103
|
+
/\.txt$/i,
|
|
1104
|
+
/readme/i,
|
|
1105
|
+
/changelog/i,
|
|
1106
|
+
/license/i,
|
|
1107
|
+
/test/i,
|
|
1108
|
+
/spec/i,
|
|
1109
|
+
/\.test\./i,
|
|
1110
|
+
/\.spec\./i,
|
|
1111
|
+
/__tests__/i,
|
|
1112
|
+
/\.json$/i,
|
|
1113
|
+
/\.yaml$/i,
|
|
1114
|
+
/\.yml$/i,
|
|
1115
|
+
/\.toml$/i,
|
|
1116
|
+
/\.ini$/i,
|
|
1117
|
+
/\.lock$/i,
|
|
1118
|
+
/package-lock/i,
|
|
1119
|
+
/yarn\.lock/i,
|
|
1120
|
+
/pnpm-lock/i,
|
|
1121
|
+
/\.gitignore/i,
|
|
1122
|
+
/\.eslint/i,
|
|
1123
|
+
/\.prettier/i,
|
|
1124
|
+
/tsconfig/i,
|
|
1125
|
+
/\.svg$/i,
|
|
1126
|
+
/\.png$/i,
|
|
1127
|
+
/\.jpg$/i,
|
|
1128
|
+
/\.jpeg$/i,
|
|
1129
|
+
/\.gif$/i,
|
|
1072
1130
|
];
|
|
1073
|
-
if (lowPriorityPatterns.some(pattern => pattern.test(path))) {
|
|
1131
|
+
if (lowPriorityPatterns.some((pattern) => pattern.test(path))) {
|
|
1074
1132
|
return "low";
|
|
1075
1133
|
}
|
|
1076
1134
|
// Medium priority: Everything else
|
|
@@ -1128,7 +1186,8 @@ ${recommendation}
|
|
|
1128
1186
|
for (const file of prioritizedFiles) {
|
|
1129
1187
|
const wouldExceedTokens = currentBatch.estimatedTokens + file.estimatedTokens > maxTokensPerBatch;
|
|
1130
1188
|
const wouldExceedFileCount = currentBatch.files.length >= batchConfig.maxFilesPerBatch;
|
|
1131
|
-
if ((wouldExceedTokens || wouldExceedFileCount) &&
|
|
1189
|
+
if ((wouldExceedTokens || wouldExceedFileCount) &&
|
|
1190
|
+
currentBatch.files.length > 0) {
|
|
1132
1191
|
// Finalize current batch
|
|
1133
1192
|
batches.push(currentBatch);
|
|
1134
1193
|
// Start new batch
|