codereview-aia 0.1.2 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/analysis/static/wpPhpcsRunner.d.ts +11 -0
- package/dist/analysis/static/wpPhpcsRunner.js +219 -0
- package/dist/analysis/static/wpPhpcsRunner.js.map +1 -0
- package/dist/clients/implementations/openRouterClient.js +2 -2
- package/dist/clients/implementations/openRouterClient.js.map +1 -1
- package/dist/clients/openRouterClient.js +2 -2
- package/dist/clients/openRouterClient.js.map +1 -1
- package/dist/clients/utils/promptFormatter.d.ts +3 -2
- package/dist/clients/utils/promptFormatter.js +82 -24
- package/dist/clients/utils/promptFormatter.js.map +1 -1
- package/dist/core/ConfigurationService.d.ts +21 -0
- package/dist/core/ConfigurationService.js +39 -0
- package/dist/core/ConfigurationService.js.map +1 -1
- package/dist/core/handlers/FileProcessingHandler.js +5 -0
- package/dist/core/handlers/FileProcessingHandler.js.map +1 -1
- package/dist/core/reviewOrchestrator.js +61 -1
- package/dist/core/reviewOrchestrator.js.map +1 -1
- package/dist/index.d.ts +0 -1
- package/dist/index.js +0 -2
- package/dist/index.js.map +1 -1
- package/dist/runtime/cliEntry.js +57 -4
- package/dist/runtime/cliEntry.js.map +1 -1
- package/dist/runtime/fileCollector.d.ts +10 -1
- package/dist/runtime/fileCollector.js +217 -2
- package/dist/runtime/fileCollector.js.map +1 -1
- package/dist/runtime/reporting/markdownReportBuilder.d.ts +2 -0
- package/dist/runtime/reporting/markdownReportBuilder.js +57 -0
- package/dist/runtime/reporting/markdownReportBuilder.js.map +1 -1
- package/dist/runtime/reviewPipeline.d.ts +22 -3
- package/dist/runtime/reviewPipeline.js +46 -7
- package/dist/runtime/reviewPipeline.js.map +1 -1
- package/dist/runtime/runAiCodeReview.d.ts +19 -1
- package/dist/runtime/runAiCodeReview.js +243 -8
- package/dist/runtime/runAiCodeReview.js.map +1 -1
- package/dist/runtime/ui/RuntimeApp.js +15 -4
- package/dist/runtime/ui/RuntimeApp.js.map +1 -1
- package/dist/runtime/ui/screens/ProgressScreen.d.ts +6 -1
- package/dist/runtime/ui/screens/ProgressScreen.js +28 -2
- package/dist/runtime/ui/screens/ProgressScreen.js.map +1 -1
- package/dist/runtime/ui/screens/ResultsScreen.js +8 -1
- package/dist/runtime/ui/screens/ResultsScreen.js.map +1 -1
- package/dist/types/review.d.ts +60 -0
- package/dist/utils/detection/frameworkDetector.js +55 -0
- package/dist/utils/detection/frameworkDetector.js.map +1 -1
- package/dist/utils/promptTemplateManager.js +1 -0
- package/dist/utils/promptTemplateManager.js.map +1 -1
- package/package.json +13 -10
- package/.cr-aia.yml +0 -23
- package/.crignore +0 -0
- package/src/analysis/FindingsExtractor.ts +0 -431
- package/src/analysis/ai-detection/analyzers/BaseAnalyzer.ts +0 -267
- package/src/analysis/ai-detection/analyzers/DocumentationAnalyzer.ts +0 -622
- package/src/analysis/ai-detection/analyzers/GitHistoryAnalyzer.ts +0 -430
- package/src/analysis/ai-detection/core/AIDetectionEngine.ts +0 -467
- package/src/analysis/ai-detection/types/DetectionTypes.ts +0 -406
- package/src/analysis/ai-detection/utils/SubmissionConverter.ts +0 -390
- package/src/analysis/context/ReviewContext.ts +0 -378
- package/src/analysis/context/index.ts +0 -7
- package/src/analysis/index.ts +0 -8
- package/src/analysis/tokens/TokenAnalysisFormatter.ts +0 -154
- package/src/analysis/tokens/TokenAnalyzer.ts +0 -747
- package/src/analysis/tokens/index.ts +0 -8
- package/src/clients/base/abstractClient.ts +0 -190
- package/src/clients/base/httpClient.ts +0 -160
- package/src/clients/base/index.ts +0 -12
- package/src/clients/base/modelDetection.ts +0 -107
- package/src/clients/base/responseProcessor.ts +0 -586
- package/src/clients/factory/clientFactory.ts +0 -55
- package/src/clients/factory/index.ts +0 -8
- package/src/clients/implementations/index.ts +0 -8
- package/src/clients/implementations/openRouterClient.ts +0 -411
- package/src/clients/openRouterClient.ts +0 -863
- package/src/clients/openRouterClientWrapper.ts +0 -44
- package/src/clients/utils/directoryStructure.ts +0 -52
- package/src/clients/utils/index.ts +0 -11
- package/src/clients/utils/languageDetection.ts +0 -44
- package/src/clients/utils/promptFormatter.ts +0 -105
- package/src/clients/utils/promptLoader.ts +0 -53
- package/src/clients/utils/tokenCounter.ts +0 -297
- package/src/core/ApiClientSelector.ts +0 -37
- package/src/core/ConfigurationService.ts +0 -591
- package/src/core/ConsolidationService.ts +0 -423
- package/src/core/InteractiveDisplayManager.ts +0 -81
- package/src/core/OutputManager.ts +0 -275
- package/src/core/ReviewGenerator.ts +0 -140
- package/src/core/fileDiscovery.ts +0 -237
- package/src/core/handlers/EstimationHandler.ts +0 -104
- package/src/core/handlers/FileProcessingHandler.ts +0 -204
- package/src/core/handlers/OutputHandler.ts +0 -125
- package/src/core/handlers/ReviewExecutor.ts +0 -104
- package/src/core/reviewOrchestrator.ts +0 -333
- package/src/core/utils/ModelInfoUtils.ts +0 -56
- package/src/formatters/outputFormatter.ts +0 -62
- package/src/formatters/utils/IssueFormatters.ts +0 -83
- package/src/formatters/utils/JsonFormatter.ts +0 -77
- package/src/formatters/utils/MarkdownFormatters.ts +0 -609
- package/src/formatters/utils/MetadataFormatter.ts +0 -269
- package/src/formatters/utils/ModelInfoExtractor.ts +0 -115
- package/src/index.ts +0 -28
- package/src/plugins/PluginInterface.ts +0 -50
- package/src/plugins/PluginManager.ts +0 -126
- package/src/prompts/PromptManager.ts +0 -69
- package/src/prompts/cache/PromptCache.ts +0 -50
- package/src/prompts/promptText/common/variables/css-frameworks.json +0 -33
- package/src/prompts/promptText/common/variables/framework-versions.json +0 -45
- package/src/prompts/promptText/frameworks/react/comprehensive.hbs +0 -19
- package/src/prompts/promptText/languages/css/comprehensive.hbs +0 -18
- package/src/prompts/promptText/languages/generic/comprehensive.hbs +0 -20
- package/src/prompts/promptText/languages/html/comprehensive.hbs +0 -18
- package/src/prompts/promptText/languages/javascript/comprehensive.hbs +0 -18
- package/src/prompts/promptText/languages/python/comprehensive.hbs +0 -18
- package/src/prompts/promptText/languages/typescript/comprehensive.hbs +0 -18
- package/src/runtime/auth/service.ts +0 -58
- package/src/runtime/auth/session.ts +0 -103
- package/src/runtime/auth/types.ts +0 -11
- package/src/runtime/cliEntry.ts +0 -196
- package/src/runtime/debug/logManager.ts +0 -37
- package/src/runtime/errors.ts +0 -13
- package/src/runtime/fileCollector.ts +0 -222
- package/src/runtime/manifest.ts +0 -64
- package/src/runtime/openrouterProxy.ts +0 -45
- package/src/runtime/preprod/webCheck.ts +0 -104
- package/src/runtime/proxyConfig.ts +0 -94
- package/src/runtime/proxyEnvironment.ts +0 -71
- package/src/runtime/reportMerge.ts +0 -102
- package/src/runtime/reporting/markdownReportBuilder.ts +0 -138
- package/src/runtime/reporting/reportDataCollector.ts +0 -234
- package/src/runtime/reporting/summaryGenerator.ts +0 -86
- package/src/runtime/reviewPipeline.ts +0 -161
- package/src/runtime/runAiCodeReview.ts +0 -153
- package/src/runtime/runtimeConfig.ts +0 -5
- package/src/runtime/ui/Layout.tsx +0 -57
- package/src/runtime/ui/RuntimeApp.tsx +0 -233
- package/src/runtime/ui/inkModules.ts +0 -73
- package/src/runtime/ui/screens/AuthScreen.tsx +0 -128
- package/src/runtime/ui/screens/ModeSelection.tsx +0 -185
- package/src/runtime/ui/screens/ProgressScreen.tsx +0 -62
- package/src/runtime/ui/screens/ResultsScreen.tsx +0 -83
- package/src/strategies/ArchitecturalReviewStrategy.ts +0 -54
- package/src/strategies/CodingTestReviewStrategy.ts +0 -920
- package/src/strategies/ConsolidatedReviewStrategy.ts +0 -59
- package/src/strategies/ExtractPatternsReviewStrategy.ts +0 -64
- package/src/strategies/MultiPassReviewStrategy.ts +0 -785
- package/src/strategies/ReviewStrategy.ts +0 -64
- package/src/strategies/StrategyFactory.ts +0 -79
- package/src/strategies/index.ts +0 -14
- package/src/tokenizers/baseTokenizer.ts +0 -61
- package/src/tokenizers/gptTokenizer.ts +0 -27
- package/src/tokenizers/index.ts +0 -8
- package/src/types/apiResponses.ts +0 -40
- package/src/types/cli.ts +0 -24
- package/src/types/common.ts +0 -39
- package/src/types/configuration.ts +0 -201
- package/src/types/handlebars.d.ts +0 -5
- package/src/types/patch.d.ts +0 -25
- package/src/types/review.ts +0 -294
- package/src/types/reviewContext.d.ts +0 -65
- package/src/types/reviewSchema.ts +0 -181
- package/src/types/structuredReview.ts +0 -167
- package/src/types/tokenAnalysis.ts +0 -56
- package/src/utils/FileReader.ts +0 -93
- package/src/utils/FileWriter.ts +0 -76
- package/src/utils/PathGenerator.ts +0 -97
- package/src/utils/api/apiUtils.ts +0 -14
- package/src/utils/api/index.ts +0 -1
- package/src/utils/apiErrorHandler.ts +0 -287
- package/src/utils/ciDataCollector.ts +0 -252
- package/src/utils/codingTestConfigLoader.ts +0 -466
- package/src/utils/dependencies/aiDependencyAnalyzer.ts +0 -454
- package/src/utils/detection/frameworkDetector.ts +0 -879
- package/src/utils/detection/index.ts +0 -10
- package/src/utils/detection/projectTypeDetector.ts +0 -518
- package/src/utils/diagramGenerator.ts +0 -206
- package/src/utils/errorLogger.ts +0 -60
- package/src/utils/estimationUtils.ts +0 -407
- package/src/utils/fileFilters.ts +0 -373
- package/src/utils/fileSystem.ts +0 -57
- package/src/utils/index.ts +0 -36
- package/src/utils/logger.ts +0 -290
- package/src/utils/pathValidator.ts +0 -98
- package/src/utils/priorityFilter.ts +0 -59
- package/src/utils/projectDocs.ts +0 -189
- package/src/utils/promptPaths.ts +0 -29
- package/src/utils/promptTemplateManager.ts +0 -157
- package/src/utils/review/consolidateReview.ts +0 -553
- package/src/utils/review/fixDisplay.ts +0 -100
- package/src/utils/review/fixImplementation.ts +0 -61
- package/src/utils/review/index.ts +0 -36
- package/src/utils/review/interactiveProcessing.ts +0 -294
- package/src/utils/review/progressTracker.ts +0 -296
- package/src/utils/review/reviewExtraction.ts +0 -382
- package/src/utils/review/types.ts +0 -46
- package/src/utils/reviewActionHandler.ts +0 -18
- package/src/utils/reviewParser.ts +0 -253
- package/src/utils/sanitizer.ts +0 -238
- package/src/utils/smartFileSelector.ts +0 -255
- package/src/utils/templateLoader.ts +0 -514
- package/src/utils/treeGenerator.ts +0 -153
- package/tsconfig.build.json +0 -14
- package/tsconfig.json +0 -59
|
@@ -1,747 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* @fileoverview Token analysis service for pre-review token counting and estimation.
|
|
3
|
-
*
|
|
4
|
-
* This module provides fast, provider-agnostic token counting and analysis functionality
|
|
5
|
-
* to estimate token usage and costs before performing actual reviews.
|
|
6
|
-
*/
|
|
7
|
-
|
|
8
|
-
import { countTokens } from '../../tokenizers';
|
|
9
|
-
import type { FileInfo } from '../../types/review';
|
|
10
|
-
import logger from '../../utils/logger';
|
|
11
|
-
|
|
12
|
-
/**
|
|
13
|
-
* Result of token analysis for a single file
|
|
14
|
-
*/
|
|
15
|
-
export interface FileTokenAnalysis {
|
|
16
|
-
/** Path to the file */
|
|
17
|
-
path: string;
|
|
18
|
-
/** Relative path to the file */
|
|
19
|
-
relativePath: string | undefined;
|
|
20
|
-
/** Number of tokens in the file */
|
|
21
|
-
tokenCount: number;
|
|
22
|
-
/** Size of file in bytes */
|
|
23
|
-
sizeInBytes: number;
|
|
24
|
-
/** Tokens per byte ratio (used for optimization analysis) */
|
|
25
|
-
tokensPerByte: number;
|
|
26
|
-
}
|
|
27
|
-
|
|
28
|
-
/**
|
|
29
|
-
* Result of token analysis for a set of files
|
|
30
|
-
*/
|
|
31
|
-
export interface TokenAnalysisResult {
|
|
32
|
-
/** Analysis of individual files */
|
|
33
|
-
files: FileTokenAnalysis[];
|
|
34
|
-
/** Total number of tokens across all files */
|
|
35
|
-
totalTokens: number;
|
|
36
|
-
/** Total size of all files in bytes */
|
|
37
|
-
totalSizeInBytes: number;
|
|
38
|
-
/** Average tokens per byte across all files */
|
|
39
|
-
averageTokensPerByte: number;
|
|
40
|
-
/** Total number of files analyzed */
|
|
41
|
-
fileCount: number;
|
|
42
|
-
/** Token overhead for prompts, instructions, etc. */
|
|
43
|
-
promptOverheadTokens: number;
|
|
44
|
-
/** Estimated total token count including overhead */
|
|
45
|
-
estimatedTotalTokens: number;
|
|
46
|
-
/** Maximum context window size for the model */
|
|
47
|
-
contextWindowSize: number;
|
|
48
|
-
/** Whether the content exceeds the context window */
|
|
49
|
-
exceedsContextWindow: boolean;
|
|
50
|
-
/** Number of passes needed for multi-pass review */
|
|
51
|
-
estimatedPassesNeeded: number;
|
|
52
|
-
/** Chunking strategy recommendation */
|
|
53
|
-
chunkingRecommendation: ChunkingRecommendation;
|
|
54
|
-
}
|
|
55
|
-
|
|
56
|
-
/**
|
|
57
|
-
* Recommendation for chunking strategy
|
|
58
|
-
*/
|
|
59
|
-
export interface ChunkingRecommendation {
|
|
60
|
-
/** Whether chunking is recommended */
|
|
61
|
-
chunkingRecommended: boolean;
|
|
62
|
-
/** Approximate file chunks for multi-pass processing */
|
|
63
|
-
recommendedChunks: FileChunk[];
|
|
64
|
-
/** Reason for chunking recommendation */
|
|
65
|
-
reason: string;
|
|
66
|
-
}
|
|
67
|
-
|
|
68
|
-
/**
|
|
69
|
-
* A chunk of files for multi-pass processing
|
|
70
|
-
*/
|
|
71
|
-
export interface FileChunk {
|
|
72
|
-
/** Files in this chunk */
|
|
73
|
-
files: string[];
|
|
74
|
-
/** Estimated token count for this chunk */
|
|
75
|
-
estimatedTokenCount: number;
|
|
76
|
-
/** Priority of this chunk (higher = more important) */
|
|
77
|
-
priority: number;
|
|
78
|
-
}
|
|
79
|
-
|
|
80
|
-
/**
|
|
81
|
-
* Options for token analysis
|
|
82
|
-
*/
|
|
83
|
-
export interface TokenAnalysisOptions {
|
|
84
|
-
/** Type of review being performed */
|
|
85
|
-
reviewType: string;
|
|
86
|
-
/** Name of the model being used */
|
|
87
|
-
modelName: string;
|
|
88
|
-
/** Whether to optimize for speed (less accurate) or precision */
|
|
89
|
-
optimizeForSpeed?: boolean;
|
|
90
|
-
/** Additional prompt overhead to consider */
|
|
91
|
-
additionalPromptOverhead?: number;
|
|
92
|
-
/** Context maintenance factor for multi-pass reviews (0-1) */
|
|
93
|
-
contextMaintenanceFactor?: number;
|
|
94
|
-
/** Safety margin factor for context window (0-1) */
|
|
95
|
-
safetyMarginFactor?: number;
|
|
96
|
-
/** Force single pass mode regardless of token count */
|
|
97
|
-
forceSinglePass?: boolean;
|
|
98
|
-
/** Force maximum tokens per batch (for testing consolidation) */
|
|
99
|
-
batchTokenLimit?: number;
|
|
100
|
-
}
|
|
101
|
-
|
|
102
|
-
/**
|
|
103
|
-
* Service for analyzing token usage in files
|
|
104
|
-
*/
|
|
105
|
-
export class TokenAnalyzer {
|
|
106
|
-
private static DEFAULT_PROMPT_OVERHEAD = 1500;
|
|
107
|
-
private static DEFAULT_CONTEXT_MAINTENANCE_FACTOR = 0.08; // Reduced to 8% for better efficiency
|
|
108
|
-
private static DEFAULT_SAFETY_MARGIN_FACTOR = 0.1; // Use 90% of context window by default
|
|
109
|
-
private static DEFAULT_CONTEXT_WINDOW = 100000; // Default fallback
|
|
110
|
-
/**
|
|
111
|
-
* Context window mapping for supported models.
|
|
112
|
-
* Leave values as null if the exact window is unknown so they can be filled in later.
|
|
113
|
-
*/
|
|
114
|
-
private static MODEL_CONTEXT_WINDOWS: Record<string, number | null> = {
|
|
115
|
-
'openai/gpt-5.1-codex': 400000,
|
|
116
|
-
'gpt-5.1-codex': 400000,
|
|
117
|
-
'anthropic/claude-haiku-4.5': 200000,
|
|
118
|
-
'claude-haiku-4.5': 200000,
|
|
119
|
-
'moonshotai/kimi-k2-thinking': 262144,
|
|
120
|
-
'kimi-k2-thinking': 262144,
|
|
121
|
-
'x-ai/grok-4-fast': 2000000,
|
|
122
|
-
'grok-4-fast': 2000000,
|
|
123
|
-
};
|
|
124
|
-
|
|
125
|
-
/**
|
|
126
|
-
* Get the context window size for a model
|
|
127
|
-
* @param modelName Name of the model
|
|
128
|
-
* @returns Context window size in tokens
|
|
129
|
-
*/
|
|
130
|
-
private static getContextWindowSize(modelName: string): number {
|
|
131
|
-
logger.debug(`getContextWindowSize: modelName=${modelName}`);
|
|
132
|
-
const configured = TokenAnalyzer.lookupConfiguredContextWindow(modelName);
|
|
133
|
-
|
|
134
|
-
if (typeof configured === 'number') {
|
|
135
|
-
logger.info(`Using configured context window for ${modelName}: ${configured.toLocaleString()} tokens`);
|
|
136
|
-
return configured;
|
|
137
|
-
}
|
|
138
|
-
|
|
139
|
-
if (configured === null) {
|
|
140
|
-
logger.warn(
|
|
141
|
-
`Context window size for ${modelName} has not been set. Update TokenAnalyzer.MODEL_CONTEXT_WINDOWS to provide the exact token limit.`,
|
|
142
|
-
);
|
|
143
|
-
} else {
|
|
144
|
-
logger.warn(`No matching context window size found for model: ${modelName}`);
|
|
145
|
-
}
|
|
146
|
-
|
|
147
|
-
logger.warn(
|
|
148
|
-
`Using default context window size: ${TokenAnalyzer.DEFAULT_CONTEXT_WINDOW.toLocaleString()} tokens`,
|
|
149
|
-
);
|
|
150
|
-
return TokenAnalyzer.DEFAULT_CONTEXT_WINDOW;
|
|
151
|
-
}
|
|
152
|
-
|
|
153
|
-
private static lookupConfiguredContextWindow(modelName: string): number | null | undefined {
|
|
154
|
-
const normalized = modelName.toLowerCase();
|
|
155
|
-
if (normalized in TokenAnalyzer.MODEL_CONTEXT_WINDOWS) {
|
|
156
|
-
return TokenAnalyzer.MODEL_CONTEXT_WINDOWS[normalized];
|
|
157
|
-
}
|
|
158
|
-
|
|
159
|
-
if (normalized.includes(':')) {
|
|
160
|
-
const [, baseName] = normalized.split(':');
|
|
161
|
-
if (baseName && baseName in TokenAnalyzer.MODEL_CONTEXT_WINDOWS) {
|
|
162
|
-
return TokenAnalyzer.MODEL_CONTEXT_WINDOWS[baseName];
|
|
163
|
-
}
|
|
164
|
-
}
|
|
165
|
-
|
|
166
|
-
return undefined;
|
|
167
|
-
}
|
|
168
|
-
|
|
169
|
-
/**
|
|
170
|
-
* Analyze token usage for a set of files
|
|
171
|
-
* @param files Files to analyze
|
|
172
|
-
* @param options Analysis options
|
|
173
|
-
* @returns Token analysis result
|
|
174
|
-
*/
|
|
175
|
-
public static analyzeFiles(
|
|
176
|
-
files: FileInfo[],
|
|
177
|
-
options: TokenAnalysisOptions,
|
|
178
|
-
): TokenAnalysisResult {
|
|
179
|
-
logger.info('Analyzing token usage for files...');
|
|
180
|
-
logger.debug(`TokenAnalyzer: modelName=${options.modelName}`);
|
|
181
|
-
|
|
182
|
-
const contextWindowSize = TokenAnalyzer.getContextWindowSize(options.modelName);
|
|
183
|
-
const promptOverhead =
|
|
184
|
-
options.additionalPromptOverhead || TokenAnalyzer.DEFAULT_PROMPT_OVERHEAD;
|
|
185
|
-
const contextMaintenanceFactor =
|
|
186
|
-
options.contextMaintenanceFactor || TokenAnalyzer.DEFAULT_CONTEXT_MAINTENANCE_FACTOR;
|
|
187
|
-
const safetyMarginFactor =
|
|
188
|
-
options.safetyMarginFactor || TokenAnalyzer.DEFAULT_SAFETY_MARGIN_FACTOR;
|
|
189
|
-
|
|
190
|
-
// Calculate effective context window size with safety margin
|
|
191
|
-
const effectiveContextWindowSize = Math.floor(contextWindowSize * (1 - safetyMarginFactor));
|
|
192
|
-
logger.info(
|
|
193
|
-
`Using effective context window size: ${effectiveContextWindowSize.toLocaleString()} tokens (${Math.round((1 - safetyMarginFactor) * 100)}% of ${contextWindowSize.toLocaleString()} tokens)`,
|
|
194
|
-
);
|
|
195
|
-
|
|
196
|
-
// Analyze each file
|
|
197
|
-
const fileAnalyses: FileTokenAnalysis[] = files.map((file) => {
|
|
198
|
-
const content = file.content;
|
|
199
|
-
const tokenCount = countTokens(content, options.modelName);
|
|
200
|
-
const sizeInBytes = content.length;
|
|
201
|
-
const tokensPerByte = sizeInBytes > 0 ? tokenCount / sizeInBytes : 0;
|
|
202
|
-
|
|
203
|
-
return {
|
|
204
|
-
path: file.path,
|
|
205
|
-
relativePath: file.relativePath,
|
|
206
|
-
tokenCount,
|
|
207
|
-
sizeInBytes,
|
|
208
|
-
tokensPerByte,
|
|
209
|
-
};
|
|
210
|
-
});
|
|
211
|
-
|
|
212
|
-
// Calculate totals
|
|
213
|
-
const totalTokens = fileAnalyses.reduce((sum, file) => sum + file.tokenCount, 0);
|
|
214
|
-
const totalSizeInBytes = fileAnalyses.reduce((sum, file) => sum + file.sizeInBytes, 0);
|
|
215
|
-
const averageTokensPerByte = totalSizeInBytes > 0 ? totalTokens / totalSizeInBytes : 0;
|
|
216
|
-
|
|
217
|
-
// Estimate total tokens with overhead
|
|
218
|
-
const estimatedTotalTokens = totalTokens + promptOverhead;
|
|
219
|
-
|
|
220
|
-
// Determine if chunking is needed
|
|
221
|
-
const exceedsContextWindow = estimatedTotalTokens > effectiveContextWindowSize;
|
|
222
|
-
|
|
223
|
-
logger.info(`Token analysis summary:`);
|
|
224
|
-
logger.info(`- Total files: ${files.length}`);
|
|
225
|
-
logger.info(`- Total tokens: ${totalTokens.toLocaleString()}`);
|
|
226
|
-
logger.info(`- Prompt overhead: ${promptOverhead.toLocaleString()}`);
|
|
227
|
-
logger.info(`- Estimated total tokens: ${estimatedTotalTokens.toLocaleString()}`);
|
|
228
|
-
logger.info(`- Context window size: ${contextWindowSize.toLocaleString()}`);
|
|
229
|
-
logger.info(
|
|
230
|
-
`- Effective context size (with safety margin): ${effectiveContextWindowSize.toLocaleString()}`,
|
|
231
|
-
);
|
|
232
|
-
logger.info(
|
|
233
|
-
`- Context utilization: ${((estimatedTotalTokens / effectiveContextWindowSize) * 100).toFixed(2)}%`,
|
|
234
|
-
);
|
|
235
|
-
|
|
236
|
-
// Calculate recommended chunks if needed
|
|
237
|
-
const chunkingRecommendation = TokenAnalyzer.generateChunkingRecommendation(
|
|
238
|
-
fileAnalyses,
|
|
239
|
-
estimatedTotalTokens,
|
|
240
|
-
effectiveContextWindowSize,
|
|
241
|
-
contextMaintenanceFactor,
|
|
242
|
-
options.forceSinglePass,
|
|
243
|
-
options.batchTokenLimit,
|
|
244
|
-
);
|
|
245
|
-
|
|
246
|
-
// Log chunking decision
|
|
247
|
-
if (chunkingRecommendation.chunkingRecommended) {
|
|
248
|
-
logger.info(`Multi-pass review recommended: ${chunkingRecommendation.reason}`);
|
|
249
|
-
logger.info(`Estimated passes needed: ${chunkingRecommendation.recommendedChunks.length}`);
|
|
250
|
-
} else {
|
|
251
|
-
logger.info(`Single-pass review recommended: ${chunkingRecommendation.reason}`);
|
|
252
|
-
}
|
|
253
|
-
|
|
254
|
-
return {
|
|
255
|
-
files: fileAnalyses,
|
|
256
|
-
totalTokens,
|
|
257
|
-
totalSizeInBytes,
|
|
258
|
-
averageTokensPerByte,
|
|
259
|
-
fileCount: files.length,
|
|
260
|
-
promptOverheadTokens: promptOverhead,
|
|
261
|
-
estimatedTotalTokens,
|
|
262
|
-
contextWindowSize,
|
|
263
|
-
exceedsContextWindow,
|
|
264
|
-
estimatedPassesNeeded: chunkingRecommendation.recommendedChunks.length,
|
|
265
|
-
chunkingRecommendation,
|
|
266
|
-
};
|
|
267
|
-
}
|
|
268
|
-
|
|
269
|
-
/**
|
|
270
|
-
* Generate a chunking recommendation for files that exceed context window
|
|
271
|
-
* @param fileAnalyses Array of file token analyses
|
|
272
|
-
* @param estimatedTotalTokens Total tokens including overhead
|
|
273
|
-
* @param contextWindowSize Maximum context window size
|
|
274
|
-
* @param contextMaintenanceFactor Context maintenance overhead factor
|
|
275
|
-
* @param forceSinglePass Force single pass mode regardless of token count
|
|
276
|
-
* @param batchTokenLimit Force maximum tokens per batch (for testing)
|
|
277
|
-
* @returns Chunking recommendation
|
|
278
|
-
*/
|
|
279
|
-
private static generateChunkingRecommendation(
|
|
280
|
-
fileAnalyses: FileTokenAnalysis[],
|
|
281
|
-
estimatedTotalTokens: number,
|
|
282
|
-
contextWindowSize: number,
|
|
283
|
-
contextMaintenanceFactor: number,
|
|
284
|
-
forceSinglePass?: boolean,
|
|
285
|
-
batchTokenLimit?: number,
|
|
286
|
-
): ChunkingRecommendation {
|
|
287
|
-
// If forceSinglePass is true, skip chunking regardless of token count
|
|
288
|
-
if (forceSinglePass) {
|
|
289
|
-
logger.debug(`Forcing single-pass review mode as requested (forceSinglePass=true)`);
|
|
290
|
-
return {
|
|
291
|
-
chunkingRecommended: false,
|
|
292
|
-
recommendedChunks: [
|
|
293
|
-
{
|
|
294
|
-
files: fileAnalyses.map((f) => f.path),
|
|
295
|
-
estimatedTokenCount: estimatedTotalTokens,
|
|
296
|
-
priority: 1,
|
|
297
|
-
},
|
|
298
|
-
],
|
|
299
|
-
reason: 'Single-pass mode forced by configuration',
|
|
300
|
-
};
|
|
301
|
-
}
|
|
302
|
-
|
|
303
|
-
// If batchTokenLimit is provided, use it to force smaller batches
|
|
304
|
-
let effectiveContextLimit = contextWindowSize;
|
|
305
|
-
if (batchTokenLimit && batchTokenLimit > 0) {
|
|
306
|
-
effectiveContextLimit = Math.min(batchTokenLimit, contextWindowSize);
|
|
307
|
-
logger.info(
|
|
308
|
-
`Using batch token limit: ${batchTokenLimit.toLocaleString()} tokens (forcing smaller batches for testing)`,
|
|
309
|
-
);
|
|
310
|
-
if (batchTokenLimit < contextWindowSize) {
|
|
311
|
-
logger.info(
|
|
312
|
-
`This will force chunking even if content would fit in the model's context window`,
|
|
313
|
-
);
|
|
314
|
-
}
|
|
315
|
-
}
|
|
316
|
-
|
|
317
|
-
// If content fits within context window, no chunking needed
|
|
318
|
-
if (estimatedTotalTokens <= effectiveContextLimit) {
|
|
319
|
-
logger.debug(
|
|
320
|
-
`Content fits within effective limit (${estimatedTotalTokens.toLocaleString()} <= ${effectiveContextLimit.toLocaleString()} tokens)`,
|
|
321
|
-
);
|
|
322
|
-
return {
|
|
323
|
-
chunkingRecommended: false,
|
|
324
|
-
recommendedChunks: [
|
|
325
|
-
{
|
|
326
|
-
files: fileAnalyses.map((f) => f.path),
|
|
327
|
-
estimatedTokenCount: estimatedTotalTokens,
|
|
328
|
-
priority: 1,
|
|
329
|
-
},
|
|
330
|
-
],
|
|
331
|
-
reason: batchTokenLimit
|
|
332
|
-
? 'Content fits within batch token limit'
|
|
333
|
-
: 'Content fits within model context window',
|
|
334
|
-
};
|
|
335
|
-
}
|
|
336
|
-
|
|
337
|
-
logger.debug(
|
|
338
|
-
`Content exceeds effective limit (${estimatedTotalTokens.toLocaleString()} > ${effectiveContextLimit.toLocaleString()} tokens)`,
|
|
339
|
-
);
|
|
340
|
-
logger.debug(
|
|
341
|
-
`Generating chunking recommendation with context maintenance factor: ${contextMaintenanceFactor}`,
|
|
342
|
-
);
|
|
343
|
-
|
|
344
|
-
// Calculate effective context window size accounting for context maintenance
|
|
345
|
-
const effectiveContextSize = Math.floor(effectiveContextLimit * (1 - contextMaintenanceFactor));
|
|
346
|
-
|
|
347
|
-
logger.debug(
|
|
348
|
-
`Effective context size for chunking: ${effectiveContextSize.toLocaleString()} tokens (${Math.round((1 - contextMaintenanceFactor) * 100)}% of ${effectiveContextLimit.toLocaleString()} tokens)`,
|
|
349
|
-
);
|
|
350
|
-
|
|
351
|
-
// Use optimized bin-packing algorithm for better chunk distribution
|
|
352
|
-
const chunks = TokenAnalyzer.optimizedBinPacking(
|
|
353
|
-
fileAnalyses,
|
|
354
|
-
effectiveContextSize,
|
|
355
|
-
effectiveContextLimit,
|
|
356
|
-
);
|
|
357
|
-
|
|
358
|
-
logger.info(`Created ${chunks.length} optimized chunks for multi-pass review`);
|
|
359
|
-
|
|
360
|
-
let reason = `Content exceeds effective limit (${estimatedTotalTokens.toLocaleString()} > ${effectiveContextLimit.toLocaleString()} tokens)`;
|
|
361
|
-
if (batchTokenLimit && batchTokenLimit < contextWindowSize) {
|
|
362
|
-
reason = `Batch token limit forcing smaller batches (limit: ${batchTokenLimit.toLocaleString()} tokens)`;
|
|
363
|
-
}
|
|
364
|
-
|
|
365
|
-
return {
|
|
366
|
-
chunkingRecommended: true,
|
|
367
|
-
recommendedChunks: chunks,
|
|
368
|
-
reason,
|
|
369
|
-
};
|
|
370
|
-
}
|
|
371
|
-
|
|
372
|
-
/**
|
|
373
|
-
* Optimized bin-packing algorithm to minimize the number of chunks
|
|
374
|
-
* Uses an advanced first-fit decreasing with multi-level optimization
|
|
375
|
-
* @param fileAnalyses Array of file token analyses
|
|
376
|
-
* @param maxChunkSize Maximum size for each chunk in tokens
|
|
377
|
-
* @param contextWindowSize Original context window for logging
|
|
378
|
-
* @returns Array of optimized file chunks
|
|
379
|
-
*/
|
|
380
|
-
private static optimizedBinPacking(
|
|
381
|
-
fileAnalyses: FileTokenAnalysis[],
|
|
382
|
-
maxChunkSize: number,
|
|
383
|
-
_contextWindowSize: number,
|
|
384
|
-
): FileChunk[] {
|
|
385
|
-
// Sort files by token count (largest first) for first-fit decreasing
|
|
386
|
-
const sortedFiles = [...fileAnalyses].sort((a, b) => b.tokenCount - a.tokenCount);
|
|
387
|
-
|
|
388
|
-
// Calculate target chunk size for optimal distribution
|
|
389
|
-
const totalTokens = sortedFiles.reduce((sum, f) => sum + f.tokenCount, 0);
|
|
390
|
-
const minChunksNeeded = Math.ceil(totalTokens / maxChunkSize);
|
|
391
|
-
const targetChunkSize = Math.floor(totalTokens / minChunksNeeded);
|
|
392
|
-
|
|
393
|
-
logger.debug(`Bin-packing optimization:`);
|
|
394
|
-
logger.debug(` - Total tokens: ${totalTokens.toLocaleString()}`);
|
|
395
|
-
logger.debug(` - Max chunk size: ${maxChunkSize.toLocaleString()}`);
|
|
396
|
-
logger.debug(` - Min chunks needed: ${minChunksNeeded}`);
|
|
397
|
-
logger.debug(` - Target chunk size: ${targetChunkSize.toLocaleString()}`);
|
|
398
|
-
|
|
399
|
-
// Initialize chunks array
|
|
400
|
-
const chunks: FileChunk[] = [];
|
|
401
|
-
|
|
402
|
-
// Track oversized files separately
|
|
403
|
-
const oversizedFiles: FileTokenAnalysis[] = [];
|
|
404
|
-
const largeFiles: FileTokenAnalysis[] = [];
|
|
405
|
-
const mediumFiles: FileTokenAnalysis[] = [];
|
|
406
|
-
const smallFiles: FileTokenAnalysis[] = [];
|
|
407
|
-
|
|
408
|
-
// Categorize files by size for better packing
|
|
409
|
-
for (const file of sortedFiles) {
|
|
410
|
-
if (file.tokenCount > maxChunkSize) {
|
|
411
|
-
oversizedFiles.push(file);
|
|
412
|
-
logger.warn(
|
|
413
|
-
`File "${file.path}" is oversized (${file.tokenCount.toLocaleString()} > ${maxChunkSize.toLocaleString()} tokens)`,
|
|
414
|
-
);
|
|
415
|
-
} else if (file.tokenCount > maxChunkSize * 0.5) {
|
|
416
|
-
largeFiles.push(file);
|
|
417
|
-
} else if (file.tokenCount > maxChunkSize * 0.2) {
|
|
418
|
-
mediumFiles.push(file);
|
|
419
|
-
} else {
|
|
420
|
-
smallFiles.push(file);
|
|
421
|
-
}
|
|
422
|
-
}
|
|
423
|
-
|
|
424
|
-
logger.debug(`File categorization:`);
|
|
425
|
-
logger.debug(` - Oversized: ${oversizedFiles.length}`);
|
|
426
|
-
logger.debug(` - Large (>50% of max): ${largeFiles.length}`);
|
|
427
|
-
logger.debug(` - Medium (20-50% of max): ${mediumFiles.length}`);
|
|
428
|
-
logger.debug(` - Small (<20% of max): ${smallFiles.length}`);
|
|
429
|
-
|
|
430
|
-
// Process oversized files first (split them if possible)
|
|
431
|
-
for (const file of oversizedFiles) {
|
|
432
|
-
// For now, put oversized files in their own chunks
|
|
433
|
-
// TODO: In future, we could split file content
|
|
434
|
-
chunks.push({
|
|
435
|
-
files: [file.path],
|
|
436
|
-
estimatedTokenCount: file.tokenCount,
|
|
437
|
-
priority: chunks.length + 1,
|
|
438
|
-
});
|
|
439
|
-
logger.debug(
|
|
440
|
-
`Created dedicated chunk ${chunks.length} for oversized file "${file.path}" (${file.tokenCount.toLocaleString()} tokens)`,
|
|
441
|
-
);
|
|
442
|
-
}
|
|
443
|
-
|
|
444
|
-
// Process large files - try to pair them optimally
|
|
445
|
-
for (const file of largeFiles) {
|
|
446
|
-
let placed = false;
|
|
447
|
-
|
|
448
|
-
// Try to find a chunk with complementary space
|
|
449
|
-
for (let i = 0; i < chunks.length; i++) {
|
|
450
|
-
const chunk = chunks[i];
|
|
451
|
-
const remainingSpace = maxChunkSize - chunk.estimatedTokenCount;
|
|
452
|
-
|
|
453
|
-
// Check if this file fits well (within 80% efficiency)
|
|
454
|
-
if (
|
|
455
|
-
remainingSpace >= file.tokenCount &&
|
|
456
|
-
chunk.estimatedTokenCount + file.tokenCount >= targetChunkSize * 0.8
|
|
457
|
-
) {
|
|
458
|
-
chunk.files.push(file.path);
|
|
459
|
-
chunk.estimatedTokenCount += file.tokenCount;
|
|
460
|
-
placed = true;
|
|
461
|
-
logger.debug(
|
|
462
|
-
`Added large file "${file.path}" (${file.tokenCount.toLocaleString()} tokens) to chunk ${i + 1}`,
|
|
463
|
-
);
|
|
464
|
-
break;
|
|
465
|
-
}
|
|
466
|
-
}
|
|
467
|
-
|
|
468
|
-
if (!placed) {
|
|
469
|
-
// Create a new chunk for this large file
|
|
470
|
-
chunks.push({
|
|
471
|
-
files: [file.path],
|
|
472
|
-
estimatedTokenCount: file.tokenCount,
|
|
473
|
-
priority: chunks.length + 1,
|
|
474
|
-
});
|
|
475
|
-
logger.debug(
|
|
476
|
-
`Created new chunk ${chunks.length} for large file "${file.path}" (${file.tokenCount.toLocaleString()} tokens)`,
|
|
477
|
-
);
|
|
478
|
-
}
|
|
479
|
-
}
|
|
480
|
-
|
|
481
|
-
// Process medium files - use first-fit with efficiency threshold
|
|
482
|
-
for (const file of mediumFiles) {
|
|
483
|
-
let placed = false;
|
|
484
|
-
|
|
485
|
-
// Find first chunk where this file fits efficiently
|
|
486
|
-
for (let i = 0; i < chunks.length; i++) {
|
|
487
|
-
const chunk = chunks[i];
|
|
488
|
-
const remainingSpace = maxChunkSize - chunk.estimatedTokenCount;
|
|
489
|
-
|
|
490
|
-
if (remainingSpace >= file.tokenCount) {
|
|
491
|
-
chunk.files.push(file.path);
|
|
492
|
-
chunk.estimatedTokenCount += file.tokenCount;
|
|
493
|
-
placed = true;
|
|
494
|
-
logger.debug(
|
|
495
|
-
`Added medium file "${file.path}" (${file.tokenCount.toLocaleString()} tokens) to chunk ${i + 1}`,
|
|
496
|
-
);
|
|
497
|
-
break;
|
|
498
|
-
}
|
|
499
|
-
}
|
|
500
|
-
|
|
501
|
-
if (!placed) {
|
|
502
|
-
// Create a new chunk
|
|
503
|
-
chunks.push({
|
|
504
|
-
files: [file.path],
|
|
505
|
-
estimatedTokenCount: file.tokenCount,
|
|
506
|
-
priority: chunks.length + 1,
|
|
507
|
-
});
|
|
508
|
-
logger.debug(
|
|
509
|
-
`Created new chunk ${chunks.length} for medium file "${file.path}" (${file.tokenCount.toLocaleString()} tokens)`,
|
|
510
|
-
);
|
|
511
|
-
}
|
|
512
|
-
}
|
|
513
|
-
|
|
514
|
-
// Process small files - pack them to fill gaps
|
|
515
|
-
// Sort small files for better packing (largest first)
|
|
516
|
-
smallFiles.sort((a, b) => b.tokenCount - a.tokenCount);
|
|
517
|
-
|
|
518
|
-
for (const file of smallFiles) {
|
|
519
|
-
let placed = false;
|
|
520
|
-
|
|
521
|
-
// Find the fullest chunk that can still fit this file
|
|
522
|
-
let bestChunkIndex = -1;
|
|
523
|
-
let bestChunkFullness = 0;
|
|
524
|
-
|
|
525
|
-
for (let i = 0; i < chunks.length; i++) {
|
|
526
|
-
const chunk = chunks[i];
|
|
527
|
-
const remainingSpace = maxChunkSize - chunk.estimatedTokenCount;
|
|
528
|
-
const chunkFullness = chunk.estimatedTokenCount / maxChunkSize;
|
|
529
|
-
|
|
530
|
-
if (remainingSpace >= file.tokenCount && chunkFullness > bestChunkFullness) {
|
|
531
|
-
bestChunkIndex = i;
|
|
532
|
-
bestChunkFullness = chunkFullness;
|
|
533
|
-
}
|
|
534
|
-
}
|
|
535
|
-
|
|
536
|
-
if (bestChunkIndex !== -1) {
|
|
537
|
-
const chunk = chunks[bestChunkIndex];
|
|
538
|
-
chunk.files.push(file.path);
|
|
539
|
-
chunk.estimatedTokenCount += file.tokenCount;
|
|
540
|
-
placed = true;
|
|
541
|
-
logger.debug(
|
|
542
|
-
`Added small file "${file.path}" (${file.tokenCount.toLocaleString()} tokens) to chunk ${bestChunkIndex + 1}`,
|
|
543
|
-
);
|
|
544
|
-
}
|
|
545
|
-
|
|
546
|
-
if (!placed) {
|
|
547
|
-
// Create a new chunk only if absolutely necessary
|
|
548
|
-
chunks.push({
|
|
549
|
-
files: [file.path],
|
|
550
|
-
estimatedTokenCount: file.tokenCount,
|
|
551
|
-
priority: chunks.length + 1,
|
|
552
|
-
});
|
|
553
|
-
logger.debug(
|
|
554
|
-
`Created new chunk ${chunks.length} for small file "${file.path}" (${file.tokenCount.toLocaleString()} tokens)`,
|
|
555
|
-
);
|
|
556
|
-
}
|
|
557
|
-
}
|
|
558
|
-
|
|
559
|
-
// Perform aggressive balancing to minimize chunk count
|
|
560
|
-
const balancedChunks = TokenAnalyzer.aggressiveBalance(chunks, fileAnalyses, maxChunkSize);
|
|
561
|
-
|
|
562
|
-
// Log chunk statistics
|
|
563
|
-
const avgTokensPerChunk = Math.round(
|
|
564
|
-
balancedChunks.reduce((sum, c) => sum + c.estimatedTokenCount, 0) / balancedChunks.length,
|
|
565
|
-
);
|
|
566
|
-
const maxTokensInChunk = Math.max(...balancedChunks.map((c) => c.estimatedTokenCount));
|
|
567
|
-
const minTokensInChunk = Math.min(...balancedChunks.map((c) => c.estimatedTokenCount));
|
|
568
|
-
|
|
569
|
-
logger.info(`Chunk statistics:`);
|
|
570
|
-
logger.info(` - Total chunks: ${balancedChunks.length}`);
|
|
571
|
-
logger.info(` - Average tokens per chunk: ${avgTokensPerChunk.toLocaleString()}`);
|
|
572
|
-
logger.info(` - Max tokens in a chunk: ${maxTokensInChunk.toLocaleString()}`);
|
|
573
|
-
logger.info(` - Min tokens in a chunk: ${minTokensInChunk.toLocaleString()}`);
|
|
574
|
-
logger.info(` - Chunk efficiency: ${((avgTokensPerChunk / maxChunkSize) * 100).toFixed(1)}%`);
|
|
575
|
-
|
|
576
|
-
return balancedChunks;
|
|
577
|
-
}
|
|
578
|
-
|
|
579
|
-
/**
|
|
580
|
-
* Aggressive balancing to minimize chunk count and maximize efficiency
|
|
581
|
-
* @param chunks Initial chunks from bin-packing
|
|
582
|
-
* @param fileAnalyses Original file analyses for lookup
|
|
583
|
-
* @param maxChunkSize Maximum size for each chunk
|
|
584
|
-
* @returns Balanced chunks with minimized count
|
|
585
|
-
*/
|
|
586
|
-
private static aggressiveBalance(
|
|
587
|
-
chunks: FileChunk[],
|
|
588
|
-
fileAnalyses: FileTokenAnalysis[],
|
|
589
|
-
maxChunkSize: number,
|
|
590
|
-
): FileChunk[] {
|
|
591
|
-
// Create a map for quick file lookups
|
|
592
|
-
const fileMap = new Map<string, FileTokenAnalysis>();
|
|
593
|
-
for (const file of fileAnalyses) {
|
|
594
|
-
fileMap.set(file.path, file);
|
|
595
|
-
}
|
|
596
|
-
|
|
597
|
-
// First pass: Try to merge small chunks
|
|
598
|
-
const mergedChunks: FileChunk[] = [];
|
|
599
|
-
const sortedForMerging = [...chunks].sort(
|
|
600
|
-
(a, b) => a.estimatedTokenCount - b.estimatedTokenCount,
|
|
601
|
-
);
|
|
602
|
-
const usedChunks = new Set<number>();
|
|
603
|
-
|
|
604
|
-
for (let i = 0; i < sortedForMerging.length; i++) {
|
|
605
|
-
if (usedChunks.has(i)) continue;
|
|
606
|
-
|
|
607
|
-
const chunk1 = sortedForMerging[i];
|
|
608
|
-
const mergedChunk: FileChunk = {
|
|
609
|
-
files: [...chunk1.files],
|
|
610
|
-
estimatedTokenCount: chunk1.estimatedTokenCount,
|
|
611
|
-
priority: mergedChunks.length + 1,
|
|
612
|
-
};
|
|
613
|
-
usedChunks.add(i);
|
|
614
|
-
|
|
615
|
-
// Try to merge with other small chunks
|
|
616
|
-
for (let j = i + 1; j < sortedForMerging.length; j++) {
|
|
617
|
-
if (usedChunks.has(j)) continue;
|
|
618
|
-
|
|
619
|
-
const chunk2 = sortedForMerging[j];
|
|
620
|
-
const combinedSize = mergedChunk.estimatedTokenCount + chunk2.estimatedTokenCount;
|
|
621
|
-
|
|
622
|
-
// Merge if combined size is still within limits
|
|
623
|
-
if (combinedSize <= maxChunkSize) {
|
|
624
|
-
mergedChunk.files.push(...chunk2.files);
|
|
625
|
-
mergedChunk.estimatedTokenCount = combinedSize;
|
|
626
|
-
usedChunks.add(j);
|
|
627
|
-
logger.debug(
|
|
628
|
-
`Merged chunks: ${chunk2.files.length} files (${chunk2.estimatedTokenCount.toLocaleString()} tokens) into chunk with ${mergedChunk.files.length} files`,
|
|
629
|
-
);
|
|
630
|
-
}
|
|
631
|
-
}
|
|
632
|
-
|
|
633
|
-
mergedChunks.push(mergedChunk);
|
|
634
|
-
}
|
|
635
|
-
|
|
636
|
-
logger.debug(`Chunk merging reduced count from ${chunks.length} to ${mergedChunks.length}`);
|
|
637
|
-
|
|
638
|
-
// Second pass: Balance the merged chunks
|
|
639
|
-
const sortedChunks = [...mergedChunks].sort(
|
|
640
|
-
(a, b) => a.estimatedTokenCount - b.estimatedTokenCount,
|
|
641
|
-
);
|
|
642
|
-
|
|
643
|
-
// Try to move files to achieve better balance
|
|
644
|
-
let improved = true;
|
|
645
|
-
let iterations = 0;
|
|
646
|
-
const maxIterations = 20; // More iterations for aggressive optimization
|
|
647
|
-
|
|
648
|
-
while (improved && iterations < maxIterations) {
|
|
649
|
-
improved = false;
|
|
650
|
-
iterations++;
|
|
651
|
-
|
|
652
|
-
// Find the most and least full chunks
|
|
653
|
-
sortedChunks.sort((a, b) => a.estimatedTokenCount - b.estimatedTokenCount);
|
|
654
|
-
|
|
655
|
-
for (let i = 0; i < Math.floor(sortedChunks.length / 2); i++) {
|
|
656
|
-
const smallChunk = sortedChunks[i];
|
|
657
|
-
const largeChunk = sortedChunks[sortedChunks.length - 1 - i];
|
|
658
|
-
|
|
659
|
-
// Calculate variance threshold based on chunk count
|
|
660
|
-
const varianceThreshold = Math.max(500, maxChunkSize * 0.05); // 5% of max or 500 tokens
|
|
661
|
-
|
|
662
|
-
// Skip if chunks are already well balanced
|
|
663
|
-
if (largeChunk.estimatedTokenCount - smallChunk.estimatedTokenCount < varianceThreshold) {
|
|
664
|
-
continue;
|
|
665
|
-
}
|
|
666
|
-
|
|
667
|
-
// Try to find optimal file to move
|
|
668
|
-
let bestFile: string | null = null;
|
|
669
|
-
let bestImprovement = 0;
|
|
670
|
-
|
|
671
|
-
for (const filePath of largeChunk.files) {
|
|
672
|
-
const file = fileMap.get(filePath);
|
|
673
|
-
if (!file) continue;
|
|
674
|
-
|
|
675
|
-
const newSmallSize = smallChunk.estimatedTokenCount + file.tokenCount;
|
|
676
|
-
const newLargeSize = largeChunk.estimatedTokenCount - file.tokenCount;
|
|
677
|
-
|
|
678
|
-
// Check if moving this file would improve balance
|
|
679
|
-
if (newSmallSize <= maxChunkSize && newLargeSize > 0) {
|
|
680
|
-
const currentDiff = largeChunk.estimatedTokenCount - smallChunk.estimatedTokenCount;
|
|
681
|
-
const newDiff = Math.abs(newLargeSize - newSmallSize);
|
|
682
|
-
const improvement = currentDiff - newDiff;
|
|
683
|
-
|
|
684
|
-
if (improvement > bestImprovement) {
|
|
685
|
-
bestFile = filePath;
|
|
686
|
-
bestImprovement = improvement;
|
|
687
|
-
}
|
|
688
|
-
}
|
|
689
|
-
}
|
|
690
|
-
|
|
691
|
-
// Move the best file if found
|
|
692
|
-
if (bestFile && bestImprovement > 100) {
|
|
693
|
-
const file = fileMap.get(bestFile)!;
|
|
694
|
-
largeChunk.files = largeChunk.files.filter((f) => f !== bestFile);
|
|
695
|
-
smallChunk.files.push(bestFile);
|
|
696
|
-
|
|
697
|
-
// Update token counts
|
|
698
|
-
largeChunk.estimatedTokenCount -= file.tokenCount;
|
|
699
|
-
smallChunk.estimatedTokenCount += file.tokenCount;
|
|
700
|
-
|
|
701
|
-
logger.debug(
|
|
702
|
-
`Balanced: Moved file "${bestFile}" (${file.tokenCount.toLocaleString()} tokens) - improvement: ${bestImprovement.toLocaleString()} tokens`,
|
|
703
|
-
);
|
|
704
|
-
|
|
705
|
-
improved = true;
|
|
706
|
-
}
|
|
707
|
-
}
|
|
708
|
-
}
|
|
709
|
-
|
|
710
|
-
// Final pass: Remove any empty chunks
|
|
711
|
-
const finalChunks = sortedChunks.filter((chunk) => chunk.files.length > 0);
|
|
712
|
-
|
|
713
|
-
// Re-assign priorities based on token count (largest first for processing)
|
|
714
|
-
finalChunks.sort((a, b) => b.estimatedTokenCount - a.estimatedTokenCount);
|
|
715
|
-
finalChunks.forEach((chunk, index) => {
|
|
716
|
-
chunk.priority = index + 1;
|
|
717
|
-
});
|
|
718
|
-
|
|
719
|
-
if (iterations === maxIterations) {
|
|
720
|
-
logger.debug(`Aggressive balancing stopped after ${maxIterations} iterations`);
|
|
721
|
-
} else {
|
|
722
|
-
logger.debug(`Aggressive balancing completed in ${iterations} iterations`);
|
|
723
|
-
}
|
|
724
|
-
|
|
725
|
-
return finalChunks;
|
|
726
|
-
}
|
|
727
|
-
|
|
728
|
-
/**
|
|
729
|
-
* Analyze a single file for token usage
|
|
730
|
-
* @param file File to analyze
|
|
731
|
-
* @param options Analysis options
|
|
732
|
-
* @returns Token analysis for the file
|
|
733
|
-
*/
|
|
734
|
-
public static analyzeFile(file: FileInfo, options: TokenAnalysisOptions): FileTokenAnalysis {
|
|
735
|
-
const content = file.content;
|
|
736
|
-
const tokenCount = countTokens(content, options.modelName);
|
|
737
|
-
const sizeInBytes = content.length;
|
|
738
|
-
|
|
739
|
-
return {
|
|
740
|
-
path: file.path,
|
|
741
|
-
relativePath: file.relativePath,
|
|
742
|
-
tokenCount,
|
|
743
|
-
sizeInBytes,
|
|
744
|
-
tokensPerByte: sizeInBytes > 0 ? tokenCount / sizeInBytes : 0,
|
|
745
|
-
};
|
|
746
|
-
}
|
|
747
|
-
}
|