@loxia-labs/loxia-autopilot-one 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. package/LICENSE +267 -0
  2. package/README.md +509 -0
  3. package/bin/cli.js +117 -0
  4. package/package.json +94 -0
  5. package/scripts/install-scanners.js +236 -0
  6. package/src/analyzers/CSSAnalyzer.js +297 -0
  7. package/src/analyzers/ConfigValidator.js +690 -0
  8. package/src/analyzers/ESLintAnalyzer.js +320 -0
  9. package/src/analyzers/JavaScriptAnalyzer.js +261 -0
  10. package/src/analyzers/PrettierFormatter.js +247 -0
  11. package/src/analyzers/PythonAnalyzer.js +266 -0
  12. package/src/analyzers/SecurityAnalyzer.js +729 -0
  13. package/src/analyzers/TypeScriptAnalyzer.js +247 -0
  14. package/src/analyzers/codeCloneDetector/analyzer.js +344 -0
  15. package/src/analyzers/codeCloneDetector/detector.js +203 -0
  16. package/src/analyzers/codeCloneDetector/index.js +160 -0
  17. package/src/analyzers/codeCloneDetector/parser.js +199 -0
  18. package/src/analyzers/codeCloneDetector/reporter.js +148 -0
  19. package/src/analyzers/codeCloneDetector/scanner.js +59 -0
  20. package/src/core/agentPool.js +1474 -0
  21. package/src/core/agentScheduler.js +2147 -0
  22. package/src/core/contextManager.js +709 -0
  23. package/src/core/messageProcessor.js +732 -0
  24. package/src/core/orchestrator.js +548 -0
  25. package/src/core/stateManager.js +877 -0
  26. package/src/index.js +631 -0
  27. package/src/interfaces/cli.js +549 -0
  28. package/src/interfaces/webServer.js +2162 -0
  29. package/src/modules/fileExplorer/controller.js +280 -0
  30. package/src/modules/fileExplorer/index.js +37 -0
  31. package/src/modules/fileExplorer/middleware.js +92 -0
  32. package/src/modules/fileExplorer/routes.js +125 -0
  33. package/src/modules/fileExplorer/types.js +44 -0
  34. package/src/services/aiService.js +1232 -0
  35. package/src/services/apiKeyManager.js +164 -0
  36. package/src/services/benchmarkService.js +366 -0
  37. package/src/services/budgetService.js +539 -0
  38. package/src/services/contextInjectionService.js +247 -0
  39. package/src/services/conversationCompactionService.js +637 -0
  40. package/src/services/errorHandler.js +810 -0
  41. package/src/services/fileAttachmentService.js +544 -0
  42. package/src/services/modelRouterService.js +366 -0
  43. package/src/services/modelsService.js +322 -0
  44. package/src/services/qualityInspector.js +796 -0
  45. package/src/services/tokenCountingService.js +536 -0
  46. package/src/tools/agentCommunicationTool.js +1344 -0
  47. package/src/tools/agentDelayTool.js +485 -0
  48. package/src/tools/asyncToolManager.js +604 -0
  49. package/src/tools/baseTool.js +800 -0
  50. package/src/tools/browserTool.js +920 -0
  51. package/src/tools/cloneDetectionTool.js +621 -0
  52. package/src/tools/dependencyResolverTool.js +1215 -0
  53. package/src/tools/fileContentReplaceTool.js +875 -0
  54. package/src/tools/fileSystemTool.js +1107 -0
  55. package/src/tools/fileTreeTool.js +853 -0
  56. package/src/tools/imageTool.js +901 -0
  57. package/src/tools/importAnalyzerTool.js +1060 -0
  58. package/src/tools/jobDoneTool.js +248 -0
  59. package/src/tools/seekTool.js +956 -0
  60. package/src/tools/staticAnalysisTool.js +1778 -0
  61. package/src/tools/taskManagerTool.js +2873 -0
  62. package/src/tools/terminalTool.js +2304 -0
  63. package/src/tools/webTool.js +1430 -0
  64. package/src/types/agent.js +519 -0
  65. package/src/types/contextReference.js +972 -0
  66. package/src/types/conversation.js +730 -0
  67. package/src/types/toolCommand.js +747 -0
  68. package/src/utilities/attachmentValidator.js +292 -0
  69. package/src/utilities/configManager.js +582 -0
  70. package/src/utilities/constants.js +722 -0
  71. package/src/utilities/directoryAccessManager.js +535 -0
  72. package/src/utilities/fileProcessor.js +307 -0
  73. package/src/utilities/logger.js +436 -0
  74. package/src/utilities/tagParser.js +1246 -0
  75. package/src/utilities/toolConstants.js +317 -0
  76. package/web-ui/build/index.html +15 -0
  77. package/web-ui/build/logo.png +0 -0
  78. package/web-ui/build/logo2.png +0 -0
  79. package/web-ui/build/static/index-CjkkcnFA.js +344 -0
  80. package/web-ui/build/static/index-Dy2bYbOa.css +1 -0
@@ -0,0 +1,536 @@
1
+ /**
2
+ * TokenCountingService - Accurate token counting for conversation compactization
3
+ *
4
+ * Purpose:
5
+ * - Provide accurate token counting using tiktoken (OpenAI's tokenizer)
6
+ * - Support multiple models with different tokenization schemes
7
+ * - Determine when conversation compactization should be triggered
8
+ * - Cache token counts for performance optimization
9
+ * - Provide fallback estimation when accurate counting unavailable
10
+ *
11
+ * Key Features:
12
+ * - Accurate token counting via tiktoken library
13
+ * - Model-specific tokenization
14
+ * - Intelligent caching with TTL
15
+ * - Fast estimation fallback
16
+ * - Context window management
17
+ * - Compaction trigger detection
18
+ */
19
+
20
+ import { encoding_for_model } from 'tiktoken';
21
+ import {
22
+ MODELS,
23
+ COMPACTION_CONFIG,
24
+ TOKEN_COUNTING_MODES,
25
+ } from '../utilities/constants.js';
26
+
27
+ class TokenCountingService {
28
+ constructor(logger) {
29
+ this.logger = logger;
30
+
31
+ // Token count cache for performance
32
+ this.tokenCache = new Map();
33
+
34
+ // Tiktoken encoders cache (reuse encoders for efficiency)
35
+ this.encoders = new Map();
36
+
37
+ // Supported tiktoken models mapping
38
+ this.tiktokenModelMap = {
39
+ // Anthropic models use OpenAI's cl100k_base encoding
40
+ [MODELS.ANTHROPIC_SONNET]: 'gpt-4',
41
+ [MODELS.ANTHROPIC_OPUS]: 'gpt-4',
42
+ [MODELS.ANTHROPIC_HAIKU]: 'gpt-4',
43
+
44
+ // OpenAI models
45
+ [MODELS.GPT_4]: 'gpt-4',
46
+ [MODELS.GPT_4_MINI]: 'gpt-4',
47
+ 'gpt-4o': 'gpt-4',
48
+ 'gpt-4o-mini': 'gpt-4',
49
+ 'gpt-4-turbo': 'gpt-4',
50
+ 'gpt-3.5-turbo': 'gpt-4', // Uses cl100k_base encoding
51
+
52
+ // DeepSeek uses similar tokenization to GPT-4
53
+ [MODELS.DEEPSEEK_R1]: 'gpt-4',
54
+
55
+ // Phi models - fallback to GPT-4 encoding
56
+ [MODELS.PHI_4]: 'gpt-4',
57
+
58
+ // Azure AI Foundry models
59
+ 'azure-ai-grok3': 'gpt-4',
60
+ 'azure-ai-deepseek-r1': 'gpt-4',
61
+ 'azure-openai-gpt-5': 'gpt-4',
62
+ 'azure-openai-gpt-4': 'gpt-4',
63
+ 'azure-openai-gpt-4o': 'gpt-4',
64
+
65
+ // Compaction models
66
+ [COMPACTION_CONFIG.COMPACTION_MODEL]: 'gpt-4',
67
+ [COMPACTION_CONFIG.COMPACTION_MODEL_FALLBACK]: 'gpt-4',
68
+ };
69
+
70
+ this.logger?.info('TokenCountingService initialized', {
71
+ supportedModels: Object.keys(this.tiktokenModelMap).length,
72
+ cacheEnabled: true
73
+ });
74
+ }
75
+
76
+ /**
77
+ * Count tokens in text using accurate tiktoken encoder
78
+ * @param {string} text - Text to count tokens in
79
+ * @param {string} model - Model name for appropriate tokenization
80
+ * @param {string} mode - Counting mode (accurate, estimated, cached)
81
+ * @returns {Promise<number>} Token count
82
+ */
83
+ async countTokens(text, model, mode = TOKEN_COUNTING_MODES.ACCURATE) {
84
+ // Validate input
85
+ if (!text || typeof text !== 'string') {
86
+ return 0;
87
+ }
88
+
89
+ // Check cache first if enabled
90
+ if (mode === TOKEN_COUNTING_MODES.CACHED) {
91
+ const cached = this._getCachedTokenCount(text, model);
92
+ if (cached !== null) {
93
+ return cached;
94
+ }
95
+ // Fall through to accurate counting if not cached
96
+ }
97
+
98
+ // Use fast estimation if requested
99
+ if (mode === TOKEN_COUNTING_MODES.ESTIMATED) {
100
+ return this._estimateTokens(text);
101
+ }
102
+
103
+ // Accurate counting with tiktoken
104
+ try {
105
+ const encoder = await this._getEncoder(model);
106
+ const tokens = encoder.encode(text);
107
+ const count = tokens.length;
108
+
109
+ // Cache the result
110
+ this._cacheTokenCount(text, model, count);
111
+
112
+ return count;
113
+
114
+ } catch (error) {
115
+ this.logger?.warn('Tiktoken encoding failed, falling back to estimation', {
116
+ model,
117
+ error: error.message
118
+ });
119
+
120
+ // Fallback to estimation
121
+ return this._estimateTokens(text);
122
+ }
123
+ }
124
+
125
+ /**
126
+ * Estimate conversation token count including all messages
127
+ * @param {Array} messages - Array of message objects with { role, content }
128
+ * @param {string} model - Model name for tokenization
129
+ * @param {string} mode - Counting mode
130
+ * @returns {Promise<number>} Total token count for conversation
131
+ */
132
+ async estimateConversationTokens(messages, model, mode = TOKEN_COUNTING_MODES.ACCURATE) {
133
+ if (!Array.isArray(messages) || messages.length === 0) {
134
+ return 0;
135
+ }
136
+
137
+ let totalTokens = 0;
138
+
139
+ // Count tokens for each message
140
+ for (const message of messages) {
141
+ if (!message.content) {
142
+ continue;
143
+ }
144
+
145
+ // Message formatting overhead (role + formatting)
146
+ const formattingOverhead = this._getMessageFormattingOverhead(model);
147
+
148
+ // Content tokens
149
+ const contentTokens = await this.countTokens(message.content, model, mode);
150
+
151
+ totalTokens += contentTokens + formattingOverhead;
152
+ }
153
+
154
+ // Add conversation-level overhead (system instructions, etc.)
155
+ const conversationOverhead = this._getConversationOverhead(messages.length);
156
+ totalTokens += conversationOverhead;
157
+
158
+ this.logger?.debug('Conversation token count', {
159
+ model,
160
+ messageCount: messages.length,
161
+ totalTokens,
162
+ mode
163
+ });
164
+
165
+ return totalTokens;
166
+ }
167
+
168
+ /**
169
+ * Get context window size for a model
170
+ * @param {string} model - Model name
171
+ * @returns {number} Context window size in tokens
172
+ */
173
+ getModelContextWindow(model) {
174
+ // Model context windows (from aiService model specs and vendor documentation)
175
+ const contextWindows = {
176
+ // Anthropic Claude models
177
+ [MODELS.ANTHROPIC_SONNET]: 200000,
178
+ [MODELS.ANTHROPIC_OPUS]: 200000,
179
+ [MODELS.ANTHROPIC_HAIKU]: 200000,
180
+
181
+ // OpenAI models
182
+ [MODELS.GPT_4]: 128000,
183
+ [MODELS.GPT_4_MINI]: 128000,
184
+ 'gpt-4o': 128000,
185
+ 'gpt-4o-mini': 128000,
186
+ 'gpt-4-turbo': 128000,
187
+ 'gpt-3.5-turbo': 16384,
188
+
189
+ // DeepSeek models
190
+ [MODELS.DEEPSEEK_R1]: 128000,
191
+
192
+ // Phi models
193
+ [MODELS.PHI_4]: 16384,
194
+
195
+ // Azure AI Foundry models
196
+ 'azure-ai-grok3': 128000,
197
+ 'azure-ai-deepseek-r1': 128000,
198
+ 'azure-openai-gpt-5': 128000,
199
+ 'azure-openai-gpt-4': 128000,
200
+ 'azure-openai-gpt-4o': 128000,
201
+
202
+ // Compaction models
203
+ [COMPACTION_CONFIG.COMPACTION_MODEL]: 128000,
204
+ [COMPACTION_CONFIG.COMPACTION_MODEL_FALLBACK]: 128000,
205
+
206
+ // Router model
207
+ 'autopilot-model-router': 16384,
208
+ };
209
+
210
+ const contextWindow = contextWindows[model];
211
+
212
+ if (!contextWindow) {
213
+ this.logger?.warn('Unknown model context window, using default', {
214
+ model,
215
+ defaultWindow: 128000
216
+ });
217
+ return 128000; // Default to 128k
218
+ }
219
+
220
+ return contextWindow;
221
+ }
222
+
223
+ /**
224
+ * Get maximum output tokens for a model
225
+ * @param {string} model - Model name
226
+ * @returns {number} Maximum output tokens
227
+ */
228
+ getModelMaxOutputTokens(model) {
229
+ // Max output tokens - increased to 8K-20K where supported for better responses
230
+ // Note: These are conservative estimates to ensure compaction triggers appropriately
231
+ const maxOutputTokens = {
232
+ // Anthropic Claude models - support up to 8K output
233
+ [MODELS.ANTHROPIC_SONNET]: 8192,
234
+ [MODELS.ANTHROPIC_OPUS]: 8192,
235
+ [MODELS.ANTHROPIC_HAIKU]: 8192,
236
+
237
+ // OpenAI models
238
+ [MODELS.GPT_4]: 8192, // Supports up to 16K, using 8K for safety
239
+ [MODELS.GPT_4_MINI]: 16384, // Already at max
240
+ 'gpt-4o': 8192, // Supports up to 16K, using 8K
241
+ 'gpt-4o-mini': 16384, // Already at max
242
+ 'gpt-4-turbo': 8192, // Increased from default
243
+ 'gpt-3.5-turbo': 4096, // Smaller model, keep at 4K
244
+
245
+ // DeepSeek models
246
+ [MODELS.DEEPSEEK_R1]: 8192, // Already at max
247
+
248
+ // Phi models - smaller architecture
249
+ [MODELS.PHI_4]: 4096, // Increased from 2048
250
+
251
+ // Azure AI Foundry models
252
+ 'azure-ai-grok3': 8192, // Increased from 4K
253
+ 'azure-ai-deepseek-r1': 8192, // Already at max
254
+ 'azure-openai-gpt-5': 8192, // Increased from 4K
255
+ 'azure-openai-gpt-4': 8192, // Increased from default
256
+ 'azure-openai-gpt-4o': 8192, // Increased from default
257
+
258
+ // Compaction models - keep moderate for efficiency
259
+ [COMPACTION_CONFIG.COMPACTION_MODEL]: 8192,
260
+ [COMPACTION_CONFIG.COMPACTION_MODEL_FALLBACK]: 8192,
261
+
262
+ // Router model - keep small for fast routing
263
+ 'autopilot-model-router': 2048,
264
+ };
265
+
266
+ return maxOutputTokens[model] || 8192; // Default increased to 8K
267
+ }
268
+
269
+ /**
270
+ * Determine if compaction should be triggered
271
+ * @param {number} currentTokens - Current conversation token count (K)
272
+ * @param {number} maxOutputTokens - Max tokens model can output (X)
273
+ * @param {number} contextWindow - Model's context window size (C)
274
+ * @param {number} threshold - Trigger threshold (default 0.8 = 80%)
275
+ * @returns {boolean} True if compaction should be triggered
276
+ */
277
+ shouldTriggerCompaction(currentTokens, maxOutputTokens, contextWindow, threshold = COMPACTION_CONFIG.DEFAULT_THRESHOLD) {
278
+ // Validate threshold
279
+ if (threshold < COMPACTION_CONFIG.MIN_THRESHOLD || threshold > COMPACTION_CONFIG.MAX_THRESHOLD) {
280
+ this.logger?.warn('Invalid compaction threshold, using default', {
281
+ provided: threshold,
282
+ default: COMPACTION_CONFIG.DEFAULT_THRESHOLD
283
+ });
284
+ threshold = COMPACTION_CONFIG.DEFAULT_THRESHOLD;
285
+ }
286
+
287
+ // Calculate: K + X >= threshold * C
288
+ const requiredTokens = currentTokens + maxOutputTokens;
289
+ const thresholdTokens = threshold * contextWindow;
290
+ const shouldTrigger = requiredTokens >= thresholdTokens;
291
+
292
+ this.logger?.debug('Compaction trigger check', {
293
+ currentTokens,
294
+ maxOutputTokens,
295
+ contextWindow,
296
+ threshold,
297
+ requiredTokens,
298
+ thresholdTokens,
299
+ shouldTrigger,
300
+ utilizationPercent: ((requiredTokens / contextWindow) * 100).toFixed(2)
301
+ });
302
+
303
+ return shouldTrigger;
304
+ }
305
+
306
+ /**
307
+ * Calculate how many tokens to target after compaction
308
+ * @param {number} contextWindow - Model's context window size
309
+ * @param {number} targetThreshold - Target threshold after compaction (default 85%)
310
+ * @returns {number} Target token count after compaction
311
+ */
312
+ calculateTargetTokenCount(contextWindow, targetThreshold = COMPACTION_CONFIG.MAX_ACCEPTABLE_TOKEN_COUNT_AFTER) {
313
+ return Math.floor(contextWindow * targetThreshold);
314
+ }
315
+
316
+ /**
317
+ * Validate that compaction achieved sufficient reduction
318
+ * @param {number} originalTokens - Token count before compaction
319
+ * @param {number} compactedTokens - Token count after compaction
320
+ * @param {number} contextWindow - Model's context window
321
+ * @returns {Object} Validation result { valid, reductionPercent, exceedsTarget }
322
+ */
323
+ validateCompaction(originalTokens, compactedTokens, contextWindow) {
324
+ const reductionPercent = ((originalTokens - compactedTokens) / originalTokens) * 100;
325
+ const targetTokens = this.calculateTargetTokenCount(contextWindow);
326
+ const exceedsTarget = compactedTokens > targetTokens;
327
+ const sufficientReduction = reductionPercent >= COMPACTION_CONFIG.MIN_REDUCTION_PERCENTAGE;
328
+
329
+ const valid = !exceedsTarget && sufficientReduction;
330
+
331
+ this.logger?.info('Compaction validation', {
332
+ originalTokens,
333
+ compactedTokens,
334
+ reductionPercent: reductionPercent.toFixed(2),
335
+ targetTokens,
336
+ exceedsTarget,
337
+ sufficientReduction,
338
+ valid
339
+ });
340
+
341
+ return {
342
+ valid,
343
+ reductionPercent,
344
+ exceedsTarget,
345
+ sufficientReduction,
346
+ targetTokens,
347
+ compactedTokens,
348
+ originalTokens
349
+ };
350
+ }
351
+
352
+ /**
353
+ * Clear token count cache
354
+ * @param {string} model - Optional: clear cache for specific model only
355
+ */
356
+ clearCache(model = null) {
357
+ if (model) {
358
+ // Clear cache entries for specific model
359
+ for (const [key, value] of this.tokenCache.entries()) {
360
+ if (value.model === model) {
361
+ this.tokenCache.delete(key);
362
+ }
363
+ }
364
+ this.logger?.debug('Token cache cleared for model', { model });
365
+ } else {
366
+ // Clear entire cache
367
+ this.tokenCache.clear();
368
+ this.logger?.debug('Token cache cleared completely');
369
+ }
370
+ }
371
+
372
+ /**
373
+ * Get encoder for model (with caching)
374
+ * @private
375
+ */
376
+ async _getEncoder(model) {
377
+ // Get tiktoken model name
378
+ const tiktokenModel = this.tiktokenModelMap[model] || 'gpt-4';
379
+
380
+ // Check if encoder is already cached
381
+ if (this.encoders.has(tiktokenModel)) {
382
+ return this.encoders.get(tiktokenModel);
383
+ }
384
+
385
+ // Create new encoder
386
+ const encoder = encoding_for_model(tiktokenModel);
387
+ this.encoders.set(tiktokenModel, encoder);
388
+
389
+ this.logger?.debug('Created tiktoken encoder', {
390
+ model,
391
+ tiktokenModel
392
+ });
393
+
394
+ return encoder;
395
+ }
396
+
397
+ /**
398
+ * Fast token estimation (character-based)
399
+ * @private
400
+ */
401
+ _estimateTokens(text) {
402
+ if (!text || typeof text !== 'string') {
403
+ return 0;
404
+ }
405
+
406
+ // Use configured estimation ratio
407
+ return Math.ceil(text.length / COMPACTION_CONFIG.CHARS_PER_TOKEN_ESTIMATE);
408
+ }
409
+
410
+ /**
411
+ * Get cached token count if available and not expired
412
+ * @private
413
+ */
414
+ _getCachedTokenCount(text, model) {
415
+ const cacheKey = this._getCacheKey(text, model);
416
+ const cached = this.tokenCache.get(cacheKey);
417
+
418
+ if (!cached) {
419
+ return null;
420
+ }
421
+
422
+ // Check if cache entry is expired
423
+ const now = Date.now();
424
+ if (now - cached.timestamp > COMPACTION_CONFIG.TOKEN_COUNT_CACHE_TTL_MS) {
425
+ this.tokenCache.delete(cacheKey);
426
+ return null;
427
+ }
428
+
429
+ return cached.count;
430
+ }
431
+
432
+ /**
433
+ * Cache token count with timestamp
434
+ * @private
435
+ */
436
+ _cacheTokenCount(text, model, count) {
437
+ const cacheKey = this._getCacheKey(text, model);
438
+
439
+ this.tokenCache.set(cacheKey, {
440
+ count,
441
+ model,
442
+ timestamp: Date.now()
443
+ });
444
+
445
+ // Prevent cache from growing indefinitely
446
+ if (this.tokenCache.size > 1000) {
447
+ // Remove oldest entries
448
+ const entries = Array.from(this.tokenCache.entries());
449
+ entries.sort((a, b) => a[1].timestamp - b[1].timestamp);
450
+
451
+ // Remove oldest 20%
452
+ const toRemove = Math.floor(entries.length * 0.2);
453
+ for (let i = 0; i < toRemove; i++) {
454
+ this.tokenCache.delete(entries[i][0]);
455
+ }
456
+ }
457
+ }
458
+
459
+ /**
460
+ * Generate cache key for text + model
461
+ * @private
462
+ */
463
+ _getCacheKey(text, model) {
464
+ // Use a simple hash for the text to avoid storing full text as key
465
+ const hash = this._simpleHash(text);
466
+ return `${model}:${hash}`;
467
+ }
468
+
469
+ /**
470
+ * Simple hash function for cache keys
471
+ * @private
472
+ */
473
+ _simpleHash(str) {
474
+ let hash = 0;
475
+ for (let i = 0; i < str.length; i++) {
476
+ const char = str.charCodeAt(i);
477
+ hash = ((hash << 5) - hash) + char;
478
+ hash = hash & hash; // Convert to 32-bit integer
479
+ }
480
+ return hash.toString(36);
481
+ }
482
+
483
+ /**
484
+ * Get message formatting overhead for model
485
+ * Accounts for role labels, XML tags, etc.
486
+ * @private
487
+ */
488
+ _getMessageFormattingOverhead(model) {
489
+ // Different models have different formatting
490
+ // Anthropic: ~10 tokens per message (role tags)
491
+ // OpenAI: ~5 tokens per message (JSON formatting)
492
+
493
+ if (model.includes('anthropic') || model.includes('claude')) {
494
+ return 10;
495
+ }
496
+
497
+ return 5; // Default for OpenAI-style models
498
+ }
499
+
500
+ /**
501
+ * Get conversation-level overhead
502
+ * Accounts for system prompts, special tokens, etc.
503
+ * @private
504
+ */
505
+ _getConversationOverhead(messageCount) {
506
+ // Base overhead for conversation structure
507
+ const baseOverhead = 50;
508
+
509
+ // Additional overhead scales with message count
510
+ const scalingOverhead = messageCount * 2;
511
+
512
+ return baseOverhead + scalingOverhead;
513
+ }
514
+
515
+ /**
516
+ * Clean up resources (close encoders)
517
+ */
518
+ async cleanup() {
519
+ // Free tiktoken encoders
520
+ for (const [model, encoder] of this.encoders.entries()) {
521
+ try {
522
+ encoder.free();
523
+ this.logger?.debug('Freed tiktoken encoder', { model });
524
+ } catch (error) {
525
+ this.logger?.warn('Failed to free encoder', { model, error: error.message });
526
+ }
527
+ }
528
+
529
+ this.encoders.clear();
530
+ this.tokenCache.clear();
531
+
532
+ this.logger?.info('TokenCountingService cleaned up');
533
+ }
534
+ }
535
+
536
+ export default TokenCountingService;