@softerist/heuristic-mcp 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,246 @@
1
+ /**
2
+ * Tests for CodebaseIndexer feature
3
+ *
4
+ * Tests the indexing functionality including:
5
+ * - File discovery and filtering
6
+ * - Chunk generation and embedding
7
+ * - Concurrent indexing protection
8
+ * - Force reindex behavior
9
+ * - Progress notifications
10
+ */
11
+
12
+ import { describe, it, expect, beforeAll, afterAll, beforeEach } from 'vitest';
13
+ import {
14
+ createTestFixtures,
15
+ cleanupFixtures,
16
+ clearTestCache,
17
+ createMockRequest,
18
+ measureTime
19
+ } from './helpers.js';
20
+ import * as IndexCodebaseFeature from '../features/index-codebase.js';
21
+ import { CodebaseIndexer } from '../features/index-codebase.js';
22
+
23
+ describe('CodebaseIndexer', () => {
24
+ let fixtures;
25
+
26
+ beforeAll(async () => {
27
+ fixtures = await createTestFixtures({ workerThreads: 2 });
28
+ });
29
+
30
+ afterAll(async () => {
31
+ await cleanupFixtures(fixtures);
32
+ });
33
+
34
+ beforeEach(async () => {
35
+ // Reset state
36
+ fixtures.indexer.isIndexing = false;
37
+ fixtures.indexer.terminateWorkers();
38
+ });
39
+
40
+ describe('Basic Indexing', () => {
41
+ it('should index files and create embeddings', async () => {
42
+ // Clear cache first
43
+ await clearTestCache(fixtures.config);
44
+ fixtures.cache.setVectorStore([]);
45
+ fixtures.cache.fileHashes = new Map();
46
+
47
+ // Run indexing
48
+ const result = await fixtures.indexer.indexAll(true);
49
+
50
+ // Should have processed files
51
+ expect(result.skipped).toBe(false);
52
+ expect(result.filesProcessed).toBeGreaterThan(0);
53
+ expect(result.chunksCreated).toBeGreaterThan(0);
54
+ expect(result.totalFiles).toBeGreaterThan(0);
55
+ expect(result.totalChunks).toBeGreaterThan(0);
56
+ });
57
+
58
+ it('should skip unchanged files on subsequent indexing', async () => {
59
+ // First index
60
+ await fixtures.indexer.indexAll(true);
61
+
62
+ // Second index without force
63
+ const result = await fixtures.indexer.indexAll(false);
64
+
65
+ // Should skip processing (files unchanged)
66
+ expect(result.skipped).toBe(false);
67
+ expect(result.filesProcessed).toBe(0);
68
+ expect(result.message).toContain('up to date');
69
+ });
70
+
71
+ it('should reindex all files when force is true', async () => {
72
+ // First index
73
+ await fixtures.indexer.indexAll(true);
74
+ const firstChunks = fixtures.cache.getVectorStore().length;
75
+
76
+ // Force reindex
77
+ const result = await fixtures.indexer.indexAll(true);
78
+
79
+ // Should have processed all files again
80
+ expect(result.filesProcessed).toBeGreaterThan(0);
81
+ expect(result.chunksCreated).toBeGreaterThan(0);
82
+ });
83
+ });
84
+
85
+ describe('Concurrent Indexing Protection', () => {
86
+ it('should prevent concurrent indexing', async () => {
87
+ // Clear for clean state
88
+ await clearTestCache(fixtures.config);
89
+ fixtures.cache.setVectorStore([]);
90
+ fixtures.cache.fileHashes = new Map();
91
+
92
+ // Start first indexing
93
+ const promise1 = fixtures.indexer.indexAll(true);
94
+
95
+ // Wait for it to start
96
+ await new Promise(resolve => setTimeout(resolve, 100));
97
+ expect(fixtures.indexer.isIndexing).toBe(true);
98
+
99
+ // Second call should be skipped
100
+ const result2 = await fixtures.indexer.indexAll(false);
101
+
102
+ expect(result2.skipped).toBe(true);
103
+ expect(result2.reason).toContain('already in progress');
104
+
105
+ await promise1;
106
+ });
107
+
108
+ it('should set and clear isIndexing flag correctly', async () => {
109
+ // Clear cache to ensure indexing actually runs
110
+ await clearTestCache(fixtures.config);
111
+ fixtures.cache.setVectorStore([]);
112
+ fixtures.cache.fileHashes = new Map();
113
+
114
+ expect(fixtures.indexer.isIndexing).toBe(false);
115
+
116
+ const promise = fixtures.indexer.indexAll(true);
117
+
118
+ // Should be set during indexing
119
+ await new Promise(resolve => setTimeout(resolve, 100));
120
+ expect(fixtures.indexer.isIndexing).toBe(true);
121
+
122
+ await promise;
123
+
124
+ // Should be cleared after indexing
125
+ expect(fixtures.indexer.isIndexing).toBe(false);
126
+ });
127
+ });
128
+
129
+ describe('File Discovery', () => {
130
+ it('should discover files matching configured extensions', async () => {
131
+ const files = await fixtures.indexer.discoverFiles();
132
+
133
+ expect(files.length).toBeGreaterThan(0);
134
+
135
+ // All files should have valid extensions
136
+ const extensions = fixtures.config.fileExtensions.map(ext => `.${ext}`);
137
+ for (const file of files) {
138
+ const ext = file.substring(file.lastIndexOf('.'));
139
+ expect(extensions).toContain(ext);
140
+ }
141
+ });
142
+
143
+ it('should exclude files in excluded directories', async () => {
144
+ const files = await fixtures.indexer.discoverFiles();
145
+
146
+ // No files from node_modules
147
+ const nodeModulesFiles = files.filter(f => f.includes('node_modules'));
148
+ expect(nodeModulesFiles.length).toBe(0);
149
+
150
+ // No files from .smart-coding-cache
151
+ const cacheFiles = files.filter(f => f.includes('.smart-coding-cache'));
152
+ expect(cacheFiles.length).toBe(0);
153
+ });
154
+ });
155
+
156
+ describe('Worker Thread Management', () => {
157
+ it('should initialize workers when CPU count > 1', async () => {
158
+ await fixtures.indexer.initializeWorkers();
159
+
160
+ // Should have at least 1 worker on multi-core systems
161
+ expect(fixtures.indexer.workers.length).toBeGreaterThanOrEqual(0);
162
+
163
+ fixtures.indexer.terminateWorkers();
164
+ expect(fixtures.indexer.workers.length).toBe(0);
165
+ });
166
+ });
167
+ });
168
+
169
+ describe('Index Codebase Tool Handler', () => {
170
+ let fixtures;
171
+
172
+ beforeAll(async () => {
173
+ fixtures = await createTestFixtures({ workerThreads: 2 });
174
+ });
175
+
176
+ afterAll(async () => {
177
+ await cleanupFixtures(fixtures);
178
+ });
179
+
180
+ beforeEach(async () => {
181
+ fixtures.indexer.isIndexing = false;
182
+ });
183
+
184
+ describe('Tool Definition', () => {
185
+ it('should have correct tool definition', () => {
186
+ const toolDef = IndexCodebaseFeature.getToolDefinition();
187
+
188
+ expect(toolDef.name).toBe('b_index_codebase');
189
+ expect(toolDef.description).toContain('reindex');
190
+ expect(toolDef.inputSchema.properties.force).toBeDefined();
191
+ expect(toolDef.inputSchema.properties.force.type).toBe('boolean');
192
+ });
193
+ });
194
+
195
+ describe('Tool Handler', () => {
196
+ it('should return success message on completed indexing', async () => {
197
+ const request = createMockRequest('b_index_codebase', { force: false });
198
+ const result = await IndexCodebaseFeature.handleToolCall(request, fixtures.indexer);
199
+
200
+ expect(result.content[0].text).toContain('reindexed successfully');
201
+ expect(result.content[0].text).toContain('Total files in index');
202
+ expect(result.content[0].text).toContain('Total code chunks');
203
+ });
204
+
205
+ it('should return skipped message on concurrent calls', async () => {
206
+ // Start first indexing
207
+ await clearTestCache(fixtures.config);
208
+ fixtures.cache.setVectorStore([]);
209
+ fixtures.cache.fileHashes = new Map();
210
+
211
+ const promise1 = IndexCodebaseFeature.handleToolCall(
212
+ createMockRequest('b_index_codebase', { force: true }),
213
+ fixtures.indexer
214
+ );
215
+
216
+ await new Promise(resolve => setTimeout(resolve, 100));
217
+
218
+ // Second concurrent call
219
+ const result2 = await IndexCodebaseFeature.handleToolCall(
220
+ createMockRequest('b_index_codebase', { force: false }),
221
+ fixtures.indexer
222
+ );
223
+
224
+ expect(result2.content[0].text).toContain('Indexing skipped');
225
+ expect(result2.content[0].text).toContain('already in progress');
226
+
227
+ await promise1;
228
+ });
229
+
230
+ it('should handle force parameter correctly', async () => {
231
+ // First index
232
+ await IndexCodebaseFeature.handleToolCall(
233
+ createMockRequest('b_index_codebase', { force: true }),
234
+ fixtures.indexer
235
+ );
236
+
237
+ // Non-force should skip unchanged
238
+ const result = await IndexCodebaseFeature.handleToolCall(
239
+ createMockRequest('b_index_codebase', { force: false }),
240
+ fixtures.indexer
241
+ );
242
+
243
+ expect(result.content[0].text).toContain('up to date');
244
+ });
245
+ });
246
+ });
@@ -0,0 +1,223 @@
1
+ /**
2
+ * Integration tests for cross-feature interactions
3
+ *
4
+ * Tests scenarios that involve multiple features working together:
5
+ * 1. Concurrent indexing protection across MCP tool calls
6
+ * 2. Clear cache interaction with indexing
7
+ * 3. Tool handler response quality
8
+ */
9
+
10
+ import { describe, it, expect, beforeAll, afterAll, beforeEach } from 'vitest';
11
+ import {
12
+ createTestFixtures,
13
+ cleanupFixtures,
14
+ clearTestCache,
15
+ createMockRequest,
16
+ measureTime
17
+ } from './helpers.js';
18
+ import * as IndexCodebaseFeature from '../features/index-codebase.js';
19
+ import * as ClearCacheFeature from '../features/clear-cache.js';
20
+
21
+ describe('Concurrent Indexing', () => {
22
+ let fixtures;
23
+
24
+ beforeAll(async () => {
25
+ fixtures = await createTestFixtures({ workerThreads: 2 });
26
+ });
27
+
28
+ afterAll(async () => {
29
+ await cleanupFixtures(fixtures);
30
+ });
31
+
32
+ beforeEach(async () => {
33
+ // Reset indexing state
34
+ fixtures.indexer.isIndexing = false;
35
+ // Clear cache for clean state
36
+ await clearTestCache(fixtures.config);
37
+ fixtures.cache.setVectorStore([]);
38
+ fixtures.cache.fileHashes = new Map();
39
+ });
40
+
41
+ it('should only run one indexer at a time', async () => {
42
+ const request1 = createMockRequest('b_index_codebase', { force: true });
43
+ const request2 = createMockRequest('b_index_codebase', { force: false });
44
+
45
+ // Start first indexing
46
+ const promise1 = IndexCodebaseFeature.handleToolCall(request1, fixtures.indexer);
47
+
48
+ // Wait a bit for first to start
49
+ await new Promise(resolve => setTimeout(resolve, 100));
50
+
51
+ // Verify first is running
52
+ expect(fixtures.indexer.isIndexing).toBe(true);
53
+
54
+ // Start second indexing while first is running
55
+ const promise2 = IndexCodebaseFeature.handleToolCall(request2, fixtures.indexer);
56
+
57
+ // Wait for both to complete
58
+ const [result1, result2] = await Promise.all([promise1, promise2]);
59
+
60
+ // First should complete with stats
61
+ expect(result1.content[0].text).toContain('reindexed successfully');
62
+ expect(result1.content[0].text).toContain('Total files in index');
63
+
64
+ // Second should clearly indicate it was skipped
65
+ expect(result2.content[0].text).toContain('Indexing skipped');
66
+ expect(result2.content[0].text).toContain('already in progress');
67
+ });
68
+
69
+ it('should set isIndexing flag during indexing', async () => {
70
+ // Check initial state
71
+ expect(fixtures.indexer.isIndexing).toBe(false);
72
+
73
+ // Start indexing
74
+ const promise = fixtures.indexer.indexAll(true);
75
+
76
+ // Wait for it to start
77
+ await new Promise(resolve => setTimeout(resolve, 50));
78
+
79
+ // Check flag is set
80
+ expect(fixtures.indexer.isIndexing).toBe(true);
81
+
82
+ // Wait for completion
83
+ await promise;
84
+
85
+ // Check flag is cleared
86
+ expect(fixtures.indexer.isIndexing).toBe(false);
87
+ });
88
+
89
+ it('should skip concurrent indexing calls gracefully', async () => {
90
+ // Start first indexing
91
+ const promise1 = fixtures.indexer.indexAll(true);
92
+
93
+ await new Promise(resolve => setTimeout(resolve, 50));
94
+
95
+ // Second call should return immediately with skipped status
96
+ const { result, duration } = await measureTime(() => fixtures.indexer.indexAll(false));
97
+
98
+ // Second call should return very quickly (not run full indexing)
99
+ expect(duration).toBeLessThan(100);
100
+
101
+ // Should indicate it was skipped
102
+ expect(result.skipped).toBe(true);
103
+ expect(result.reason).toContain('already in progress');
104
+
105
+ await promise1;
106
+ });
107
+ });
108
+
109
+ describe('Clear Cache Operations', () => {
110
+ let fixtures;
111
+
112
+ beforeAll(async () => {
113
+ fixtures = await createTestFixtures({ workerThreads: 2 });
114
+ });
115
+
116
+ afterAll(async () => {
117
+ await cleanupFixtures(fixtures);
118
+ });
119
+
120
+ beforeEach(async () => {
121
+ fixtures.indexer.isIndexing = false;
122
+ });
123
+
124
+ it('should prevent clear cache while indexing', async () => {
125
+ // Start indexing
126
+ const indexPromise = fixtures.indexer.indexAll(true);
127
+
128
+ await new Promise(resolve => setTimeout(resolve, 50));
129
+
130
+ // Try to clear cache
131
+ const request = createMockRequest('c_clear_cache', {});
132
+ const result = await ClearCacheFeature.handleToolCall(request, fixtures.cacheClearer);
133
+
134
+ // Should fail with appropriate message
135
+ expect(result.content[0].text).toContain('indexing is in progress');
136
+
137
+ await indexPromise;
138
+ });
139
+
140
+ it('should allow clear cache after indexing completes', async () => {
141
+ // First index
142
+ await fixtures.indexer.indexAll(true);
143
+
144
+ // Verify indexing is done
145
+ expect(fixtures.indexer.isIndexing).toBe(false);
146
+
147
+ // Now clear cache
148
+ const request = createMockRequest('c_clear_cache', {});
149
+ const result = await ClearCacheFeature.handleToolCall(request, fixtures.cacheClearer);
150
+
151
+ // Should succeed
152
+ expect(result.content[0].text).toContain('Cache cleared successfully');
153
+ });
154
+
155
+ it('should clear cache immediately after indexing without crash', async () => {
156
+ // This tests the race condition scenario
157
+ await fixtures.indexer.indexAll(true);
158
+
159
+ // Immediately clear (potential race with cache.save())
160
+ const result = await fixtures.cacheClearer.execute();
161
+
162
+ expect(result.success).toBe(true);
163
+ expect(result.message).toContain('Cache cleared successfully');
164
+ });
165
+
166
+ it('should handle multiple concurrent clear cache calls', async () => {
167
+ // First index to have something to clear
168
+ await fixtures.indexer.indexAll(true);
169
+
170
+ // Reset the isClearing flag
171
+ fixtures.cacheClearer.isClearing = false;
172
+
173
+ // Multiple concurrent clears - with new mutex, only first should succeed
174
+ const promises = [
175
+ fixtures.cacheClearer.execute(),
176
+ fixtures.cacheClearer.execute(),
177
+ fixtures.cacheClearer.execute()
178
+ ];
179
+
180
+ const results = await Promise.allSettled(promises);
181
+
182
+ // First should succeed, others should fail with "already in progress"
183
+ const successes = results.filter(r => r.status === 'fulfilled');
184
+ const failures = results.filter(r => r.status === 'rejected');
185
+
186
+ expect(successes.length).toBe(1);
187
+ expect(failures.length).toBe(2);
188
+
189
+ // Verify failure message
190
+ for (const failure of failures) {
191
+ expect(failure.reason.message).toContain('already in progress');
192
+ }
193
+ });
194
+ });
195
+
196
+ describe('Tool Handler Response Quality', () => {
197
+ let fixtures;
198
+
199
+ beforeAll(async () => {
200
+ fixtures = await createTestFixtures({ workerThreads: 2 });
201
+ });
202
+
203
+ afterAll(async () => {
204
+ await cleanupFixtures(fixtures);
205
+ });
206
+
207
+ it('should return meaningful response when indexing is skipped', async () => {
208
+ // Start first indexing
209
+ const promise1 = fixtures.indexer.indexAll(true);
210
+ await new Promise(resolve => setTimeout(resolve, 50));
211
+
212
+ // Second call via handler
213
+ const request = createMockRequest('b_index_codebase', { force: false });
214
+ const result = await IndexCodebaseFeature.handleToolCall(request, fixtures.indexer);
215
+
216
+ await promise1;
217
+
218
+ // The response should clearly indicate the indexing was skipped
219
+ expect(result.content[0].text).toContain('Indexing skipped');
220
+ expect(result.content[0].text).toContain('already in progress');
221
+ expect(result.content[0].text).toContain('Please wait');
222
+ });
223
+ });
@@ -0,0 +1,225 @@
1
+ /**
2
+ * Tests for Tokenizer utilities
3
+ *
4
+ * Tests the token estimation and model-specific limits including:
5
+ * - Token estimation for various text types
6
+ * - Model token limits lookup
7
+ * - Chunking parameters calculation
8
+ * - Token limit checking
9
+ */
10
+
11
+ import { describe, it, expect } from 'vitest';
12
+ import {
13
+ estimateTokens,
14
+ getModelTokenLimit,
15
+ getChunkingParams,
16
+ exceedsTokenLimit,
17
+ MODEL_TOKEN_LIMITS
18
+ } from '../lib/tokenizer.js';
19
+
20
+ describe('Token Estimation', () => {
21
+ describe('estimateTokens', () => {
22
+ it('should return 0 for empty string', () => {
23
+ expect(estimateTokens('')).toBe(0);
24
+ expect(estimateTokens(null)).toBe(0);
25
+ expect(estimateTokens(undefined)).toBe(0);
26
+ });
27
+
28
+ it('should count simple words correctly', () => {
29
+ // Simple words get ~1 token each + 2 for CLS/SEP
30
+ const result = estimateTokens('hello world');
31
+ expect(result).toBeGreaterThanOrEqual(4); // 2 words + 2 special tokens
32
+ expect(result).toBeLessThanOrEqual(6);
33
+ });
34
+
35
+ it('should add extra tokens for long words', () => {
36
+ const shortWord = estimateTokens('cat');
37
+ const longWord = estimateTokens('internationalization');
38
+
39
+ // Long words should have more tokens due to subword splitting
40
+ expect(longWord).toBeGreaterThan(shortWord);
41
+ });
42
+
43
+ it('should count special characters', () => {
44
+ const withoutSpecial = estimateTokens('hello world');
45
+ const withSpecial = estimateTokens('hello(); world{}');
46
+
47
+ // Special characters add to token count
48
+ expect(withSpecial).toBeGreaterThan(withoutSpecial);
49
+ });
50
+
51
+ it('should handle code snippets', () => {
52
+ const code = `
53
+ function test() {
54
+ const x = 10;
55
+ return x * 2;
56
+ }
57
+ `;
58
+
59
+ const tokens = estimateTokens(code);
60
+
61
+ // Code has many special chars, should have reasonable token count
62
+ expect(tokens).toBeGreaterThan(10);
63
+ expect(tokens).toBeLessThan(100);
64
+ });
65
+
66
+ it('should handle multiline text', () => {
67
+ const multiline = 'line one\nline two\nline three';
68
+ const tokens = estimateTokens(multiline);
69
+
70
+ expect(tokens).toBeGreaterThan(5);
71
+ });
72
+ });
73
+ });
74
+
75
+ describe('Model Token Limits', () => {
76
+ describe('MODEL_TOKEN_LIMITS', () => {
77
+ it('should have default limit', () => {
78
+ expect(MODEL_TOKEN_LIMITS['default']).toBeDefined();
79
+ expect(MODEL_TOKEN_LIMITS['default']).toBe(256);
80
+ });
81
+
82
+ it('should have limits for MiniLM models', () => {
83
+ expect(MODEL_TOKEN_LIMITS['Xenova/all-MiniLM-L6-v2']).toBe(256);
84
+ expect(MODEL_TOKEN_LIMITS['Xenova/all-MiniLM-L12-v2']).toBe(256);
85
+ });
86
+
87
+ it('should have limits for code-specific models', () => {
88
+ expect(MODEL_TOKEN_LIMITS['Xenova/codebert-base']).toBe(512);
89
+ expect(MODEL_TOKEN_LIMITS['Xenova/graphcodebert-base']).toBe(512);
90
+ });
91
+
92
+ it('should have limits for E5 and BGE models', () => {
93
+ expect(MODEL_TOKEN_LIMITS['Xenova/e5-small-v2']).toBe(512);
94
+ expect(MODEL_TOKEN_LIMITS['Xenova/bge-base-en-v1.5']).toBe(512);
95
+ });
96
+ });
97
+
98
+ describe('getModelTokenLimit', () => {
99
+ it('should return correct limit for known models', () => {
100
+ expect(getModelTokenLimit('Xenova/all-MiniLM-L6-v2')).toBe(256);
101
+ expect(getModelTokenLimit('Xenova/codebert-base')).toBe(512);
102
+ });
103
+
104
+ it('should return default for unknown models', () => {
105
+ expect(getModelTokenLimit('unknown/model-name')).toBe(256);
106
+ });
107
+
108
+ it('should return default for null/undefined', () => {
109
+ expect(getModelTokenLimit(null)).toBe(256);
110
+ expect(getModelTokenLimit(undefined)).toBe(256);
111
+ });
112
+
113
+ it('should be case-insensitive', () => {
114
+ const normalCase = getModelTokenLimit('Xenova/all-MiniLM-L6-v2');
115
+ const lowerCase = getModelTokenLimit('xenova/all-minilm-l6-v2');
116
+
117
+ expect(lowerCase).toBe(normalCase);
118
+ });
119
+ });
120
+ });
121
+
122
+ describe('Chunking Parameters', () => {
123
+ describe('getChunkingParams', () => {
124
+ it('should return correct params for default model', () => {
125
+ const params = getChunkingParams('Xenova/all-MiniLM-L6-v2');
126
+
127
+ expect(params.maxTokens).toBe(256);
128
+ expect(params.targetTokens).toBeLessThan(256); // 85% of max
129
+ expect(params.targetTokens).toBeGreaterThan(200);
130
+ expect(params.overlapTokens).toBeLessThan(params.targetTokens);
131
+ });
132
+
133
+ it('should calculate ~85% for target tokens', () => {
134
+ const params = getChunkingParams('Xenova/codebert-base'); // 512 limit
135
+
136
+ // 85% of 512 = 435.2 -> floor = 435
137
+ expect(params.targetTokens).toBe(Math.floor(512 * 0.85));
138
+ });
139
+
140
+ it('should calculate ~18% overlap', () => {
141
+ const params = getChunkingParams('Xenova/all-MiniLM-L6-v2');
142
+
143
+ const expectedOverlap = Math.floor(params.targetTokens * 0.18);
144
+ expect(params.overlapTokens).toBe(expectedOverlap);
145
+ });
146
+
147
+ it('should return all three parameters', () => {
148
+ const params = getChunkingParams('Xenova/all-MiniLM-L6-v2');
149
+
150
+ expect(params).toHaveProperty('maxTokens');
151
+ expect(params).toHaveProperty('targetTokens');
152
+ expect(params).toHaveProperty('overlapTokens');
153
+ });
154
+
155
+ it('should handle unknown models with defaults', () => {
156
+ const params = getChunkingParams('unknown/model');
157
+
158
+ expect(params.maxTokens).toBe(256);
159
+ expect(params.targetTokens).toBeLessThan(256);
160
+ });
161
+ });
162
+ });
163
+
164
+ describe('Token Limit Checking', () => {
165
+ describe('exceedsTokenLimit', () => {
166
+ it('should return false for short text', () => {
167
+ const shortText = 'hello world';
168
+ expect(exceedsTokenLimit(shortText, 'Xenova/all-MiniLM-L6-v2')).toBe(false);
169
+ });
170
+
171
+ it('should return true for very long text', () => {
172
+ // Create text that definitely exceeds 256 tokens
173
+ const longText = 'word '.repeat(500);
174
+ expect(exceedsTokenLimit(longText, 'Xenova/all-MiniLM-L6-v2')).toBe(true);
175
+ });
176
+
177
+ it('should consider different model limits', () => {
178
+ // Create text that exceeds 256 but not 512
179
+ const mediumText = 'word '.repeat(300);
180
+
181
+ // Should exceed small model limit
182
+ expect(exceedsTokenLimit(mediumText, 'Xenova/all-MiniLM-L6-v2')).toBe(true);
183
+
184
+ // Should not exceed large model limit
185
+ expect(exceedsTokenLimit(mediumText, 'Xenova/codebert-base')).toBe(false);
186
+ });
187
+
188
+ it('should handle empty text', () => {
189
+ expect(exceedsTokenLimit('', 'Xenova/all-MiniLM-L6-v2')).toBe(false);
190
+ });
191
+ });
192
+ });
193
+
194
+ describe('Integration: Token Estimation Accuracy', () => {
195
+ it('should estimate reasonable tokens for typical code chunks', () => {
196
+ const typicalCodeChunk = `
197
+ import { pipeline } from '@xenova/transformers';
198
+
199
+ export class MyClass {
200
+ constructor(config) {
201
+ this.config = config;
202
+ this.data = [];
203
+ }
204
+
205
+ async process(input) {
206
+ const result = await this.transform(input);
207
+ return result.map(item => item.value);
208
+ }
209
+ }
210
+ `;
211
+
212
+ const tokens = estimateTokens(typicalCodeChunk);
213
+
214
+ // Should be within typical chunk size
215
+ expect(tokens).toBeGreaterThan(30);
216
+ expect(tokens).toBeLessThan(200);
217
+ });
218
+
219
+ it('should keep small code chunks under model limits', () => {
220
+ // A small chunk should definitely be under the limit
221
+ const safeChunk = 'const x = 1;\n'.repeat(10);
222
+
223
+ expect(exceedsTokenLimit(safeChunk, 'Xenova/all-MiniLM-L6-v2')).toBe(false);
224
+ });
225
+ });