npm - smart-coding-mcp - Versions diffs - 1.2.4 → 1.3.1 - Mend

smart-coding-mcp 1.2.4 → 1.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

package/README.md +28 -168
package/config.json +4 -3
package/example.png +0 -0
package/features/clear-cache.js +30 -7
package/features/index-codebase.js +507 -37
package/how-its-works.png +0 -0
package/index.js +2 -2
package/lib/cache.js +5 -0
package/lib/config.js +29 -4
package/lib/embedding-worker.js +67 -0
package/lib/tokenizer.js +142 -0
package/lib/utils.js +113 -25
package/package.json +9 -3
package/test/clear-cache.test.js +288 -0
package/test/embedding-model.test.js +230 -0
package/test/helpers.js +128 -0
package/test/hybrid-search.test.js +243 -0
package/test/index-codebase.test.js +246 -0
package/test/integration.test.js +223 -0
package/test/tokenizer.test.js +225 -0
package/vitest.config.js +29 -0

package/test/integration.test.js ADDED Viewed

@@ -0,0 +1,223 @@
+/**
+ * Integration tests for cross-feature interactions
+ *
+ * Tests scenarios that involve multiple features working together:
+ * 1. Concurrent indexing protection across MCP tool calls
+ * 2. Clear cache interaction with indexing
+ * 3. Tool handler response quality
+ */
+import { describe, it, expect, beforeAll, afterAll, beforeEach } from 'vitest';
+import {
+  createTestFixtures,
+  cleanupFixtures,
+  clearTestCache,
+  createMockRequest,
+  measureTime
+} from './helpers.js';
+import * as IndexCodebaseFeature from '../features/index-codebase.js';
+import * as ClearCacheFeature from '../features/clear-cache.js';
+describe('Concurrent Indexing', () => {
+  let fixtures;
+  beforeAll(async () => {
+    fixtures = await createTestFixtures({ workerThreads: 2 });
+  });
+  afterAll(async () => {
+    await cleanupFixtures(fixtures);
+  });
+  beforeEach(async () => {
+    // Reset indexing state
+    fixtures.indexer.isIndexing = false;
+    // Clear cache for clean state
+    await clearTestCache(fixtures.config);
+    fixtures.cache.setVectorStore([]);
+    fixtures.cache.fileHashes = new Map();
+  });
+  it('should only run one indexer at a time', async () => {
+    const request1 = createMockRequest('b_index_codebase', { force: true });
+    const request2 = createMockRequest('b_index_codebase', { force: false });
+    // Start first indexing
+    const promise1 = IndexCodebaseFeature.handleToolCall(request1, fixtures.indexer);
+    // Wait a bit for first to start
+    await new Promise(resolve => setTimeout(resolve, 100));
+    // Verify first is running
+    expect(fixtures.indexer.isIndexing).toBe(true);
+    // Start second indexing while first is running
+    const promise2 = IndexCodebaseFeature.handleToolCall(request2, fixtures.indexer);
+    // Wait for both to complete
+    const [result1, result2] = await Promise.all([promise1, promise2]);
+    // First should complete with stats
+    expect(result1.content[0].text).toContain('reindexed successfully');
+    expect(result1.content[0].text).toContain('Total files in index');
+    // Second should clearly indicate it was skipped
+    expect(result2.content[0].text).toContain('Indexing skipped');
+    expect(result2.content[0].text).toContain('already in progress');
+  });
+  it('should set isIndexing flag during indexing', async () => {
+    // Check initial state
+    expect(fixtures.indexer.isIndexing).toBe(false);
+    // Start indexing
+    const promise = fixtures.indexer.indexAll(true);
+    // Wait for it to start
+    await new Promise(resolve => setTimeout(resolve, 50));
+    // Check flag is set
+    expect(fixtures.indexer.isIndexing).toBe(true);
+    // Wait for completion
+    await promise;
+    // Check flag is cleared
+    expect(fixtures.indexer.isIndexing).toBe(false);
+  });
+  it('should skip concurrent indexing calls gracefully', async () => {
+    // Start first indexing
+    const promise1 = fixtures.indexer.indexAll(true);
+    await new Promise(resolve => setTimeout(resolve, 50));
+    // Second call should return immediately with skipped status
+    const { result, duration } = await measureTime(() => fixtures.indexer.indexAll(false));
+    // Second call should return very quickly (not run full indexing)
+    expect(duration).toBeLessThan(100);
+    // Should indicate it was skipped
+    expect(result.skipped).toBe(true);
+    expect(result.reason).toContain('already in progress');
+    await promise1;
+  });
+});
+describe('Clear Cache Operations', () => {
+  let fixtures;
+  beforeAll(async () => {
+    fixtures = await createTestFixtures({ workerThreads: 2 });
+  });
+  afterAll(async () => {
+    await cleanupFixtures(fixtures);
+  });
+  beforeEach(async () => {
+    fixtures.indexer.isIndexing = false;
+  });
+  it('should prevent clear cache while indexing', async () => {
+    // Start indexing
+    const indexPromise = fixtures.indexer.indexAll(true);
+    await new Promise(resolve => setTimeout(resolve, 50));
+    // Try to clear cache
+    const request = createMockRequest('c_clear_cache', {});
+    const result = await ClearCacheFeature.handleToolCall(request, fixtures.cacheClearer);
+    // Should fail with appropriate message
+    expect(result.content[0].text).toContain('indexing is in progress');
+    await indexPromise;
+  });
+  it('should allow clear cache after indexing completes', async () => {
+    // First index
+    await fixtures.indexer.indexAll(true);
+    // Verify indexing is done
+    expect(fixtures.indexer.isIndexing).toBe(false);
+    // Now clear cache
+    const request = createMockRequest('c_clear_cache', {});
+    const result = await ClearCacheFeature.handleToolCall(request, fixtures.cacheClearer);
+    // Should succeed
+    expect(result.content[0].text).toContain('Cache cleared successfully');
+  });
+  it('should clear cache immediately after indexing without crash', async () => {
+    // This tests the race condition scenario
+    await fixtures.indexer.indexAll(true);
+    // Immediately clear (potential race with cache.save())
+    const result = await fixtures.cacheClearer.execute();
+    expect(result.success).toBe(true);
+    expect(result.message).toContain('Cache cleared successfully');
+  });
+  it('should handle multiple concurrent clear cache calls', async () => {
+    // First index to have something to clear
+    await fixtures.indexer.indexAll(true);
+    // Reset the isClearing flag
+    fixtures.cacheClearer.isClearing = false;
+    // Multiple concurrent clears - with new mutex, only first should succeed
+    const promises = [
+      fixtures.cacheClearer.execute(),
+      fixtures.cacheClearer.execute(),
+      fixtures.cacheClearer.execute()
+    ];
+    const results = await Promise.allSettled(promises);
+    // First should succeed, others should fail with "already in progress"
+    const successes = results.filter(r => r.status === 'fulfilled');
+    const failures = results.filter(r => r.status === 'rejected');
+    expect(successes.length).toBe(1);
+    expect(failures.length).toBe(2);
+    // Verify failure message
+    for (const failure of failures) {
+      expect(failure.reason.message).toContain('already in progress');
+    }
+  });
+});
+describe('Tool Handler Response Quality', () => {
+  let fixtures;
+  beforeAll(async () => {
+    fixtures = await createTestFixtures({ workerThreads: 2 });
+  });
+  afterAll(async () => {
+    await cleanupFixtures(fixtures);
+  });
+  it('should return meaningful response when indexing is skipped', async () => {
+    // Start first indexing
+    const promise1 = fixtures.indexer.indexAll(true);
+    await new Promise(resolve => setTimeout(resolve, 50));
+    // Second call via handler
+    const request = createMockRequest('b_index_codebase', { force: false });
+    const result = await IndexCodebaseFeature.handleToolCall(request, fixtures.indexer);
+    await promise1;
+    // The response should clearly indicate the indexing was skipped
+    expect(result.content[0].text).toContain('Indexing skipped');
+    expect(result.content[0].text).toContain('already in progress');
+    expect(result.content[0].text).toContain('Please wait');
+  });
+});

package/test/tokenizer.test.js ADDED Viewed

@@ -0,0 +1,225 @@
+/**
+ * Tests for Tokenizer utilities
+ *
+ * Tests the token estimation and model-specific limits including:
+ * - Token estimation for various text types
+ * - Model token limits lookup
+ * - Chunking parameters calculation
+ * - Token limit checking
+ */
+import { describe, it, expect } from 'vitest';
+import {
+  estimateTokens,
+  getModelTokenLimit,
+  getChunkingParams,
+  exceedsTokenLimit,
+  MODEL_TOKEN_LIMITS
+} from '../lib/tokenizer.js';
+describe('Token Estimation', () => {
+  describe('estimateTokens', () => {
+    it('should return 0 for empty string', () => {
+      expect(estimateTokens('')).toBe(0);
+      expect(estimateTokens(null)).toBe(0);
+      expect(estimateTokens(undefined)).toBe(0);
+    });
+    it('should count simple words correctly', () => {
+      // Simple words get ~1 token each + 2 for CLS/SEP
+      const result = estimateTokens('hello world');
+      expect(result).toBeGreaterThanOrEqual(4); // 2 words + 2 special tokens
+      expect(result).toBeLessThanOrEqual(6);
+    });
+    it('should add extra tokens for long words', () => {
+      const shortWord = estimateTokens('cat');
+      const longWord = estimateTokens('internationalization');
+      // Long words should have more tokens due to subword splitting
+      expect(longWord).toBeGreaterThan(shortWord);
+    });
+    it('should count special characters', () => {
+      const withoutSpecial = estimateTokens('hello world');
+      const withSpecial = estimateTokens('hello(); world{}');
+      // Special characters add to token count
+      expect(withSpecial).toBeGreaterThan(withoutSpecial);
+    });
+    it('should handle code snippets', () => {
+      const code = `
+        function test() {
+          const x = 10;
+          return x * 2;
+        }
+      `;
+      const tokens = estimateTokens(code);
+      // Code has many special chars, should have reasonable token count
+      expect(tokens).toBeGreaterThan(10);
+      expect(tokens).toBeLessThan(100);
+    });
+    it('should handle multiline text', () => {
+      const multiline = 'line one\nline two\nline three';
+      const tokens = estimateTokens(multiline);
+      expect(tokens).toBeGreaterThan(5);
+    });
+  });
+});
+describe('Model Token Limits', () => {
+  describe('MODEL_TOKEN_LIMITS', () => {
+    it('should have default limit', () => {
+      expect(MODEL_TOKEN_LIMITS['default']).toBeDefined();
+      expect(MODEL_TOKEN_LIMITS['default']).toBe(256);
+    });
+    it('should have limits for MiniLM models', () => {
+      expect(MODEL_TOKEN_LIMITS['Xenova/all-MiniLM-L6-v2']).toBe(256);
+      expect(MODEL_TOKEN_LIMITS['Xenova/all-MiniLM-L12-v2']).toBe(256);
+    });
+    it('should have limits for code-specific models', () => {
+      expect(MODEL_TOKEN_LIMITS['Xenova/codebert-base']).toBe(512);
+      expect(MODEL_TOKEN_LIMITS['Xenova/graphcodebert-base']).toBe(512);
+    });
+    it('should have limits for E5 and BGE models', () => {
+      expect(MODEL_TOKEN_LIMITS['Xenova/e5-small-v2']).toBe(512);
+      expect(MODEL_TOKEN_LIMITS['Xenova/bge-base-en-v1.5']).toBe(512);
+    });
+  });
+  describe('getModelTokenLimit', () => {
+    it('should return correct limit for known models', () => {
+      expect(getModelTokenLimit('Xenova/all-MiniLM-L6-v2')).toBe(256);
+      expect(getModelTokenLimit('Xenova/codebert-base')).toBe(512);
+    });
+    it('should return default for unknown models', () => {
+      expect(getModelTokenLimit('unknown/model-name')).toBe(256);
+    });
+    it('should return default for null/undefined', () => {
+      expect(getModelTokenLimit(null)).toBe(256);
+      expect(getModelTokenLimit(undefined)).toBe(256);
+    });
+    it('should be case-insensitive', () => {
+      const normalCase = getModelTokenLimit('Xenova/all-MiniLM-L6-v2');
+      const lowerCase = getModelTokenLimit('xenova/all-minilm-l6-v2');
+      expect(lowerCase).toBe(normalCase);
+    });
+  });
+});
+describe('Chunking Parameters', () => {
+  describe('getChunkingParams', () => {
+    it('should return correct params for default model', () => {
+      const params = getChunkingParams('Xenova/all-MiniLM-L6-v2');
+      expect(params.maxTokens).toBe(256);
+      expect(params.targetTokens).toBeLessThan(256); // 85% of max
+      expect(params.targetTokens).toBeGreaterThan(200);
+      expect(params.overlapTokens).toBeLessThan(params.targetTokens);
+    });
+    it('should calculate ~85% for target tokens', () => {
+      const params = getChunkingParams('Xenova/codebert-base'); // 512 limit
+      // 85% of 512 = 435.2 -> floor = 435
+      expect(params.targetTokens).toBe(Math.floor(512 * 0.85));
+    });
+    it('should calculate ~18% overlap', () => {
+      const params = getChunkingParams('Xenova/all-MiniLM-L6-v2');
+      const expectedOverlap = Math.floor(params.targetTokens * 0.18);
+      expect(params.overlapTokens).toBe(expectedOverlap);
+    });
+    it('should return all three parameters', () => {
+      const params = getChunkingParams('Xenova/all-MiniLM-L6-v2');
+      expect(params).toHaveProperty('maxTokens');
+      expect(params).toHaveProperty('targetTokens');
+      expect(params).toHaveProperty('overlapTokens');
+    });
+    it('should handle unknown models with defaults', () => {
+      const params = getChunkingParams('unknown/model');
+      expect(params.maxTokens).toBe(256);
+      expect(params.targetTokens).toBeLessThan(256);
+    });
+  });
+});
+describe('Token Limit Checking', () => {
+  describe('exceedsTokenLimit', () => {
+    it('should return false for short text', () => {
+      const shortText = 'hello world';
+      expect(exceedsTokenLimit(shortText, 'Xenova/all-MiniLM-L6-v2')).toBe(false);
+    });
+    it('should return true for very long text', () => {
+      // Create text that definitely exceeds 256 tokens
+      const longText = 'word '.repeat(500);
+      expect(exceedsTokenLimit(longText, 'Xenova/all-MiniLM-L6-v2')).toBe(true);
+    });
+    it('should consider different model limits', () => {
+      // Create text that exceeds 256 but not 512
+      const mediumText = 'word '.repeat(300);
+      // Should exceed small model limit
+      expect(exceedsTokenLimit(mediumText, 'Xenova/all-MiniLM-L6-v2')).toBe(true);
+      // Should not exceed large model limit
+      expect(exceedsTokenLimit(mediumText, 'Xenova/codebert-base')).toBe(false);
+    });
+    it('should handle empty text', () => {
+      expect(exceedsTokenLimit('', 'Xenova/all-MiniLM-L6-v2')).toBe(false);
+    });
+  });
+});
+describe('Integration: Token Estimation Accuracy', () => {
+  it('should estimate reasonable tokens for typical code chunks', () => {
+    const typicalCodeChunk = `
+      import { pipeline } from '@xenova/transformers';
+      export class MyClass {
+        constructor(config) {
+          this.config = config;
+          this.data = [];
+        }
+        async process(input) {
+          const result = await this.transform(input);
+          return result.map(item => item.value);
+        }
+      }
+    `;
+    const tokens = estimateTokens(typicalCodeChunk);
+    // Should be within typical chunk size
+    expect(tokens).toBeGreaterThan(30);
+    expect(tokens).toBeLessThan(200);
+  });
+  it('should keep small code chunks under model limits', () => {
+    // A small chunk should definitely be under the limit
+    const safeChunk = 'const x = 1;\n'.repeat(10);
+    expect(exceedsTokenLimit(safeChunk, 'Xenova/all-MiniLM-L6-v2')).toBe(false);
+  });
+});

package/vitest.config.js ADDED Viewed

@@ -0,0 +1,29 @@
+import { defineConfig } from 'vitest/config';
+export default defineConfig({
+  test: {
+    // Test files pattern
+    include: ['test/**/*.test.js'],
+    // Global test timeout (embedding models can be slow)
+    testTimeout: 180000,
+    // Hook timeout for setup/teardown
+    hookTimeout: 180000,
+    // Run test files sequentially to avoid resource conflicts
+    // Each file loads the embedding model which uses significant memory
+    fileParallelism: false,
+    // Run tests within a file sequentially
+    sequence: {
+      concurrent: false
+    },
+    // Verbose output
+    reporters: ['verbose'],
+    // Isolate tests to prevent memory leaks between test files
+    isolate: true
+  }
+});