smart-coding-mcp 1.4.1 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,273 @@
1
+ /**
2
+ * AST-based Code Chunker
3
+ *
4
+ * Uses Tree-sitter to parse code and chunk at semantic boundaries
5
+ * (functions, classes, methods) instead of arbitrary line splits.
6
+ */
7
+
8
+ import Parser from 'web-tree-sitter';
9
+ import path from 'path';
10
+ import fs from 'fs/promises';
11
+ import { fileURLToPath } from 'url';
12
+ import { smartChunk } from './utils.js'; // Fallback
13
+
14
+ const __dirname = path.dirname(fileURLToPath(import.meta.url));
15
+
16
+ // Mapping of file extensions to Tree-sitter language names
17
+ const LANGUAGE_MAP = {
18
+ js: 'javascript',
19
+ mjs: 'javascript',
20
+ cjs: 'javascript',
21
+ jsx: 'javascript',
22
+ ts: 'typescript',
23
+ tsx: 'typescript',
24
+ py: 'python',
25
+ go: 'go',
26
+ rs: 'rust',
27
+ rb: 'ruby',
28
+ java: 'java',
29
+ c: 'c',
30
+ cpp: 'cpp',
31
+ h: 'c',
32
+ hpp: 'cpp'
33
+ };
34
+
35
+ // Node types that represent semantic boundaries
36
+ const SEMANTIC_NODES = {
37
+ javascript: ['function_declaration', 'arrow_function', 'class_declaration', 'method_definition', 'export_statement'],
38
+ typescript: ['function_declaration', 'arrow_function', 'class_declaration', 'method_definition', 'export_statement'],
39
+ python: ['function_definition', 'class_definition', 'decorated_definition'],
40
+ go: ['function_declaration', 'method_declaration', 'type_declaration'],
41
+ rust: ['function_item', 'impl_item', 'struct_item', 'enum_item'],
42
+ ruby: ['method', 'class', 'module'],
43
+ java: ['method_declaration', 'class_declaration', 'interface_declaration'],
44
+ c: ['function_definition', 'struct_specifier'],
45
+ cpp: ['function_definition', 'class_specifier', 'struct_specifier']
46
+ };
47
+
48
+ export class ASTChunker {
49
+ constructor(config) {
50
+ this.config = config;
51
+ this.parser = null;
52
+ this.languages = new Map();
53
+ this.initialized = false;
54
+ }
55
+
56
+ /**
57
+ * Initialize Tree-sitter parser
58
+ */
59
+ async init() {
60
+ if (this.initialized) return;
61
+
62
+ try {
63
+ await Parser.init();
64
+ this.parser = new Parser();
65
+ this.initialized = true;
66
+ console.error('[AST] Tree-sitter parser initialized');
67
+ } catch (error) {
68
+ console.error('[AST] Failed to initialize Tree-sitter:', error.message);
69
+ throw error;
70
+ }
71
+ }
72
+
73
+ /**
74
+ * Load a language grammar
75
+ */
76
+ async loadLanguage(langName) {
77
+ if (this.languages.has(langName)) {
78
+ return this.languages.get(langName);
79
+ }
80
+
81
+ try {
82
+ // Try to find the WASM file in node_modules
83
+ const possiblePaths = [
84
+ path.join(__dirname, '..', 'node_modules', `tree-sitter-${langName}`, `tree-sitter-${langName}.wasm`),
85
+ path.join(__dirname, '..', 'node_modules', 'tree-sitter-wasms', 'out', `tree-sitter-${langName}.wasm`),
86
+ path.join(__dirname, '..', 'grammars', `tree-sitter-${langName}.wasm`)
87
+ ];
88
+
89
+ for (const wasmPath of possiblePaths) {
90
+ try {
91
+ await fs.access(wasmPath);
92
+ const language = await Parser.Language.load(wasmPath);
93
+ this.languages.set(langName, language);
94
+ if (this.config.verbose) {
95
+ console.error(`[AST] Loaded ${langName} grammar from ${wasmPath}`);
96
+ }
97
+ return language;
98
+ } catch {
99
+ continue;
100
+ }
101
+ }
102
+
103
+ console.error(`[AST] No grammar found for ${langName}`);
104
+ return null;
105
+ } catch (error) {
106
+ console.error(`[AST] Failed to load ${langName}:`, error.message);
107
+ return null;
108
+ }
109
+ }
110
+
111
+ /**
112
+ * Get the language name from file extension
113
+ */
114
+ getLanguageForFile(file) {
115
+ const ext = path.extname(file).slice(1).toLowerCase();
116
+ return LANGUAGE_MAP[ext] || null;
117
+ }
118
+
119
+ /**
120
+ * Chunk code using AST analysis
121
+ */
122
+ async chunk(content, file) {
123
+ // Initialize if needed
124
+ if (!this.initialized) {
125
+ await this.init();
126
+ }
127
+
128
+ const langName = this.getLanguageForFile(file);
129
+
130
+ // Fall back to smart chunking if language not supported
131
+ if (!langName) {
132
+ if (this.config.verbose) {
133
+ console.error(`[AST] No AST support for ${path.extname(file)}, using smart chunking`);
134
+ }
135
+ return smartChunk(content, file, this.config);
136
+ }
137
+
138
+ const language = await this.loadLanguage(langName);
139
+
140
+ // Fall back if grammar not available
141
+ if (!language) {
142
+ return smartChunk(content, file, this.config);
143
+ }
144
+
145
+ try {
146
+ this.parser.setLanguage(language);
147
+ const tree = this.parser.parse(content);
148
+ const chunks = [];
149
+ const lines = content.split('\n');
150
+ const semanticNodes = SEMANTIC_NODES[langName] || [];
151
+
152
+ // Walk the AST and extract semantic chunks
153
+ this.walkTree(tree.rootNode, (node) => {
154
+ if (semanticNodes.includes(node.type)) {
155
+ const startLine = node.startPosition.row;
156
+ const endLine = node.endPosition.row;
157
+
158
+ // Skip very small nodes (< 3 lines)
159
+ if (endLine - startLine < 2) return;
160
+
161
+ // Extract the text for this node
162
+ const chunkLines = lines.slice(startLine, endLine + 1);
163
+ const text = chunkLines.join('\n');
164
+
165
+ // Skip if too large (will be handled by split)
166
+ const targetTokens = this.config.chunkSize * 4; // Rough estimate
167
+ if (text.length > targetTokens * 4) {
168
+ // Split large nodes
169
+ this.splitLargeNode(node, lines, chunks);
170
+ } else {
171
+ chunks.push({
172
+ text,
173
+ startLine: startLine + 1, // 1-indexed
174
+ endLine: endLine + 1,
175
+ nodeType: node.type
176
+ });
177
+ }
178
+ }
179
+ });
180
+
181
+ // If no semantic chunks found, fall back to smart chunking
182
+ if (chunks.length === 0) {
183
+ return smartChunk(content, file, this.config);
184
+ }
185
+
186
+ // Sort by start line
187
+ chunks.sort((a, b) => a.startLine - b.startLine);
188
+
189
+ // Merge small gaps and remove overlaps
190
+ return this.mergeAndCleanChunks(chunks, lines);
191
+
192
+ } catch (error) {
193
+ console.error(`[AST] Parse error for ${file}:`, error.message);
194
+ return smartChunk(content, file, this.config);
195
+ }
196
+ }
197
+
198
+ /**
199
+ * Walk the AST tree and call callback for each node
200
+ */
201
+ walkTree(node, callback) {
202
+ callback(node);
203
+ for (let i = 0; i < node.childCount; i++) {
204
+ this.walkTree(node.child(i), callback);
205
+ }
206
+ }
207
+
208
+ /**
209
+ * Split large AST nodes into smaller chunks
210
+ */
211
+ splitLargeNode(node, lines, chunks) {
212
+ const chunkSize = this.config.chunkSize || 25;
213
+ const startLine = node.startPosition.row;
214
+ const endLine = node.endPosition.row;
215
+
216
+ for (let i = startLine; i <= endLine; i += chunkSize) {
217
+ const chunkEnd = Math.min(i + chunkSize - 1, endLine);
218
+ const chunkLines = lines.slice(i, chunkEnd + 1);
219
+
220
+ chunks.push({
221
+ text: chunkLines.join('\n'),
222
+ startLine: i + 1,
223
+ endLine: chunkEnd + 1,
224
+ nodeType: node.type + '_part'
225
+ });
226
+ }
227
+ }
228
+
229
+ /**
230
+ * Merge small chunks and clean up overlaps
231
+ */
232
+ mergeAndCleanChunks(chunks, lines) {
233
+ const cleaned = [];
234
+ const minSize = 5; // Minimum lines per chunk
235
+
236
+ for (const chunk of chunks) {
237
+ // Skip if overlaps with previous
238
+ if (cleaned.length > 0) {
239
+ const prev = cleaned[cleaned.length - 1];
240
+ if (chunk.startLine <= prev.endLine) {
241
+ // Extend previous chunk if this one extends further
242
+ if (chunk.endLine > prev.endLine) {
243
+ prev.endLine = chunk.endLine;
244
+ const extendedLines = lines.slice(prev.startLine - 1, prev.endLine);
245
+ prev.text = extendedLines.join('\n');
246
+ }
247
+ continue;
248
+ }
249
+ }
250
+
251
+ // Add to cleaned list
252
+ cleaned.push(chunk);
253
+ }
254
+
255
+ return cleaned;
256
+ }
257
+ }
258
+
259
+ /**
260
+ * Factory function to get the appropriate chunker based on config
261
+ */
262
+ export function getChunker(config) {
263
+ if (config.chunkingMode === 'ast') {
264
+ return new ASTChunker(config);
265
+ }
266
+
267
+ // Return a wrapper that uses smartChunk
268
+ return {
269
+ async chunk(content, file) {
270
+ return smartChunk(content, file, config);
271
+ }
272
+ };
273
+ }
package/lib/config.js CHANGED
@@ -62,7 +62,10 @@ const DEFAULT_CONFIG = {
62
62
  watchFiles: false,
63
63
  verbose: false,
64
64
  workerThreads: "auto", // "auto" = CPU cores - 1, or set a number
65
- embeddingModel: "Xenova/all-MiniLM-L6-v2",
65
+ embeddingModel: "nomic-ai/nomic-embed-text-v1.5",
66
+ embeddingDimension: 256, // MRL dimension: 64, 128, 256, 512, 768
67
+ device: "auto", // "cpu", "webgpu", or "auto"
68
+ chunkingMode: "smart", // "smart", "ast", or "line"
66
69
  semanticWeight: 0.7,
67
70
  exactMatchBoost: 1.5,
68
71
  smartIndexing: true
@@ -237,6 +240,42 @@ export async function loadConfig(workspaceDir = null) {
237
240
  }
238
241
  }
239
242
 
243
+ // MRL embedding dimension
244
+ if (process.env.SMART_CODING_EMBEDDING_DIMENSION !== undefined) {
245
+ const value = parseInt(process.env.SMART_CODING_EMBEDDING_DIMENSION, 10);
246
+ const validDims = [64, 128, 256, 512, 768];
247
+ if (validDims.includes(value)) {
248
+ config.embeddingDimension = value;
249
+ console.error(`[Config] Using embedding dimension: ${value}`);
250
+ } else {
251
+ console.error(`[Config] Invalid SMART_CODING_EMBEDDING_DIMENSION: ${value}, using default (must be 64, 128, 256, 512, or 768)`);
252
+ }
253
+ }
254
+
255
+ // Device selection
256
+ if (process.env.SMART_CODING_DEVICE !== undefined) {
257
+ const value = process.env.SMART_CODING_DEVICE.trim().toLowerCase();
258
+ const validDevices = ['cpu', 'webgpu', 'auto'];
259
+ if (validDevices.includes(value)) {
260
+ config.device = value;
261
+ console.error(`[Config] Using device: ${value}`);
262
+ } else {
263
+ console.error(`[Config] Invalid SMART_CODING_DEVICE: ${value}, using default (must be 'cpu', 'webgpu', or 'auto')`);
264
+ }
265
+ }
266
+
267
+ // Chunking mode
268
+ if (process.env.SMART_CODING_CHUNKING_MODE !== undefined) {
269
+ const value = process.env.SMART_CODING_CHUNKING_MODE.trim().toLowerCase();
270
+ const validModes = ['smart', 'ast', 'line'];
271
+ if (validModes.includes(value)) {
272
+ config.chunkingMode = value;
273
+ console.error(`[Config] Using chunking mode: ${value}`);
274
+ } else {
275
+ console.error(`[Config] Invalid SMART_CODING_CHUNKING_MODE: ${value}, using default (must be 'smart', 'ast', or 'line')`);
276
+ }
277
+ }
278
+
240
279
  return config;
241
280
  }
242
281
 
@@ -1,12 +1,38 @@
1
1
  import { parentPort, workerData } from "worker_threads";
2
- import { pipeline } from "@xenova/transformers";
2
+ import { pipeline, layer_norm } from "@huggingface/transformers";
3
3
 
4
4
  let embedder = null;
5
+ const VALID_DIMENSIONS = [64, 128, 256, 512, 768];
5
6
 
6
7
  // Initialize the embedding model once when worker starts
7
8
  async function initializeEmbedder() {
8
9
  if (!embedder) {
9
- embedder = await pipeline("feature-extraction", workerData.embeddingModel);
10
+ const modelName = workerData.embeddingModel || 'nomic-ai/nomic-embed-text-v1.5';
11
+ const dimension = workerData.embeddingDimension || 256;
12
+ const targetDim = VALID_DIMENSIONS.includes(dimension) ? dimension : 256;
13
+ const isNomic = modelName.includes('nomic');
14
+
15
+ const extractor = await pipeline("feature-extraction", modelName);
16
+
17
+ if (isNomic) {
18
+ // MRL embedder with dimension slicing
19
+ embedder = async function(text, options = {}) {
20
+ let embeddings = await extractor(text, { pooling: 'mean' });
21
+ embeddings = layer_norm(embeddings, [embeddings.dims[1]])
22
+ .slice(null, [0, targetDim])
23
+ .normalize(2, -1);
24
+ return { data: embeddings.data };
25
+ };
26
+ embedder.dimension = targetDim;
27
+ } else {
28
+ // Legacy embedder (MiniLM etc.)
29
+ embedder = async function(text, options = {}) {
30
+ return await extractor(text, { pooling: 'mean', normalize: true });
31
+ };
32
+ embedder.dimension = 384;
33
+ }
34
+
35
+ embedder.modelName = modelName;
10
36
  }
11
37
  return embedder;
12
38
  }
@@ -65,3 +91,4 @@ initializeEmbedder().then(() => {
65
91
  }).catch((error) => {
66
92
  parentPort.postMessage({ type: "error", error: error.message });
67
93
  });
94
+
@@ -0,0 +1,133 @@
1
+ /**
2
+ * MRL (Matryoshka Representation Learning) Embedder
3
+ *
4
+ * Provides flexible embedding dimensions (64, 128, 256, 512, 768) using
5
+ * nomic-embed-text-v1.5 with layer normalization and dimension slicing.
6
+ */
7
+
8
+ import { pipeline, layer_norm } from '@huggingface/transformers';
9
+
10
+ // Valid MRL dimensions for nomic-embed-text-v1.5
11
+ const VALID_DIMENSIONS = [64, 128, 256, 512, 768];
12
+
13
+ /**
14
+ * Create an MRL-enabled embedder with configurable output dimensions
15
+ *
16
+ * @param {string} modelName - Model identifier (e.g., 'nomic-ai/nomic-embed-text-v1.5')
17
+ * @param {object} options - Configuration options
18
+ * @param {number} options.dimension - Target embedding dimension (64, 128, 256, 512, 768)
19
+ * @param {string} options.device - Device to use ('cpu', 'webgpu', 'auto')
20
+ * @returns {Function} Embedder function compatible with existing codebase
21
+ */
22
+ export async function createMRLEmbedder(modelName, options = {}) {
23
+ const dimension = options.dimension || 256;
24
+ const device = options.device || 'cpu';
25
+
26
+ // Validate dimension
27
+ if (!VALID_DIMENSIONS.includes(dimension)) {
28
+ console.error(`[MRL] Invalid dimension ${dimension}, using 256. Valid: ${VALID_DIMENSIONS.join(', ')}`);
29
+ }
30
+
31
+ const targetDim = VALID_DIMENSIONS.includes(dimension) ? dimension : 256;
32
+
33
+ console.error(`[MRL] Loading ${modelName} (output: ${targetDim}d, device: ${device})`);
34
+
35
+ // Detect best device if auto
36
+ const finalDevice = device === 'auto' ? detectBestDevice() : device;
37
+
38
+ // Create the feature extraction pipeline
39
+ const pipelineOptions = {};
40
+ if (finalDevice === 'webgpu') {
41
+ pipelineOptions.device = 'webgpu';
42
+ }
43
+
44
+ const extractor = await pipeline('feature-extraction', modelName, pipelineOptions);
45
+
46
+ console.error(`[MRL] Model loaded on ${finalDevice}`);
47
+
48
+ /**
49
+ * Embed text with MRL dimension slicing
50
+ * Compatible with existing embedder(text, options) signature
51
+ */
52
+ async function embed(text, embedOptions = {}) {
53
+ // Generate full 768d embedding
54
+ let embeddings = await extractor(text, { pooling: 'mean' });
55
+
56
+ // Apply MRL: layer_norm -> slice -> normalize
57
+ embeddings = layer_norm(embeddings, [embeddings.dims[1]])
58
+ .slice(null, [0, targetDim])
59
+ .normalize(2, -1);
60
+
61
+ // Return in format compatible with existing code (has .data property)
62
+ return {
63
+ data: embeddings.data,
64
+ dims: [embeddings.dims[0], targetDim]
65
+ };
66
+ }
67
+
68
+ // Attach metadata
69
+ embed.modelName = modelName;
70
+ embed.dimension = targetDim;
71
+ embed.device = finalDevice;
72
+
73
+ return embed;
74
+ }
75
+
76
+ /**
77
+ * Detect best available device for inference
78
+ */
79
+ function detectBestDevice() {
80
+ // WebGPU check (browser environment)
81
+ if (typeof navigator !== 'undefined' && navigator.gpu) {
82
+ return 'webgpu';
83
+ }
84
+
85
+ // Node.js with experimental WebGPU (Node 20+)
86
+ // This would require --experimental-webgpu flag
87
+ // For now, default to CPU in Node.js
88
+ return 'cpu';
89
+ }
90
+
91
+ /**
92
+ * Create a legacy-compatible embedder (384d, MiniLM)
93
+ * Used as fallback if MRL model fails to load
94
+ */
95
+ export async function createLegacyEmbedder(modelName = 'Xenova/all-MiniLM-L6-v2') {
96
+ console.error(`[Embedder] Loading legacy model: ${modelName}`);
97
+ const extractor = await pipeline('feature-extraction', modelName);
98
+
99
+ async function embed(text, options = {}) {
100
+ const output = await extractor(text, { pooling: 'mean', normalize: true });
101
+ return output;
102
+ }
103
+
104
+ embed.modelName = modelName;
105
+ embed.dimension = 384;
106
+ embed.device = 'cpu';
107
+
108
+ return embed;
109
+ }
110
+
111
+ /**
112
+ * Smart embedder factory - picks MRL or legacy based on config
113
+ */
114
+ export async function createEmbedder(config) {
115
+ const model = config.embeddingModel || 'nomic-ai/nomic-embed-text-v1.5';
116
+ const dimension = config.embeddingDimension || 256;
117
+ const device = config.device || 'cpu';
118
+
119
+ // Use MRL for nomic models
120
+ if (model.includes('nomic')) {
121
+ try {
122
+ return await createMRLEmbedder(model, { dimension, device });
123
+ } catch (err) {
124
+ console.error(`[Embedder] MRL model failed: ${err.message}, falling back to legacy`);
125
+ return await createLegacyEmbedder();
126
+ }
127
+ }
128
+
129
+ // Use legacy for MiniLM and other models
130
+ return await createLegacyEmbedder(model);
131
+ }
132
+
133
+ export { VALID_DIMENSIONS };
package/lib/tokenizer.js CHANGED
@@ -10,6 +10,10 @@
10
10
  * Each model has its own maximum sequence length
11
11
  */
12
12
  export const MODEL_TOKEN_LIMITS = {
13
+ // MRL / Nomic models (longer context)
14
+ "nomic-ai/nomic-embed-text-v1.5": 8192,
15
+ "nomic-ai/nomic-embed-text-v1": 2048,
16
+
13
17
  // Sentence Transformers / MiniLM family
14
18
  "Xenova/all-MiniLM-L6-v2": 256,
15
19
  "Xenova/all-MiniLM-L12-v2": 256,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "smart-coding-mcp",
3
- "version": "1.4.1",
3
+ "version": "2.0.0",
4
4
  "description": "An extensible MCP server that enhances coding productivity with AI-powered features including semantic code search, intelligent indexing, and more, using local LLMs",
5
5
  "type": "module",
6
6
  "main": "index.js",
@@ -45,11 +45,13 @@
45
45
  "homepage": "https://github.com/omar-haris/smart-coding-mcp#readme",
46
46
  "license": "MIT",
47
47
  "dependencies": {
48
+ "@huggingface/transformers": "^3.8.1",
48
49
  "@modelcontextprotocol/sdk": "^1.0.4",
49
- "@xenova/transformers": "^2.17.2",
50
50
  "chokidar": "^3.5.3",
51
+ "fastembed": "^2.1.0",
51
52
  "fdir": "^6.5.0",
52
- "glob": "^10.3.10"
53
+ "glob": "^10.3.10",
54
+ "web-tree-sitter": "^0.24.6"
53
55
  },
54
56
  "engines": {
55
57
  "node": ">=18.0.0"
@@ -0,0 +1,105 @@
1
+ /**
2
+ * Tests for AST Chunker
3
+ *
4
+ * Tests the AST-based code chunking functionality:
5
+ * - Tree-sitter initialization
6
+ * - Language detection
7
+ * - Semantic chunking vs smart chunking fallback
8
+ */
9
+
10
+ import { describe, it, expect, beforeAll } from 'vitest';
11
+ import { ASTChunker, getChunker } from '../lib/ast-chunker.js';
12
+ import { loadConfig } from '../lib/config.js';
13
+
14
+ describe('AST Chunker', () => {
15
+ let config;
16
+
17
+ beforeAll(async () => {
18
+ config = await loadConfig();
19
+ });
20
+
21
+ describe('Chunker Factory', () => {
22
+ it('should return AST chunker when mode is ast', () => {
23
+ const chunker = getChunker({ ...config, chunkingMode: 'ast' });
24
+ expect(chunker).toBeInstanceOf(ASTChunker);
25
+ });
26
+
27
+ it('should return smart chunker wrapper when mode is smart', () => {
28
+ const chunker = getChunker({ ...config, chunkingMode: 'smart' });
29
+ expect(typeof chunker.chunk).toBe('function');
30
+ expect(chunker).not.toBeInstanceOf(ASTChunker);
31
+ });
32
+ });
33
+
34
+ describe('Language Detection', () => {
35
+ it('should detect JavaScript files', () => {
36
+ const chunker = new ASTChunker(config);
37
+ expect(chunker.getLanguageForFile('test.js')).toBe('javascript');
38
+ expect(chunker.getLanguageForFile('test.mjs')).toBe('javascript');
39
+ expect(chunker.getLanguageForFile('test.jsx')).toBe('javascript');
40
+ });
41
+
42
+ it('should detect TypeScript files', () => {
43
+ const chunker = new ASTChunker(config);
44
+ expect(chunker.getLanguageForFile('test.ts')).toBe('typescript');
45
+ expect(chunker.getLanguageForFile('test.tsx')).toBe('typescript');
46
+ });
47
+
48
+ it('should detect Python files', () => {
49
+ const chunker = new ASTChunker(config);
50
+ expect(chunker.getLanguageForFile('test.py')).toBe('python');
51
+ });
52
+
53
+ it('should return null for unsupported files', () => {
54
+ const chunker = new ASTChunker(config);
55
+ expect(chunker.getLanguageForFile('test.sql')).toBeNull();
56
+ expect(chunker.getLanguageForFile('test.md')).toBeNull();
57
+ });
58
+ });
59
+
60
+ describe('Fallback Behavior', () => {
61
+ it('should fall back to smart chunking for unsupported languages', async () => {
62
+ const chunker = new ASTChunker(config);
63
+ const sqlContent = 'SELECT * FROM users WHERE id = 1;';
64
+
65
+ const chunks = await chunker.chunk(sqlContent, 'query.sql');
66
+ expect(Array.isArray(chunks)).toBe(true);
67
+ });
68
+
69
+ it('should handle empty content', async () => {
70
+ const chunker = new ASTChunker(config);
71
+ const chunks = await chunker.chunk('', 'empty.js');
72
+ expect(Array.isArray(chunks)).toBe(true);
73
+ });
74
+ });
75
+
76
+ describe('JavaScript Chunking', () => {
77
+ it('should chunk JavaScript functions', async () => {
78
+ const chunker = new ASTChunker(config);
79
+ const jsCode = `
80
+ function add(a, b) {
81
+ return a + b;
82
+ }
83
+
84
+ function multiply(a, b) {
85
+ return a * b;
86
+ }
87
+
88
+ class Calculator {
89
+ constructor() {
90
+ this.result = 0;
91
+ }
92
+
93
+ add(n) {
94
+ this.result += n;
95
+ return this;
96
+ }
97
+ }
98
+ `;
99
+
100
+ const chunks = await chunker.chunk(jsCode, 'calc.js');
101
+ expect(Array.isArray(chunks)).toBe(true);
102
+ // Should have found some chunks (exact number depends on Tree-sitter grammar availability)
103
+ });
104
+ });
105
+ });