cntx-ui 3.0.8 → 3.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. package/dist/bin/cntx-ui.js +70 -0
  2. package/dist/lib/agent-runtime.js +269 -0
  3. package/dist/lib/agent-tools.js +162 -0
  4. package/dist/lib/api-router.js +387 -0
  5. package/dist/lib/bundle-manager.js +236 -0
  6. package/dist/lib/configuration-manager.js +230 -0
  7. package/dist/lib/database-manager.js +277 -0
  8. package/dist/lib/file-system-manager.js +305 -0
  9. package/dist/lib/function-level-chunker.js +144 -0
  10. package/dist/lib/heuristics-manager.js +491 -0
  11. package/dist/lib/mcp-server.js +159 -0
  12. package/dist/lib/mcp-transport.js +10 -0
  13. package/dist/lib/semantic-splitter.js +335 -0
  14. package/dist/lib/simple-vector-store.js +98 -0
  15. package/dist/lib/treesitter-semantic-chunker.js +277 -0
  16. package/dist/lib/websocket-manager.js +268 -0
  17. package/dist/server.js +225 -0
  18. package/package.json +17 -8
  19. package/bin/cntx-ui-mcp.sh +0 -3
  20. package/bin/cntx-ui.js +0 -123
  21. package/lib/agent-runtime.js +0 -371
  22. package/lib/agent-tools.js +0 -370
  23. package/lib/api-router.js +0 -1026
  24. package/lib/bundle-manager.js +0 -326
  25. package/lib/configuration-manager.js +0 -760
  26. package/lib/database-manager.js +0 -397
  27. package/lib/file-system-manager.js +0 -489
  28. package/lib/function-level-chunker.js +0 -406
  29. package/lib/heuristics-manager.js +0 -529
  30. package/lib/mcp-server.js +0 -1380
  31. package/lib/mcp-transport.js +0 -97
  32. package/lib/semantic-splitter.js +0 -347
  33. package/lib/simple-vector-store.js +0 -108
  34. package/lib/treesitter-semantic-chunker.js +0 -1557
  35. package/lib/websocket-manager.js +0 -470
  36. package/server.js +0 -687
@@ -1,1557 +0,0 @@
1
- /**
2
- * Treesitter-based Semantic Chunker for JavaScript/TypeScript and Rust Files
3
- * Uses tree-sitter for true AST-based code analysis and semantic chunking
4
- * Supports JS/TS/JSX/TSX and Rust with equal treatment
5
- * Node ecosystem focus: React components, Express APIs, CLI tools, utilities
6
- * Rust ecosystem focus: standalone functions, structs, enums, traits
7
- */
8
-
9
- import { readFileSync, existsSync } from 'fs'
10
- import { extname, basename, dirname, relative, join } from 'path'
11
- import { glob } from 'glob'
12
- import Parser from 'tree-sitter'
13
- import JavaScript from 'tree-sitter-javascript'
14
- import TypeScript from 'tree-sitter-typescript'
15
- import Rust from 'tree-sitter-rust'
16
-
17
- class TreesitterSemanticChunker {
18
- constructor(options = {}) {
19
- this.options = {
20
- includeImports: true,
21
- includeExports: true,
22
- detectComponentTypes: true,
23
- groupRelatedFiles: true,
24
- minChunkSize: 100,
25
- maxChunkSize: 50000,
26
- namingStrategy: 'domain-based', // domain-based, pattern-based, graph-based
27
- ...options
28
- }
29
-
30
- // Initialize parsers for different languages
31
- this.parsers = {}
32
- this.initializeParsers()
33
-
34
- // Semantic patterns for Node/Rust ecosystem
35
- this.semanticPatterns = {
36
- reactComponent: this.isReactComponent.bind(this),
37
- reactHook: this.isReactHook.bind(this),
38
- expressRoute: this.isExpressRoute.bind(this),
39
- expressMiddleware: this.isExpressMiddleware.bind(this),
40
- cliCommand: this.isCliCommand.bind(this),
41
- utilityFunction: this.isUtilityFunction.bind(this),
42
- apiHandler: this.isApiHandler.bind(this),
43
- typeDefinition: this.isTypeDefinition.bind(this),
44
- configModule: this.isConfigModule.bind(this)
45
- }
46
- }
47
-
48
- /**
49
- * Initialize tree-sitter parsers for different languages
50
- */
51
- initializeParsers() {
52
- // JavaScript parser
53
- this.parsers.javascript = new Parser()
54
- this.parsers.javascript.setLanguage(JavaScript)
55
-
56
- // TypeScript parser
57
- this.parsers.typescript = new Parser()
58
- this.parsers.typescript.setLanguage(TypeScript.typescript)
59
-
60
- // TSX parser
61
- this.parsers.tsx = new Parser()
62
- this.parsers.tsx.setLanguage(TypeScript.tsx)
63
-
64
- // Rust parser
65
- this.parsers.rust = new Parser()
66
- this.parsers.rust.setLanguage(Rust)
67
- }
68
-
69
- /**
70
- * Get appropriate parser for file extension
71
- */
72
- getParser(filePath) {
73
- const ext = extname(filePath)
74
- switch (ext) {
75
- case '.ts': return this.parsers.typescript
76
- case '.tsx': return this.parsers.tsx
77
- case '.rs': return this.parsers.rust
78
- case '.js':
79
- case '.jsx':
80
- default: return this.parsers.javascript
81
- }
82
- }
83
-
84
- /**
85
- * Main entry point - analyze files and create semantic chunks
86
- */
87
- async analyzeProject(projectPath, patterns = ['**/*.{js,jsx,ts,tsx,rs}']) {
88
- console.log('🔍 Starting treesitter-based semantic analysis...')
89
-
90
- const files = await this.findFiles(projectPath, patterns)
91
- console.log(`📁 Found ${files.length} files to analyze`)
92
-
93
- const analysis = await this.analyzeFiles(files, projectPath)
94
- const successfulFiles = Object.keys(analysis).filter(f => !analysis[f].error)
95
- console.log(`✅ Analyzed ${Object.keys(analysis).length} files (${successfulFiles.length} successful)`)
96
- if (successfulFiles.length > 0) {
97
- console.log('📝 Sample successful files:', successfulFiles.slice(0, 5))
98
- }
99
-
100
- const relationshipGraph = this.buildRelationshipGraph(analysis)
101
- console.log(`🔗 Built relationship graph with ${Object.keys(relationshipGraph).length} nodes`)
102
-
103
- const chunks = await this.createSmartChunks(analysis, relationshipGraph)
104
- console.log(`📦 Created ${chunks.length} semantic chunks`)
105
-
106
- return {
107
- summary: this.generateSummary(analysis, chunks),
108
- files: analysis,
109
- chunks: chunks,
110
- relationshipGraph,
111
- recommendations: this.generateRecommendations(analysis, chunks)
112
- }
113
- }
114
-
115
- /**
116
- * Find files matching patterns
117
- */
118
- async findFiles(projectPath, patterns) {
119
- const files = []
120
-
121
- for (const pattern of patterns) {
122
- const matches = await glob(pattern, {
123
- cwd: projectPath,
124
- ignore: [
125
- 'node_modules/**', 'dist/**', 'build/**', '.git/**',
126
- '*.test.*', '*.spec.*', '**/test/**', '**/tests/**',
127
- '**/*.min.js', '**/*.bundle.js', '**/coverage/**',
128
- '**/.next/**', '**/.cache/**', '**/tmp/**', '**/temp/**'
129
- ]
130
- })
131
-
132
- // Extra filter to ensure no node_modules files get through
133
- const filteredMatches = matches.filter(file =>
134
- !file.includes('node_modules') &&
135
- !file.includes('dist/') &&
136
- !file.includes('.min.') &&
137
- !file.includes('.bundle.')
138
- )
139
-
140
- files.push(...filteredMatches)
141
- }
142
-
143
- return [...new Set(files)] // Remove duplicates
144
- }
145
-
146
- /**
147
- * Analyze all files using treesitter
148
- */
149
- async analyzeFiles(filePaths, projectPath) {
150
- const analysis = {}
151
-
152
- for (const relativePath of filePaths) {
153
- // Bulletproof check to skip node_modules
154
- if (relativePath.includes('node_modules')) {
155
- console.log(`Skipping node_modules file: ${relativePath}`);
156
- continue;
157
- }
158
-
159
- const fullPath = join(projectPath, relativePath)
160
- if (!existsSync(fullPath)) continue
161
-
162
- try {
163
- const content = readFileSync(fullPath, 'utf8')
164
- const fileAnalysis = await this.analyzeFile(fullPath, content)
165
- fileAnalysis.path = relativePath // Store relative path
166
- analysis[relativePath] = fileAnalysis
167
- } catch (error) {
168
- // Silently skip files that can't be parsed - they won't be included in semantic analysis
169
- // This is normal for complex files or unsupported syntax patterns
170
- analysis[relativePath] = { error: error.message, path: relativePath }
171
- }
172
- }
173
-
174
- return analysis
175
- }
176
-
177
- /**
178
- * Analyze a single file using treesitter AST
179
- */
180
- async analyzeFile(filePath, content) {
181
- const parser = this.getParser(filePath)
182
-
183
- // Skip files that are too large or have syntax errors
184
- if (content.length > 500000) { // Skip files > 500KB
185
- throw new Error('File too large')
186
- }
187
-
188
- let tree, rootNode
189
- try {
190
- // Use simple string parsing (confirmed working in tests)
191
- tree = parser.parse(content)
192
- rootNode = tree.rootNode
193
- } catch (error) {
194
- throw new Error(`Tree-sitter parse failed: ${error.message}`)
195
- }
196
-
197
- const analysis = {
198
- path: filePath,
199
- fileName: basename(filePath),
200
- dirName: basename(dirname(filePath)),
201
- extension: extname(filePath),
202
- size: content.length,
203
- lines: content.split('\n').length,
204
-
205
- // AST-based analysis
206
- ast: {
207
- functions: this.extractFunctions(rootNode, content),
208
- classes: this.extractClasses(rootNode, content),
209
- imports: this.extractImports(rootNode, content),
210
- exports: this.extractExports(rootNode, content),
211
- variables: this.extractVariables(rootNode, content),
212
- jsxElements: this.extractJsxElements(rootNode, content),
213
- typeDefinitions: this.extractTypeDefinitions(rootNode, content)
214
- },
215
-
216
- // Semantic classification
217
- semanticType: this.classifyFileSemantics(rootNode, content, filePath),
218
- businessDomain: this.extractBusinessDomain(rootNode, content, filePath),
219
- technicalPatterns: this.identifyTechnicalPatterns(rootNode, content),
220
-
221
- // Relationships
222
- dependencies: this.analyzeDependencies(rootNode, content),
223
- complexity: this.calculateAstComplexity(rootNode),
224
-
225
- // Metadata
226
- codeSignature: this.generateCodeSignature(rootNode, content)
227
- }
228
-
229
- // Generate semantic tags based on AST analysis
230
- analysis.semanticTags = this.generateSemanticTags(analysis)
231
-
232
- return analysis
233
- }
234
-
235
- /**
236
- * Extract function declarations from AST
237
- */
238
- extractFunctions(rootNode, content) {
239
- const functions = []
240
-
241
- // JS/TS Function declarations
242
- const functionDeclarations = this.queryNode(rootNode, '(function_declaration name: (identifier) @name)')
243
- functions.push(...functionDeclarations.map(capture => ({
244
- name: this.getNodeText(capture.node, content),
245
- type: 'function_declaration',
246
- startPosition: capture.node.startPosition,
247
- endPosition: capture.node.endPosition,
248
- isExported: this.isNodeExported(capture.node)
249
- })))
250
-
251
- // JS/TS Arrow functions
252
- const arrowFunctions = this.queryNode(rootNode, '(variable_declarator name: (identifier) @name value: (arrow_function))')
253
- functions.push(...arrowFunctions.map(capture => ({
254
- name: this.getNodeText(capture.node, content),
255
- type: 'arrow_function',
256
- startPosition: capture.node.startPosition,
257
- endPosition: capture.node.endPosition,
258
- isExported: this.isNodeExported(capture.node.parent.parent)
259
- })))
260
-
261
- // JS/TS Method definitions
262
- const methods = this.queryNode(rootNode, '(method_definition name: (property_name) @name)')
263
- functions.push(...methods.map(capture => ({
264
- name: this.getNodeText(capture.node, content),
265
- type: 'method',
266
- startPosition: capture.node.startPosition,
267
- endPosition: capture.node.endPosition,
268
- isExported: false // methods are part of classes
269
- })))
270
-
271
- // Rust function items
272
- const rustFunctions = this.queryNode(rootNode, '(function_item name: (identifier) @name)')
273
- functions.push(...rustFunctions.map(capture => ({
274
- name: this.getNodeText(capture.node, content),
275
- type: 'function_item',
276
- startPosition: capture.node.startPosition,
277
- endPosition: capture.node.endPosition,
278
- isExported: this.isNodeExported(capture.node)
279
- })))
280
-
281
- return functions
282
- }
283
-
284
- /**
285
- * Extract class declarations from AST
286
- */
287
- extractClasses(rootNode, content) {
288
- const classes = []
289
-
290
- const classDeclarations = this.queryNode(rootNode, '(class_declaration name: (identifier) @name)')
291
- classes.push(...classDeclarations.map(capture => ({
292
- name: this.getNodeText(capture.node, content),
293
- type: 'class',
294
- startPosition: capture.node.startPosition,
295
- endPosition: capture.node.endPosition,
296
- isExported: this.isNodeExported(capture.node.parent),
297
- methods: this.extractClassMethods(capture.node.parent, content)
298
- })))
299
-
300
- return classes
301
- }
302
-
303
- /**
304
- * Extract class methods
305
- */
306
- extractClassMethods(classNode, content) {
307
- const methods = []
308
-
309
- try {
310
- const methodNodes = this.queryNode(classNode, '(method_definition)')
311
- methods.push(...methodNodes.map(capture => ({
312
- name: this.getNodeText(capture.node, content),
313
- type: 'method',
314
- startPosition: capture.node.startPosition,
315
- endPosition: capture.node.endPosition
316
- })))
317
- } catch (error) {
318
- // Handle case where method extraction fails
319
- }
320
-
321
- return methods
322
- }
323
-
324
- /**
325
- * Extract import statements from AST
326
- */
327
- extractImports(rootNode, content) {
328
- const imports = []
329
-
330
- // JS/TS imports
331
- const importStatements = this.queryNode(rootNode, '(import_statement source: (string) @source)')
332
- imports.push(...importStatements.map(capture => {
333
- const source = this.getNodeText(capture.node, content).replace(/['"]/g, '')
334
- return {
335
- source,
336
- statement: this.getNodeText(capture.node.parent, content),
337
- isRelative: source.startsWith('.'),
338
- isExternal: !source.startsWith('.') && !source.startsWith('/'),
339
- importedNames: this.extractImportedNames(capture.node.parent, content)
340
- }
341
- }))
342
-
343
- // Rust use declarations
344
- const rustUseStatements = this.queryNode(rootNode, '(use_declaration)')
345
- imports.push(...rustUseStatements.map(capture => {
346
- const statement = this.getNodeText(capture.node, content)
347
- return {
348
- source: 'rust',
349
- statement,
350
- isRelative: statement.includes('self::') || statement.includes('super::'),
351
- isExternal: !statement.includes('crate::') && !statement.includes('self::') && !statement.includes('super::'),
352
- importedNames: [] // Complexity of parsing Rust use paths is high for this simple chunker
353
- }
354
- }))
355
-
356
- return imports
357
- }
358
-
359
- /**
360
- * Extract export statements from AST
361
- */
362
- extractExports(rootNode, content) {
363
- const exports = []
364
-
365
- // Export declarations (JS/TS)
366
- const exportDeclarations = this.queryNode(rootNode, '(export_statement)')
367
- exports.push(...exportDeclarations.map(capture => {
368
- const exportNode = capture.node
369
- const declaration = exportNode.namedChild(0)
370
-
371
- if (declaration) {
372
- return {
373
- type: declaration.type === 'export_clause' ? 'named' : 'declaration',
374
- name: this.extractExportName(declaration, content),
375
- statement: this.getNodeText(exportNode, content),
376
- isDefault: this.getNodeText(exportNode, content).includes('default')
377
- }
378
- }
379
- return null
380
- }).filter(Boolean))
381
-
382
- return exports
383
- }
384
-
385
- /**
386
- * Extract variable declarations from AST
387
- */
388
- extractVariables(rootNode, content) {
389
- const variables = []
390
-
391
- const variableDeclarations = this.queryNode(rootNode, '(variable_declarator name: (identifier) @name)')
392
- variables.push(...variableDeclarations.map(capture => ({
393
- name: this.getNodeText(capture.node, content),
394
- type: 'variable',
395
- startPosition: capture.node.startPosition,
396
- endPosition: capture.node.endPosition,
397
- isExported: this.isNodeExported(capture.node.parent.parent),
398
- declarationType: capture.node.parent.parent.type // const, let, var
399
- })))
400
-
401
- return variables
402
- }
403
-
404
- /**
405
- * Extract JSX elements from AST (for React components)
406
- */
407
- extractJsxElements(rootNode, content) {
408
- const jsxElements = []
409
-
410
- try {
411
- const jsxNodes = this.queryNode(rootNode, '(jsx_element)')
412
- jsxElements.push(...jsxNodes.map(capture => ({
413
- elementName: this.extractJsxElementName(capture.node, content),
414
- startPosition: capture.node.startPosition,
415
- endPosition: capture.node.endPosition
416
- })))
417
- } catch (error) {
418
- // JSX might not be available in JavaScript parser
419
- }
420
-
421
- return jsxElements
422
- }
423
-
424
- /**
425
- * Extract TypeScript type definitions from AST
426
- */
427
- extractTypeDefinitions(rootNode, content) {
428
- const types = []
429
-
430
- try {
431
- // JS/TS Interface declarations
432
- const interfaces = this.queryNode(rootNode, '(interface_declaration name: (type_identifier) @name)')
433
- types.push(...interfaces.map(capture => ({
434
- name: this.getNodeText(capture.node, content),
435
- type: 'interface',
436
- startPosition: capture.node.startPosition,
437
- endPosition: capture.node.endPosition,
438
- isExported: this.isNodeExported(capture.node.parent)
439
- })))
440
-
441
- // JS/TS Type alias declarations
442
- const typeAliases = this.queryNode(rootNode, '(type_alias_declaration name: (type_identifier) @name)')
443
- types.push(...typeAliases.map(capture => ({
444
- name: this.getNodeText(capture.node, content),
445
- type: 'type_alias',
446
- startPosition: capture.node.startPosition,
447
- endPosition: capture.node.endPosition,
448
- isExported: this.isNodeExported(capture.node.parent)
449
- })))
450
-
451
- // Rust struct definitions
452
- const structs = this.queryNode(rootNode, '(struct_item name: (type_identifier) @name)')
453
- types.push(...structs.map(capture => ({
454
- name: this.getNodeText(capture.node, content),
455
- type: 'struct',
456
- startPosition: capture.node.startPosition,
457
- endPosition: capture.node.endPosition,
458
- isExported: this.isNodeExported(capture.node)
459
- })))
460
-
461
- // Rust enum definitions
462
- const enums = this.queryNode(rootNode, '(enum_item name: (type_identifier) @name)')
463
- types.push(...enums.map(capture => ({
464
- name: this.getNodeText(capture.node, content),
465
- type: 'enum',
466
- startPosition: capture.node.startPosition,
467
- endPosition: capture.node.endPosition,
468
- isExported: this.isNodeExported(capture.node)
469
- })))
470
-
471
- // Rust trait definitions
472
- const traits = this.queryNode(rootNode, '(trait_item name: (type_identifier) @name)')
473
- types.push(...traits.map(capture => ({
474
- name: this.getNodeText(capture.node, content),
475
- type: 'trait',
476
- startPosition: capture.node.startPosition,
477
- endPosition: capture.node.endPosition,
478
- isExported: this.isNodeExported(capture.node)
479
- })))
480
- } catch (error) {
481
- // TypeScript/Rust types might not be available
482
- }
483
-
484
- return types
485
- }
486
-
487
- /**
488
- * Classify file semantics based on AST patterns
489
- */
490
- classifyFileSemantics(rootNode, content, filePath) {
491
- const classifications = []
492
-
493
- // Test each semantic pattern
494
- for (const [patternName, patternFn] of Object.entries(this.semanticPatterns)) {
495
- if (patternFn(rootNode, content, filePath)) {
496
- classifications.push(patternName)
497
- }
498
- }
499
-
500
- // Return primary classification (most specific first)
501
- const priority = ['reactComponent', 'reactHook', 'expressRoute', 'expressMiddleware',
502
- 'cliCommand', 'apiHandler', 'typeDefinition', 'configModule', 'utilityFunction']
503
-
504
- for (const pattern of priority) {
505
- if (classifications.includes(pattern)) {
506
- return pattern
507
- }
508
- }
509
-
510
- return 'module'
511
- }
512
-
513
- /**
514
- * Semantic pattern: React Component
515
- */
516
- isReactComponent(rootNode, content, filePath) {
517
- // Check for JSX elements
518
- const hasJsx = this.queryNode(rootNode, '(jsx_element)').length > 0
519
-
520
- // Check for React imports
521
- const hasReactImport = content.includes("import React") || content.includes("from 'react'")
522
-
523
- // Check for component naming pattern
524
- const fileName = basename(filePath, extname(filePath))
525
- const hasComponentName = fileName[0] === fileName[0].toUpperCase()
526
-
527
- // Check for function that returns JSX
528
- const functions = this.extractFunctions(rootNode, content)
529
- const hasComponentFunction = functions.some(fn =>
530
- fn.isExported && fn.name[0] === fn.name[0].toUpperCase()
531
- )
532
-
533
- return (hasJsx && (hasReactImport || hasComponentName)) ||
534
- (hasComponentFunction && hasReactImport)
535
- }
536
-
537
- /**
538
- * Semantic pattern: React Hook
539
- */
540
- isReactHook(rootNode, content, filePath) {
541
- const fileName = basename(filePath, extname(filePath))
542
- const hasHookName = fileName.startsWith('use') && fileName[3] === fileName[3].toUpperCase()
543
-
544
- const functions = this.extractFunctions(rootNode, content)
545
- const hasHookFunction = functions.some(fn =>
546
- fn.name.startsWith('use') && fn.name[3] === fn.name[3].toUpperCase() && fn.isExported
547
- )
548
-
549
- const hasReactHookImports = content.includes("from 'react'") &&
550
- (content.includes('useState') || content.includes('useEffect'))
551
-
552
- return hasHookName || (hasHookFunction && hasReactHookImports)
553
- }
554
-
555
- /**
556
- * Semantic pattern: Express Route
557
- */
558
- isExpressRoute(rootNode, content, filePath) {
559
- const hasExpressImport = content.includes("from 'express'") || content.includes("require('express')")
560
- const hasRouterMethods = /\.(get|post|put|delete|patch)\s*\(/.test(content)
561
- const hasRoutePattern = /['"`]\/[^'"`]*['"`]/.test(content)
562
-
563
- return hasExpressImport && hasRouterMethods && hasRoutePattern
564
- }
565
-
566
- /**
567
- * Semantic pattern: Express Middleware
568
- */
569
- isExpressMiddleware(rootNode, content, filePath) {
570
- const hasMiddlewarePattern = /\(req,\s*res,\s*next\)|function\s*\([^)]*req[^)]*res[^)]*next/.test(content)
571
- const hasExpressImport = content.includes("from 'express'") || content.includes("require('express')")
572
- const fileName = basename(filePath).toLowerCase()
573
-
574
- return (hasMiddlewarePattern && hasExpressImport) || fileName.includes('middleware')
575
- }
576
-
577
- /**
578
- * Semantic pattern: CLI Command
579
- */
580
- isCliCommand(rootNode, content, filePath) {
581
- const hasCommanderImport = content.includes('commander') || content.includes('yargs')
582
- const hasProcessArgv = content.includes('process.argv')
583
- const hasCliPatterns = content.includes('.command(') || content.includes('.option(')
584
- const fileName = basename(filePath).toLowerCase()
585
-
586
- return hasCommanderImport || (hasProcessArgv && hasCliPatterns) || fileName.includes('cli')
587
- }
588
-
589
- /**
590
- * Semantic pattern: Utility Function
591
- */
592
- isUtilityFunction(rootNode, content, filePath) {
593
- const functions = this.extractFunctions(rootNode, content)
594
- const hasMultipleExportedFunctions = functions.filter(fn => fn.isExported).length > 1
595
-
596
- const fileName = basename(filePath).toLowerCase()
597
- const hasUtilityName = fileName.includes('util') || fileName.includes('helper') || fileName.includes('lib')
598
-
599
- const hasNoDomSpecificImports = !content.includes('react') && !content.includes('express')
600
-
601
- return hasUtilityName || (hasMultipleExportedFunctions && hasNoDomSpecificImports)
602
- }
603
-
604
- /**
605
- * Semantic pattern: API Handler
606
- */
607
- isApiHandler(rootNode, content, filePath) {
608
- const hasApiPattern = /api|handler|controller/i.test(filePath)
609
- const hasFetchPattern = content.includes('fetch(') || content.includes('axios')
610
- const hasHttpMethods = /\b(GET|POST|PUT|DELETE|PATCH)\b/.test(content)
611
-
612
- return hasApiPattern || (hasFetchPattern && hasHttpMethods)
613
- }
614
-
615
- /**
616
- * Semantic pattern: Type Definition
617
- */
618
- isTypeDefinition(rootNode, content, filePath) {
619
- const types = this.extractTypeDefinitions(rootNode, content)
620
- const hasTypeDefinitions = types.length > 0
621
-
622
- const fileName = basename(filePath).toLowerCase()
623
- const hasTypeFileName = fileName.includes('type') || fileName.includes('.d.ts')
624
-
625
- const hasOnlyTypes = hasTypeDefinitions &&
626
- this.extractFunctions(rootNode, content).length === 0 &&
627
- this.extractClasses(rootNode, content).length === 0
628
-
629
- return hasTypeFileName || hasOnlyTypes
630
- }
631
-
632
- /**
633
- * Semantic pattern: UI Component (generic for any language)
634
- */
635
- isUiComponent(rootNode, content, filePath) {
636
- // JS/TS logic
637
- if (extname(filePath).match(/\.(jsx|tsx|js|ts)$/)) {
638
- return this.isReactComponent(rootNode, content, filePath)
639
- }
640
- // TODO: Add generic patterns for other languages (Rust templates, etc.)
641
- return false
642
- }
643
-
644
- /**
645
- * Semantic pattern: Config Module
646
- */
647
- isConfigModule(rootNode, content, filePath) {
648
- const fileName = basename(filePath).toLowerCase()
649
- const hasConfigName = fileName.includes('config') || fileName.includes('setting')
650
-
651
- const hasConfigPatterns = content.includes('module.exports') || content.includes('export default') || content.includes('Cargo.toml')
652
- const hasConfigObject = /\{[\s\S]*\}/.test(content) && !/function|class/.test(content)
653
-
654
- return hasConfigName && (hasConfigPatterns || hasConfigObject)
655
- }
656
-
657
- /**
658
- * Extract business domain terms from code
659
- */
660
- extractBusinessDomain(rootNode, content, filePath) {
661
- const domains = []
662
-
663
- // Focus on meaningful path segments instead of generic business terms
664
- const pathSegments = filePath.split('/').filter(s => s && s !== 'src' && s !== 'lib' && s !== 'components')
665
- const fileName = basename(filePath, extname(filePath))
666
-
667
- // Extract domain from directory structure (more reliable than keywords)
668
- if (pathSegments.length > 0) {
669
- const relevantSegments = pathSegments.slice(-2) // Last 2 directories
670
- domains.push(...relevantSegments.map(s => s.toLowerCase()))
671
- }
672
-
673
- // Add meaningful file-based domains
674
- if (fileName.toLowerCase().includes('config')) domains.push('configuration')
675
- if (fileName.toLowerCase().includes('test')) domains.push('testing')
676
- if (fileName.toLowerCase().includes('util')) domains.push('utilities')
677
- if (fileName.toLowerCase().includes('api')) domains.push('api')
678
- if (fileName.toLowerCase().includes('ui') || fileName.toLowerCase().includes('component')) {
679
- domains.push('user-interface')
680
- }
681
-
682
- // Only return meaningful, non-generic domains
683
- return [...new Set(domains)].filter(domain =>
684
- domain.length > 2 && !['web', 'src', 'ts', 'js', 'tsx', 'jsx'].includes(domain)
685
- )
686
- }
687
-
688
- /**
689
- * Identify technical patterns in the code
690
- */
691
- identifyTechnicalPatterns(rootNode, content) {
692
- const patterns = []
693
-
694
- // Framework patterns
695
- if (content.includes('react')) patterns.push('react')
696
- if (content.includes('express')) patterns.push('express')
697
- if (content.includes('typescript')) patterns.push('typescript')
698
- if (content.includes('cargo') || extname(content) === '.rs') patterns.push('rust')
699
-
700
- // Architecture patterns
701
- if (content.includes('async') && (content.includes('await') || content.includes('.await'))) patterns.push('async-await')
702
- if (content.includes('Promise')) patterns.push('promises')
703
- if (content.includes('class') && content.includes('extends')) patterns.push('inheritance')
704
- if (content.includes('unsafe')) patterns.push('unsafe-code')
705
-
706
- // Design patterns
707
- const functions = this.extractFunctions(rootNode, content)
708
- if (functions.some(f => f.name.includes('Factory'))) patterns.push('factory-pattern')
709
- if (functions.some(f => f.name.includes('Observer'))) patterns.push('observer-pattern')
710
-
711
- return patterns
712
- }
713
-
714
- /**
715
- * Build relationship graph between files
716
- */
717
- buildRelationshipGraph(analysis) {
718
- const graph = {}
719
-
720
- for (const [filePath, fileAnalysis] of Object.entries(analysis)) {
721
- if (fileAnalysis.error) continue
722
-
723
- graph[filePath] = {
724
- imports: [],
725
- importedBy: [],
726
- semanticSimilarity: {},
727
- businessDomainOverlap: {},
728
- technicalPatternOverlap: {}
729
- }
730
- }
731
-
732
- // Build import relationships
733
- for (const [filePath, fileAnalysis] of Object.entries(analysis)) {
734
- if (fileAnalysis.error) continue
735
-
736
- for (const imp of fileAnalysis.ast.imports) {
737
- if (imp.isRelative) {
738
- // Resolve relative import to actual file path
739
- const importPath = this.resolveImportPath(filePath, imp.source)
740
- if (graph[importPath]) {
741
- graph[filePath].imports.push(importPath)
742
- graph[importPath].importedBy.push(filePath)
743
- }
744
- }
745
- }
746
- }
747
-
748
- // Calculate semantic similarities
749
- for (const [fileA, analysisA] of Object.entries(analysis)) {
750
- if (analysisA.error) continue
751
-
752
- for (const [fileB, analysisB] of Object.entries(analysis)) {
753
- if (analysisB.error || fileA === fileB) continue
754
-
755
- // Semantic type similarity
756
- const semanticSimilarity = analysisA.semanticType === analysisB.semanticType ? 1.0 : 0.0
757
-
758
- // Business domain overlap
759
- const domainOverlap = this.calculateOverlap(analysisA.businessDomain, analysisB.businessDomain)
760
-
761
- // Technical pattern overlap
762
- const patternOverlap = this.calculateOverlap(analysisA.technicalPatterns, analysisB.technicalPatterns)
763
-
764
- if (semanticSimilarity > 0 || domainOverlap > 0 || patternOverlap > 0) {
765
- graph[fileA].semanticSimilarity[fileB] = semanticSimilarity
766
- graph[fileA].businessDomainOverlap[fileB] = domainOverlap
767
- graph[fileA].technicalPatternOverlap[fileB] = patternOverlap
768
- }
769
- }
770
- }
771
-
772
- return graph
773
- }
774
-
775
- /**
776
- * Create smart chunks using clustering algorithms
777
- */
778
- async createSmartChunks(analysis, relationshipGraph) {
779
-
780
- // Start with individual files as nodes
781
- const nodes = Object.keys(analysis).filter(path => !analysis[path].error)
782
- console.log(`🧩 Starting with ${nodes.length} nodes for clustering`)
783
-
784
- // Apply different clustering strategies
785
- const strategies = [
786
- this.clusterBySemanticType.bind(this),
787
- this.clusterByBusinessDomain.bind(this),
788
- this.clusterByDependencyGraph.bind(this),
789
- this.clusterByDirectoryStructure.bind(this)
790
- ]
791
-
792
- let clusters = nodes.map(node => [node]) // Start with individual nodes
793
-
794
- // Apply clustering strategies
795
- for (const strategy of strategies) {
796
- clusters = strategy(clusters, analysis, relationshipGraph)
797
- console.log(`📦 After ${strategy.name}: ${clusters.length} clusters`)
798
- }
799
-
800
- // Convert clusters to named chunks
801
- const chunks = []
802
- const usedNames = new Set()
803
-
804
- for (const cluster of clusters) {
805
- if (cluster.length === 0) continue
806
-
807
- let chunkName = await this.generateChunkName(cluster, analysis)
808
-
809
- // Ensure unique names
810
- let uniqueName = chunkName
811
- let counter = 1
812
- while (usedNames.has(uniqueName)) {
813
- uniqueName = `${chunkName}-${counter}`
814
- counter++
815
- }
816
- usedNames.add(uniqueName)
817
-
818
- const chunk = {
819
- name: uniqueName,
820
- type: this.determineChunkType(cluster, analysis),
821
- files: cluster,
822
- size: cluster.reduce((sum, file) => sum + analysis[file].size, 0),
823
- complexity: this.calculateClusterComplexity(cluster, analysis),
824
- dependencies: this.calculateClusterDependencies(cluster, analysis),
825
- businessDomains: this.extractClusterBusinessDomains(cluster, analysis),
826
- technicalPatterns: this.extractClusterTechnicalPatterns(cluster, analysis),
827
- purpose: this.determineClusterPurpose(cluster, analysis),
828
- cohesion: this.calculateClusterCohesion(cluster, relationshipGraph),
829
- recommendations: this.generateClusterRecommendations(cluster, analysis),
830
- tags: this.generateTags(cluster, analysis)
831
- }
832
-
833
- chunks.push(chunk)
834
- }
835
-
836
- return chunks.sort((a, b) => b.cohesion - a.cohesion) // Sort by cohesion (best chunks first)
837
- }
838
-
839
- /**
840
- * Cluster files by semantic type
841
- */
842
- clusterBySemanticType(clusters, analysis, relationshipGraph) {
843
- const semanticGroups = {}
844
-
845
- for (const cluster of clusters) {
846
- for (const file of cluster) {
847
- const semanticType = analysis[file].semanticType
848
- if (!semanticGroups[semanticType]) {
849
- semanticGroups[semanticType] = []
850
- }
851
- semanticGroups[semanticType].push(file)
852
- }
853
- }
854
-
855
- return Object.values(semanticGroups).filter(group => group.length > 0)
856
- }
857
-
858
- /**
859
- * Cluster files by business domain
860
- */
861
- clusterByBusinessDomain(clusters, analysis, relationshipGraph) {
862
- const domainGroups = {}
863
-
864
- for (const cluster of clusters) {
865
- for (const file of cluster) {
866
- const domains = analysis[file].businessDomain
867
-
868
- if (domains.length === 0) {
869
- // Files with no clear domain go to 'general' group
870
- if (!domainGroups.general) domainGroups.general = []
871
- domainGroups.general.push(file)
872
- } else {
873
- // Files go to their primary domain group
874
- const primaryDomain = domains[0]
875
- if (!domainGroups[primaryDomain]) domainGroups[primaryDomain] = []
876
- domainGroups[primaryDomain].push(file)
877
- }
878
- }
879
- }
880
-
881
- return Object.values(domainGroups).filter(group => group.length > 0)
882
- }
883
-
884
- /**
885
- * Cluster files by dependency relationships
886
- */
887
- clusterByDependencyGraph(clusters, analysis, relationshipGraph) {
888
- const dependencyGroups = []
889
- const visited = new Set()
890
-
891
- for (const cluster of clusters) {
892
- for (const file of cluster) {
893
- if (visited.has(file)) continue
894
-
895
- // Find all files connected to this file through imports
896
- const connected = this.findConnectedFiles(file, relationshipGraph, new Set())
897
-
898
- // Filter to only files in current clusters
899
- const relevantConnected = connected.filter(f =>
900
- clusters.some(cluster => cluster.includes(f))
901
- )
902
-
903
- if (relevantConnected.length > 1) {
904
- dependencyGroups.push(relevantConnected)
905
- relevantConnected.forEach(f => visited.add(f))
906
- } else {
907
- // Isolated file becomes its own group
908
- dependencyGroups.push([file])
909
- visited.add(file)
910
- }
911
- }
912
- }
913
-
914
- return dependencyGroups.filter(group => group.length > 0)
915
- }
916
-
917
- /**
918
- * Cluster files by directory structure
919
- */
920
- clusterByDirectoryStructure(clusters, analysis, relationshipGraph) {
921
- const directoryGroups = {}
922
-
923
- for (const cluster of clusters) {
924
- for (const file of cluster) {
925
- const dir = dirname(file)
926
- if (!directoryGroups[dir]) {
927
- directoryGroups[dir] = []
928
- }
929
- directoryGroups[dir].push(file)
930
- }
931
- }
932
-
933
- return Object.values(directoryGroups).filter(group => group.length > 0)
934
- }
935
-
936
- /**
937
- * Generate intelligent chunk name
938
- */
939
- async generateChunkName(files, analysis) {
940
- const namingStrategies = {
941
- domainBased: this.generateDomainBasedName.bind(this),
942
- patternBased: this.generatePatternBasedName.bind(this),
943
- functionalityBased: this.generateFunctionalityBasedName.bind(this)
944
- }
945
-
946
- const names = {}
947
-
948
- for (const [strategy, generator] of Object.entries(namingStrategies)) {
949
- try {
950
- names[strategy] = generator(files, analysis)
951
- } catch (error) {
952
- names[strategy] = 'unnamed-chunk'
953
- }
954
- }
955
-
956
- // Choose best name based on strategy preference - prefer pattern-based for better names
957
- const strategy = 'patternBased' // Force pattern-based naming
958
- return names[strategy] || names.patternBased || names.functionalityBased || names.domainBased || 'unknown-chunk'
959
- }
960
-
961
- /**
962
- * Generate domain-based chunk name
963
- */
964
- generateDomainBasedName(files, analysis) {
965
- // Always fallback to pattern-based naming since domain extraction is unreliable
966
- return this.generatePatternBasedName(files, analysis)
967
- }
968
-
969
- /**
970
- * Generate pattern-based chunk name
971
- */
972
- generatePatternBasedName(files, analysis) {
973
- const semanticTypes = files.map(file => analysis[file].semanticType)
974
- const mostCommon = this.getMostCommon(semanticTypes)
975
-
976
- // Look at actual file names and directories for context
977
- const commonPath = this.findCommonPath(files)
978
- const dirName = commonPath ? basename(dirname(commonPath)) : null
979
-
980
- const typeNames = {
981
- reactComponent: 'ui-components',
982
- reactHook: 'react-hooks',
983
- expressRoute: 'server-routes',
984
- expressMiddleware: 'server-middleware',
985
- utilityFunction: 'utility-functions',
986
- typeDefinition: 'type-definitions',
987
- configModule: 'configuration',
988
- cliCommand: 'cli-tools',
989
- apiHandler: 'api-endpoints',
990
- module: 'shared-modules'
991
- }
992
-
993
- let baseName = typeNames[mostCommon] || 'mixed-files'
994
-
995
- // Add more specific context based on file paths
996
- if (commonPath) {
997
- if (commonPath.includes('/components/ui/')) {
998
- baseName = 'ui-library-components'
999
- } else if (commonPath.includes('/components/')) {
1000
- baseName = 'application-components'
1001
- } else if (commonPath.includes('/hooks/')) {
1002
- baseName = 'custom-hooks'
1003
- } else if (commonPath.includes('/lib/')) {
1004
- baseName = 'core-utilities'
1005
- } else if (commonPath.includes('/utils/')) {
1006
- baseName = 'helper-utilities'
1007
- } else if (dirName && dirName !== 'src' && dirName !== 'components' && dirName !== 'lib') {
1008
- baseName = `${dirName}-${baseName}`
1009
- }
1010
- }
1011
-
1012
- return baseName
1013
- }
1014
-
1015
- /**
1016
- * Generate functionality-based chunk name
1017
- */
1018
- generateFunctionalityBasedName(files, analysis) {
1019
- // Extract function names and find common themes
1020
- const allFunctions = files.flatMap(file =>
1021
- analysis[file].ast.functions.map(fn => fn.name.toLowerCase())
1022
- )
1023
-
1024
- const commonWords = this.extractCommonWords(allFunctions)
1025
-
1026
- if (commonWords.length > 0) {
1027
- return commonWords.slice(0, 2).join('-') + '-logic'
1028
- }
1029
-
1030
- // Fallback to directory-based naming
1031
- const dirs = files.map(file => basename(dirname(file)))
1032
- const commonDir = this.getMostCommon(dirs)
1033
-
1034
- return commonDir + '-module'
1035
- }
1036
-
1037
- /**
1038
- * Helper methods for AST analysis
1039
- */
1040
-
1041
- queryNode(node, query) {
1042
- // Simplified query implementation
1043
- // In a full implementation, you'd use tree-sitter's query language
1044
- const results = []
1045
-
1046
- const traverse = (currentNode) => {
1047
- // Match based on node type for now
1048
- if (query.includes(currentNode.type)) {
1049
- results.push({ node: currentNode })
1050
- }
1051
-
1052
- for (let i = 0; i < currentNode.namedChildCount; i++) {
1053
- traverse(currentNode.namedChild(i))
1054
- }
1055
- }
1056
-
1057
- traverse(node)
1058
- return results
1059
- }
1060
-
1061
- getNodeText(node, content) {
1062
- return content.slice(node.startIndex, node.endIndex)
1063
- }
1064
-
1065
- isNodeExported(node) {
1066
- // Rust: check for visibility_modifier (pub) as direct child
1067
- for (let i = 0; i < node.namedChildCount; i++) {
1068
- if (node.namedChild(i).type === 'visibility_modifier') return true
1069
- }
1070
- // JS/TS: check for export_statement ancestor or parent
1071
- let parent = node.parent
1072
- while (parent) {
1073
- if (parent.type === 'export_statement' || parent.type === 'export_declaration') {
1074
- return true
1075
- }
1076
- parent = parent.parent
1077
- }
1078
- return false
1079
- }
1080
-
1081
- calculateOverlap(arrayA, arrayB) {
1082
- const setA = new Set(arrayA)
1083
- const setB = new Set(arrayB)
1084
- const intersection = new Set([...setA].filter(x => setB.has(x)))
1085
- const union = new Set([...setA, ...setB])
1086
-
1087
- return union.size === 0 ? 0 : intersection.size / union.size
1088
- }
1089
-
1090
- getMostCommon(arr) {
1091
- const counts = {}
1092
- for (const item of arr) {
1093
- counts[item] = (counts[item] || 0) + 1
1094
- }
1095
-
1096
- return Object.entries(counts)
1097
- .sort(([,a], [,b]) => b - a)[0]?.[0] || 'unknown'
1098
- }
1099
-
1100
- generateSemanticTags(analysis) {
1101
- const tags = []
1102
-
1103
- tags.push(analysis.semanticType)
1104
- tags.push(...analysis.businessDomain)
1105
- tags.push(...analysis.technicalPatterns)
1106
-
1107
- if (analysis.complexity.level) {
1108
- tags.push(`complexity-${analysis.complexity.level}`)
1109
- }
1110
-
1111
- if (analysis.lines < 50) tags.push('small')
1112
- else if (analysis.lines < 200) tags.push('medium')
1113
- else tags.push('large')
1114
-
1115
- return [...new Set(tags)]
1116
- }
1117
-
1118
- calculateAstComplexity(rootNode) {
1119
- let complexity = 1
1120
-
1121
- const complexityNodes = ['if_statement', 'while_statement', 'for_statement',
1122
- 'switch_statement', 'try_statement', 'catch_clause',
1123
- 'match_arm', 'loop_expression']
1124
-
1125
- const traverse = (node) => {
1126
- if (complexityNodes.includes(node.type)) {
1127
- complexity++
1128
- }
1129
-
1130
- for (let i = 0; i < node.namedChildCount; i++) {
1131
- traverse(node.namedChild(i))
1132
- }
1133
- }
1134
-
1135
- traverse(rootNode)
1136
-
1137
- return {
1138
- score: complexity,
1139
- level: complexity < 5 ? 'low' : complexity < 15 ? 'medium' : 'high'
1140
- }
1141
- }
1142
-
1143
- /**
1144
- * Analyze dependencies from AST
1145
- */
1146
- analyzeDependencies(rootNode, content) {
1147
- const dependencies = {
1148
- internal: [],
1149
- external: [],
1150
- relative: []
1151
- }
1152
-
1153
- const imports = this.extractImports(rootNode, content)
1154
-
1155
- for (const imp of imports) {
1156
- if (imp.isRelative) {
1157
- dependencies.relative.push(imp.source)
1158
- } else if (imp.isExternal) {
1159
- dependencies.external.push(imp.source)
1160
- } else {
1161
- dependencies.internal.push(imp.source)
1162
- }
1163
- }
1164
-
1165
- return dependencies
1166
- }
1167
-
1168
- /**
1169
- * Resolve relative import path to absolute path
1170
- */
1171
- resolveImportPath(fromFile, importPath) {
1172
- // Simplified path resolution
1173
- const dir = dirname(fromFile)
1174
- return join(dir, importPath)
1175
- }
1176
-
1177
- /**
1178
- * Find all files connected through imports
1179
- */
1180
- findConnectedFiles(startFile, relationshipGraph, visited = new Set()) {
1181
- if (visited.has(startFile)) return []
1182
-
1183
- visited.add(startFile)
1184
- const connected = [startFile]
1185
-
1186
- if (relationshipGraph[startFile]) {
1187
- // Follow imports
1188
- for (const importedFile of relationshipGraph[startFile].imports) {
1189
- connected.push(...this.findConnectedFiles(importedFile, relationshipGraph, visited))
1190
- }
1191
-
1192
- // Follow files that import this one
1193
- for (const importingFile of relationshipGraph[startFile].importedBy) {
1194
- connected.push(...this.findConnectedFiles(importingFile, relationshipGraph, visited))
1195
- }
1196
- }
1197
-
1198
- return [...new Set(connected)]
1199
- }
1200
-
1201
- /**
1202
- * Extract imported names from import statement
1203
- */
1204
- extractImportedNames(importNode, content) {
1205
- const names = []
1206
- // Simplified implementation - would need more complex parsing
1207
- const importText = this.getNodeText(importNode, content)
1208
- const match = importText.match(/import\s+(?:\{([^}]+)\}|(\w+))/i)
1209
- if (match) {
1210
- if (match[1]) {
1211
- // Named imports
1212
- names.push(...match[1].split(',').map(n => n.trim()))
1213
- } else if (match[2]) {
1214
- // Default import
1215
- names.push(match[2])
1216
- }
1217
- }
1218
- return names
1219
- }
1220
-
1221
- /**
1222
- * Extract export name from export declaration
1223
- */
1224
- extractExportName(declaration, content) {
1225
- const text = this.getNodeText(declaration, content)
1226
- const match = text.match(/(?:function|class|const|let|var)\s+(\w+)/)
1227
- return match ? match[1] : 'unnamed'
1228
- }
1229
-
1230
- /**
1231
- * Extract JSX element name
1232
- */
1233
- extractJsxElementName(jsxNode, content) {
1234
- try {
1235
- const openingElement = jsxNode.namedChild(0)
1236
- if (openingElement) {
1237
- const nameNode = openingElement.namedChild(0)
1238
- return nameNode ? this.getNodeText(nameNode, content) : 'unknown'
1239
- }
1240
- } catch (error) {
1241
- return 'unknown'
1242
- }
1243
- return 'unknown'
1244
- }
1245
-
1246
- /**
1247
- * Determine chunk type based on files
1248
- */
1249
- determineChunkType(files, analysis) {
1250
- const semanticTypes = files.map(file => analysis[file].semanticType)
1251
- const mostCommon = this.getMostCommon(semanticTypes)
1252
-
1253
- const typeMapping = {
1254
- reactComponent: 'ui-components',
1255
- reactHook: 'custom-hooks',
1256
- expressRoute: 'api-routes',
1257
- expressMiddleware: 'middleware',
1258
- utilityFunction: 'utilities',
1259
- typeDefinition: 'type-definitions',
1260
- configModule: 'configuration',
1261
- cliCommand: 'cli-commands',
1262
- apiHandler: 'api-handlers'
1263
- }
1264
-
1265
- return typeMapping[mostCommon] || 'mixed-module'
1266
- }
1267
-
1268
- /**
1269
- * Calculate cluster complexity
1270
- */
1271
- calculateClusterComplexity(files, analysis) {
1272
- const complexities = files.map(file => analysis[file].complexity.score)
1273
- const total = complexities.reduce((sum, c) => sum + c, 0)
1274
- const average = total / files.length
1275
-
1276
- return {
1277
- total,
1278
- average,
1279
- level: average < 5 ? 'low' : average < 15 ? 'medium' : 'high'
1280
- }
1281
- }
1282
-
1283
- /**
1284
- * Calculate cluster dependencies
1285
- */
1286
- calculateClusterDependencies(files, analysis) {
1287
- const allDeps = {
1288
- internal: new Set(),
1289
- external: new Set(),
1290
- relative: new Set()
1291
- }
1292
-
1293
- for (const file of files) {
1294
- const deps = analysis[file].dependencies
1295
- deps.internal.forEach(dep => allDeps.internal.add(dep))
1296
- deps.external.forEach(dep => allDeps.external.add(dep))
1297
- deps.relative.forEach(dep => allDeps.relative.add(dep))
1298
- }
1299
-
1300
- return {
1301
- internal: Array.from(allDeps.internal),
1302
- external: Array.from(allDeps.external),
1303
- relative: Array.from(allDeps.relative),
1304
- totalCount: allDeps.internal.size + allDeps.external.size + allDeps.relative.size
1305
- }
1306
- }
1307
-
1308
- /**
1309
- * Extract cluster business domains
1310
- */
1311
- extractClusterBusinessDomains(files, analysis) {
1312
- const allDomains = files.flatMap(file => analysis[file].businessDomain)
1313
- return [...new Set(allDomains)]
1314
- }
1315
-
1316
- /**
1317
- * Extract cluster technical patterns
1318
- */
1319
- extractClusterTechnicalPatterns(files, analysis) {
1320
- const allPatterns = files.flatMap(file => analysis[file].technicalPatterns)
1321
- return [...new Set(allPatterns)]
1322
- }
1323
-
1324
- /**
1325
- * Determine cluster purpose
1326
- */
1327
- determineClusterPurpose(files, analysis) {
1328
- const semanticTypes = files.map(file => analysis[file].semanticType)
1329
- const mostCommon = this.getMostCommon(semanticTypes)
1330
-
1331
- const purposeMapping = {
1332
- reactComponent: 'User interface components and React elements',
1333
- reactHook: 'Custom React hooks for state and logic sharing',
1334
- expressRoute: 'API routes and endpoint handlers',
1335
- expressMiddleware: 'Express middleware and request processing',
1336
- utilityFunction: 'Utility functions and helper libraries',
1337
- typeDefinition: 'TypeScript type definitions and interfaces',
1338
- configModule: 'Configuration files and settings',
1339
- cliCommand: 'Command-line interface and CLI tools',
1340
- apiHandler: 'API client and data fetching logic'
1341
- }
1342
-
1343
- return purposeMapping[mostCommon] || 'Mixed functionality module'
1344
- }
1345
-
1346
- /**
1347
- * Calculate cluster cohesion
1348
- */
1349
- calculateClusterCohesion(files, relationshipGraph) {
1350
- if (files.length <= 1) return 1.0
1351
-
1352
- let connections = 0
1353
- let totalPossible = files.length * (files.length - 1)
1354
-
1355
- for (const fileA of files) {
1356
- for (const fileB of files) {
1357
- if (fileA !== fileB && relationshipGraph[fileA]) {
1358
- if (relationshipGraph[fileA].imports.includes(fileB) ||
1359
- relationshipGraph[fileA].importedBy.includes(fileB) ||
1360
- relationshipGraph[fileA].semanticSimilarity[fileB] > 0.5) {
1361
- connections++
1362
- }
1363
- }
1364
- }
1365
- }
1366
-
1367
- return totalPossible > 0 ? connections / totalPossible : 0
1368
- }
1369
-
1370
- /**
1371
- * Generate cluster recommendations
1372
- */
1373
- generateClusterRecommendations(files, analysis) {
1374
- const recommendations = []
1375
-
1376
- const totalSize = files.reduce((sum, file) => sum + analysis[file].size, 0)
1377
- const avgComplexity = files.reduce((sum, file) => sum + analysis[file].complexity.score, 0) / files.length
1378
-
1379
- if (totalSize > 100000) {
1380
- recommendations.push({
1381
- type: 'warning',
1382
- message: 'Large cluster - consider splitting by functionality'
1383
- })
1384
- }
1385
-
1386
- if (avgComplexity > 20) {
1387
- recommendations.push({
1388
- type: 'warning',
1389
- message: 'High complexity cluster - review for refactoring opportunities'
1390
- })
1391
- }
1392
-
1393
- if (files.length === 1) {
1394
- recommendations.push({
1395
- type: 'info',
1396
- message: 'Single file cluster - consider grouping with related files'
1397
- })
1398
- }
1399
-
1400
- return recommendations
1401
- }
1402
-
1403
- /**
1404
- * Extract common words from function names
1405
- */
1406
- extractCommonWords(functionNames) {
1407
- const words = functionNames.flatMap(name =>
1408
- name.split(/(?=[A-Z])|_|-/).filter(word => word.length > 2)
1409
- )
1410
-
1411
- const wordCounts = {}
1412
- for (const word of words) {
1413
- wordCounts[word] = (wordCounts[word] || 0) + 1
1414
- }
1415
-
1416
- return Object.entries(wordCounts)
1417
- .filter(([, count]) => count > 1)
1418
- .sort(([, a], [, b]) => b - a)
1419
- .slice(0, 3)
1420
- .map(([word]) => word)
1421
- }
1422
-
1423
- /**
1424
- * Generate code signature for caching and similarity comparison
1425
- */
1426
- generateCodeSignature(rootNode, content) {
1427
- const functions = this.extractFunctions(rootNode, content)
1428
- const classes = this.extractClasses(rootNode, content)
1429
- const imports = this.extractImports(rootNode, content)
1430
- const exports = this.extractExports(rootNode, content)
1431
-
1432
- return {
1433
- functionCount: functions.length,
1434
- classCount: classes.length,
1435
- importCount: imports.length,
1436
- exportCount: exports.length,
1437
- exportedFunctions: functions.filter(f => f.isExported).map(f => f.name),
1438
- importSources: imports.map(i => i.source),
1439
- hasJsx: this.extractJsxElements(rootNode, content).length > 0,
1440
- contentHash: this.simpleHash(content)
1441
- }
1442
- }
1443
-
1444
- /**
1445
- * Simple hash function for content comparison
1446
- */
1447
- simpleHash(str) {
1448
- let hash = 0
1449
- for (let i = 0; i < str.length; i++) {
1450
- const char = str.charCodeAt(i)
1451
- hash = ((hash << 5) - hash) + char
1452
- hash = hash & hash // Convert to 32bit integer
1453
- }
1454
- return hash
1455
- }
1456
-
1457
- generateSummary(analysis, chunks) {
1458
- const files = Object.values(analysis).filter(f => !f.error)
1459
-
1460
- return {
1461
- totalFiles: files.length,
1462
- totalSize: files.reduce((sum, f) => sum + f.size, 0),
1463
- totalLines: files.reduce((sum, f) => sum + f.lines, 0),
1464
- semanticTypes: this.countByProperty(files, 'semanticType'),
1465
- businessDomains: this.countDomains(files),
1466
- technicalPatterns: this.countPatterns(files),
1467
- totalChunks: chunks.length,
1468
- averageChunkSize: chunks.reduce((sum, c) => sum + c.size, 0) / chunks.length,
1469
- chunkTypes: this.countByProperty(chunks, 'type')
1470
- }
1471
- }
1472
-
1473
- generateRecommendations(analysis, chunks) {
1474
- const recommendations = []
1475
-
1476
- // Add specific recommendations based on treesitter analysis
1477
-
1478
- return recommendations
1479
- }
1480
-
1481
- countByProperty(items, property) {
1482
- const counts = {}
1483
- for (const item of items) {
1484
- const value = typeof property === 'function' ? property(item) : item[property]
1485
- counts[value] = (counts[value] || 0) + 1
1486
- }
1487
- return counts
1488
- }
1489
-
1490
- countDomains(files) {
1491
- const allDomains = files.flatMap(f => f.businessDomain)
1492
- return this.countByProperty(allDomains, d => d)
1493
- }
1494
-
1495
- countPatterns(files) {
1496
- const allPatterns = files.flatMap(f => f.technicalPatterns)
1497
- return this.countByProperty(allPatterns, p => p)
1498
- }
1499
-
1500
- /**
1501
- * Generate tags for a chunk based on its characteristics
1502
- */
1503
- generateTags(files, analysis) {
1504
- const tags = new Set()
1505
-
1506
- // Add semantic type tags
1507
- const semanticTypes = files.map(file => analysis[file].semanticType)
1508
- for (const type of semanticTypes) {
1509
- if (type === 'reactComponent') tags.add('react-component')
1510
- if (type === 'reactHook') tags.add('react-hook')
1511
- if (type === 'utilityFunction') tags.add('utility')
1512
- if (type === 'expressRoute') tags.add('api')
1513
- if (type === 'configModule') tags.add('config')
1514
- }
1515
-
1516
- // Add directory-based tags
1517
- const commonPath = this.findCommonPath(files)
1518
- if (commonPath) {
1519
- if (commonPath.includes('/components/')) tags.add('component')
1520
- if (commonPath.includes('/hooks/')) tags.add('hook')
1521
- if (commonPath.includes('/lib/')) tags.add('library')
1522
- if (commonPath.includes('/utils/')) tags.add('utility')
1523
- if (commonPath.includes('/ui/')) tags.add('ui-library')
1524
- }
1525
-
1526
- // Add complexity tags
1527
- const avgComplexity = files.reduce((sum, file) => sum + analysis[file].complexity.score, 0) / files.length
1528
- if (avgComplexity > 15) tags.add('complex')
1529
- if (avgComplexity < 5) tags.add('simple')
1530
-
1531
- return Array.from(tags)
1532
- }
1533
-
1534
- /**
1535
- * Find common path prefix for a group of files
1536
- */
1537
- findCommonPath(files) {
1538
- if (files.length === 0) return null
1539
- if (files.length === 1) return files[0]
1540
-
1541
- const pathParts = files.map(file => file.split('/'))
1542
- const commonParts = []
1543
-
1544
- for (let i = 0; i < Math.min(...pathParts.map(p => p.length)); i++) {
1545
- const part = pathParts[0][i]
1546
- if (pathParts.every(p => p[i] === part)) {
1547
- commonParts.push(part)
1548
- } else {
1549
- break
1550
- }
1551
- }
1552
-
1553
- return commonParts.length > 0 ? commonParts.join('/') : null
1554
- }
1555
- }
1556
-
1557
- export default TreesitterSemanticChunker