cntx-ui 2.0.12 → 2.0.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1485 @@
1
+ /**
2
+ * Treesitter-based Semantic Chunker for JavaScript/TypeScript Files
3
+ * Uses tree-sitter for true AST-based code analysis and semantic chunking
4
+ * Supports JS/TS/JSX/TSX with equal treatment
5
+ * Node ecosystem focus: React components, Express APIs, CLI tools, utilities
6
+ */
7
+
8
+ import { readFileSync, existsSync } from 'fs'
9
+ import { extname, basename, dirname, relative, join } from 'path'
10
+ import glob from 'glob'
11
+ import { promisify } from 'util'
12
+ import Parser from 'tree-sitter'
13
+ import JavaScript from 'tree-sitter-javascript'
14
+ import TypeScript from 'tree-sitter-typescript'
15
+
16
+ const globAsync = promisify(glob)
17
+
18
+ class TreesitterSemanticChunker {
19
+ constructor(options = {}) {
20
+ this.options = {
21
+ includeImports: true,
22
+ includeExports: true,
23
+ detectComponentTypes: true,
24
+ groupRelatedFiles: true,
25
+ minChunkSize: 100,
26
+ maxChunkSize: 50000,
27
+ namingStrategy: 'domain-based', // domain-based, pattern-based, graph-based
28
+ ...options
29
+ }
30
+
31
+ // Initialize parsers for different languages
32
+ this.parsers = {}
33
+ this.initializeParsers()
34
+
35
+ // Semantic patterns for Node ecosystem
36
+ this.semanticPatterns = {
37
+ reactComponent: this.isReactComponent.bind(this),
38
+ reactHook: this.isReactHook.bind(this),
39
+ expressRoute: this.isExpressRoute.bind(this),
40
+ expressMiddleware: this.isExpressMiddleware.bind(this),
41
+ cliCommand: this.isCliCommand.bind(this),
42
+ utilityFunction: this.isUtilityFunction.bind(this),
43
+ apiHandler: this.isApiHandler.bind(this),
44
+ typeDefinition: this.isTypeDefinition.bind(this),
45
+ configModule: this.isConfigModule.bind(this)
46
+ }
47
+ }
48
+
49
+ /**
50
+ * Initialize tree-sitter parsers for different languages
51
+ */
52
+ initializeParsers() {
53
+ // JavaScript parser
54
+ this.parsers.javascript = new Parser()
55
+ this.parsers.javascript.setLanguage(JavaScript)
56
+
57
+ // TypeScript parser
58
+ this.parsers.typescript = new Parser()
59
+ this.parsers.typescript.setLanguage(TypeScript.typescript)
60
+
61
+ // TSX parser
62
+ this.parsers.tsx = new Parser()
63
+ this.parsers.tsx.setLanguage(TypeScript.tsx)
64
+ }
65
+
66
+ /**
67
+ * Get appropriate parser for file extension
68
+ */
69
+ getParser(filePath) {
70
+ const ext = extname(filePath)
71
+ switch (ext) {
72
+ case '.ts': return this.parsers.typescript
73
+ case '.tsx': return this.parsers.tsx
74
+ case '.js':
75
+ case '.jsx':
76
+ default: return this.parsers.javascript
77
+ }
78
+ }
79
+
80
+ /**
81
+ * Main entry point - analyze files and create semantic chunks
82
+ */
83
+ async analyzeProject(projectPath, patterns = ['**/*.{js,jsx,ts,tsx}']) {
84
+ console.log('🔍 Starting treesitter-based semantic analysis...')
85
+
86
+ const files = await this.findFiles(projectPath, patterns)
87
+ console.log(`📁 Found ${files.length} files to analyze`)
88
+
89
+ const analysis = await this.analyzeFiles(files, projectPath)
90
+ const successfulFiles = Object.keys(analysis).filter(f => !analysis[f].error)
91
+ console.log(`✅ Analyzed ${Object.keys(analysis).length} files (${successfulFiles.length} successful)`)
92
+ if (successfulFiles.length > 0) {
93
+ console.log('📝 Sample successful files:', successfulFiles.slice(0, 5))
94
+ }
95
+
96
+ const relationshipGraph = this.buildRelationshipGraph(analysis)
97
+ console.log(`🔗 Built relationship graph with ${Object.keys(relationshipGraph).length} nodes`)
98
+
99
+ const chunks = await this.createSmartChunks(analysis, relationshipGraph)
100
+ console.log(`📦 Created ${chunks.length} semantic chunks`)
101
+
102
+ return {
103
+ summary: this.generateSummary(analysis, chunks),
104
+ files: analysis,
105
+ chunks: chunks,
106
+ relationshipGraph,
107
+ recommendations: this.generateRecommendations(analysis, chunks)
108
+ }
109
+ }
110
+
111
+ /**
112
+ * Find files matching patterns
113
+ */
114
+ async findFiles(projectPath, patterns) {
115
+ const files = []
116
+
117
+ for (const pattern of patterns) {
118
+ const matches = await globAsync(pattern, {
119
+ cwd: projectPath,
120
+ ignore: [
121
+ 'node_modules/**', 'dist/**', 'build/**', '.git/**',
122
+ '*.test.*', '*.spec.*', '**/test/**', '**/tests/**',
123
+ '**/*.min.js', '**/*.bundle.js', '**/coverage/**',
124
+ '**/.next/**', '**/.cache/**', '**/tmp/**', '**/temp/**'
125
+ ]
126
+ })
127
+
128
+ // Extra filter to ensure no node_modules files get through
129
+ const filteredMatches = matches.filter(file =>
130
+ !file.includes('node_modules') &&
131
+ !file.includes('dist/') &&
132
+ !file.includes('.min.') &&
133
+ !file.includes('.bundle.')
134
+ )
135
+
136
+ files.push(...filteredMatches)
137
+ }
138
+
139
+ return [...new Set(files)] // Remove duplicates
140
+ }
141
+
142
+ /**
143
+ * Analyze all files using treesitter
144
+ */
145
+ async analyzeFiles(filePaths, projectPath) {
146
+ const analysis = {}
147
+
148
+ for (const relativePath of filePaths) {
149
+ // Bulletproof check to skip node_modules
150
+ if (relativePath.includes('node_modules')) {
151
+ console.log(`Skipping node_modules file: ${relativePath}`);
152
+ continue;
153
+ }
154
+
155
+ const fullPath = join(projectPath, relativePath)
156
+ if (!existsSync(fullPath)) continue
157
+
158
+ try {
159
+ const content = readFileSync(fullPath, 'utf8')
160
+ const fileAnalysis = await this.analyzeFile(fullPath, content)
161
+ fileAnalysis.path = relativePath // Store relative path
162
+ analysis[relativePath] = fileAnalysis
163
+ } catch (error) {
164
+ // Silently skip files that can't be parsed - they won't be included in semantic analysis
165
+ // This is normal for complex files or unsupported syntax patterns
166
+ analysis[relativePath] = { error: error.message, path: relativePath }
167
+ }
168
+ }
169
+
170
+ return analysis
171
+ }
172
+
173
+ /**
174
+ * Analyze a single file using treesitter AST
175
+ */
176
+ async analyzeFile(filePath, content) {
177
+ const parser = this.getParser(filePath)
178
+
179
+ // Skip files that are too large or have syntax errors
180
+ if (content.length > 500000) { // Skip files > 500KB
181
+ throw new Error('File too large')
182
+ }
183
+
184
+ let tree, rootNode
185
+ try {
186
+ // Use simple string parsing (confirmed working in tests)
187
+ tree = parser.parse(content)
188
+ rootNode = tree.rootNode
189
+
190
+ // Check for parse errors
191
+ if (rootNode.hasError()) {
192
+ throw new Error('Parse error in file')
193
+ }
194
+ } catch (error) {
195
+ throw new Error(`Tree-sitter parse failed: ${error.message}`)
196
+ }
197
+
198
+ const analysis = {
199
+ path: filePath,
200
+ fileName: basename(filePath),
201
+ dirName: basename(dirname(filePath)),
202
+ extension: extname(filePath),
203
+ size: content.length,
204
+ lines: content.split('\n').length,
205
+
206
+ // AST-based analysis
207
+ ast: {
208
+ functions: this.extractFunctions(rootNode, content),
209
+ classes: this.extractClasses(rootNode, content),
210
+ imports: this.extractImports(rootNode, content),
211
+ exports: this.extractExports(rootNode, content),
212
+ variables: this.extractVariables(rootNode, content),
213
+ jsxElements: this.extractJsxElements(rootNode, content),
214
+ typeDefinitions: this.extractTypeDefinitions(rootNode, content)
215
+ },
216
+
217
+ // Semantic classification
218
+ semanticType: this.classifyFileSemantics(rootNode, content, filePath),
219
+ businessDomain: this.extractBusinessDomain(rootNode, content, filePath),
220
+ technicalPatterns: this.identifyTechnicalPatterns(rootNode, content),
221
+
222
+ // Relationships
223
+ dependencies: this.analyzeDependencies(rootNode, content),
224
+ complexity: this.calculateAstComplexity(rootNode),
225
+
226
+ // Metadata
227
+ codeSignature: this.generateCodeSignature(rootNode, content)
228
+ }
229
+
230
+ // Generate semantic tags based on AST analysis
231
+ analysis.semanticTags = this.generateSemanticTags(analysis)
232
+
233
+ return analysis
234
+ }
235
+
236
+ /**
237
+ * Extract function declarations from AST
238
+ */
239
+ extractFunctions(rootNode, content) {
240
+ const functions = []
241
+
242
+ // Function declarations
243
+ const functionDeclarations = this.queryNode(rootNode, '(function_declaration name: (identifier) @name)')
244
+ functions.push(...functionDeclarations.map(capture => ({
245
+ name: this.getNodeText(capture.node, content),
246
+ type: 'function_declaration',
247
+ startPosition: capture.node.startPosition,
248
+ endPosition: capture.node.endPosition,
249
+ isExported: this.isNodeExported(capture.node)
250
+ })))
251
+
252
+ // Arrow functions
253
+ const arrowFunctions = this.queryNode(rootNode, '(variable_declarator name: (identifier) @name value: (arrow_function))')
254
+ functions.push(...arrowFunctions.map(capture => ({
255
+ name: this.getNodeText(capture.node, content),
256
+ type: 'arrow_function',
257
+ startPosition: capture.node.startPosition,
258
+ endPosition: capture.node.endPosition,
259
+ isExported: this.isNodeExported(capture.node.parent.parent)
260
+ })))
261
+
262
+ // Method definitions
263
+ const methods = this.queryNode(rootNode, '(method_definition name: (property_name) @name)')
264
+ functions.push(...methods.map(capture => ({
265
+ name: this.getNodeText(capture.node, content),
266
+ type: 'method',
267
+ startPosition: capture.node.startPosition,
268
+ endPosition: capture.node.endPosition,
269
+ isExported: false // methods are part of classes
270
+ })))
271
+
272
+ return functions
273
+ }
274
+
275
+ /**
276
+ * Extract class declarations from AST
277
+ */
278
+ extractClasses(rootNode, content) {
279
+ const classes = []
280
+
281
+ const classDeclarations = this.queryNode(rootNode, '(class_declaration name: (identifier) @name)')
282
+ classes.push(...classDeclarations.map(capture => ({
283
+ name: this.getNodeText(capture.node, content),
284
+ type: 'class',
285
+ startPosition: capture.node.startPosition,
286
+ endPosition: capture.node.endPosition,
287
+ isExported: this.isNodeExported(capture.node.parent),
288
+ methods: this.extractClassMethods(capture.node.parent, content)
289
+ })))
290
+
291
+ return classes
292
+ }
293
+
294
+ /**
295
+ * Extract class methods
296
+ */
297
+ extractClassMethods(classNode, content) {
298
+ const methods = []
299
+
300
+ try {
301
+ const methodNodes = this.queryNode(classNode, '(method_definition)')
302
+ methods.push(...methodNodes.map(capture => ({
303
+ name: this.getNodeText(capture.node, content),
304
+ type: 'method',
305
+ startPosition: capture.node.startPosition,
306
+ endPosition: capture.node.endPosition
307
+ })))
308
+ } catch (error) {
309
+ // Handle case where method extraction fails
310
+ }
311
+
312
+ return methods
313
+ }
314
+
315
+ /**
316
+ * Extract import statements from AST
317
+ */
318
+ extractImports(rootNode, content) {
319
+ const imports = []
320
+
321
+ const importStatements = this.queryNode(rootNode, '(import_statement source: (string) @source)')
322
+ imports.push(...importStatements.map(capture => {
323
+ const source = this.getNodeText(capture.node, content).replace(/['"]/g, '')
324
+ return {
325
+ source,
326
+ statement: this.getNodeText(capture.node.parent, content),
327
+ isRelative: source.startsWith('.'),
328
+ isExternal: !source.startsWith('.') && !source.startsWith('/'),
329
+ importedNames: this.extractImportedNames(capture.node.parent, content)
330
+ }
331
+ }))
332
+
333
+ return imports
334
+ }
335
+
336
+ /**
337
+ * Extract export statements from AST
338
+ */
339
+ extractExports(rootNode, content) {
340
+ const exports = []
341
+
342
+ // Export declarations
343
+ const exportDeclarations = this.queryNode(rootNode, '(export_statement)')
344
+ exports.push(...exportDeclarations.map(capture => {
345
+ const exportNode = capture.node
346
+ const declaration = exportNode.namedChild(0)
347
+
348
+ if (declaration) {
349
+ return {
350
+ type: declaration.type === 'export_clause' ? 'named' : 'declaration',
351
+ name: this.extractExportName(declaration, content),
352
+ statement: this.getNodeText(exportNode, content),
353
+ isDefault: this.getNodeText(exportNode, content).includes('default')
354
+ }
355
+ }
356
+ return null
357
+ }).filter(Boolean))
358
+
359
+ return exports
360
+ }
361
+
362
+ /**
363
+ * Extract variable declarations from AST
364
+ */
365
+ extractVariables(rootNode, content) {
366
+ const variables = []
367
+
368
+ const variableDeclarations = this.queryNode(rootNode, '(variable_declarator name: (identifier) @name)')
369
+ variables.push(...variableDeclarations.map(capture => ({
370
+ name: this.getNodeText(capture.node, content),
371
+ type: 'variable',
372
+ startPosition: capture.node.startPosition,
373
+ endPosition: capture.node.endPosition,
374
+ isExported: this.isNodeExported(capture.node.parent.parent),
375
+ declarationType: capture.node.parent.parent.type // const, let, var
376
+ })))
377
+
378
+ return variables
379
+ }
380
+
381
+ /**
382
+ * Extract JSX elements from AST (for React components)
383
+ */
384
+ extractJsxElements(rootNode, content) {
385
+ const jsxElements = []
386
+
387
+ try {
388
+ const jsxNodes = this.queryNode(rootNode, '(jsx_element)')
389
+ jsxElements.push(...jsxNodes.map(capture => ({
390
+ elementName: this.extractJsxElementName(capture.node, content),
391
+ startPosition: capture.node.startPosition,
392
+ endPosition: capture.node.endPosition
393
+ })))
394
+ } catch (error) {
395
+ // JSX might not be available in JavaScript parser
396
+ }
397
+
398
+ return jsxElements
399
+ }
400
+
401
+ /**
402
+ * Extract TypeScript type definitions from AST
403
+ */
404
+ extractTypeDefinitions(rootNode, content) {
405
+ const types = []
406
+
407
+ try {
408
+ // Interface declarations
409
+ const interfaces = this.queryNode(rootNode, '(interface_declaration name: (type_identifier) @name)')
410
+ types.push(...interfaces.map(capture => ({
411
+ name: this.getNodeText(capture.node, content),
412
+ type: 'interface',
413
+ startPosition: capture.node.startPosition,
414
+ endPosition: capture.node.endPosition,
415
+ isExported: this.isNodeExported(capture.node.parent)
416
+ })))
417
+
418
+ // Type alias declarations
419
+ const typeAliases = this.queryNode(rootNode, '(type_alias_declaration name: (type_identifier) @name)')
420
+ types.push(...typeAliases.map(capture => ({
421
+ name: this.getNodeText(capture.node, content),
422
+ type: 'type_alias',
423
+ startPosition: capture.node.startPosition,
424
+ endPosition: capture.node.endPosition,
425
+ isExported: this.isNodeExported(capture.node.parent)
426
+ })))
427
+ } catch (error) {
428
+ // TypeScript types might not be available in JavaScript parser
429
+ }
430
+
431
+ return types
432
+ }
433
+
434
+ /**
435
+ * Classify file semantics based on AST patterns
436
+ */
437
+ classifyFileSemantics(rootNode, content, filePath) {
438
+ const classifications = []
439
+
440
+ // Test each semantic pattern
441
+ for (const [patternName, patternFn] of Object.entries(this.semanticPatterns)) {
442
+ if (patternFn(rootNode, content, filePath)) {
443
+ classifications.push(patternName)
444
+ }
445
+ }
446
+
447
+ // Return primary classification (most specific first)
448
+ const priority = ['reactComponent', 'reactHook', 'expressRoute', 'expressMiddleware',
449
+ 'cliCommand', 'apiHandler', 'typeDefinition', 'configModule', 'utilityFunction']
450
+
451
+ for (const pattern of priority) {
452
+ if (classifications.includes(pattern)) {
453
+ return pattern
454
+ }
455
+ }
456
+
457
+ return 'module'
458
+ }
459
+
460
+ /**
461
+ * Semantic pattern: React Component
462
+ */
463
+ isReactComponent(rootNode, content, filePath) {
464
+ // Check for JSX elements
465
+ const hasJsx = this.queryNode(rootNode, '(jsx_element)').length > 0
466
+
467
+ // Check for React imports
468
+ const hasReactImport = content.includes("import React") || content.includes("from 'react'")
469
+
470
+ // Check for component naming pattern
471
+ const fileName = basename(filePath, extname(filePath))
472
+ const hasComponentName = fileName[0] === fileName[0].toUpperCase()
473
+
474
+ // Check for function that returns JSX
475
+ const functions = this.extractFunctions(rootNode, content)
476
+ const hasComponentFunction = functions.some(fn =>
477
+ fn.isExported && fn.name[0] === fn.name[0].toUpperCase()
478
+ )
479
+
480
+ return (hasJsx && (hasReactImport || hasComponentName)) ||
481
+ (hasComponentFunction && hasReactImport)
482
+ }
483
+
484
+ /**
485
+ * Semantic pattern: React Hook
486
+ */
487
+ isReactHook(rootNode, content, filePath) {
488
+ const fileName = basename(filePath, extname(filePath))
489
+ const hasHookName = fileName.startsWith('use') && fileName[3] === fileName[3].toUpperCase()
490
+
491
+ const functions = this.extractFunctions(rootNode, content)
492
+ const hasHookFunction = functions.some(fn =>
493
+ fn.name.startsWith('use') && fn.name[3] === fn.name[3].toUpperCase() && fn.isExported
494
+ )
495
+
496
+ const hasReactHookImports = content.includes("from 'react'") &&
497
+ (content.includes('useState') || content.includes('useEffect'))
498
+
499
+ return hasHookName || (hasHookFunction && hasReactHookImports)
500
+ }
501
+
502
+ /**
503
+ * Semantic pattern: Express Route
504
+ */
505
+ isExpressRoute(rootNode, content, filePath) {
506
+ const hasExpressImport = content.includes("from 'express'") || content.includes("require('express')")
507
+ const hasRouterMethods = /\.(get|post|put|delete|patch)\s*\(/.test(content)
508
+ const hasRoutePattern = /['"`]\/[^'"`]*['"`]/.test(content)
509
+
510
+ return hasExpressImport && hasRouterMethods && hasRoutePattern
511
+ }
512
+
513
+ /**
514
+ * Semantic pattern: Express Middleware
515
+ */
516
+ isExpressMiddleware(rootNode, content, filePath) {
517
+ const hasMiddlewarePattern = /\(req,\s*res,\s*next\)|function\s*\([^)]*req[^)]*res[^)]*next/.test(content)
518
+ const hasExpressImport = content.includes("from 'express'") || content.includes("require('express')")
519
+ const fileName = basename(filePath).toLowerCase()
520
+
521
+ return (hasMiddlewarePattern && hasExpressImport) || fileName.includes('middleware')
522
+ }
523
+
524
+ /**
525
+ * Semantic pattern: CLI Command
526
+ */
527
+ isCliCommand(rootNode, content, filePath) {
528
+ const hasCommanderImport = content.includes('commander') || content.includes('yargs')
529
+ const hasProcessArgv = content.includes('process.argv')
530
+ const hasCliPatterns = content.includes('.command(') || content.includes('.option(')
531
+ const fileName = basename(filePath).toLowerCase()
532
+
533
+ return hasCommanderImport || (hasProcessArgv && hasCliPatterns) || fileName.includes('cli')
534
+ }
535
+
536
+ /**
537
+ * Semantic pattern: Utility Function
538
+ */
539
+ isUtilityFunction(rootNode, content, filePath) {
540
+ const functions = this.extractFunctions(rootNode, content)
541
+ const hasMultipleExportedFunctions = functions.filter(fn => fn.isExported).length > 1
542
+
543
+ const fileName = basename(filePath).toLowerCase()
544
+ const hasUtilityName = fileName.includes('util') || fileName.includes('helper') || fileName.includes('lib')
545
+
546
+ const hasNoDomSpecificImports = !content.includes('react') && !content.includes('express')
547
+
548
+ return hasUtilityName || (hasMultipleExportedFunctions && hasNoDomSpecificImports)
549
+ }
550
+
551
+ /**
552
+ * Semantic pattern: API Handler
553
+ */
554
+ isApiHandler(rootNode, content, filePath) {
555
+ const hasApiPattern = /api|handler|controller/i.test(filePath)
556
+ const hasFetchPattern = content.includes('fetch(') || content.includes('axios')
557
+ const hasHttpMethods = /\b(GET|POST|PUT|DELETE|PATCH)\b/.test(content)
558
+
559
+ return hasApiPattern || (hasFetchPattern && hasHttpMethods)
560
+ }
561
+
562
+ /**
563
+ * Semantic pattern: Type Definition
564
+ */
565
+ isTypeDefinition(rootNode, content, filePath) {
566
+ const types = this.extractTypeDefinitions(rootNode, content)
567
+ const hasTypeDefinitions = types.length > 0
568
+
569
+ const fileName = basename(filePath).toLowerCase()
570
+ const hasTypeFileName = fileName.includes('type') || fileName.includes('.d.ts')
571
+
572
+ const hasOnlyTypes = hasTypeDefinitions &&
573
+ this.extractFunctions(rootNode, content).length === 0 &&
574
+ this.extractClasses(rootNode, content).length === 0
575
+
576
+ return hasTypeFileName || hasOnlyTypes
577
+ }
578
+
579
+ /**
580
+ * Semantic pattern: Config Module
581
+ */
582
+ isConfigModule(rootNode, content, filePath) {
583
+ const fileName = basename(filePath).toLowerCase()
584
+ const hasConfigName = fileName.includes('config') || fileName.includes('setting')
585
+
586
+ const hasConfigPatterns = content.includes('module.exports') || content.includes('export default')
587
+ const hasConfigObject = /\{[\s\S]*\}/.test(content) && !/function|class/.test(content)
588
+
589
+ return hasConfigName && (hasConfigPatterns || hasConfigObject)
590
+ }
591
+
592
+ /**
593
+ * Extract business domain terms from code
594
+ */
595
+ extractBusinessDomain(rootNode, content, filePath) {
596
+ const domains = []
597
+
598
+ // Focus on meaningful path segments instead of generic business terms
599
+ const pathSegments = filePath.split('/').filter(s => s && s !== 'src' && s !== 'lib' && s !== 'components')
600
+ const fileName = basename(filePath, extname(filePath))
601
+
602
+ // Extract domain from directory structure (more reliable than keywords)
603
+ if (pathSegments.length > 0) {
604
+ const relevantSegments = pathSegments.slice(-2) // Last 2 directories
605
+ domains.push(...relevantSegments.map(s => s.toLowerCase()))
606
+ }
607
+
608
+ // Add meaningful file-based domains
609
+ if (fileName.toLowerCase().includes('config')) domains.push('configuration')
610
+ if (fileName.toLowerCase().includes('test')) domains.push('testing')
611
+ if (fileName.toLowerCase().includes('util')) domains.push('utilities')
612
+ if (fileName.toLowerCase().includes('api')) domains.push('api')
613
+ if (fileName.toLowerCase().includes('ui') || fileName.toLowerCase().includes('component')) {
614
+ domains.push('user-interface')
615
+ }
616
+
617
+ // Only return meaningful, non-generic domains
618
+ return [...new Set(domains)].filter(domain =>
619
+ domain.length > 2 && !['web', 'src', 'ts', 'js', 'tsx', 'jsx'].includes(domain)
620
+ )
621
+ }
622
+
623
+ /**
624
+ * Identify technical patterns in the code
625
+ */
626
+ identifyTechnicalPatterns(rootNode, content) {
627
+ const patterns = []
628
+
629
+ // Framework patterns
630
+ if (content.includes('react')) patterns.push('react')
631
+ if (content.includes('express')) patterns.push('express')
632
+ if (content.includes('typescript')) patterns.push('typescript')
633
+
634
+ // Architecture patterns
635
+ if (content.includes('async') && content.includes('await')) patterns.push('async-await')
636
+ if (content.includes('Promise')) patterns.push('promises')
637
+ if (content.includes('class') && content.includes('extends')) patterns.push('inheritance')
638
+
639
+ // Design patterns
640
+ const functions = this.extractFunctions(rootNode, content)
641
+ if (functions.some(f => f.name.includes('Factory'))) patterns.push('factory-pattern')
642
+ if (functions.some(f => f.name.includes('Observer'))) patterns.push('observer-pattern')
643
+
644
+ return patterns
645
+ }
646
+
647
+ /**
648
+ * Build relationship graph between files
649
+ */
650
+ buildRelationshipGraph(analysis) {
651
+ const graph = {}
652
+
653
+ for (const [filePath, fileAnalysis] of Object.entries(analysis)) {
654
+ if (fileAnalysis.error) continue
655
+
656
+ graph[filePath] = {
657
+ imports: [],
658
+ importedBy: [],
659
+ semanticSimilarity: {},
660
+ businessDomainOverlap: {},
661
+ technicalPatternOverlap: {}
662
+ }
663
+ }
664
+
665
+ // Build import relationships
666
+ for (const [filePath, fileAnalysis] of Object.entries(analysis)) {
667
+ if (fileAnalysis.error) continue
668
+
669
+ for (const imp of fileAnalysis.ast.imports) {
670
+ if (imp.isRelative) {
671
+ // Resolve relative import to actual file path
672
+ const importPath = this.resolveImportPath(filePath, imp.source)
673
+ if (graph[importPath]) {
674
+ graph[filePath].imports.push(importPath)
675
+ graph[importPath].importedBy.push(filePath)
676
+ }
677
+ }
678
+ }
679
+ }
680
+
681
+ // Calculate semantic similarities
682
+ for (const [fileA, analysisA] of Object.entries(analysis)) {
683
+ if (analysisA.error) continue
684
+
685
+ for (const [fileB, analysisB] of Object.entries(analysis)) {
686
+ if (analysisB.error || fileA === fileB) continue
687
+
688
+ // Semantic type similarity
689
+ const semanticSimilarity = analysisA.semanticType === analysisB.semanticType ? 1.0 : 0.0
690
+
691
+ // Business domain overlap
692
+ const domainOverlap = this.calculateOverlap(analysisA.businessDomain, analysisB.businessDomain)
693
+
694
+ // Technical pattern overlap
695
+ const patternOverlap = this.calculateOverlap(analysisA.technicalPatterns, analysisB.technicalPatterns)
696
+
697
+ if (semanticSimilarity > 0 || domainOverlap > 0 || patternOverlap > 0) {
698
+ graph[fileA].semanticSimilarity[fileB] = semanticSimilarity
699
+ graph[fileA].businessDomainOverlap[fileB] = domainOverlap
700
+ graph[fileA].technicalPatternOverlap[fileB] = patternOverlap
701
+ }
702
+ }
703
+ }
704
+
705
+ return graph
706
+ }
707
+
708
+ /**
709
+ * Create smart chunks using clustering algorithms
710
+ */
711
+ async createSmartChunks(analysis, relationshipGraph) {
712
+
713
+ // Start with individual files as nodes
714
+ const nodes = Object.keys(analysis).filter(path => !analysis[path].error)
715
+ console.log(`🧩 Starting with ${nodes.length} nodes for clustering`)
716
+
717
+ // Apply different clustering strategies
718
+ const strategies = [
719
+ this.clusterBySemanticType.bind(this),
720
+ this.clusterByBusinessDomain.bind(this),
721
+ this.clusterByDependencyGraph.bind(this),
722
+ this.clusterByDirectoryStructure.bind(this)
723
+ ]
724
+
725
+ let clusters = nodes.map(node => [node]) // Start with individual nodes
726
+
727
+ // Apply clustering strategies
728
+ for (const strategy of strategies) {
729
+ clusters = strategy(clusters, analysis, relationshipGraph)
730
+ console.log(`📦 After ${strategy.name}: ${clusters.length} clusters`)
731
+ }
732
+
733
+ // Convert clusters to named chunks
734
+ const chunks = []
735
+ const usedNames = new Set()
736
+
737
+ for (const cluster of clusters) {
738
+ if (cluster.length === 0) continue
739
+
740
+ let chunkName = await this.generateChunkName(cluster, analysis)
741
+
742
+ // Ensure unique names
743
+ let uniqueName = chunkName
744
+ let counter = 1
745
+ while (usedNames.has(uniqueName)) {
746
+ uniqueName = `${chunkName}-${counter}`
747
+ counter++
748
+ }
749
+ usedNames.add(uniqueName)
750
+
751
+ const chunk = {
752
+ name: uniqueName,
753
+ type: this.determineChunkType(cluster, analysis),
754
+ files: cluster,
755
+ size: cluster.reduce((sum, file) => sum + analysis[file].size, 0),
756
+ complexity: this.calculateClusterComplexity(cluster, analysis),
757
+ dependencies: this.calculateClusterDependencies(cluster, analysis),
758
+ businessDomains: this.extractClusterBusinessDomains(cluster, analysis),
759
+ technicalPatterns: this.extractClusterTechnicalPatterns(cluster, analysis),
760
+ purpose: this.determineClusterPurpose(cluster, analysis),
761
+ cohesion: this.calculateClusterCohesion(cluster, relationshipGraph),
762
+ recommendations: this.generateClusterRecommendations(cluster, analysis),
763
+ tags: this.generateTags(cluster, analysis)
764
+ }
765
+
766
+ chunks.push(chunk)
767
+ }
768
+
769
+ return chunks.sort((a, b) => b.cohesion - a.cohesion) // Sort by cohesion (best chunks first)
770
+ }
771
+
772
+ /**
773
+ * Cluster files by semantic type
774
+ */
775
+ clusterBySemanticType(clusters, analysis, relationshipGraph) {
776
+ const semanticGroups = {}
777
+
778
+ for (const cluster of clusters) {
779
+ for (const file of cluster) {
780
+ const semanticType = analysis[file].semanticType
781
+ if (!semanticGroups[semanticType]) {
782
+ semanticGroups[semanticType] = []
783
+ }
784
+ semanticGroups[semanticType].push(file)
785
+ }
786
+ }
787
+
788
+ return Object.values(semanticGroups).filter(group => group.length > 0)
789
+ }
790
+
791
+ /**
792
+ * Cluster files by business domain
793
+ */
794
+ clusterByBusinessDomain(clusters, analysis, relationshipGraph) {
795
+ const domainGroups = {}
796
+
797
+ for (const cluster of clusters) {
798
+ for (const file of cluster) {
799
+ const domains = analysis[file].businessDomain
800
+
801
+ if (domains.length === 0) {
802
+ // Files with no clear domain go to 'general' group
803
+ if (!domainGroups.general) domainGroups.general = []
804
+ domainGroups.general.push(file)
805
+ } else {
806
+ // Files go to their primary domain group
807
+ const primaryDomain = domains[0]
808
+ if (!domainGroups[primaryDomain]) domainGroups[primaryDomain] = []
809
+ domainGroups[primaryDomain].push(file)
810
+ }
811
+ }
812
+ }
813
+
814
+ return Object.values(domainGroups).filter(group => group.length > 0)
815
+ }
816
+
817
+ /**
818
+ * Cluster files by dependency relationships
819
+ */
820
+ clusterByDependencyGraph(clusters, analysis, relationshipGraph) {
821
+ const dependencyGroups = []
822
+ const visited = new Set()
823
+
824
+ for (const cluster of clusters) {
825
+ for (const file of cluster) {
826
+ if (visited.has(file)) continue
827
+
828
+ // Find all files connected to this file through imports
829
+ const connected = this.findConnectedFiles(file, relationshipGraph, new Set())
830
+
831
+ // Filter to only files in current clusters
832
+ const relevantConnected = connected.filter(f =>
833
+ clusters.some(cluster => cluster.includes(f))
834
+ )
835
+
836
+ if (relevantConnected.length > 1) {
837
+ dependencyGroups.push(relevantConnected)
838
+ relevantConnected.forEach(f => visited.add(f))
839
+ } else {
840
+ // Isolated file becomes its own group
841
+ dependencyGroups.push([file])
842
+ visited.add(file)
843
+ }
844
+ }
845
+ }
846
+
847
+ return dependencyGroups.filter(group => group.length > 0)
848
+ }
849
+
850
+ /**
851
+ * Cluster files by directory structure
852
+ */
853
+ clusterByDirectoryStructure(clusters, analysis, relationshipGraph) {
854
+ const directoryGroups = {}
855
+
856
+ for (const cluster of clusters) {
857
+ for (const file of cluster) {
858
+ const dir = dirname(file)
859
+ if (!directoryGroups[dir]) {
860
+ directoryGroups[dir] = []
861
+ }
862
+ directoryGroups[dir].push(file)
863
+ }
864
+ }
865
+
866
+ return Object.values(directoryGroups).filter(group => group.length > 0)
867
+ }
868
+
869
+ /**
870
+ * Generate intelligent chunk name
871
+ */
872
+ async generateChunkName(files, analysis) {
873
+ const namingStrategies = {
874
+ domainBased: this.generateDomainBasedName.bind(this),
875
+ patternBased: this.generatePatternBasedName.bind(this),
876
+ functionalityBased: this.generateFunctionalityBasedName.bind(this)
877
+ }
878
+
879
+ const names = {}
880
+
881
+ for (const [strategy, generator] of Object.entries(namingStrategies)) {
882
+ try {
883
+ names[strategy] = generator(files, analysis)
884
+ } catch (error) {
885
+ names[strategy] = 'unnamed-chunk'
886
+ }
887
+ }
888
+
889
+ // Choose best name based on strategy preference - prefer pattern-based for better names
890
+ const strategy = 'patternBased' // Force pattern-based naming
891
+ return names[strategy] || names.patternBased || names.functionalityBased || names.domainBased || 'unknown-chunk'
892
+ }
893
+
894
+ /**
895
+ * Generate domain-based chunk name
896
+ */
897
+ generateDomainBasedName(files, analysis) {
898
+ // Always fallback to pattern-based naming since domain extraction is unreliable
899
+ return this.generatePatternBasedName(files, analysis)
900
+ }
901
+
902
+ /**
903
+ * Generate pattern-based chunk name
904
+ */
905
+ generatePatternBasedName(files, analysis) {
906
+ const semanticTypes = files.map(file => analysis[file].semanticType)
907
+ const mostCommon = this.getMostCommon(semanticTypes)
908
+
909
+ // Look at actual file names and directories for context
910
+ const commonPath = this.findCommonPath(files)
911
+ const dirName = commonPath ? basename(dirname(commonPath)) : null
912
+
913
+ const typeNames = {
914
+ reactComponent: 'ui-components',
915
+ reactHook: 'react-hooks',
916
+ expressRoute: 'server-routes',
917
+ expressMiddleware: 'server-middleware',
918
+ utilityFunction: 'utility-functions',
919
+ typeDefinition: 'type-definitions',
920
+ configModule: 'configuration',
921
+ cliCommand: 'cli-tools',
922
+ apiHandler: 'api-endpoints',
923
+ module: 'shared-modules'
924
+ }
925
+
926
+ let baseName = typeNames[mostCommon] || 'mixed-files'
927
+
928
+ // Add more specific context based on file paths
929
+ if (commonPath) {
930
+ if (commonPath.includes('/components/ui/')) {
931
+ baseName = 'ui-library-components'
932
+ } else if (commonPath.includes('/components/')) {
933
+ baseName = 'application-components'
934
+ } else if (commonPath.includes('/hooks/')) {
935
+ baseName = 'custom-hooks'
936
+ } else if (commonPath.includes('/lib/')) {
937
+ baseName = 'core-utilities'
938
+ } else if (commonPath.includes('/utils/')) {
939
+ baseName = 'helper-utilities'
940
+ } else if (dirName && dirName !== 'src' && dirName !== 'components' && dirName !== 'lib') {
941
+ baseName = `${dirName}-${baseName}`
942
+ }
943
+ }
944
+
945
+ return baseName
946
+ }
947
+
948
+ /**
949
+ * Generate functionality-based chunk name
950
+ */
951
+ generateFunctionalityBasedName(files, analysis) {
952
+ // Extract function names and find common themes
953
+ const allFunctions = files.flatMap(file =>
954
+ analysis[file].ast.functions.map(fn => fn.name.toLowerCase())
955
+ )
956
+
957
+ const commonWords = this.extractCommonWords(allFunctions)
958
+
959
+ if (commonWords.length > 0) {
960
+ return commonWords.slice(0, 2).join('-') + '-logic'
961
+ }
962
+
963
+ // Fallback to directory-based naming
964
+ const dirs = files.map(file => basename(dirname(file)))
965
+ const commonDir = this.getMostCommon(dirs)
966
+
967
+ return commonDir + '-module'
968
+ }
969
+
970
+ /**
971
+ * Helper methods for AST analysis
972
+ */
973
+
974
+ queryNode(node, query) {
975
+ // Simplified query implementation
976
+ // In a full implementation, you'd use tree-sitter's query language
977
+ const results = []
978
+
979
+ const traverse = (currentNode) => {
980
+ // Match based on node type for now
981
+ if (query.includes(currentNode.type)) {
982
+ results.push({ node: currentNode })
983
+ }
984
+
985
+ for (let i = 0; i < currentNode.namedChildCount; i++) {
986
+ traverse(currentNode.namedChild(i))
987
+ }
988
+ }
989
+
990
+ traverse(node)
991
+ return results
992
+ }
993
+
994
+ getNodeText(node, content) {
995
+ return content.slice(node.startIndex, node.endIndex)
996
+ }
997
+
998
+ isNodeExported(node) {
999
+ // Check if node is part of an export statement
1000
+ let parent = node.parent
1001
+ while (parent) {
1002
+ if (parent.type === 'export_statement') {
1003
+ return true
1004
+ }
1005
+ parent = parent.parent
1006
+ }
1007
+ return false
1008
+ }
1009
+
1010
+ calculateOverlap(arrayA, arrayB) {
1011
+ const setA = new Set(arrayA)
1012
+ const setB = new Set(arrayB)
1013
+ const intersection = new Set([...setA].filter(x => setB.has(x)))
1014
+ const union = new Set([...setA, ...setB])
1015
+
1016
+ return union.size === 0 ? 0 : intersection.size / union.size
1017
+ }
1018
+
1019
+ getMostCommon(arr) {
1020
+ const counts = {}
1021
+ for (const item of arr) {
1022
+ counts[item] = (counts[item] || 0) + 1
1023
+ }
1024
+
1025
+ return Object.entries(counts)
1026
+ .sort(([,a], [,b]) => b - a)[0]?.[0] || 'unknown'
1027
+ }
1028
+
1029
+ generateSemanticTags(analysis) {
1030
+ const tags = []
1031
+
1032
+ tags.push(analysis.semanticType)
1033
+ tags.push(...analysis.businessDomain)
1034
+ tags.push(...analysis.technicalPatterns)
1035
+
1036
+ if (analysis.complexity.level) {
1037
+ tags.push(`complexity-${analysis.complexity.level}`)
1038
+ }
1039
+
1040
+ if (analysis.lines < 50) tags.push('small')
1041
+ else if (analysis.lines < 200) tags.push('medium')
1042
+ else tags.push('large')
1043
+
1044
+ return [...new Set(tags)]
1045
+ }
1046
+
1047
+ calculateAstComplexity(rootNode) {
1048
+ let complexity = 1
1049
+
1050
+ const complexityNodes = ['if_statement', 'while_statement', 'for_statement',
1051
+ 'switch_statement', 'try_statement', 'catch_clause']
1052
+
1053
+ const traverse = (node) => {
1054
+ if (complexityNodes.includes(node.type)) {
1055
+ complexity++
1056
+ }
1057
+
1058
+ for (let i = 0; i < node.namedChildCount; i++) {
1059
+ traverse(node.namedChild(i))
1060
+ }
1061
+ }
1062
+
1063
+ traverse(rootNode)
1064
+
1065
+ return {
1066
+ score: complexity,
1067
+ level: complexity < 5 ? 'low' : complexity < 15 ? 'medium' : 'high'
1068
+ }
1069
+ }
1070
+
1071
+ /**
1072
+ * Analyze dependencies from AST
1073
+ */
1074
+ analyzeDependencies(rootNode, content) {
1075
+ const dependencies = {
1076
+ internal: [],
1077
+ external: [],
1078
+ relative: []
1079
+ }
1080
+
1081
+ const imports = this.extractImports(rootNode, content)
1082
+
1083
+ for (const imp of imports) {
1084
+ if (imp.isRelative) {
1085
+ dependencies.relative.push(imp.source)
1086
+ } else if (imp.isExternal) {
1087
+ dependencies.external.push(imp.source)
1088
+ } else {
1089
+ dependencies.internal.push(imp.source)
1090
+ }
1091
+ }
1092
+
1093
+ return dependencies
1094
+ }
1095
+
1096
+ /**
1097
+ * Resolve relative import path to absolute path
1098
+ */
1099
+ resolveImportPath(fromFile, importPath) {
1100
+ // Simplified path resolution
1101
+ const dir = dirname(fromFile)
1102
+ return join(dir, importPath)
1103
+ }
1104
+
1105
+ /**
1106
+ * Find all files connected through imports
1107
+ */
1108
+ findConnectedFiles(startFile, relationshipGraph, visited = new Set()) {
1109
+ if (visited.has(startFile)) return []
1110
+
1111
+ visited.add(startFile)
1112
+ const connected = [startFile]
1113
+
1114
+ if (relationshipGraph[startFile]) {
1115
+ // Follow imports
1116
+ for (const importedFile of relationshipGraph[startFile].imports) {
1117
+ connected.push(...this.findConnectedFiles(importedFile, relationshipGraph, visited))
1118
+ }
1119
+
1120
+ // Follow files that import this one
1121
+ for (const importingFile of relationshipGraph[startFile].importedBy) {
1122
+ connected.push(...this.findConnectedFiles(importingFile, relationshipGraph, visited))
1123
+ }
1124
+ }
1125
+
1126
+ return [...new Set(connected)]
1127
+ }
1128
+
1129
+ /**
1130
+ * Extract imported names from import statement
1131
+ */
1132
+ extractImportedNames(importNode, content) {
1133
+ const names = []
1134
+ // Simplified implementation - would need more complex parsing
1135
+ const importText = this.getNodeText(importNode, content)
1136
+ const match = importText.match(/import\s+(?:\{([^}]+)\}|(\w+))/i)
1137
+ if (match) {
1138
+ if (match[1]) {
1139
+ // Named imports
1140
+ names.push(...match[1].split(',').map(n => n.trim()))
1141
+ } else if (match[2]) {
1142
+ // Default import
1143
+ names.push(match[2])
1144
+ }
1145
+ }
1146
+ return names
1147
+ }
1148
+
1149
+ /**
1150
+ * Extract export name from export declaration
1151
+ */
1152
+ extractExportName(declaration, content) {
1153
+ const text = this.getNodeText(declaration, content)
1154
+ const match = text.match(/(?:function|class|const|let|var)\s+(\w+)/)
1155
+ return match ? match[1] : 'unnamed'
1156
+ }
1157
+
1158
+ /**
1159
+ * Extract JSX element name
1160
+ */
1161
+ extractJsxElementName(jsxNode, content) {
1162
+ try {
1163
+ const openingElement = jsxNode.namedChild(0)
1164
+ if (openingElement) {
1165
+ const nameNode = openingElement.namedChild(0)
1166
+ return nameNode ? this.getNodeText(nameNode, content) : 'unknown'
1167
+ }
1168
+ } catch (error) {
1169
+ return 'unknown'
1170
+ }
1171
+ return 'unknown'
1172
+ }
1173
+
1174
+ /**
1175
+ * Determine chunk type based on files
1176
+ */
1177
+ determineChunkType(files, analysis) {
1178
+ const semanticTypes = files.map(file => analysis[file].semanticType)
1179
+ const mostCommon = this.getMostCommon(semanticTypes)
1180
+
1181
+ const typeMapping = {
1182
+ reactComponent: 'ui-components',
1183
+ reactHook: 'custom-hooks',
1184
+ expressRoute: 'api-routes',
1185
+ expressMiddleware: 'middleware',
1186
+ utilityFunction: 'utilities',
1187
+ typeDefinition: 'type-definitions',
1188
+ configModule: 'configuration',
1189
+ cliCommand: 'cli-commands',
1190
+ apiHandler: 'api-handlers'
1191
+ }
1192
+
1193
+ return typeMapping[mostCommon] || 'mixed-module'
1194
+ }
1195
+
1196
+ /**
1197
+ * Calculate cluster complexity
1198
+ */
1199
+ calculateClusterComplexity(files, analysis) {
1200
+ const complexities = files.map(file => analysis[file].complexity.score)
1201
+ const total = complexities.reduce((sum, c) => sum + c, 0)
1202
+ const average = total / files.length
1203
+
1204
+ return {
1205
+ total,
1206
+ average,
1207
+ level: average < 5 ? 'low' : average < 15 ? 'medium' : 'high'
1208
+ }
1209
+ }
1210
+
1211
+ /**
1212
+ * Calculate cluster dependencies
1213
+ */
1214
+ calculateClusterDependencies(files, analysis) {
1215
+ const allDeps = {
1216
+ internal: new Set(),
1217
+ external: new Set(),
1218
+ relative: new Set()
1219
+ }
1220
+
1221
+ for (const file of files) {
1222
+ const deps = analysis[file].dependencies
1223
+ deps.internal.forEach(dep => allDeps.internal.add(dep))
1224
+ deps.external.forEach(dep => allDeps.external.add(dep))
1225
+ deps.relative.forEach(dep => allDeps.relative.add(dep))
1226
+ }
1227
+
1228
+ return {
1229
+ internal: Array.from(allDeps.internal),
1230
+ external: Array.from(allDeps.external),
1231
+ relative: Array.from(allDeps.relative),
1232
+ totalCount: allDeps.internal.size + allDeps.external.size + allDeps.relative.size
1233
+ }
1234
+ }
1235
+
1236
+ /**
1237
+ * Extract cluster business domains
1238
+ */
1239
+ extractClusterBusinessDomains(files, analysis) {
1240
+ const allDomains = files.flatMap(file => analysis[file].businessDomain)
1241
+ return [...new Set(allDomains)]
1242
+ }
1243
+
1244
+ /**
1245
+ * Extract cluster technical patterns
1246
+ */
1247
+ extractClusterTechnicalPatterns(files, analysis) {
1248
+ const allPatterns = files.flatMap(file => analysis[file].technicalPatterns)
1249
+ return [...new Set(allPatterns)]
1250
+ }
1251
+
1252
+ /**
1253
+ * Determine cluster purpose
1254
+ */
1255
+ determineClusterPurpose(files, analysis) {
1256
+ const semanticTypes = files.map(file => analysis[file].semanticType)
1257
+ const mostCommon = this.getMostCommon(semanticTypes)
1258
+
1259
+ const purposeMapping = {
1260
+ reactComponent: 'User interface components and React elements',
1261
+ reactHook: 'Custom React hooks for state and logic sharing',
1262
+ expressRoute: 'API routes and endpoint handlers',
1263
+ expressMiddleware: 'Express middleware and request processing',
1264
+ utilityFunction: 'Utility functions and helper libraries',
1265
+ typeDefinition: 'TypeScript type definitions and interfaces',
1266
+ configModule: 'Configuration files and settings',
1267
+ cliCommand: 'Command-line interface and CLI tools',
1268
+ apiHandler: 'API client and data fetching logic'
1269
+ }
1270
+
1271
+ return purposeMapping[mostCommon] || 'Mixed functionality module'
1272
+ }
1273
+
1274
+ /**
1275
+ * Calculate cluster cohesion
1276
+ */
1277
+ calculateClusterCohesion(files, relationshipGraph) {
1278
+ if (files.length <= 1) return 1.0
1279
+
1280
+ let connections = 0
1281
+ let totalPossible = files.length * (files.length - 1)
1282
+
1283
+ for (const fileA of files) {
1284
+ for (const fileB of files) {
1285
+ if (fileA !== fileB && relationshipGraph[fileA]) {
1286
+ if (relationshipGraph[fileA].imports.includes(fileB) ||
1287
+ relationshipGraph[fileA].importedBy.includes(fileB) ||
1288
+ relationshipGraph[fileA].semanticSimilarity[fileB] > 0.5) {
1289
+ connections++
1290
+ }
1291
+ }
1292
+ }
1293
+ }
1294
+
1295
+ return totalPossible > 0 ? connections / totalPossible : 0
1296
+ }
1297
+
1298
+ /**
1299
+ * Generate cluster recommendations
1300
+ */
1301
+ generateClusterRecommendations(files, analysis) {
1302
+ const recommendations = []
1303
+
1304
+ const totalSize = files.reduce((sum, file) => sum + analysis[file].size, 0)
1305
+ const avgComplexity = files.reduce((sum, file) => sum + analysis[file].complexity.score, 0) / files.length
1306
+
1307
+ if (totalSize > 100000) {
1308
+ recommendations.push({
1309
+ type: 'warning',
1310
+ message: 'Large cluster - consider splitting by functionality'
1311
+ })
1312
+ }
1313
+
1314
+ if (avgComplexity > 20) {
1315
+ recommendations.push({
1316
+ type: 'warning',
1317
+ message: 'High complexity cluster - review for refactoring opportunities'
1318
+ })
1319
+ }
1320
+
1321
+ if (files.length === 1) {
1322
+ recommendations.push({
1323
+ type: 'info',
1324
+ message: 'Single file cluster - consider grouping with related files'
1325
+ })
1326
+ }
1327
+
1328
+ return recommendations
1329
+ }
1330
+
1331
+ /**
1332
+ * Extract common words from function names
1333
+ */
1334
+ extractCommonWords(functionNames) {
1335
+ const words = functionNames.flatMap(name =>
1336
+ name.split(/(?=[A-Z])|_|-/).filter(word => word.length > 2)
1337
+ )
1338
+
1339
+ const wordCounts = {}
1340
+ for (const word of words) {
1341
+ wordCounts[word] = (wordCounts[word] || 0) + 1
1342
+ }
1343
+
1344
+ return Object.entries(wordCounts)
1345
+ .filter(([, count]) => count > 1)
1346
+ .sort(([, a], [, b]) => b - a)
1347
+ .slice(0, 3)
1348
+ .map(([word]) => word)
1349
+ }
1350
+
1351
+ /**
1352
+ * Generate code signature for caching and similarity comparison
1353
+ */
1354
+ generateCodeSignature(rootNode, content) {
1355
+ const functions = this.extractFunctions(rootNode, content)
1356
+ const classes = this.extractClasses(rootNode, content)
1357
+ const imports = this.extractImports(rootNode, content)
1358
+ const exports = this.extractExports(rootNode, content)
1359
+
1360
+ return {
1361
+ functionCount: functions.length,
1362
+ classCount: classes.length,
1363
+ importCount: imports.length,
1364
+ exportCount: exports.length,
1365
+ exportedFunctions: functions.filter(f => f.isExported).map(f => f.name),
1366
+ importSources: imports.map(i => i.source),
1367
+ hasJsx: this.extractJsxElements(rootNode, content).length > 0,
1368
+ contentHash: this.simpleHash(content)
1369
+ }
1370
+ }
1371
+
1372
+ /**
1373
+ * Simple hash function for content comparison
1374
+ */
1375
+ simpleHash(str) {
1376
+ let hash = 0
1377
+ for (let i = 0; i < str.length; i++) {
1378
+ const char = str.charCodeAt(i)
1379
+ hash = ((hash << 5) - hash) + char
1380
+ hash = hash & hash // Convert to 32bit integer
1381
+ }
1382
+ return hash
1383
+ }
1384
+
1385
+ generateSummary(analysis, chunks) {
1386
+ const files = Object.values(analysis).filter(f => !f.error)
1387
+
1388
+ return {
1389
+ totalFiles: files.length,
1390
+ totalSize: files.reduce((sum, f) => sum + f.size, 0),
1391
+ totalLines: files.reduce((sum, f) => sum + f.lines, 0),
1392
+ semanticTypes: this.countByProperty(files, 'semanticType'),
1393
+ businessDomains: this.countDomains(files),
1394
+ technicalPatterns: this.countPatterns(files),
1395
+ totalChunks: chunks.length,
1396
+ averageChunkSize: chunks.reduce((sum, c) => sum + c.size, 0) / chunks.length,
1397
+ chunkTypes: this.countByProperty(chunks, 'type')
1398
+ }
1399
+ }
1400
+
1401
+ generateRecommendations(analysis, chunks) {
1402
+ const recommendations = []
1403
+
1404
+ // Add specific recommendations based on treesitter analysis
1405
+
1406
+ return recommendations
1407
+ }
1408
+
1409
+ countByProperty(items, property) {
1410
+ const counts = {}
1411
+ for (const item of items) {
1412
+ const value = typeof property === 'function' ? property(item) : item[property]
1413
+ counts[value] = (counts[value] || 0) + 1
1414
+ }
1415
+ return counts
1416
+ }
1417
+
1418
+ countDomains(files) {
1419
+ const allDomains = files.flatMap(f => f.businessDomain)
1420
+ return this.countByProperty(allDomains, d => d)
1421
+ }
1422
+
1423
+ countPatterns(files) {
1424
+ const allPatterns = files.flatMap(f => f.technicalPatterns)
1425
+ return this.countByProperty(allPatterns, p => p)
1426
+ }
1427
+
1428
+ /**
1429
+ * Generate tags for a chunk based on its characteristics
1430
+ */
1431
+ generateTags(files, analysis) {
1432
+ const tags = new Set()
1433
+
1434
+ // Add semantic type tags
1435
+ const semanticTypes = files.map(file => analysis[file].semanticType)
1436
+ for (const type of semanticTypes) {
1437
+ if (type === 'reactComponent') tags.add('react-component')
1438
+ if (type === 'reactHook') tags.add('react-hook')
1439
+ if (type === 'utilityFunction') tags.add('utility')
1440
+ if (type === 'expressRoute') tags.add('api')
1441
+ if (type === 'configModule') tags.add('config')
1442
+ }
1443
+
1444
+ // Add directory-based tags
1445
+ const commonPath = this.findCommonPath(files)
1446
+ if (commonPath) {
1447
+ if (commonPath.includes('/components/')) tags.add('component')
1448
+ if (commonPath.includes('/hooks/')) tags.add('hook')
1449
+ if (commonPath.includes('/lib/')) tags.add('library')
1450
+ if (commonPath.includes('/utils/')) tags.add('utility')
1451
+ if (commonPath.includes('/ui/')) tags.add('ui-library')
1452
+ }
1453
+
1454
+ // Add complexity tags
1455
+ const avgComplexity = files.reduce((sum, file) => sum + analysis[file].complexity.score, 0) / files.length
1456
+ if (avgComplexity > 15) tags.add('complex')
1457
+ if (avgComplexity < 5) tags.add('simple')
1458
+
1459
+ return Array.from(tags)
1460
+ }
1461
+
1462
+ /**
1463
+ * Find common path prefix for a group of files
1464
+ */
1465
+ findCommonPath(files) {
1466
+ if (files.length === 0) return null
1467
+ if (files.length === 1) return files[0]
1468
+
1469
+ const pathParts = files.map(file => file.split('/'))
1470
+ const commonParts = []
1471
+
1472
+ for (let i = 0; i < Math.min(...pathParts.map(p => p.length)); i++) {
1473
+ const part = pathParts[0][i]
1474
+ if (pathParts.every(p => p[i] === part)) {
1475
+ commonParts.push(part)
1476
+ } else {
1477
+ break
1478
+ }
1479
+ }
1480
+
1481
+ return commonParts.length > 0 ? commonParts.join('/') : null
1482
+ }
1483
+ }
1484
+
1485
+ export default TreesitterSemanticChunker