cntx-ui 2.0.12 → 2.0.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/function-level-chunker.js +406 -0
- package/lib/semantic-integration.js +441 -0
- package/lib/semantic-splitter.js +595 -0
- package/lib/treesitter-semantic-chunker.js +1485 -0
- package/package.json +5 -1
- package/server.js +285 -45
- package/web/dist/assets/index-Ci1Q-YrQ.js +611 -0
- package/web/dist/assets/index-IUp4q_fr.css +1 -0
- package/web/dist/index.html +2 -2
- package/web/dist/vite.svg +21 -1
- package/web/dist/assets/index-8Kli5657.js +0 -541
- package/web/dist/assets/index-C-Ldi33E.css +0 -1
|
@@ -0,0 +1,1485 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Treesitter-based Semantic Chunker for JavaScript/TypeScript Files
|
|
3
|
+
* Uses tree-sitter for true AST-based code analysis and semantic chunking
|
|
4
|
+
* Supports JS/TS/JSX/TSX with equal treatment
|
|
5
|
+
* Node ecosystem focus: React components, Express APIs, CLI tools, utilities
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import { readFileSync, existsSync } from 'fs'
|
|
9
|
+
import { extname, basename, dirname, relative, join } from 'path'
|
|
10
|
+
import glob from 'glob'
|
|
11
|
+
import { promisify } from 'util'
|
|
12
|
+
import Parser from 'tree-sitter'
|
|
13
|
+
import JavaScript from 'tree-sitter-javascript'
|
|
14
|
+
import TypeScript from 'tree-sitter-typescript'
|
|
15
|
+
|
|
16
|
+
const globAsync = promisify(glob)
|
|
17
|
+
|
|
18
|
+
class TreesitterSemanticChunker {
|
|
19
|
+
constructor(options = {}) {
|
|
20
|
+
this.options = {
|
|
21
|
+
includeImports: true,
|
|
22
|
+
includeExports: true,
|
|
23
|
+
detectComponentTypes: true,
|
|
24
|
+
groupRelatedFiles: true,
|
|
25
|
+
minChunkSize: 100,
|
|
26
|
+
maxChunkSize: 50000,
|
|
27
|
+
namingStrategy: 'domain-based', // domain-based, pattern-based, graph-based
|
|
28
|
+
...options
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
// Initialize parsers for different languages
|
|
32
|
+
this.parsers = {}
|
|
33
|
+
this.initializeParsers()
|
|
34
|
+
|
|
35
|
+
// Semantic patterns for Node ecosystem
|
|
36
|
+
this.semanticPatterns = {
|
|
37
|
+
reactComponent: this.isReactComponent.bind(this),
|
|
38
|
+
reactHook: this.isReactHook.bind(this),
|
|
39
|
+
expressRoute: this.isExpressRoute.bind(this),
|
|
40
|
+
expressMiddleware: this.isExpressMiddleware.bind(this),
|
|
41
|
+
cliCommand: this.isCliCommand.bind(this),
|
|
42
|
+
utilityFunction: this.isUtilityFunction.bind(this),
|
|
43
|
+
apiHandler: this.isApiHandler.bind(this),
|
|
44
|
+
typeDefinition: this.isTypeDefinition.bind(this),
|
|
45
|
+
configModule: this.isConfigModule.bind(this)
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
/**
|
|
50
|
+
* Initialize tree-sitter parsers for different languages
|
|
51
|
+
*/
|
|
52
|
+
initializeParsers() {
|
|
53
|
+
// JavaScript parser
|
|
54
|
+
this.parsers.javascript = new Parser()
|
|
55
|
+
this.parsers.javascript.setLanguage(JavaScript)
|
|
56
|
+
|
|
57
|
+
// TypeScript parser
|
|
58
|
+
this.parsers.typescript = new Parser()
|
|
59
|
+
this.parsers.typescript.setLanguage(TypeScript.typescript)
|
|
60
|
+
|
|
61
|
+
// TSX parser
|
|
62
|
+
this.parsers.tsx = new Parser()
|
|
63
|
+
this.parsers.tsx.setLanguage(TypeScript.tsx)
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
/**
|
|
67
|
+
* Get appropriate parser for file extension
|
|
68
|
+
*/
|
|
69
|
+
getParser(filePath) {
|
|
70
|
+
const ext = extname(filePath)
|
|
71
|
+
switch (ext) {
|
|
72
|
+
case '.ts': return this.parsers.typescript
|
|
73
|
+
case '.tsx': return this.parsers.tsx
|
|
74
|
+
case '.js':
|
|
75
|
+
case '.jsx':
|
|
76
|
+
default: return this.parsers.javascript
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
/**
|
|
81
|
+
* Main entry point - analyze files and create semantic chunks
|
|
82
|
+
*/
|
|
83
|
+
async analyzeProject(projectPath, patterns = ['**/*.{js,jsx,ts,tsx}']) {
|
|
84
|
+
console.log('🔍 Starting treesitter-based semantic analysis...')
|
|
85
|
+
|
|
86
|
+
const files = await this.findFiles(projectPath, patterns)
|
|
87
|
+
console.log(`📁 Found ${files.length} files to analyze`)
|
|
88
|
+
|
|
89
|
+
const analysis = await this.analyzeFiles(files, projectPath)
|
|
90
|
+
const successfulFiles = Object.keys(analysis).filter(f => !analysis[f].error)
|
|
91
|
+
console.log(`✅ Analyzed ${Object.keys(analysis).length} files (${successfulFiles.length} successful)`)
|
|
92
|
+
if (successfulFiles.length > 0) {
|
|
93
|
+
console.log('📝 Sample successful files:', successfulFiles.slice(0, 5))
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
const relationshipGraph = this.buildRelationshipGraph(analysis)
|
|
97
|
+
console.log(`🔗 Built relationship graph with ${Object.keys(relationshipGraph).length} nodes`)
|
|
98
|
+
|
|
99
|
+
const chunks = await this.createSmartChunks(analysis, relationshipGraph)
|
|
100
|
+
console.log(`📦 Created ${chunks.length} semantic chunks`)
|
|
101
|
+
|
|
102
|
+
return {
|
|
103
|
+
summary: this.generateSummary(analysis, chunks),
|
|
104
|
+
files: analysis,
|
|
105
|
+
chunks: chunks,
|
|
106
|
+
relationshipGraph,
|
|
107
|
+
recommendations: this.generateRecommendations(analysis, chunks)
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
/**
|
|
112
|
+
* Find files matching patterns
|
|
113
|
+
*/
|
|
114
|
+
async findFiles(projectPath, patterns) {
|
|
115
|
+
const files = []
|
|
116
|
+
|
|
117
|
+
for (const pattern of patterns) {
|
|
118
|
+
const matches = await globAsync(pattern, {
|
|
119
|
+
cwd: projectPath,
|
|
120
|
+
ignore: [
|
|
121
|
+
'node_modules/**', 'dist/**', 'build/**', '.git/**',
|
|
122
|
+
'*.test.*', '*.spec.*', '**/test/**', '**/tests/**',
|
|
123
|
+
'**/*.min.js', '**/*.bundle.js', '**/coverage/**',
|
|
124
|
+
'**/.next/**', '**/.cache/**', '**/tmp/**', '**/temp/**'
|
|
125
|
+
]
|
|
126
|
+
})
|
|
127
|
+
|
|
128
|
+
// Extra filter to ensure no node_modules files get through
|
|
129
|
+
const filteredMatches = matches.filter(file =>
|
|
130
|
+
!file.includes('node_modules') &&
|
|
131
|
+
!file.includes('dist/') &&
|
|
132
|
+
!file.includes('.min.') &&
|
|
133
|
+
!file.includes('.bundle.')
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
files.push(...filteredMatches)
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
return [...new Set(files)] // Remove duplicates
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
/**
|
|
143
|
+
* Analyze all files using treesitter
|
|
144
|
+
*/
|
|
145
|
+
async analyzeFiles(filePaths, projectPath) {
|
|
146
|
+
const analysis = {}
|
|
147
|
+
|
|
148
|
+
for (const relativePath of filePaths) {
|
|
149
|
+
// Bulletproof check to skip node_modules
|
|
150
|
+
if (relativePath.includes('node_modules')) {
|
|
151
|
+
console.log(`Skipping node_modules file: ${relativePath}`);
|
|
152
|
+
continue;
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
const fullPath = join(projectPath, relativePath)
|
|
156
|
+
if (!existsSync(fullPath)) continue
|
|
157
|
+
|
|
158
|
+
try {
|
|
159
|
+
const content = readFileSync(fullPath, 'utf8')
|
|
160
|
+
const fileAnalysis = await this.analyzeFile(fullPath, content)
|
|
161
|
+
fileAnalysis.path = relativePath // Store relative path
|
|
162
|
+
analysis[relativePath] = fileAnalysis
|
|
163
|
+
} catch (error) {
|
|
164
|
+
// Silently skip files that can't be parsed - they won't be included in semantic analysis
|
|
165
|
+
// This is normal for complex files or unsupported syntax patterns
|
|
166
|
+
analysis[relativePath] = { error: error.message, path: relativePath }
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
return analysis
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
/**
|
|
174
|
+
* Analyze a single file using treesitter AST
|
|
175
|
+
*/
|
|
176
|
+
async analyzeFile(filePath, content) {
|
|
177
|
+
const parser = this.getParser(filePath)
|
|
178
|
+
|
|
179
|
+
// Skip files that are too large or have syntax errors
|
|
180
|
+
if (content.length > 500000) { // Skip files > 500KB
|
|
181
|
+
throw new Error('File too large')
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
let tree, rootNode
|
|
185
|
+
try {
|
|
186
|
+
// Use simple string parsing (confirmed working in tests)
|
|
187
|
+
tree = parser.parse(content)
|
|
188
|
+
rootNode = tree.rootNode
|
|
189
|
+
|
|
190
|
+
// Check for parse errors
|
|
191
|
+
if (rootNode.hasError()) {
|
|
192
|
+
throw new Error('Parse error in file')
|
|
193
|
+
}
|
|
194
|
+
} catch (error) {
|
|
195
|
+
throw new Error(`Tree-sitter parse failed: ${error.message}`)
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
const analysis = {
|
|
199
|
+
path: filePath,
|
|
200
|
+
fileName: basename(filePath),
|
|
201
|
+
dirName: basename(dirname(filePath)),
|
|
202
|
+
extension: extname(filePath),
|
|
203
|
+
size: content.length,
|
|
204
|
+
lines: content.split('\n').length,
|
|
205
|
+
|
|
206
|
+
// AST-based analysis
|
|
207
|
+
ast: {
|
|
208
|
+
functions: this.extractFunctions(rootNode, content),
|
|
209
|
+
classes: this.extractClasses(rootNode, content),
|
|
210
|
+
imports: this.extractImports(rootNode, content),
|
|
211
|
+
exports: this.extractExports(rootNode, content),
|
|
212
|
+
variables: this.extractVariables(rootNode, content),
|
|
213
|
+
jsxElements: this.extractJsxElements(rootNode, content),
|
|
214
|
+
typeDefinitions: this.extractTypeDefinitions(rootNode, content)
|
|
215
|
+
},
|
|
216
|
+
|
|
217
|
+
// Semantic classification
|
|
218
|
+
semanticType: this.classifyFileSemantics(rootNode, content, filePath),
|
|
219
|
+
businessDomain: this.extractBusinessDomain(rootNode, content, filePath),
|
|
220
|
+
technicalPatterns: this.identifyTechnicalPatterns(rootNode, content),
|
|
221
|
+
|
|
222
|
+
// Relationships
|
|
223
|
+
dependencies: this.analyzeDependencies(rootNode, content),
|
|
224
|
+
complexity: this.calculateAstComplexity(rootNode),
|
|
225
|
+
|
|
226
|
+
// Metadata
|
|
227
|
+
codeSignature: this.generateCodeSignature(rootNode, content)
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
// Generate semantic tags based on AST analysis
|
|
231
|
+
analysis.semanticTags = this.generateSemanticTags(analysis)
|
|
232
|
+
|
|
233
|
+
return analysis
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
/**
|
|
237
|
+
* Extract function declarations from AST
|
|
238
|
+
*/
|
|
239
|
+
extractFunctions(rootNode, content) {
|
|
240
|
+
const functions = []
|
|
241
|
+
|
|
242
|
+
// Function declarations
|
|
243
|
+
const functionDeclarations = this.queryNode(rootNode, '(function_declaration name: (identifier) @name)')
|
|
244
|
+
functions.push(...functionDeclarations.map(capture => ({
|
|
245
|
+
name: this.getNodeText(capture.node, content),
|
|
246
|
+
type: 'function_declaration',
|
|
247
|
+
startPosition: capture.node.startPosition,
|
|
248
|
+
endPosition: capture.node.endPosition,
|
|
249
|
+
isExported: this.isNodeExported(capture.node)
|
|
250
|
+
})))
|
|
251
|
+
|
|
252
|
+
// Arrow functions
|
|
253
|
+
const arrowFunctions = this.queryNode(rootNode, '(variable_declarator name: (identifier) @name value: (arrow_function))')
|
|
254
|
+
functions.push(...arrowFunctions.map(capture => ({
|
|
255
|
+
name: this.getNodeText(capture.node, content),
|
|
256
|
+
type: 'arrow_function',
|
|
257
|
+
startPosition: capture.node.startPosition,
|
|
258
|
+
endPosition: capture.node.endPosition,
|
|
259
|
+
isExported: this.isNodeExported(capture.node.parent.parent)
|
|
260
|
+
})))
|
|
261
|
+
|
|
262
|
+
// Method definitions
|
|
263
|
+
const methods = this.queryNode(rootNode, '(method_definition name: (property_name) @name)')
|
|
264
|
+
functions.push(...methods.map(capture => ({
|
|
265
|
+
name: this.getNodeText(capture.node, content),
|
|
266
|
+
type: 'method',
|
|
267
|
+
startPosition: capture.node.startPosition,
|
|
268
|
+
endPosition: capture.node.endPosition,
|
|
269
|
+
isExported: false // methods are part of classes
|
|
270
|
+
})))
|
|
271
|
+
|
|
272
|
+
return functions
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
/**
|
|
276
|
+
* Extract class declarations from AST
|
|
277
|
+
*/
|
|
278
|
+
extractClasses(rootNode, content) {
|
|
279
|
+
const classes = []
|
|
280
|
+
|
|
281
|
+
const classDeclarations = this.queryNode(rootNode, '(class_declaration name: (identifier) @name)')
|
|
282
|
+
classes.push(...classDeclarations.map(capture => ({
|
|
283
|
+
name: this.getNodeText(capture.node, content),
|
|
284
|
+
type: 'class',
|
|
285
|
+
startPosition: capture.node.startPosition,
|
|
286
|
+
endPosition: capture.node.endPosition,
|
|
287
|
+
isExported: this.isNodeExported(capture.node.parent),
|
|
288
|
+
methods: this.extractClassMethods(capture.node.parent, content)
|
|
289
|
+
})))
|
|
290
|
+
|
|
291
|
+
return classes
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
/**
|
|
295
|
+
* Extract class methods
|
|
296
|
+
*/
|
|
297
|
+
extractClassMethods(classNode, content) {
|
|
298
|
+
const methods = []
|
|
299
|
+
|
|
300
|
+
try {
|
|
301
|
+
const methodNodes = this.queryNode(classNode, '(method_definition)')
|
|
302
|
+
methods.push(...methodNodes.map(capture => ({
|
|
303
|
+
name: this.getNodeText(capture.node, content),
|
|
304
|
+
type: 'method',
|
|
305
|
+
startPosition: capture.node.startPosition,
|
|
306
|
+
endPosition: capture.node.endPosition
|
|
307
|
+
})))
|
|
308
|
+
} catch (error) {
|
|
309
|
+
// Handle case where method extraction fails
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
return methods
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
/**
|
|
316
|
+
* Extract import statements from AST
|
|
317
|
+
*/
|
|
318
|
+
extractImports(rootNode, content) {
|
|
319
|
+
const imports = []
|
|
320
|
+
|
|
321
|
+
const importStatements = this.queryNode(rootNode, '(import_statement source: (string) @source)')
|
|
322
|
+
imports.push(...importStatements.map(capture => {
|
|
323
|
+
const source = this.getNodeText(capture.node, content).replace(/['"]/g, '')
|
|
324
|
+
return {
|
|
325
|
+
source,
|
|
326
|
+
statement: this.getNodeText(capture.node.parent, content),
|
|
327
|
+
isRelative: source.startsWith('.'),
|
|
328
|
+
isExternal: !source.startsWith('.') && !source.startsWith('/'),
|
|
329
|
+
importedNames: this.extractImportedNames(capture.node.parent, content)
|
|
330
|
+
}
|
|
331
|
+
}))
|
|
332
|
+
|
|
333
|
+
return imports
|
|
334
|
+
}
|
|
335
|
+
|
|
336
|
+
/**
|
|
337
|
+
* Extract export statements from AST
|
|
338
|
+
*/
|
|
339
|
+
extractExports(rootNode, content) {
|
|
340
|
+
const exports = []
|
|
341
|
+
|
|
342
|
+
// Export declarations
|
|
343
|
+
const exportDeclarations = this.queryNode(rootNode, '(export_statement)')
|
|
344
|
+
exports.push(...exportDeclarations.map(capture => {
|
|
345
|
+
const exportNode = capture.node
|
|
346
|
+
const declaration = exportNode.namedChild(0)
|
|
347
|
+
|
|
348
|
+
if (declaration) {
|
|
349
|
+
return {
|
|
350
|
+
type: declaration.type === 'export_clause' ? 'named' : 'declaration',
|
|
351
|
+
name: this.extractExportName(declaration, content),
|
|
352
|
+
statement: this.getNodeText(exportNode, content),
|
|
353
|
+
isDefault: this.getNodeText(exportNode, content).includes('default')
|
|
354
|
+
}
|
|
355
|
+
}
|
|
356
|
+
return null
|
|
357
|
+
}).filter(Boolean))
|
|
358
|
+
|
|
359
|
+
return exports
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
/**
|
|
363
|
+
* Extract variable declarations from AST
|
|
364
|
+
*/
|
|
365
|
+
extractVariables(rootNode, content) {
|
|
366
|
+
const variables = []
|
|
367
|
+
|
|
368
|
+
const variableDeclarations = this.queryNode(rootNode, '(variable_declarator name: (identifier) @name)')
|
|
369
|
+
variables.push(...variableDeclarations.map(capture => ({
|
|
370
|
+
name: this.getNodeText(capture.node, content),
|
|
371
|
+
type: 'variable',
|
|
372
|
+
startPosition: capture.node.startPosition,
|
|
373
|
+
endPosition: capture.node.endPosition,
|
|
374
|
+
isExported: this.isNodeExported(capture.node.parent.parent),
|
|
375
|
+
declarationType: capture.node.parent.parent.type // const, let, var
|
|
376
|
+
})))
|
|
377
|
+
|
|
378
|
+
return variables
|
|
379
|
+
}
|
|
380
|
+
|
|
381
|
+
/**
|
|
382
|
+
* Extract JSX elements from AST (for React components)
|
|
383
|
+
*/
|
|
384
|
+
extractJsxElements(rootNode, content) {
|
|
385
|
+
const jsxElements = []
|
|
386
|
+
|
|
387
|
+
try {
|
|
388
|
+
const jsxNodes = this.queryNode(rootNode, '(jsx_element)')
|
|
389
|
+
jsxElements.push(...jsxNodes.map(capture => ({
|
|
390
|
+
elementName: this.extractJsxElementName(capture.node, content),
|
|
391
|
+
startPosition: capture.node.startPosition,
|
|
392
|
+
endPosition: capture.node.endPosition
|
|
393
|
+
})))
|
|
394
|
+
} catch (error) {
|
|
395
|
+
// JSX might not be available in JavaScript parser
|
|
396
|
+
}
|
|
397
|
+
|
|
398
|
+
return jsxElements
|
|
399
|
+
}
|
|
400
|
+
|
|
401
|
+
/**
|
|
402
|
+
* Extract TypeScript type definitions from AST
|
|
403
|
+
*/
|
|
404
|
+
extractTypeDefinitions(rootNode, content) {
|
|
405
|
+
const types = []
|
|
406
|
+
|
|
407
|
+
try {
|
|
408
|
+
// Interface declarations
|
|
409
|
+
const interfaces = this.queryNode(rootNode, '(interface_declaration name: (type_identifier) @name)')
|
|
410
|
+
types.push(...interfaces.map(capture => ({
|
|
411
|
+
name: this.getNodeText(capture.node, content),
|
|
412
|
+
type: 'interface',
|
|
413
|
+
startPosition: capture.node.startPosition,
|
|
414
|
+
endPosition: capture.node.endPosition,
|
|
415
|
+
isExported: this.isNodeExported(capture.node.parent)
|
|
416
|
+
})))
|
|
417
|
+
|
|
418
|
+
// Type alias declarations
|
|
419
|
+
const typeAliases = this.queryNode(rootNode, '(type_alias_declaration name: (type_identifier) @name)')
|
|
420
|
+
types.push(...typeAliases.map(capture => ({
|
|
421
|
+
name: this.getNodeText(capture.node, content),
|
|
422
|
+
type: 'type_alias',
|
|
423
|
+
startPosition: capture.node.startPosition,
|
|
424
|
+
endPosition: capture.node.endPosition,
|
|
425
|
+
isExported: this.isNodeExported(capture.node.parent)
|
|
426
|
+
})))
|
|
427
|
+
} catch (error) {
|
|
428
|
+
// TypeScript types might not be available in JavaScript parser
|
|
429
|
+
}
|
|
430
|
+
|
|
431
|
+
return types
|
|
432
|
+
}
|
|
433
|
+
|
|
434
|
+
/**
|
|
435
|
+
* Classify file semantics based on AST patterns
|
|
436
|
+
*/
|
|
437
|
+
classifyFileSemantics(rootNode, content, filePath) {
|
|
438
|
+
const classifications = []
|
|
439
|
+
|
|
440
|
+
// Test each semantic pattern
|
|
441
|
+
for (const [patternName, patternFn] of Object.entries(this.semanticPatterns)) {
|
|
442
|
+
if (patternFn(rootNode, content, filePath)) {
|
|
443
|
+
classifications.push(patternName)
|
|
444
|
+
}
|
|
445
|
+
}
|
|
446
|
+
|
|
447
|
+
// Return primary classification (most specific first)
|
|
448
|
+
const priority = ['reactComponent', 'reactHook', 'expressRoute', 'expressMiddleware',
|
|
449
|
+
'cliCommand', 'apiHandler', 'typeDefinition', 'configModule', 'utilityFunction']
|
|
450
|
+
|
|
451
|
+
for (const pattern of priority) {
|
|
452
|
+
if (classifications.includes(pattern)) {
|
|
453
|
+
return pattern
|
|
454
|
+
}
|
|
455
|
+
}
|
|
456
|
+
|
|
457
|
+
return 'module'
|
|
458
|
+
}
|
|
459
|
+
|
|
460
|
+
/**
|
|
461
|
+
* Semantic pattern: React Component
|
|
462
|
+
*/
|
|
463
|
+
isReactComponent(rootNode, content, filePath) {
|
|
464
|
+
// Check for JSX elements
|
|
465
|
+
const hasJsx = this.queryNode(rootNode, '(jsx_element)').length > 0
|
|
466
|
+
|
|
467
|
+
// Check for React imports
|
|
468
|
+
const hasReactImport = content.includes("import React") || content.includes("from 'react'")
|
|
469
|
+
|
|
470
|
+
// Check for component naming pattern
|
|
471
|
+
const fileName = basename(filePath, extname(filePath))
|
|
472
|
+
const hasComponentName = fileName[0] === fileName[0].toUpperCase()
|
|
473
|
+
|
|
474
|
+
// Check for function that returns JSX
|
|
475
|
+
const functions = this.extractFunctions(rootNode, content)
|
|
476
|
+
const hasComponentFunction = functions.some(fn =>
|
|
477
|
+
fn.isExported && fn.name[0] === fn.name[0].toUpperCase()
|
|
478
|
+
)
|
|
479
|
+
|
|
480
|
+
return (hasJsx && (hasReactImport || hasComponentName)) ||
|
|
481
|
+
(hasComponentFunction && hasReactImport)
|
|
482
|
+
}
|
|
483
|
+
|
|
484
|
+
/**
|
|
485
|
+
* Semantic pattern: React Hook
|
|
486
|
+
*/
|
|
487
|
+
isReactHook(rootNode, content, filePath) {
|
|
488
|
+
const fileName = basename(filePath, extname(filePath))
|
|
489
|
+
const hasHookName = fileName.startsWith('use') && fileName[3] === fileName[3].toUpperCase()
|
|
490
|
+
|
|
491
|
+
const functions = this.extractFunctions(rootNode, content)
|
|
492
|
+
const hasHookFunction = functions.some(fn =>
|
|
493
|
+
fn.name.startsWith('use') && fn.name[3] === fn.name[3].toUpperCase() && fn.isExported
|
|
494
|
+
)
|
|
495
|
+
|
|
496
|
+
const hasReactHookImports = content.includes("from 'react'") &&
|
|
497
|
+
(content.includes('useState') || content.includes('useEffect'))
|
|
498
|
+
|
|
499
|
+
return hasHookName || (hasHookFunction && hasReactHookImports)
|
|
500
|
+
}
|
|
501
|
+
|
|
502
|
+
/**
|
|
503
|
+
* Semantic pattern: Express Route
|
|
504
|
+
*/
|
|
505
|
+
isExpressRoute(rootNode, content, filePath) {
|
|
506
|
+
const hasExpressImport = content.includes("from 'express'") || content.includes("require('express')")
|
|
507
|
+
const hasRouterMethods = /\.(get|post|put|delete|patch)\s*\(/.test(content)
|
|
508
|
+
const hasRoutePattern = /['"`]\/[^'"`]*['"`]/.test(content)
|
|
509
|
+
|
|
510
|
+
return hasExpressImport && hasRouterMethods && hasRoutePattern
|
|
511
|
+
}
|
|
512
|
+
|
|
513
|
+
/**
|
|
514
|
+
* Semantic pattern: Express Middleware
|
|
515
|
+
*/
|
|
516
|
+
isExpressMiddleware(rootNode, content, filePath) {
|
|
517
|
+
const hasMiddlewarePattern = /\(req,\s*res,\s*next\)|function\s*\([^)]*req[^)]*res[^)]*next/.test(content)
|
|
518
|
+
const hasExpressImport = content.includes("from 'express'") || content.includes("require('express')")
|
|
519
|
+
const fileName = basename(filePath).toLowerCase()
|
|
520
|
+
|
|
521
|
+
return (hasMiddlewarePattern && hasExpressImport) || fileName.includes('middleware')
|
|
522
|
+
}
|
|
523
|
+
|
|
524
|
+
/**
|
|
525
|
+
* Semantic pattern: CLI Command
|
|
526
|
+
*/
|
|
527
|
+
isCliCommand(rootNode, content, filePath) {
|
|
528
|
+
const hasCommanderImport = content.includes('commander') || content.includes('yargs')
|
|
529
|
+
const hasProcessArgv = content.includes('process.argv')
|
|
530
|
+
const hasCliPatterns = content.includes('.command(') || content.includes('.option(')
|
|
531
|
+
const fileName = basename(filePath).toLowerCase()
|
|
532
|
+
|
|
533
|
+
return hasCommanderImport || (hasProcessArgv && hasCliPatterns) || fileName.includes('cli')
|
|
534
|
+
}
|
|
535
|
+
|
|
536
|
+
/**
|
|
537
|
+
* Semantic pattern: Utility Function
|
|
538
|
+
*/
|
|
539
|
+
isUtilityFunction(rootNode, content, filePath) {
|
|
540
|
+
const functions = this.extractFunctions(rootNode, content)
|
|
541
|
+
const hasMultipleExportedFunctions = functions.filter(fn => fn.isExported).length > 1
|
|
542
|
+
|
|
543
|
+
const fileName = basename(filePath).toLowerCase()
|
|
544
|
+
const hasUtilityName = fileName.includes('util') || fileName.includes('helper') || fileName.includes('lib')
|
|
545
|
+
|
|
546
|
+
const hasNoDomSpecificImports = !content.includes('react') && !content.includes('express')
|
|
547
|
+
|
|
548
|
+
return hasUtilityName || (hasMultipleExportedFunctions && hasNoDomSpecificImports)
|
|
549
|
+
}
|
|
550
|
+
|
|
551
|
+
/**
|
|
552
|
+
* Semantic pattern: API Handler
|
|
553
|
+
*/
|
|
554
|
+
isApiHandler(rootNode, content, filePath) {
|
|
555
|
+
const hasApiPattern = /api|handler|controller/i.test(filePath)
|
|
556
|
+
const hasFetchPattern = content.includes('fetch(') || content.includes('axios')
|
|
557
|
+
const hasHttpMethods = /\b(GET|POST|PUT|DELETE|PATCH)\b/.test(content)
|
|
558
|
+
|
|
559
|
+
return hasApiPattern || (hasFetchPattern && hasHttpMethods)
|
|
560
|
+
}
|
|
561
|
+
|
|
562
|
+
/**
|
|
563
|
+
* Semantic pattern: Type Definition
|
|
564
|
+
*/
|
|
565
|
+
isTypeDefinition(rootNode, content, filePath) {
|
|
566
|
+
const types = this.extractTypeDefinitions(rootNode, content)
|
|
567
|
+
const hasTypeDefinitions = types.length > 0
|
|
568
|
+
|
|
569
|
+
const fileName = basename(filePath).toLowerCase()
|
|
570
|
+
const hasTypeFileName = fileName.includes('type') || fileName.includes('.d.ts')
|
|
571
|
+
|
|
572
|
+
const hasOnlyTypes = hasTypeDefinitions &&
|
|
573
|
+
this.extractFunctions(rootNode, content).length === 0 &&
|
|
574
|
+
this.extractClasses(rootNode, content).length === 0
|
|
575
|
+
|
|
576
|
+
return hasTypeFileName || hasOnlyTypes
|
|
577
|
+
}
|
|
578
|
+
|
|
579
|
+
/**
|
|
580
|
+
* Semantic pattern: Config Module
|
|
581
|
+
*/
|
|
582
|
+
isConfigModule(rootNode, content, filePath) {
|
|
583
|
+
const fileName = basename(filePath).toLowerCase()
|
|
584
|
+
const hasConfigName = fileName.includes('config') || fileName.includes('setting')
|
|
585
|
+
|
|
586
|
+
const hasConfigPatterns = content.includes('module.exports') || content.includes('export default')
|
|
587
|
+
const hasConfigObject = /\{[\s\S]*\}/.test(content) && !/function|class/.test(content)
|
|
588
|
+
|
|
589
|
+
return hasConfigName && (hasConfigPatterns || hasConfigObject)
|
|
590
|
+
}
|
|
591
|
+
|
|
592
|
+
/**
|
|
593
|
+
* Extract business domain terms from code
|
|
594
|
+
*/
|
|
595
|
+
extractBusinessDomain(rootNode, content, filePath) {
|
|
596
|
+
const domains = []
|
|
597
|
+
|
|
598
|
+
// Focus on meaningful path segments instead of generic business terms
|
|
599
|
+
const pathSegments = filePath.split('/').filter(s => s && s !== 'src' && s !== 'lib' && s !== 'components')
|
|
600
|
+
const fileName = basename(filePath, extname(filePath))
|
|
601
|
+
|
|
602
|
+
// Extract domain from directory structure (more reliable than keywords)
|
|
603
|
+
if (pathSegments.length > 0) {
|
|
604
|
+
const relevantSegments = pathSegments.slice(-2) // Last 2 directories
|
|
605
|
+
domains.push(...relevantSegments.map(s => s.toLowerCase()))
|
|
606
|
+
}
|
|
607
|
+
|
|
608
|
+
// Add meaningful file-based domains
|
|
609
|
+
if (fileName.toLowerCase().includes('config')) domains.push('configuration')
|
|
610
|
+
if (fileName.toLowerCase().includes('test')) domains.push('testing')
|
|
611
|
+
if (fileName.toLowerCase().includes('util')) domains.push('utilities')
|
|
612
|
+
if (fileName.toLowerCase().includes('api')) domains.push('api')
|
|
613
|
+
if (fileName.toLowerCase().includes('ui') || fileName.toLowerCase().includes('component')) {
|
|
614
|
+
domains.push('user-interface')
|
|
615
|
+
}
|
|
616
|
+
|
|
617
|
+
// Only return meaningful, non-generic domains
|
|
618
|
+
return [...new Set(domains)].filter(domain =>
|
|
619
|
+
domain.length > 2 && !['web', 'src', 'ts', 'js', 'tsx', 'jsx'].includes(domain)
|
|
620
|
+
)
|
|
621
|
+
}
|
|
622
|
+
|
|
623
|
+
/**
|
|
624
|
+
* Identify technical patterns in the code
|
|
625
|
+
*/
|
|
626
|
+
identifyTechnicalPatterns(rootNode, content) {
|
|
627
|
+
const patterns = []
|
|
628
|
+
|
|
629
|
+
// Framework patterns
|
|
630
|
+
if (content.includes('react')) patterns.push('react')
|
|
631
|
+
if (content.includes('express')) patterns.push('express')
|
|
632
|
+
if (content.includes('typescript')) patterns.push('typescript')
|
|
633
|
+
|
|
634
|
+
// Architecture patterns
|
|
635
|
+
if (content.includes('async') && content.includes('await')) patterns.push('async-await')
|
|
636
|
+
if (content.includes('Promise')) patterns.push('promises')
|
|
637
|
+
if (content.includes('class') && content.includes('extends')) patterns.push('inheritance')
|
|
638
|
+
|
|
639
|
+
// Design patterns
|
|
640
|
+
const functions = this.extractFunctions(rootNode, content)
|
|
641
|
+
if (functions.some(f => f.name.includes('Factory'))) patterns.push('factory-pattern')
|
|
642
|
+
if (functions.some(f => f.name.includes('Observer'))) patterns.push('observer-pattern')
|
|
643
|
+
|
|
644
|
+
return patterns
|
|
645
|
+
}
|
|
646
|
+
|
|
647
|
+
/**
|
|
648
|
+
* Build relationship graph between files
|
|
649
|
+
*/
|
|
650
|
+
buildRelationshipGraph(analysis) {
|
|
651
|
+
const graph = {}
|
|
652
|
+
|
|
653
|
+
for (const [filePath, fileAnalysis] of Object.entries(analysis)) {
|
|
654
|
+
if (fileAnalysis.error) continue
|
|
655
|
+
|
|
656
|
+
graph[filePath] = {
|
|
657
|
+
imports: [],
|
|
658
|
+
importedBy: [],
|
|
659
|
+
semanticSimilarity: {},
|
|
660
|
+
businessDomainOverlap: {},
|
|
661
|
+
technicalPatternOverlap: {}
|
|
662
|
+
}
|
|
663
|
+
}
|
|
664
|
+
|
|
665
|
+
// Build import relationships
|
|
666
|
+
for (const [filePath, fileAnalysis] of Object.entries(analysis)) {
|
|
667
|
+
if (fileAnalysis.error) continue
|
|
668
|
+
|
|
669
|
+
for (const imp of fileAnalysis.ast.imports) {
|
|
670
|
+
if (imp.isRelative) {
|
|
671
|
+
// Resolve relative import to actual file path
|
|
672
|
+
const importPath = this.resolveImportPath(filePath, imp.source)
|
|
673
|
+
if (graph[importPath]) {
|
|
674
|
+
graph[filePath].imports.push(importPath)
|
|
675
|
+
graph[importPath].importedBy.push(filePath)
|
|
676
|
+
}
|
|
677
|
+
}
|
|
678
|
+
}
|
|
679
|
+
}
|
|
680
|
+
|
|
681
|
+
// Calculate semantic similarities
|
|
682
|
+
for (const [fileA, analysisA] of Object.entries(analysis)) {
|
|
683
|
+
if (analysisA.error) continue
|
|
684
|
+
|
|
685
|
+
for (const [fileB, analysisB] of Object.entries(analysis)) {
|
|
686
|
+
if (analysisB.error || fileA === fileB) continue
|
|
687
|
+
|
|
688
|
+
// Semantic type similarity
|
|
689
|
+
const semanticSimilarity = analysisA.semanticType === analysisB.semanticType ? 1.0 : 0.0
|
|
690
|
+
|
|
691
|
+
// Business domain overlap
|
|
692
|
+
const domainOverlap = this.calculateOverlap(analysisA.businessDomain, analysisB.businessDomain)
|
|
693
|
+
|
|
694
|
+
// Technical pattern overlap
|
|
695
|
+
const patternOverlap = this.calculateOverlap(analysisA.technicalPatterns, analysisB.technicalPatterns)
|
|
696
|
+
|
|
697
|
+
if (semanticSimilarity > 0 || domainOverlap > 0 || patternOverlap > 0) {
|
|
698
|
+
graph[fileA].semanticSimilarity[fileB] = semanticSimilarity
|
|
699
|
+
graph[fileA].businessDomainOverlap[fileB] = domainOverlap
|
|
700
|
+
graph[fileA].technicalPatternOverlap[fileB] = patternOverlap
|
|
701
|
+
}
|
|
702
|
+
}
|
|
703
|
+
}
|
|
704
|
+
|
|
705
|
+
return graph
|
|
706
|
+
}
|
|
707
|
+
|
|
708
|
+
/**
|
|
709
|
+
* Create smart chunks using clustering algorithms
|
|
710
|
+
*/
|
|
711
|
+
async createSmartChunks(analysis, relationshipGraph) {
|
|
712
|
+
|
|
713
|
+
// Start with individual files as nodes
|
|
714
|
+
const nodes = Object.keys(analysis).filter(path => !analysis[path].error)
|
|
715
|
+
console.log(`🧩 Starting with ${nodes.length} nodes for clustering`)
|
|
716
|
+
|
|
717
|
+
// Apply different clustering strategies
|
|
718
|
+
const strategies = [
|
|
719
|
+
this.clusterBySemanticType.bind(this),
|
|
720
|
+
this.clusterByBusinessDomain.bind(this),
|
|
721
|
+
this.clusterByDependencyGraph.bind(this),
|
|
722
|
+
this.clusterByDirectoryStructure.bind(this)
|
|
723
|
+
]
|
|
724
|
+
|
|
725
|
+
let clusters = nodes.map(node => [node]) // Start with individual nodes
|
|
726
|
+
|
|
727
|
+
// Apply clustering strategies
|
|
728
|
+
for (const strategy of strategies) {
|
|
729
|
+
clusters = strategy(clusters, analysis, relationshipGraph)
|
|
730
|
+
console.log(`📦 After ${strategy.name}: ${clusters.length} clusters`)
|
|
731
|
+
}
|
|
732
|
+
|
|
733
|
+
// Convert clusters to named chunks
|
|
734
|
+
const chunks = []
|
|
735
|
+
const usedNames = new Set()
|
|
736
|
+
|
|
737
|
+
for (const cluster of clusters) {
|
|
738
|
+
if (cluster.length === 0) continue
|
|
739
|
+
|
|
740
|
+
let chunkName = await this.generateChunkName(cluster, analysis)
|
|
741
|
+
|
|
742
|
+
// Ensure unique names
|
|
743
|
+
let uniqueName = chunkName
|
|
744
|
+
let counter = 1
|
|
745
|
+
while (usedNames.has(uniqueName)) {
|
|
746
|
+
uniqueName = `${chunkName}-${counter}`
|
|
747
|
+
counter++
|
|
748
|
+
}
|
|
749
|
+
usedNames.add(uniqueName)
|
|
750
|
+
|
|
751
|
+
const chunk = {
|
|
752
|
+
name: uniqueName,
|
|
753
|
+
type: this.determineChunkType(cluster, analysis),
|
|
754
|
+
files: cluster,
|
|
755
|
+
size: cluster.reduce((sum, file) => sum + analysis[file].size, 0),
|
|
756
|
+
complexity: this.calculateClusterComplexity(cluster, analysis),
|
|
757
|
+
dependencies: this.calculateClusterDependencies(cluster, analysis),
|
|
758
|
+
businessDomains: this.extractClusterBusinessDomains(cluster, analysis),
|
|
759
|
+
technicalPatterns: this.extractClusterTechnicalPatterns(cluster, analysis),
|
|
760
|
+
purpose: this.determineClusterPurpose(cluster, analysis),
|
|
761
|
+
cohesion: this.calculateClusterCohesion(cluster, relationshipGraph),
|
|
762
|
+
recommendations: this.generateClusterRecommendations(cluster, analysis),
|
|
763
|
+
tags: this.generateTags(cluster, analysis)
|
|
764
|
+
}
|
|
765
|
+
|
|
766
|
+
chunks.push(chunk)
|
|
767
|
+
}
|
|
768
|
+
|
|
769
|
+
return chunks.sort((a, b) => b.cohesion - a.cohesion) // Sort by cohesion (best chunks first)
|
|
770
|
+
}
|
|
771
|
+
|
|
772
|
+
/**
|
|
773
|
+
* Cluster files by semantic type
|
|
774
|
+
*/
|
|
775
|
+
clusterBySemanticType(clusters, analysis, relationshipGraph) {
|
|
776
|
+
const semanticGroups = {}
|
|
777
|
+
|
|
778
|
+
for (const cluster of clusters) {
|
|
779
|
+
for (const file of cluster) {
|
|
780
|
+
const semanticType = analysis[file].semanticType
|
|
781
|
+
if (!semanticGroups[semanticType]) {
|
|
782
|
+
semanticGroups[semanticType] = []
|
|
783
|
+
}
|
|
784
|
+
semanticGroups[semanticType].push(file)
|
|
785
|
+
}
|
|
786
|
+
}
|
|
787
|
+
|
|
788
|
+
return Object.values(semanticGroups).filter(group => group.length > 0)
|
|
789
|
+
}
|
|
790
|
+
|
|
791
|
+
/**
|
|
792
|
+
* Cluster files by business domain
|
|
793
|
+
*/
|
|
794
|
+
clusterByBusinessDomain(clusters, analysis, relationshipGraph) {
|
|
795
|
+
const domainGroups = {}
|
|
796
|
+
|
|
797
|
+
for (const cluster of clusters) {
|
|
798
|
+
for (const file of cluster) {
|
|
799
|
+
const domains = analysis[file].businessDomain
|
|
800
|
+
|
|
801
|
+
if (domains.length === 0) {
|
|
802
|
+
// Files with no clear domain go to 'general' group
|
|
803
|
+
if (!domainGroups.general) domainGroups.general = []
|
|
804
|
+
domainGroups.general.push(file)
|
|
805
|
+
} else {
|
|
806
|
+
// Files go to their primary domain group
|
|
807
|
+
const primaryDomain = domains[0]
|
|
808
|
+
if (!domainGroups[primaryDomain]) domainGroups[primaryDomain] = []
|
|
809
|
+
domainGroups[primaryDomain].push(file)
|
|
810
|
+
}
|
|
811
|
+
}
|
|
812
|
+
}
|
|
813
|
+
|
|
814
|
+
return Object.values(domainGroups).filter(group => group.length > 0)
|
|
815
|
+
}
|
|
816
|
+
|
|
817
|
+
/**
|
|
818
|
+
* Cluster files by dependency relationships
|
|
819
|
+
*/
|
|
820
|
+
clusterByDependencyGraph(clusters, analysis, relationshipGraph) {
|
|
821
|
+
const dependencyGroups = []
|
|
822
|
+
const visited = new Set()
|
|
823
|
+
|
|
824
|
+
for (const cluster of clusters) {
|
|
825
|
+
for (const file of cluster) {
|
|
826
|
+
if (visited.has(file)) continue
|
|
827
|
+
|
|
828
|
+
// Find all files connected to this file through imports
|
|
829
|
+
const connected = this.findConnectedFiles(file, relationshipGraph, new Set())
|
|
830
|
+
|
|
831
|
+
// Filter to only files in current clusters
|
|
832
|
+
const relevantConnected = connected.filter(f =>
|
|
833
|
+
clusters.some(cluster => cluster.includes(f))
|
|
834
|
+
)
|
|
835
|
+
|
|
836
|
+
if (relevantConnected.length > 1) {
|
|
837
|
+
dependencyGroups.push(relevantConnected)
|
|
838
|
+
relevantConnected.forEach(f => visited.add(f))
|
|
839
|
+
} else {
|
|
840
|
+
// Isolated file becomes its own group
|
|
841
|
+
dependencyGroups.push([file])
|
|
842
|
+
visited.add(file)
|
|
843
|
+
}
|
|
844
|
+
}
|
|
845
|
+
}
|
|
846
|
+
|
|
847
|
+
return dependencyGroups.filter(group => group.length > 0)
|
|
848
|
+
}
|
|
849
|
+
|
|
850
|
+
/**
|
|
851
|
+
* Cluster files by directory structure
|
|
852
|
+
*/
|
|
853
|
+
clusterByDirectoryStructure(clusters, analysis, relationshipGraph) {
|
|
854
|
+
const directoryGroups = {}
|
|
855
|
+
|
|
856
|
+
for (const cluster of clusters) {
|
|
857
|
+
for (const file of cluster) {
|
|
858
|
+
const dir = dirname(file)
|
|
859
|
+
if (!directoryGroups[dir]) {
|
|
860
|
+
directoryGroups[dir] = []
|
|
861
|
+
}
|
|
862
|
+
directoryGroups[dir].push(file)
|
|
863
|
+
}
|
|
864
|
+
}
|
|
865
|
+
|
|
866
|
+
return Object.values(directoryGroups).filter(group => group.length > 0)
|
|
867
|
+
}
|
|
868
|
+
|
|
869
|
+
/**
|
|
870
|
+
* Generate intelligent chunk name
|
|
871
|
+
*/
|
|
872
|
+
async generateChunkName(files, analysis) {
|
|
873
|
+
const namingStrategies = {
|
|
874
|
+
domainBased: this.generateDomainBasedName.bind(this),
|
|
875
|
+
patternBased: this.generatePatternBasedName.bind(this),
|
|
876
|
+
functionalityBased: this.generateFunctionalityBasedName.bind(this)
|
|
877
|
+
}
|
|
878
|
+
|
|
879
|
+
const names = {}
|
|
880
|
+
|
|
881
|
+
for (const [strategy, generator] of Object.entries(namingStrategies)) {
|
|
882
|
+
try {
|
|
883
|
+
names[strategy] = generator(files, analysis)
|
|
884
|
+
} catch (error) {
|
|
885
|
+
names[strategy] = 'unnamed-chunk'
|
|
886
|
+
}
|
|
887
|
+
}
|
|
888
|
+
|
|
889
|
+
// Choose best name based on strategy preference - prefer pattern-based for better names
|
|
890
|
+
const strategy = 'patternBased' // Force pattern-based naming
|
|
891
|
+
return names[strategy] || names.patternBased || names.functionalityBased || names.domainBased || 'unknown-chunk'
|
|
892
|
+
}
|
|
893
|
+
|
|
894
|
+
/**
|
|
895
|
+
* Generate domain-based chunk name
|
|
896
|
+
*/
|
|
897
|
+
generateDomainBasedName(files, analysis) {
|
|
898
|
+
// Always fallback to pattern-based naming since domain extraction is unreliable
|
|
899
|
+
return this.generatePatternBasedName(files, analysis)
|
|
900
|
+
}
|
|
901
|
+
|
|
902
|
+
/**
|
|
903
|
+
* Generate pattern-based chunk name
|
|
904
|
+
*/
|
|
905
|
+
generatePatternBasedName(files, analysis) {
|
|
906
|
+
const semanticTypes = files.map(file => analysis[file].semanticType)
|
|
907
|
+
const mostCommon = this.getMostCommon(semanticTypes)
|
|
908
|
+
|
|
909
|
+
// Look at actual file names and directories for context
|
|
910
|
+
const commonPath = this.findCommonPath(files)
|
|
911
|
+
const dirName = commonPath ? basename(dirname(commonPath)) : null
|
|
912
|
+
|
|
913
|
+
const typeNames = {
|
|
914
|
+
reactComponent: 'ui-components',
|
|
915
|
+
reactHook: 'react-hooks',
|
|
916
|
+
expressRoute: 'server-routes',
|
|
917
|
+
expressMiddleware: 'server-middleware',
|
|
918
|
+
utilityFunction: 'utility-functions',
|
|
919
|
+
typeDefinition: 'type-definitions',
|
|
920
|
+
configModule: 'configuration',
|
|
921
|
+
cliCommand: 'cli-tools',
|
|
922
|
+
apiHandler: 'api-endpoints',
|
|
923
|
+
module: 'shared-modules'
|
|
924
|
+
}
|
|
925
|
+
|
|
926
|
+
let baseName = typeNames[mostCommon] || 'mixed-files'
|
|
927
|
+
|
|
928
|
+
// Add more specific context based on file paths
|
|
929
|
+
if (commonPath) {
|
|
930
|
+
if (commonPath.includes('/components/ui/')) {
|
|
931
|
+
baseName = 'ui-library-components'
|
|
932
|
+
} else if (commonPath.includes('/components/')) {
|
|
933
|
+
baseName = 'application-components'
|
|
934
|
+
} else if (commonPath.includes('/hooks/')) {
|
|
935
|
+
baseName = 'custom-hooks'
|
|
936
|
+
} else if (commonPath.includes('/lib/')) {
|
|
937
|
+
baseName = 'core-utilities'
|
|
938
|
+
} else if (commonPath.includes('/utils/')) {
|
|
939
|
+
baseName = 'helper-utilities'
|
|
940
|
+
} else if (dirName && dirName !== 'src' && dirName !== 'components' && dirName !== 'lib') {
|
|
941
|
+
baseName = `${dirName}-${baseName}`
|
|
942
|
+
}
|
|
943
|
+
}
|
|
944
|
+
|
|
945
|
+
return baseName
|
|
946
|
+
}
|
|
947
|
+
|
|
948
|
+
/**
|
|
949
|
+
* Generate functionality-based chunk name
|
|
950
|
+
*/
|
|
951
|
+
generateFunctionalityBasedName(files, analysis) {
|
|
952
|
+
// Extract function names and find common themes
|
|
953
|
+
const allFunctions = files.flatMap(file =>
|
|
954
|
+
analysis[file].ast.functions.map(fn => fn.name.toLowerCase())
|
|
955
|
+
)
|
|
956
|
+
|
|
957
|
+
const commonWords = this.extractCommonWords(allFunctions)
|
|
958
|
+
|
|
959
|
+
if (commonWords.length > 0) {
|
|
960
|
+
return commonWords.slice(0, 2).join('-') + '-logic'
|
|
961
|
+
}
|
|
962
|
+
|
|
963
|
+
// Fallback to directory-based naming
|
|
964
|
+
const dirs = files.map(file => basename(dirname(file)))
|
|
965
|
+
const commonDir = this.getMostCommon(dirs)
|
|
966
|
+
|
|
967
|
+
return commonDir + '-module'
|
|
968
|
+
}
|
|
969
|
+
|
|
970
|
+
/**
|
|
971
|
+
* Helper methods for AST analysis
|
|
972
|
+
*/
|
|
973
|
+
|
|
974
|
+
queryNode(node, query) {
|
|
975
|
+
// Simplified query implementation
|
|
976
|
+
// In a full implementation, you'd use tree-sitter's query language
|
|
977
|
+
const results = []
|
|
978
|
+
|
|
979
|
+
const traverse = (currentNode) => {
|
|
980
|
+
// Match based on node type for now
|
|
981
|
+
if (query.includes(currentNode.type)) {
|
|
982
|
+
results.push({ node: currentNode })
|
|
983
|
+
}
|
|
984
|
+
|
|
985
|
+
for (let i = 0; i < currentNode.namedChildCount; i++) {
|
|
986
|
+
traverse(currentNode.namedChild(i))
|
|
987
|
+
}
|
|
988
|
+
}
|
|
989
|
+
|
|
990
|
+
traverse(node)
|
|
991
|
+
return results
|
|
992
|
+
}
|
|
993
|
+
|
|
994
|
+
getNodeText(node, content) {
|
|
995
|
+
return content.slice(node.startIndex, node.endIndex)
|
|
996
|
+
}
|
|
997
|
+
|
|
998
|
+
isNodeExported(node) {
|
|
999
|
+
// Check if node is part of an export statement
|
|
1000
|
+
let parent = node.parent
|
|
1001
|
+
while (parent) {
|
|
1002
|
+
if (parent.type === 'export_statement') {
|
|
1003
|
+
return true
|
|
1004
|
+
}
|
|
1005
|
+
parent = parent.parent
|
|
1006
|
+
}
|
|
1007
|
+
return false
|
|
1008
|
+
}
|
|
1009
|
+
|
|
1010
|
+
calculateOverlap(arrayA, arrayB) {
|
|
1011
|
+
const setA = new Set(arrayA)
|
|
1012
|
+
const setB = new Set(arrayB)
|
|
1013
|
+
const intersection = new Set([...setA].filter(x => setB.has(x)))
|
|
1014
|
+
const union = new Set([...setA, ...setB])
|
|
1015
|
+
|
|
1016
|
+
return union.size === 0 ? 0 : intersection.size / union.size
|
|
1017
|
+
}
|
|
1018
|
+
|
|
1019
|
+
getMostCommon(arr) {
|
|
1020
|
+
const counts = {}
|
|
1021
|
+
for (const item of arr) {
|
|
1022
|
+
counts[item] = (counts[item] || 0) + 1
|
|
1023
|
+
}
|
|
1024
|
+
|
|
1025
|
+
return Object.entries(counts)
|
|
1026
|
+
.sort(([,a], [,b]) => b - a)[0]?.[0] || 'unknown'
|
|
1027
|
+
}
|
|
1028
|
+
|
|
1029
|
+
generateSemanticTags(analysis) {
|
|
1030
|
+
const tags = []
|
|
1031
|
+
|
|
1032
|
+
tags.push(analysis.semanticType)
|
|
1033
|
+
tags.push(...analysis.businessDomain)
|
|
1034
|
+
tags.push(...analysis.technicalPatterns)
|
|
1035
|
+
|
|
1036
|
+
if (analysis.complexity.level) {
|
|
1037
|
+
tags.push(`complexity-${analysis.complexity.level}`)
|
|
1038
|
+
}
|
|
1039
|
+
|
|
1040
|
+
if (analysis.lines < 50) tags.push('small')
|
|
1041
|
+
else if (analysis.lines < 200) tags.push('medium')
|
|
1042
|
+
else tags.push('large')
|
|
1043
|
+
|
|
1044
|
+
return [...new Set(tags)]
|
|
1045
|
+
}
|
|
1046
|
+
|
|
1047
|
+
calculateAstComplexity(rootNode) {
|
|
1048
|
+
let complexity = 1
|
|
1049
|
+
|
|
1050
|
+
const complexityNodes = ['if_statement', 'while_statement', 'for_statement',
|
|
1051
|
+
'switch_statement', 'try_statement', 'catch_clause']
|
|
1052
|
+
|
|
1053
|
+
const traverse = (node) => {
|
|
1054
|
+
if (complexityNodes.includes(node.type)) {
|
|
1055
|
+
complexity++
|
|
1056
|
+
}
|
|
1057
|
+
|
|
1058
|
+
for (let i = 0; i < node.namedChildCount; i++) {
|
|
1059
|
+
traverse(node.namedChild(i))
|
|
1060
|
+
}
|
|
1061
|
+
}
|
|
1062
|
+
|
|
1063
|
+
traverse(rootNode)
|
|
1064
|
+
|
|
1065
|
+
return {
|
|
1066
|
+
score: complexity,
|
|
1067
|
+
level: complexity < 5 ? 'low' : complexity < 15 ? 'medium' : 'high'
|
|
1068
|
+
}
|
|
1069
|
+
}
|
|
1070
|
+
|
|
1071
|
+
/**
|
|
1072
|
+
* Analyze dependencies from AST
|
|
1073
|
+
*/
|
|
1074
|
+
analyzeDependencies(rootNode, content) {
|
|
1075
|
+
const dependencies = {
|
|
1076
|
+
internal: [],
|
|
1077
|
+
external: [],
|
|
1078
|
+
relative: []
|
|
1079
|
+
}
|
|
1080
|
+
|
|
1081
|
+
const imports = this.extractImports(rootNode, content)
|
|
1082
|
+
|
|
1083
|
+
for (const imp of imports) {
|
|
1084
|
+
if (imp.isRelative) {
|
|
1085
|
+
dependencies.relative.push(imp.source)
|
|
1086
|
+
} else if (imp.isExternal) {
|
|
1087
|
+
dependencies.external.push(imp.source)
|
|
1088
|
+
} else {
|
|
1089
|
+
dependencies.internal.push(imp.source)
|
|
1090
|
+
}
|
|
1091
|
+
}
|
|
1092
|
+
|
|
1093
|
+
return dependencies
|
|
1094
|
+
}
|
|
1095
|
+
|
|
1096
|
+
/**
|
|
1097
|
+
* Resolve relative import path to absolute path
|
|
1098
|
+
*/
|
|
1099
|
+
resolveImportPath(fromFile, importPath) {
|
|
1100
|
+
// Simplified path resolution
|
|
1101
|
+
const dir = dirname(fromFile)
|
|
1102
|
+
return join(dir, importPath)
|
|
1103
|
+
}
|
|
1104
|
+
|
|
1105
|
+
/**
|
|
1106
|
+
* Find all files connected through imports
|
|
1107
|
+
*/
|
|
1108
|
+
findConnectedFiles(startFile, relationshipGraph, visited = new Set()) {
|
|
1109
|
+
if (visited.has(startFile)) return []
|
|
1110
|
+
|
|
1111
|
+
visited.add(startFile)
|
|
1112
|
+
const connected = [startFile]
|
|
1113
|
+
|
|
1114
|
+
if (relationshipGraph[startFile]) {
|
|
1115
|
+
// Follow imports
|
|
1116
|
+
for (const importedFile of relationshipGraph[startFile].imports) {
|
|
1117
|
+
connected.push(...this.findConnectedFiles(importedFile, relationshipGraph, visited))
|
|
1118
|
+
}
|
|
1119
|
+
|
|
1120
|
+
// Follow files that import this one
|
|
1121
|
+
for (const importingFile of relationshipGraph[startFile].importedBy) {
|
|
1122
|
+
connected.push(...this.findConnectedFiles(importingFile, relationshipGraph, visited))
|
|
1123
|
+
}
|
|
1124
|
+
}
|
|
1125
|
+
|
|
1126
|
+
return [...new Set(connected)]
|
|
1127
|
+
}
|
|
1128
|
+
|
|
1129
|
+
/**
|
|
1130
|
+
* Extract imported names from import statement
|
|
1131
|
+
*/
|
|
1132
|
+
extractImportedNames(importNode, content) {
|
|
1133
|
+
const names = []
|
|
1134
|
+
// Simplified implementation - would need more complex parsing
|
|
1135
|
+
const importText = this.getNodeText(importNode, content)
|
|
1136
|
+
const match = importText.match(/import\s+(?:\{([^}]+)\}|(\w+))/i)
|
|
1137
|
+
if (match) {
|
|
1138
|
+
if (match[1]) {
|
|
1139
|
+
// Named imports
|
|
1140
|
+
names.push(...match[1].split(',').map(n => n.trim()))
|
|
1141
|
+
} else if (match[2]) {
|
|
1142
|
+
// Default import
|
|
1143
|
+
names.push(match[2])
|
|
1144
|
+
}
|
|
1145
|
+
}
|
|
1146
|
+
return names
|
|
1147
|
+
}
|
|
1148
|
+
|
|
1149
|
+
/**
|
|
1150
|
+
* Extract export name from export declaration
|
|
1151
|
+
*/
|
|
1152
|
+
extractExportName(declaration, content) {
|
|
1153
|
+
const text = this.getNodeText(declaration, content)
|
|
1154
|
+
const match = text.match(/(?:function|class|const|let|var)\s+(\w+)/)
|
|
1155
|
+
return match ? match[1] : 'unnamed'
|
|
1156
|
+
}
|
|
1157
|
+
|
|
1158
|
+
/**
|
|
1159
|
+
* Extract JSX element name
|
|
1160
|
+
*/
|
|
1161
|
+
extractJsxElementName(jsxNode, content) {
|
|
1162
|
+
try {
|
|
1163
|
+
const openingElement = jsxNode.namedChild(0)
|
|
1164
|
+
if (openingElement) {
|
|
1165
|
+
const nameNode = openingElement.namedChild(0)
|
|
1166
|
+
return nameNode ? this.getNodeText(nameNode, content) : 'unknown'
|
|
1167
|
+
}
|
|
1168
|
+
} catch (error) {
|
|
1169
|
+
return 'unknown'
|
|
1170
|
+
}
|
|
1171
|
+
return 'unknown'
|
|
1172
|
+
}
|
|
1173
|
+
|
|
1174
|
+
/**
|
|
1175
|
+
* Determine chunk type based on files
|
|
1176
|
+
*/
|
|
1177
|
+
determineChunkType(files, analysis) {
|
|
1178
|
+
const semanticTypes = files.map(file => analysis[file].semanticType)
|
|
1179
|
+
const mostCommon = this.getMostCommon(semanticTypes)
|
|
1180
|
+
|
|
1181
|
+
const typeMapping = {
|
|
1182
|
+
reactComponent: 'ui-components',
|
|
1183
|
+
reactHook: 'custom-hooks',
|
|
1184
|
+
expressRoute: 'api-routes',
|
|
1185
|
+
expressMiddleware: 'middleware',
|
|
1186
|
+
utilityFunction: 'utilities',
|
|
1187
|
+
typeDefinition: 'type-definitions',
|
|
1188
|
+
configModule: 'configuration',
|
|
1189
|
+
cliCommand: 'cli-commands',
|
|
1190
|
+
apiHandler: 'api-handlers'
|
|
1191
|
+
}
|
|
1192
|
+
|
|
1193
|
+
return typeMapping[mostCommon] || 'mixed-module'
|
|
1194
|
+
}
|
|
1195
|
+
|
|
1196
|
+
/**
|
|
1197
|
+
* Calculate cluster complexity
|
|
1198
|
+
*/
|
|
1199
|
+
calculateClusterComplexity(files, analysis) {
|
|
1200
|
+
const complexities = files.map(file => analysis[file].complexity.score)
|
|
1201
|
+
const total = complexities.reduce((sum, c) => sum + c, 0)
|
|
1202
|
+
const average = total / files.length
|
|
1203
|
+
|
|
1204
|
+
return {
|
|
1205
|
+
total,
|
|
1206
|
+
average,
|
|
1207
|
+
level: average < 5 ? 'low' : average < 15 ? 'medium' : 'high'
|
|
1208
|
+
}
|
|
1209
|
+
}
|
|
1210
|
+
|
|
1211
|
+
/**
|
|
1212
|
+
* Calculate cluster dependencies
|
|
1213
|
+
*/
|
|
1214
|
+
calculateClusterDependencies(files, analysis) {
|
|
1215
|
+
const allDeps = {
|
|
1216
|
+
internal: new Set(),
|
|
1217
|
+
external: new Set(),
|
|
1218
|
+
relative: new Set()
|
|
1219
|
+
}
|
|
1220
|
+
|
|
1221
|
+
for (const file of files) {
|
|
1222
|
+
const deps = analysis[file].dependencies
|
|
1223
|
+
deps.internal.forEach(dep => allDeps.internal.add(dep))
|
|
1224
|
+
deps.external.forEach(dep => allDeps.external.add(dep))
|
|
1225
|
+
deps.relative.forEach(dep => allDeps.relative.add(dep))
|
|
1226
|
+
}
|
|
1227
|
+
|
|
1228
|
+
return {
|
|
1229
|
+
internal: Array.from(allDeps.internal),
|
|
1230
|
+
external: Array.from(allDeps.external),
|
|
1231
|
+
relative: Array.from(allDeps.relative),
|
|
1232
|
+
totalCount: allDeps.internal.size + allDeps.external.size + allDeps.relative.size
|
|
1233
|
+
}
|
|
1234
|
+
}
|
|
1235
|
+
|
|
1236
|
+
/**
|
|
1237
|
+
* Extract cluster business domains
|
|
1238
|
+
*/
|
|
1239
|
+
extractClusterBusinessDomains(files, analysis) {
|
|
1240
|
+
const allDomains = files.flatMap(file => analysis[file].businessDomain)
|
|
1241
|
+
return [...new Set(allDomains)]
|
|
1242
|
+
}
|
|
1243
|
+
|
|
1244
|
+
/**
|
|
1245
|
+
* Extract cluster technical patterns
|
|
1246
|
+
*/
|
|
1247
|
+
extractClusterTechnicalPatterns(files, analysis) {
|
|
1248
|
+
const allPatterns = files.flatMap(file => analysis[file].technicalPatterns)
|
|
1249
|
+
return [...new Set(allPatterns)]
|
|
1250
|
+
}
|
|
1251
|
+
|
|
1252
|
+
/**
|
|
1253
|
+
* Determine cluster purpose
|
|
1254
|
+
*/
|
|
1255
|
+
determineClusterPurpose(files, analysis) {
|
|
1256
|
+
const semanticTypes = files.map(file => analysis[file].semanticType)
|
|
1257
|
+
const mostCommon = this.getMostCommon(semanticTypes)
|
|
1258
|
+
|
|
1259
|
+
const purposeMapping = {
|
|
1260
|
+
reactComponent: 'User interface components and React elements',
|
|
1261
|
+
reactHook: 'Custom React hooks for state and logic sharing',
|
|
1262
|
+
expressRoute: 'API routes and endpoint handlers',
|
|
1263
|
+
expressMiddleware: 'Express middleware and request processing',
|
|
1264
|
+
utilityFunction: 'Utility functions and helper libraries',
|
|
1265
|
+
typeDefinition: 'TypeScript type definitions and interfaces',
|
|
1266
|
+
configModule: 'Configuration files and settings',
|
|
1267
|
+
cliCommand: 'Command-line interface and CLI tools',
|
|
1268
|
+
apiHandler: 'API client and data fetching logic'
|
|
1269
|
+
}
|
|
1270
|
+
|
|
1271
|
+
return purposeMapping[mostCommon] || 'Mixed functionality module'
|
|
1272
|
+
}
|
|
1273
|
+
|
|
1274
|
+
/**
|
|
1275
|
+
* Calculate cluster cohesion
|
|
1276
|
+
*/
|
|
1277
|
+
calculateClusterCohesion(files, relationshipGraph) {
|
|
1278
|
+
if (files.length <= 1) return 1.0
|
|
1279
|
+
|
|
1280
|
+
let connections = 0
|
|
1281
|
+
let totalPossible = files.length * (files.length - 1)
|
|
1282
|
+
|
|
1283
|
+
for (const fileA of files) {
|
|
1284
|
+
for (const fileB of files) {
|
|
1285
|
+
if (fileA !== fileB && relationshipGraph[fileA]) {
|
|
1286
|
+
if (relationshipGraph[fileA].imports.includes(fileB) ||
|
|
1287
|
+
relationshipGraph[fileA].importedBy.includes(fileB) ||
|
|
1288
|
+
relationshipGraph[fileA].semanticSimilarity[fileB] > 0.5) {
|
|
1289
|
+
connections++
|
|
1290
|
+
}
|
|
1291
|
+
}
|
|
1292
|
+
}
|
|
1293
|
+
}
|
|
1294
|
+
|
|
1295
|
+
return totalPossible > 0 ? connections / totalPossible : 0
|
|
1296
|
+
}
|
|
1297
|
+
|
|
1298
|
+
/**
|
|
1299
|
+
* Generate cluster recommendations
|
|
1300
|
+
*/
|
|
1301
|
+
generateClusterRecommendations(files, analysis) {
|
|
1302
|
+
const recommendations = []
|
|
1303
|
+
|
|
1304
|
+
const totalSize = files.reduce((sum, file) => sum + analysis[file].size, 0)
|
|
1305
|
+
const avgComplexity = files.reduce((sum, file) => sum + analysis[file].complexity.score, 0) / files.length
|
|
1306
|
+
|
|
1307
|
+
if (totalSize > 100000) {
|
|
1308
|
+
recommendations.push({
|
|
1309
|
+
type: 'warning',
|
|
1310
|
+
message: 'Large cluster - consider splitting by functionality'
|
|
1311
|
+
})
|
|
1312
|
+
}
|
|
1313
|
+
|
|
1314
|
+
if (avgComplexity > 20) {
|
|
1315
|
+
recommendations.push({
|
|
1316
|
+
type: 'warning',
|
|
1317
|
+
message: 'High complexity cluster - review for refactoring opportunities'
|
|
1318
|
+
})
|
|
1319
|
+
}
|
|
1320
|
+
|
|
1321
|
+
if (files.length === 1) {
|
|
1322
|
+
recommendations.push({
|
|
1323
|
+
type: 'info',
|
|
1324
|
+
message: 'Single file cluster - consider grouping with related files'
|
|
1325
|
+
})
|
|
1326
|
+
}
|
|
1327
|
+
|
|
1328
|
+
return recommendations
|
|
1329
|
+
}
|
|
1330
|
+
|
|
1331
|
+
/**
|
|
1332
|
+
* Extract common words from function names
|
|
1333
|
+
*/
|
|
1334
|
+
extractCommonWords(functionNames) {
|
|
1335
|
+
const words = functionNames.flatMap(name =>
|
|
1336
|
+
name.split(/(?=[A-Z])|_|-/).filter(word => word.length > 2)
|
|
1337
|
+
)
|
|
1338
|
+
|
|
1339
|
+
const wordCounts = {}
|
|
1340
|
+
for (const word of words) {
|
|
1341
|
+
wordCounts[word] = (wordCounts[word] || 0) + 1
|
|
1342
|
+
}
|
|
1343
|
+
|
|
1344
|
+
return Object.entries(wordCounts)
|
|
1345
|
+
.filter(([, count]) => count > 1)
|
|
1346
|
+
.sort(([, a], [, b]) => b - a)
|
|
1347
|
+
.slice(0, 3)
|
|
1348
|
+
.map(([word]) => word)
|
|
1349
|
+
}
|
|
1350
|
+
|
|
1351
|
+
/**
|
|
1352
|
+
* Generate code signature for caching and similarity comparison
|
|
1353
|
+
*/
|
|
1354
|
+
generateCodeSignature(rootNode, content) {
|
|
1355
|
+
const functions = this.extractFunctions(rootNode, content)
|
|
1356
|
+
const classes = this.extractClasses(rootNode, content)
|
|
1357
|
+
const imports = this.extractImports(rootNode, content)
|
|
1358
|
+
const exports = this.extractExports(rootNode, content)
|
|
1359
|
+
|
|
1360
|
+
return {
|
|
1361
|
+
functionCount: functions.length,
|
|
1362
|
+
classCount: classes.length,
|
|
1363
|
+
importCount: imports.length,
|
|
1364
|
+
exportCount: exports.length,
|
|
1365
|
+
exportedFunctions: functions.filter(f => f.isExported).map(f => f.name),
|
|
1366
|
+
importSources: imports.map(i => i.source),
|
|
1367
|
+
hasJsx: this.extractJsxElements(rootNode, content).length > 0,
|
|
1368
|
+
contentHash: this.simpleHash(content)
|
|
1369
|
+
}
|
|
1370
|
+
}
|
|
1371
|
+
|
|
1372
|
+
/**
|
|
1373
|
+
* Simple hash function for content comparison
|
|
1374
|
+
*/
|
|
1375
|
+
simpleHash(str) {
|
|
1376
|
+
let hash = 0
|
|
1377
|
+
for (let i = 0; i < str.length; i++) {
|
|
1378
|
+
const char = str.charCodeAt(i)
|
|
1379
|
+
hash = ((hash << 5) - hash) + char
|
|
1380
|
+
hash = hash & hash // Convert to 32bit integer
|
|
1381
|
+
}
|
|
1382
|
+
return hash
|
|
1383
|
+
}
|
|
1384
|
+
|
|
1385
|
+
generateSummary(analysis, chunks) {
|
|
1386
|
+
const files = Object.values(analysis).filter(f => !f.error)
|
|
1387
|
+
|
|
1388
|
+
return {
|
|
1389
|
+
totalFiles: files.length,
|
|
1390
|
+
totalSize: files.reduce((sum, f) => sum + f.size, 0),
|
|
1391
|
+
totalLines: files.reduce((sum, f) => sum + f.lines, 0),
|
|
1392
|
+
semanticTypes: this.countByProperty(files, 'semanticType'),
|
|
1393
|
+
businessDomains: this.countDomains(files),
|
|
1394
|
+
technicalPatterns: this.countPatterns(files),
|
|
1395
|
+
totalChunks: chunks.length,
|
|
1396
|
+
averageChunkSize: chunks.reduce((sum, c) => sum + c.size, 0) / chunks.length,
|
|
1397
|
+
chunkTypes: this.countByProperty(chunks, 'type')
|
|
1398
|
+
}
|
|
1399
|
+
}
|
|
1400
|
+
|
|
1401
|
+
generateRecommendations(analysis, chunks) {
|
|
1402
|
+
const recommendations = []
|
|
1403
|
+
|
|
1404
|
+
// Add specific recommendations based on treesitter analysis
|
|
1405
|
+
|
|
1406
|
+
return recommendations
|
|
1407
|
+
}
|
|
1408
|
+
|
|
1409
|
+
countByProperty(items, property) {
|
|
1410
|
+
const counts = {}
|
|
1411
|
+
for (const item of items) {
|
|
1412
|
+
const value = typeof property === 'function' ? property(item) : item[property]
|
|
1413
|
+
counts[value] = (counts[value] || 0) + 1
|
|
1414
|
+
}
|
|
1415
|
+
return counts
|
|
1416
|
+
}
|
|
1417
|
+
|
|
1418
|
+
countDomains(files) {
|
|
1419
|
+
const allDomains = files.flatMap(f => f.businessDomain)
|
|
1420
|
+
return this.countByProperty(allDomains, d => d)
|
|
1421
|
+
}
|
|
1422
|
+
|
|
1423
|
+
countPatterns(files) {
|
|
1424
|
+
const allPatterns = files.flatMap(f => f.technicalPatterns)
|
|
1425
|
+
return this.countByProperty(allPatterns, p => p)
|
|
1426
|
+
}
|
|
1427
|
+
|
|
1428
|
+
/**
|
|
1429
|
+
* Generate tags for a chunk based on its characteristics
|
|
1430
|
+
*/
|
|
1431
|
+
generateTags(files, analysis) {
|
|
1432
|
+
const tags = new Set()
|
|
1433
|
+
|
|
1434
|
+
// Add semantic type tags
|
|
1435
|
+
const semanticTypes = files.map(file => analysis[file].semanticType)
|
|
1436
|
+
for (const type of semanticTypes) {
|
|
1437
|
+
if (type === 'reactComponent') tags.add('react-component')
|
|
1438
|
+
if (type === 'reactHook') tags.add('react-hook')
|
|
1439
|
+
if (type === 'utilityFunction') tags.add('utility')
|
|
1440
|
+
if (type === 'expressRoute') tags.add('api')
|
|
1441
|
+
if (type === 'configModule') tags.add('config')
|
|
1442
|
+
}
|
|
1443
|
+
|
|
1444
|
+
// Add directory-based tags
|
|
1445
|
+
const commonPath = this.findCommonPath(files)
|
|
1446
|
+
if (commonPath) {
|
|
1447
|
+
if (commonPath.includes('/components/')) tags.add('component')
|
|
1448
|
+
if (commonPath.includes('/hooks/')) tags.add('hook')
|
|
1449
|
+
if (commonPath.includes('/lib/')) tags.add('library')
|
|
1450
|
+
if (commonPath.includes('/utils/')) tags.add('utility')
|
|
1451
|
+
if (commonPath.includes('/ui/')) tags.add('ui-library')
|
|
1452
|
+
}
|
|
1453
|
+
|
|
1454
|
+
// Add complexity tags
|
|
1455
|
+
const avgComplexity = files.reduce((sum, file) => sum + analysis[file].complexity.score, 0) / files.length
|
|
1456
|
+
if (avgComplexity > 15) tags.add('complex')
|
|
1457
|
+
if (avgComplexity < 5) tags.add('simple')
|
|
1458
|
+
|
|
1459
|
+
return Array.from(tags)
|
|
1460
|
+
}
|
|
1461
|
+
|
|
1462
|
+
/**
|
|
1463
|
+
* Find common path prefix for a group of files
|
|
1464
|
+
*/
|
|
1465
|
+
findCommonPath(files) {
|
|
1466
|
+
if (files.length === 0) return null
|
|
1467
|
+
if (files.length === 1) return files[0]
|
|
1468
|
+
|
|
1469
|
+
const pathParts = files.map(file => file.split('/'))
|
|
1470
|
+
const commonParts = []
|
|
1471
|
+
|
|
1472
|
+
for (let i = 0; i < Math.min(...pathParts.map(p => p.length)); i++) {
|
|
1473
|
+
const part = pathParts[0][i]
|
|
1474
|
+
if (pathParts.every(p => p[i] === part)) {
|
|
1475
|
+
commonParts.push(part)
|
|
1476
|
+
} else {
|
|
1477
|
+
break
|
|
1478
|
+
}
|
|
1479
|
+
}
|
|
1480
|
+
|
|
1481
|
+
return commonParts.length > 0 ? commonParts.join('/') : null
|
|
1482
|
+
}
|
|
1483
|
+
}
|
|
1484
|
+
|
|
1485
|
+
export default TreesitterSemanticChunker
|